Skip to content

Commit

Permalink
♻️ imrove project structure & performance
Browse files Browse the repository at this point in the history
  • Loading branch information
hmiladhia authored Oct 8, 2022
2 parents 1867b40 + 94890f8 commit 420f3f8
Show file tree
Hide file tree
Showing 26 changed files with 865 additions and 717 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,5 +32,5 @@ repos:
- id: mypy
files: ^nbmanips/
# exclude utils.py for now
exclude: 'nbmanips/utils\.py$'
exclude: 'nbmanips/notebook/utils\.py$'
args: [--no-strict-optional, --ignore-missing-imports, --follow-imports, silent]
2 changes: 1 addition & 1 deletion nbmanips/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.4.0
2.0.0
10 changes: 10 additions & 0 deletions nbmanips/_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from functools import wraps


def partial(func, *args, **keywords):
@wraps(func)
def new_func(*f_args, **f_keywords):
new_keywords = {**f_keywords, **keywords}
return func(*f_args, *args, **new_keywords)

return new_func
10 changes: 10 additions & 0 deletions nbmanips/cell/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from .cell_output import CellOutput
from .cells import Cell, CodeCell, MarkdownCell, RawCell
from .output_parsers import HtmlParser, ImageParser, TextParser

CellOutput.register_parser('text', TextParser())
CellOutput.register_parser('text/html', HtmlParser())
CellOutput.register_parser('image', ImageParser())


__all__ = ['Cell', 'CellOutput', 'MarkdownCell', 'CodeCell', 'RawCell']
28 changes: 3 additions & 25 deletions nbmanips/cell_output.py → nbmanips/cell/cell_output.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,7 @@
import html
from typing import Dict, Optional, Union
from typing import Dict, Optional

from nbmanips.cell_utils import HtmlParser, ImageParser, ParserBase, TextParser
from nbmanips.utils import total_size


def _get_output_types(output_type: Union[set, dict, str]) -> set:
if isinstance(output_type, str):
if '/' in output_type:
return {output_type, output_type.split('/')[0]}
return {output_type}

output_types = set()
for output in output_type:
output_types |= _get_output_types(output)
return output_types


def _to_html(text):
return html.escape(text).encode('ascii', 'xmlcharrefreplace').decode('ascii')
from .cell_utils import _get_output_types, _to_html, total_size
from .output_parsers import ParserBase


class CellOutput:
Expand Down Expand Up @@ -284,8 +267,3 @@ class ExecuteResult(DataOutput, output_type='execute_result'):
@property
def execution_count(self):
return self.content.get('execution_count', None)


CellOutput.register_parser('text', TextParser())
CellOutput.register_parser('text/html', HtmlParser())
CellOutput.register_parser('image', ImageParser())
131 changes: 64 additions & 67 deletions nbmanips/cell_utils.py → nbmanips/cell/cell_utils.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
import json
import re
import shutil
from abc import ABCMeta, abstractmethod
import urllib.parse
import warnings
from mimetypes import guess_type
from pathlib import Path
from textwrap import wrap
from typing import Union

from nbmanips.color import supports_color
from nbmanips.cell.color import supports_color

try:
import pygments
Expand All @@ -23,16 +27,19 @@
except ImportError:
colorama = None

try:
from html2text import html2text
except ImportError:
html2text = None

try:
from img2text import img_to_ascii
except ImportError:
img_to_ascii = None
# -- Constants --
# --- Attachment Constants ---
MD_IMG_REGEX = r'!\[(?P<ALT_TEXT>.*?)]\((?P<PATH>.*?)\)'
MD_IMG_EXPRESSION = r'![{ALT_TEXT}](attachment:{attachment_name})'
HTML_IMG_REGEX = (
r'<img\s(?P<PREFIX>.*?)'
r'src\s*=\s*\"?(?P<PATH>(?<=\")[^\"]*(?=\")|(?:[^\"\s]|(?<=\\)\s)*[^\s\\/])\"?'
r'(?P<SUFFIX>.*?)>'
)
HTML_IMG_EXPRESSION = r'<img {PREFIX}src="attachment:{attachment_name}"{SUFFIX}>'

# -- Styles --
styles = {
'single': '││┌─┐└─┘',
'double': '║║╔═╗╚═╝',
Expand Down Expand Up @@ -119,60 +126,50 @@ def get_mime_type(path):
return guess_type(path)[0]


class ParserBase(metaclass=ABCMeta):
@abstractmethod
def parse(self, content, **kwargs):
return content

@property
def default_state(self):
return True


class TextParser(ParserBase):
def parse(self, content, **kwargs):
return content


class ImageParser(ParserBase):
def parse(
self,
content,
width=80,
colorful=COLOR_SUPPORTED,
bright=COLOR_SUPPORTED,
reverse=True,
**kwargs,
):
if callable(img_to_ascii):
return img_to_ascii(
content,
base64=True,
colorful=colorful,
reverse=reverse,
width=width,
bright=bright,
**kwargs,
)
else:
raise ModuleNotFoundError(
'You need to pip install img2text for readable option'
)

@property
def default_state(self):
return img_to_ascii is not None


class HtmlParser(ParserBase):
def parse(self, content, width=78, **kwargs):
if callable(html2text):
return html2text(content, bodywidth=width, **kwargs)
else:
raise ModuleNotFoundError(
'You need to pip install html2txt for readable option'
)

@property
def default_state(self):
return html2text is not None
def _get_output_types(output_type: Union[set, dict, str]) -> set:
if isinstance(output_type, str):
if '/' in output_type:
return {output_type, output_type.split('/')[0]}
return {output_type}

output_types = set()
for output in output_type:
output_types |= _get_output_types(output)
return output_types


def _to_html(text):
import html

return html.escape(text).encode('ascii', 'xmlcharrefreplace').decode('ascii')


def get_assets_path(nb, assets_path=None):
if assets_path is None:
assets_path = getattr(nb, '_original_path', None)
if assets_path:
return Path(assets_path).parent
return Path.cwd()

return Path(assets_path)


def burn_attachment(match, cell, assets_path: Path, expr):
path = match.group('PATH')
if path.startswith('attachment:'):
return match.group(0)

path = assets_path / urllib.parse.unquote(path)
if not path.exists():
path = match.group('PATH')
warnings.warn(f"Couldn't find '{path}'")
return match.group(0)

match_dict = match.groupdict()
attachment_name = match_dict.pop('PATH').replace(' ', '%20')
cell.attach(str(path), attachment_name=attachment_name)
return expr.format(**match_dict, attachment_name=attachment_name)


def total_size(o):
return len(json.dumps(o).encode('utf-8'))
39 changes: 30 additions & 9 deletions nbmanips/cell.py → nbmanips/cell/cells.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,4 @@
import base64
import re
import uuid
from copy import deepcopy
from pathlib import Path
from typing import Any, Dict, List, Optional, Union

Expand All @@ -14,12 +11,15 @@
pygments = None
get_lexer_by_name = None

from bs4 import BeautifulSoup
from nbconvert.filters.markdown_mistune import IPythonRenderer, MarkdownWithMath
from nbmanips.cell.cell_utils import (
FORMATTER,
get_mime_type,
monochrome,
printable_cell,
total_size,
)

from nbmanips.cell_output import CellOutput
from nbmanips.cell_utils import FORMATTER, get_mime_type, monochrome, printable_cell
from nbmanips.utils import total_size
from .cell_output import CellOutput


class Cell:
Expand Down Expand Up @@ -72,6 +72,8 @@ def outputs(self):
return map(CellOutput, self.cell.get('outputs', []))

def get_copy(self, new_id=None):
from copy import deepcopy

cell = self.__class__(deepcopy(self.cell), None)
if new_id is not None:
cell.id = new_id
Expand Down Expand Up @@ -123,14 +125,25 @@ def contains(self, text, case=True, output=False, regex=False, flags=0):
search_target += '\n' + self.output

if not regex:
text = re.escape(text)
if not case:
text = text.lower()
search_target = search_target.lower()

return text in search_target

if case is False:
flags = flags | re.IGNORECASE
else:
flags = flags & ~re.IGNORECASE
return bool(re.search(text, search_target, flags=flags))

def has_match(self, regex, output=False):
search_target = self.source
if output:
search_target += '\n' + self.output

return bool(regex.search(search_target))

def erase_output(self, output_types: Optional[Union[str, set]] = None):
"""
erase output of cells that have a given output_type
Expand Down Expand Up @@ -242,6 +255,8 @@ def remove_tag(self, tag: str):

@staticmethod
def generate_id_candidate():
import uuid

return uuid.uuid4().hex[:8]

def __new__(cls, content, *args, **kwargs):
Expand Down Expand Up @@ -325,6 +340,8 @@ def attachments(self):
return self.cell.setdefault('attachments', {})

def attach(self, path: Union[str, Path], attachment_name: Optional[str] = None):
import base64

mime_type = get_mime_type(str(path))
path = Path(path)
attachment_name = attachment_name or path.name
Expand All @@ -334,13 +351,17 @@ def attach(self, path: Union[str, Path], attachment_name: Optional[str] = None):

@property
def html(self):
from nbconvert.filters.markdown_mistune import IPythonRenderer, MarkdownWithMath

renderer = IPythonRenderer(
escape=False, attachments=self.attachments, exclude_anchor_links=True
)
return MarkdownWithMath(renderer=renderer).render(self.source)

@property
def soup(self):
from bs4 import BeautifulSoup

return BeautifulSoup(self.html, self._bs4_parser)


Expand Down
File renamed without changes.
72 changes: 72 additions & 0 deletions nbmanips/cell/output_parsers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
from abc import ABCMeta, abstractmethod

from nbmanips.cell.cell_utils import COLOR_SUPPORTED

try:
from html2text import html2text
except ImportError:
html2text = None

try:
from img2text import img_to_ascii
except ImportError:
img_to_ascii = None


class ParserBase(metaclass=ABCMeta):
@abstractmethod
def parse(self, content, **kwargs):
return content

@property
def default_state(self):
return True


class TextParser(ParserBase):
def parse(self, content, **kwargs):
return content


class ImageParser(ParserBase):
def parse(
self,
content,
width=80,
colorful=COLOR_SUPPORTED,
bright=COLOR_SUPPORTED,
reverse=True,
**kwargs,
):
if callable(img_to_ascii):
return img_to_ascii(
content,
base64=True,
colorful=colorful,
reverse=reverse,
width=width,
bright=bright,
**kwargs,
)
else:
raise ModuleNotFoundError(
'You need to pip install img2text for readable option'
)

@property
def default_state(self):
return img_to_ascii is not None


class HtmlParser(ParserBase):
def parse(self, content, width=78, **kwargs):
if callable(html2text):
return html2text(content, bodywidth=width, **kwargs)
else:
raise ModuleNotFoundError(
'You need to pip install html2txt for readable option'
)

@property
def default_state(self):
return html2text is not None
Loading

0 comments on commit 420f3f8

Please sign in to comment.