Skip to content

Commit

Permalink
plugins: make internal stuff private, and more refactoring
Browse files Browse the repository at this point in the history
  • Loading branch information
ilius committed Jan 17, 2025
1 parent f7e0013 commit 4f68daf
Show file tree
Hide file tree
Showing 27 changed files with 244 additions and 229 deletions.
60 changes: 30 additions & 30 deletions pyglossary/plugins/appledict/_content.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,21 +39,21 @@
log = logging.getLogger("pyglossary")


re_brhr = re.compile("<(BR|HR)>", re.IGNORECASE)
re_nonprintable = re.compile("[\x00-\x07\x0e-\x1f]")
re_img = re.compile("<IMG (.*?)>", re.IGNORECASE)
_re_brhr = re.compile("<(BR|HR)>", re.IGNORECASE)
_re_nonprintable = re.compile("[\x00-\x07\x0e-\x1f]")
_re_img = re.compile("<IMG (.*?)>", re.IGNORECASE)

re_div_margin_em = re.compile(r'<div style="margin-left:(\d)em">')
sub_div_margin_em = r'<div class="m\1">'
_re_div_margin_em = re.compile(r'<div style="margin-left:(\d)em">')
_sub_div_margin_em = r'<div class="m\1">'

re_div_margin_em_ex = re.compile(
_re_div_margin_em_ex = re.compile(
r'<div class="ex" style="margin-left:(\d)em;color:steelblue">',
)
sub_div_margin_em_ex = r'<div class="m\1 ex">'
_sub_div_margin_em_ex = r'<div class="m\1 ex">'

re_href = re.compile(r"""href=(["'])(.*?)\1""")
_re_href = re.compile(r"""href=(["'])(.*?)\1""")

re_margin = re.compile(r"margin-left:(\d)em")
_re_margin = re.compile(r"margin-left:(\d)em")


def prepare_content(
Expand All @@ -78,7 +78,7 @@ def prepare_content(
content = prepare_content_without_soup(title, body)

content = content.replace("&nbsp;", "&#160;")
content = re_nonprintable.sub("", content)
content = _re_nonprintable.sub("", content)
return content # noqa: RET504


Expand All @@ -87,9 +87,9 @@ def prepare_content_without_soup(
body: str,
) -> str:
# somewhat analogue to what BeautifulSoup suppose to do
body = re_div_margin_em.sub(sub_div_margin_em, body)
body = re_div_margin_em_ex.sub(sub_div_margin_em_ex, body)
body = re_href.sub(href_sub, body)
body = _re_div_margin_em.sub(_sub_div_margin_em, body)
body = _re_div_margin_em_ex.sub(_sub_div_margin_em_ex, body)
body = _re_href.sub(_href_sub, body)

body = (
body.replace(
Expand All @@ -116,17 +116,17 @@ def prepare_content_without_soup(

# nice header to display
content = f"<h1>{title}</h1>{body}" if title else body
content = re_brhr.sub(r"<\g<1> />", content)
content = re_img.sub(r"<img \g<1>/>", content)
content = _re_brhr.sub(r"<\g<1> />", content)
content = _re_img.sub(r"<img \g<1>/>", content)
return content # noqa: RET504


def _prepare_href(tag: bs4.element.Tag) -> None:
href = tag["href"]
href = cleanup_link_target(href)
href = _cleanup_link_target(href)

if href.startswith("sound:"):
fix_sound_link(href, tag)
_fix_sound_link(href, tag)

elif href.startswith(("phonetics", "help:phonetics")):
# for oxford9
Expand All @@ -136,7 +136,7 @@ def _prepare_href(tag: bs4.element.Tag) -> None:
src_name = tag.audio["name"].replace("#", "_")
tag.audio["src"] = f"{src_name}.mp3"

elif not link_is_url(href):
elif not _link_is_url(href):
tag["href"] = f"x-dictionary:d:{href}"


Expand Down Expand Up @@ -187,20 +187,20 @@ def prepare_content_with_soup( # noqa: PLR0912
tag["d:priority"] = "2"

for tag in soup(lambda x: "color:steelblue" in x.get("style", "")):
remove_style(tag, "color:steelblue")
_remove_style(tag, "color:steelblue")
if "ex" not in tag.get("class", []):
tag["class"] = tag.get("class", []) + ["ex"]

for tag in soup(is_green):
remove_style(tag, "color:green")
for tag in soup(_is_green):
_remove_style(tag, "color:green")
if "p" not in tag.get("class", ""):
tag["class"] = tag.get("class", []) + ["c"]

for tag in soup(True):
if "style" in tag.attrs:
m = re_margin.search(tag["style"])
m = _re_margin.search(tag["style"])
if m:
remove_style(tag, m.group(0))
_remove_style(tag, m.group(0))
tag["class"] = tag.get("class", []) + ["m" + m.group(1)]

for tag in soup(lambda x: "xhtml:" in x.name):
Expand Down Expand Up @@ -234,16 +234,16 @@ def prepare_content_with_soup( # noqa: PLR0912
return toStr(soup.encode_contents())


def cleanup_link_target(href: str) -> str:
def _cleanup_link_target(href: str) -> str:
return href.removeprefix("bword://")


def href_sub(x: re.Match) -> str:
def _href_sub(x: re.Match) -> str:
href = x.groups()[1]
if href.startswith("http"):
return x.group()

href = cleanup_link_target(href)
href = _cleanup_link_target(href)

return "href=" + quoteattr(
"x-dictionary:d:"
Expand All @@ -254,23 +254,23 @@ def href_sub(x: re.Match) -> str:
)


def is_green(x: dict) -> bool:
def _is_green(x: dict) -> bool:
return "color:green" in x.get("style", "")


def remove_style(tag: dict, line: str) -> None:
def _remove_style(tag: dict, line: str) -> None:
s = "".join(tag["style"].replace(line, "").split(";"))
if s:
tag["style"] = s
else:
del tag["style"]


def fix_sound_link(href: str, tag: dict[str, Any]) -> None:
def _fix_sound_link(href: str, tag: dict[str, Any]) -> None:
tag["href"] = f'javascript:new Audio("{href[len("sound://") :]}").play();'


def link_is_url(href: str) -> bool:
def _link_is_url(href: str) -> bool:
for prefix in (
"http:",
"https:",
Expand Down
27 changes: 15 additions & 12 deletions pyglossary/plugins/appledict/_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,27 +23,30 @@
import string
from typing import TYPE_CHECKING

from ._normalize import title as normalize_title
from ._normalize import title_long as normalize_title_long
from ._normalize import title_short as normalize_title_short

if TYPE_CHECKING:
from collections.abc import Callable, Iterator
from typing import Any

from . import _normalize

__all__ = ["_normalize", "id_generator", "indexes_generator", "quote_string"]
__all__ = ["id_generator", "indexes_generator", "quote_string"]

log = logging.getLogger("pyglossary")

digs = string.digits + string.ascii_letters
_digs = string.digits + string.ascii_letters


def base36(x: int) -> str:
def _base36(x: int) -> str:
"""
Simplified version of int2base
http://stackoverflow.com/questions/2267362/convert-integer-to-a-string-in-a-given-numeric-base-in-python#2267446.
"""
digits: list[str] = []
while x:
digits.append(digs[x % 36])
digits.append(_digs[x % 36])
x //= 36
digits.reverse()
return "".join(digits)
Expand All @@ -53,7 +56,7 @@ def id_generator() -> Iterator[str]:
cnt = 1

while True:
yield "_" + base36(cnt)
yield "_" + _base36(cnt)
cnt += 1


Expand All @@ -77,11 +80,11 @@ def indexes_generator(
indexer = None
"""Callable[[Sequence[str], str], Sequence[str]]"""
if indexes_lang:
from . import indexes as idxs
from .indexes import languages

indexer = idxs.languages.get(indexes_lang, None)
indexer = languages.get(indexes_lang, None)
if not indexer:
keys_str = ", ".join(idxs.languages)
keys_str = ", ".join(languages)
msg = (
"extended indexes not supported for the"
f" specified language: {indexes_lang}.\n"
Expand All @@ -106,9 +109,9 @@ def generate_indexes(

normal_indexes = set()
for idx in indexes:
normal = _normalize.title(idx, BeautifulSoup)
normal_indexes.add(_normalize.title_long(normal))
normal_indexes.add(_normalize.title_short(normal))
normal = normalize_title(idx, BeautifulSoup)
normal_indexes.add(normalize_title_long(normal))
normal_indexes.add(normalize_title_short(normal))
normal_indexes.discard(title)

s = f"<d:index d:value={quoted_title} d:title={quoted_title}/>"
Expand Down
30 changes: 16 additions & 14 deletions pyglossary/plugins/appledict/_normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,20 +22,22 @@
import re
from typing import Any

re_spaces = re.compile(r"[ \t\n]{2,}")
re_title = re.compile('<[^<]+?>|"|[<>]|\xef\xbb\xbf')
re_title_short = re.compile(r"\[.*?\]")
re_whitespace = re.compile("(\t|\n|\r)")
__all__ = ["title", "title_long", "title_short"]

_re_spaces = re.compile(r"[ \t\n]{2,}")
_re_title = re.compile('<[^<]+?>|"|[<>]|\xef\xbb\xbf')
_re_title_short = re.compile(r"\[.*?\]")
_re_whitespace = re.compile("(\t|\n|\r)")

# FIXME: rename all/most functions here, add a 'fix_' prefix


def spaces(s: str) -> str:
def _spaces(s: str) -> str:
"""
Strip off leading and trailing whitespaces and
replace contiguous whitespaces with just one space.
"""
return re_spaces.sub(" ", s.strip())
return _re_spaces.sub(" ", s.strip())


_brackets_sub = (
Expand Down Expand Up @@ -78,7 +80,7 @@ def spaces(s: str) -> str:
)


def brackets(s: str) -> str:
def _brackets(s: str) -> str:
r"""
Replace all crazy brackets with square ones [].
Expand All @@ -91,17 +93,17 @@ def brackets(s: str) -> str:
if "{" in s:
for exp, sub in _brackets_sub:
s = exp.sub(sub, s)
return spaces(s)
return _spaces(s)


def truncate(text: str, length: int = 449) -> str:
def _truncate(text: str, length: int = 449) -> str:
"""
Trunct a string to given length
:param str text:
:return: truncated text
:rtype: str.
"""
content = re_whitespace.sub(" ", text)
content = _re_whitespace.sub(" ", text)
if len(text) > length:
# find the next space after max_len chars (do not break inside a word)
pos = content[:length].rfind(" ")
Expand All @@ -123,10 +125,10 @@ def title(title: str, BeautifulSoup: Any) -> str:
# FIXME: html or lxml? gives warning unless it's lxml
).get_text(strip=True)
else:
title = re_title.sub("", title)
title = _re_title.sub("", title)
title = title.replace("&", "&amp;")
title = brackets(title)
title = truncate(title, 1126)
title = _brackets(title)
title = _truncate(title, 1126)
return title # noqa: RET504


Expand All @@ -151,4 +153,4 @@ def title_short(s: str) -> str:
title_short("str[ing]") -> str.
"""
return spaces(re_title_short.sub("", s))
return _spaces(_re_title_short.sub("", s))
10 changes: 1 addition & 9 deletions pyglossary/plugins/appledict/indexes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,7 @@
if TYPE_CHECKING:
from collections.abc import Callable, Sequence

from pyglossary.core import log

__all__ = ["languages", "log"]
__all__ = ["languages"]

languages: dict[str, Callable[[Sequence[str], str], set[str]]] = {}
"""
Expand All @@ -39,12 +37,6 @@
:param content: cleaned entry content
:return: iterable of indexes (str).
use
```
from . import languages
# or
from appledict.indexes import languages
```
"""

here = os.path.dirname(os.path.abspath(__file__))
Expand Down
2 changes: 1 addition & 1 deletion pyglossary/plugins/appledict/indexes/zh.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@

from typing import TYPE_CHECKING

from . import languages, log
from . import languages

if TYPE_CHECKING:
from collections.abc import Sequence
Expand Down
4 changes: 2 additions & 2 deletions pyglossary/plugins/appledict/jing/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

sys.path.append(os.path.abspath(os.path.dirname(__file__))) # noqa: E402

from . import main
from .main import main

log = logging.getLogger("root")
console_output_handler = logging.StreamHandler(sys.stderr)
Expand All @@ -18,4 +18,4 @@
log.addHandler(console_output_handler)
log.setLevel(logging.INFO)

sys.exit(main.main())
sys.exit(main())
Loading

0 comments on commit 4f68daf

Please sign in to comment.