plugins: make internal stuff private, and more refactoring

ilius · Jan 17, 2025 · 4f68daf · 4f68daf
1 parent f7e0013
commit 4f68daf
Show file tree

Hide file tree

Showing 27 changed files with 244 additions and 229 deletions.
diff --git a/pyglossary/plugins/appledict/_content.py b/pyglossary/plugins/appledict/_content.py
@@ -39,21 +39,21 @@
 log = logging.getLogger("pyglossary")
 
 
-re_brhr = re.compile("<(BR|HR)>", re.IGNORECASE)
-re_nonprintable = re.compile("[\x00-\x07\x0e-\x1f]")
-re_img = re.compile("<IMG (.*?)>", re.IGNORECASE)
+_re_brhr = re.compile("<(BR|HR)>", re.IGNORECASE)
+_re_nonprintable = re.compile("[\x00-\x07\x0e-\x1f]")
+_re_img = re.compile("<IMG (.*?)>", re.IGNORECASE)
 
-re_div_margin_em = re.compile(r'<div style="margin-left:(\d)em">')
-sub_div_margin_em = r'<div class="m\1">'
+_re_div_margin_em = re.compile(r'<div style="margin-left:(\d)em">')
+_sub_div_margin_em = r'<div class="m\1">'
 
-re_div_margin_em_ex = re.compile(
+_re_div_margin_em_ex = re.compile(
 	r'<div class="ex" style="margin-left:(\d)em;color:steelblue">',
 )
-sub_div_margin_em_ex = r'<div class="m\1 ex">'
+_sub_div_margin_em_ex = r'<div class="m\1 ex">'
 
-re_href = re.compile(r"""href=(["'])(.*?)\1""")
+_re_href = re.compile(r"""href=(["'])(.*?)\1""")
 
-re_margin = re.compile(r"margin-left:(\d)em")
+_re_margin = re.compile(r"margin-left:(\d)em")
 
 
 def prepare_content(
@@ -78,7 +78,7 @@ def prepare_content(
 		content = prepare_content_without_soup(title, body)
 
 	content = content.replace("&nbsp;", "&#160;")
-	content = re_nonprintable.sub("", content)
+	content = _re_nonprintable.sub("", content)
 	return content  # noqa: RET504
 
 
@@ -87,9 +87,9 @@ def prepare_content_without_soup(
 	body: str,
 ) -> str:
 	# somewhat analogue to what BeautifulSoup suppose to do
-	body = re_div_margin_em.sub(sub_div_margin_em, body)
-	body = re_div_margin_em_ex.sub(sub_div_margin_em_ex, body)
-	body = re_href.sub(href_sub, body)
+	body = _re_div_margin_em.sub(_sub_div_margin_em, body)
+	body = _re_div_margin_em_ex.sub(_sub_div_margin_em_ex, body)
+	body = _re_href.sub(_href_sub, body)
 
 	body = (
 		body.replace(
@@ -116,17 +116,17 @@ def prepare_content_without_soup(
 
 	# nice header to display
 	content = f"<h1>{title}</h1>{body}" if title else body
-	content = re_brhr.sub(r"<\g<1> />", content)
-	content = re_img.sub(r"<img \g<1>/>", content)
+	content = _re_brhr.sub(r"<\g<1> />", content)
+	content = _re_img.sub(r"<img \g<1>/>", content)
 	return content  # noqa: RET504
 
 
 def _prepare_href(tag: bs4.element.Tag) -> None:
 	href = tag["href"]
-	href = cleanup_link_target(href)
+	href = _cleanup_link_target(href)
 
 	if href.startswith("sound:"):
-		fix_sound_link(href, tag)
+		_fix_sound_link(href, tag)
 
 	elif href.startswith(("phonetics", "help:phonetics")):
 		# for oxford9
@@ -136,7 +136,7 @@ def _prepare_href(tag: bs4.element.Tag) -> None:
 			src_name = tag.audio["name"].replace("#", "_")
 			tag.audio["src"] = f"{src_name}.mp3"
 
-	elif not link_is_url(href):
+	elif not _link_is_url(href):
 		tag["href"] = f"x-dictionary:d:{href}"
 
 
@@ -187,20 +187,20 @@ def prepare_content_with_soup(  # noqa: PLR0912
 		tag["d:priority"] = "2"
 
 	for tag in soup(lambda x: "color:steelblue" in x.get("style", "")):
-		remove_style(tag, "color:steelblue")
+		_remove_style(tag, "color:steelblue")
 		if "ex" not in tag.get("class", []):
 			tag["class"] = tag.get("class", []) + ["ex"]
 
-	for tag in soup(is_green):
-		remove_style(tag, "color:green")
+	for tag in soup(_is_green):
+		_remove_style(tag, "color:green")
 		if "p" not in tag.get("class", ""):
 			tag["class"] = tag.get("class", []) + ["c"]
 
 	for tag in soup(True):
 		if "style" in tag.attrs:
-			m = re_margin.search(tag["style"])
+			m = _re_margin.search(tag["style"])
 			if m:
-				remove_style(tag, m.group(0))
+				_remove_style(tag, m.group(0))
 				tag["class"] = tag.get("class", []) + ["m" + m.group(1)]
 
 	for tag in soup(lambda x: "xhtml:" in x.name):
@@ -234,16 +234,16 @@ def prepare_content_with_soup(  # noqa: PLR0912
 	return toStr(soup.encode_contents())
 
 
-def cleanup_link_target(href: str) -> str:
+def _cleanup_link_target(href: str) -> str:
 	return href.removeprefix("bword://")
 
 
-def href_sub(x: re.Match) -> str:
+def _href_sub(x: re.Match) -> str:
 	href = x.groups()[1]
 	if href.startswith("http"):
 		return x.group()
 
-	href = cleanup_link_target(href)
+	href = _cleanup_link_target(href)
 
 	return "href=" + quoteattr(
 		"x-dictionary:d:"
@@ -254,23 +254,23 @@ def href_sub(x: re.Match) -> str:
 	)
 
 
-def is_green(x: dict) -> bool:
+def _is_green(x: dict) -> bool:
 	return "color:green" in x.get("style", "")
 
 
-def remove_style(tag: dict, line: str) -> None:
+def _remove_style(tag: dict, line: str) -> None:
 	s = "".join(tag["style"].replace(line, "").split(";"))
 	if s:
 		tag["style"] = s
 	else:
 		del tag["style"]
 
 
-def fix_sound_link(href: str, tag: dict[str, Any]) -> None:
+def _fix_sound_link(href: str, tag: dict[str, Any]) -> None:
 	tag["href"] = f'javascript:new Audio("{href[len("sound://") :]}").play();'
 
 
-def link_is_url(href: str) -> bool:
+def _link_is_url(href: str) -> bool:
 	for prefix in (
 		"http:",
 		"https:",

diff --git a/pyglossary/plugins/appledict/_dict.py b/pyglossary/plugins/appledict/_dict.py
@@ -23,27 +23,30 @@
 import string
 from typing import TYPE_CHECKING
 
+from ._normalize import title as normalize_title
+from ._normalize import title_long as normalize_title_long
+from ._normalize import title_short as normalize_title_short
+
 if TYPE_CHECKING:
 	from collections.abc import Callable, Iterator
 	from typing import Any
 
-from . import _normalize
 
-__all__ = ["_normalize", "id_generator", "indexes_generator", "quote_string"]
+__all__ = ["id_generator", "indexes_generator", "quote_string"]
 
 log = logging.getLogger("pyglossary")
 
-digs = string.digits + string.ascii_letters
+_digs = string.digits + string.ascii_letters
 
 
-def base36(x: int) -> str:
+def _base36(x: int) -> str:
 	"""
 	Simplified version of int2base
 	http://stackoverflow.com/questions/2267362/convert-integer-to-a-string-in-a-given-numeric-base-in-python#2267446.
 	"""
 	digits: list[str] = []
 	while x:
-		digits.append(digs[x % 36])
+		digits.append(_digs[x % 36])
 		x //= 36
 	digits.reverse()
 	return "".join(digits)
@@ -53,7 +56,7 @@ def id_generator() -> Iterator[str]:
 	cnt = 1
 
 	while True:
-		yield "_" + base36(cnt)
+		yield "_" + _base36(cnt)
 		cnt += 1
 
 
@@ -77,11 +80,11 @@ def indexes_generator(
 	indexer = None
 	"""Callable[[Sequence[str], str], Sequence[str]]"""
 	if indexes_lang:
-		from . import indexes as idxs
+		from .indexes import languages
 
-		indexer = idxs.languages.get(indexes_lang, None)
+		indexer = languages.get(indexes_lang, None)
 		if not indexer:
-			keys_str = ", ".join(idxs.languages)
+			keys_str = ", ".join(languages)
 			msg = (
 				"extended indexes not supported for the"
 				f" specified language: {indexes_lang}.\n"
@@ -106,9 +109,9 @@ def generate_indexes(
 
 		normal_indexes = set()
 		for idx in indexes:
-			normal = _normalize.title(idx, BeautifulSoup)
-			normal_indexes.add(_normalize.title_long(normal))
-			normal_indexes.add(_normalize.title_short(normal))
+			normal = normalize_title(idx, BeautifulSoup)
+			normal_indexes.add(normalize_title_long(normal))
+			normal_indexes.add(normalize_title_short(normal))
 		normal_indexes.discard(title)
 
 		s = f"<d:index d:value={quoted_title} d:title={quoted_title}/>"

diff --git a/pyglossary/plugins/appledict/_normalize.py b/pyglossary/plugins/appledict/_normalize.py
@@ -22,20 +22,22 @@
 import re
 from typing import Any
 
-re_spaces = re.compile(r"[ \t\n]{2,}")
-re_title = re.compile('<[^<]+?>|"|[<>]|\xef\xbb\xbf')
-re_title_short = re.compile(r"\[.*?\]")
-re_whitespace = re.compile("(\t|\n|\r)")
+__all__ = ["title", "title_long", "title_short"]
+
+_re_spaces = re.compile(r"[ \t\n]{2,}")
+_re_title = re.compile('<[^<]+?>|"|[<>]|\xef\xbb\xbf')
+_re_title_short = re.compile(r"\[.*?\]")
+_re_whitespace = re.compile("(\t|\n|\r)")
 
 # FIXME: rename all/most functions here, add a 'fix_' prefix
 
 
-def spaces(s: str) -> str:
+def _spaces(s: str) -> str:
 	"""
 	Strip off leading and trailing whitespaces and
 	replace contiguous whitespaces with just one space.
 	"""
-	return re_spaces.sub(" ", s.strip())
+	return _re_spaces.sub(" ", s.strip())
 
 
 _brackets_sub = (
@@ -78,7 +80,7 @@ def spaces(s: str) -> str:
 )
 
 
-def brackets(s: str) -> str:
+def _brackets(s: str) -> str:
 	r"""
 	Replace all crazy brackets with square ones [].
 
@@ -91,17 +93,17 @@ def brackets(s: str) -> str:
 	if "{" in s:
 		for exp, sub in _brackets_sub:
 			s = exp.sub(sub, s)
-	return spaces(s)
+	return _spaces(s)
 
 
-def truncate(text: str, length: int = 449) -> str:
+def _truncate(text: str, length: int = 449) -> str:
 	"""
 	Trunct a string to given length
 	:param str text:
 	:return: truncated text
 	:rtype: str.
 	"""
-	content = re_whitespace.sub(" ", text)
+	content = _re_whitespace.sub(" ", text)
 	if len(text) > length:
 		# find the next space after max_len chars (do not break inside a word)
 		pos = content[:length].rfind(" ")
@@ -123,10 +125,10 @@ def title(title: str, BeautifulSoup: Any) -> str:
 				# FIXME: html or lxml? gives warning unless it's lxml
 			).get_text(strip=True)
 	else:
-		title = re_title.sub("", title)
+		title = _re_title.sub("", title)
 		title = title.replace("&", "&amp;")
-	title = brackets(title)
-	title = truncate(title, 1126)
+	title = _brackets(title)
+	title = _truncate(title, 1126)
 	return title  # noqa: RET504
 
 
@@ -151,4 +153,4 @@ def title_short(s: str) -> str:
 	title_short("str[ing]") -> str.
 
 	"""
-	return spaces(re_title_short.sub("", s))
+	return _spaces(_re_title_short.sub("", s))
diff --git a/pyglossary/plugins/appledict/indexes/__init__.py b/pyglossary/plugins/appledict/indexes/__init__.py
@@ -25,9 +25,7 @@
 if TYPE_CHECKING:
 	from collections.abc import Callable, Sequence
 
-from pyglossary.core import log
-
-__all__ = ["languages", "log"]
+__all__ = ["languages"]
 
 languages: dict[str, Callable[[Sequence[str], str], set[str]]] = {}
 """
@@ -39,12 +37,6 @@
 	:param content: cleaned entry content
 	:return: iterable of indexes (str).
 
-use
-```
-	from . import languages
-	# or
-	from appledict.indexes import languages
-```
 """
 
 here = os.path.dirname(os.path.abspath(__file__))

diff --git a/pyglossary/plugins/appledict/indexes/zh.py b/pyglossary/plugins/appledict/indexes/zh.py
@@ -36,7 +36,7 @@
 
 from typing import TYPE_CHECKING
 
-from . import languages, log
+from . import languages
 
 if TYPE_CHECKING:
 	from collections.abc import Sequence

diff --git a/pyglossary/plugins/appledict/jing/__main__.py b/pyglossary/plugins/appledict/jing/__main__.py
@@ -6,7 +6,7 @@
 
 sys.path.append(os.path.abspath(os.path.dirname(__file__)))  # noqa: E402
 
-from . import main
+from .main import main
 
 log = logging.getLogger("root")
 console_output_handler = logging.StreamHandler(sys.stderr)
@@ -18,4 +18,4 @@
 log.addHandler(console_output_handler)
 log.setLevel(logging.INFO)
 
-sys.exit(main.main())
+sys.exit(main())