diff --git a/pyglossary/plugins/appledict/_content.py b/pyglossary/plugins/appledict/_content.py index 4ec0092ac..fb41cd863 100644 --- a/pyglossary/plugins/appledict/_content.py +++ b/pyglossary/plugins/appledict/_content.py @@ -39,21 +39,21 @@ log = logging.getLogger("pyglossary") -re_brhr = re.compile("<(BR|HR)>", re.IGNORECASE) -re_nonprintable = re.compile("[\x00-\x07\x0e-\x1f]") -re_img = re.compile("", re.IGNORECASE) +_re_brhr = re.compile("<(BR|HR)>", re.IGNORECASE) +_re_nonprintable = re.compile("[\x00-\x07\x0e-\x1f]") +_re_img = re.compile("", re.IGNORECASE) -re_div_margin_em = re.compile(r'
') -sub_div_margin_em = r'
' +_re_div_margin_em = re.compile(r'
') +_sub_div_margin_em = r'
' -re_div_margin_em_ex = re.compile( +_re_div_margin_em_ex = re.compile( r'
', ) -sub_div_margin_em_ex = r'
' +_sub_div_margin_em_ex = r'
' -re_href = re.compile(r"""href=(["'])(.*?)\1""") +_re_href = re.compile(r"""href=(["'])(.*?)\1""") -re_margin = re.compile(r"margin-left:(\d)em") +_re_margin = re.compile(r"margin-left:(\d)em") def prepare_content( @@ -78,7 +78,7 @@ def prepare_content( content = prepare_content_without_soup(title, body) content = content.replace(" ", " ") - content = re_nonprintable.sub("", content) + content = _re_nonprintable.sub("", content) return content # noqa: RET504 @@ -87,9 +87,9 @@ def prepare_content_without_soup( body: str, ) -> str: # somewhat analogue to what BeautifulSoup suppose to do - body = re_div_margin_em.sub(sub_div_margin_em, body) - body = re_div_margin_em_ex.sub(sub_div_margin_em_ex, body) - body = re_href.sub(href_sub, body) + body = _re_div_margin_em.sub(_sub_div_margin_em, body) + body = _re_div_margin_em_ex.sub(_sub_div_margin_em_ex, body) + body = _re_href.sub(_href_sub, body) body = ( body.replace( @@ -116,17 +116,17 @@ def prepare_content_without_soup( # nice header to display content = f"

{title}

{body}" if title else body - content = re_brhr.sub(r"<\g<1> />", content) - content = re_img.sub(r"/>", content) + content = _re_brhr.sub(r"<\g<1> />", content) + content = _re_img.sub(r"/>", content) return content # noqa: RET504 def _prepare_href(tag: bs4.element.Tag) -> None: href = tag["href"] - href = cleanup_link_target(href) + href = _cleanup_link_target(href) if href.startswith("sound:"): - fix_sound_link(href, tag) + _fix_sound_link(href, tag) elif href.startswith(("phonetics", "help:phonetics")): # for oxford9 @@ -136,7 +136,7 @@ def _prepare_href(tag: bs4.element.Tag) -> None: src_name = tag.audio["name"].replace("#", "_") tag.audio["src"] = f"{src_name}.mp3" - elif not link_is_url(href): + elif not _link_is_url(href): tag["href"] = f"x-dictionary:d:{href}" @@ -187,20 +187,20 @@ def prepare_content_with_soup( # noqa: PLR0912 tag["d:priority"] = "2" for tag in soup(lambda x: "color:steelblue" in x.get("style", "")): - remove_style(tag, "color:steelblue") + _remove_style(tag, "color:steelblue") if "ex" not in tag.get("class", []): tag["class"] = tag.get("class", []) + ["ex"] - for tag in soup(is_green): - remove_style(tag, "color:green") + for tag in soup(_is_green): + _remove_style(tag, "color:green") if "p" not in tag.get("class", ""): tag["class"] = tag.get("class", []) + ["c"] for tag in soup(True): if "style" in tag.attrs: - m = re_margin.search(tag["style"]) + m = _re_margin.search(tag["style"]) if m: - remove_style(tag, m.group(0)) + _remove_style(tag, m.group(0)) tag["class"] = tag.get("class", []) + ["m" + m.group(1)] for tag in soup(lambda x: "xhtml:" in x.name): @@ -234,16 +234,16 @@ def prepare_content_with_soup( # noqa: PLR0912 return toStr(soup.encode_contents()) -def cleanup_link_target(href: str) -> str: +def _cleanup_link_target(href: str) -> str: return href.removeprefix("bword://") -def href_sub(x: re.Match) -> str: +def _href_sub(x: re.Match) -> str: href = x.groups()[1] if href.startswith("http"): return x.group() - href = cleanup_link_target(href) + href = _cleanup_link_target(href) return "href=" + quoteattr( "x-dictionary:d:" @@ -254,11 +254,11 @@ def href_sub(x: re.Match) -> str: ) -def is_green(x: dict) -> bool: +def _is_green(x: dict) -> bool: return "color:green" in x.get("style", "") -def remove_style(tag: dict, line: str) -> None: +def _remove_style(tag: dict, line: str) -> None: s = "".join(tag["style"].replace(line, "").split(";")) if s: tag["style"] = s @@ -266,11 +266,11 @@ def remove_style(tag: dict, line: str) -> None: del tag["style"] -def fix_sound_link(href: str, tag: dict[str, Any]) -> None: +def _fix_sound_link(href: str, tag: dict[str, Any]) -> None: tag["href"] = f'javascript:new Audio("{href[len("sound://") :]}").play();' -def link_is_url(href: str) -> bool: +def _link_is_url(href: str) -> bool: for prefix in ( "http:", "https:", diff --git a/pyglossary/plugins/appledict/_dict.py b/pyglossary/plugins/appledict/_dict.py index 2be5bb018..8b5d9f786 100644 --- a/pyglossary/plugins/appledict/_dict.py +++ b/pyglossary/plugins/appledict/_dict.py @@ -23,27 +23,30 @@ import string from typing import TYPE_CHECKING +from ._normalize import title as normalize_title +from ._normalize import title_long as normalize_title_long +from ._normalize import title_short as normalize_title_short + if TYPE_CHECKING: from collections.abc import Callable, Iterator from typing import Any -from . import _normalize -__all__ = ["_normalize", "id_generator", "indexes_generator", "quote_string"] +__all__ = ["id_generator", "indexes_generator", "quote_string"] log = logging.getLogger("pyglossary") -digs = string.digits + string.ascii_letters +_digs = string.digits + string.ascii_letters -def base36(x: int) -> str: +def _base36(x: int) -> str: """ Simplified version of int2base http://stackoverflow.com/questions/2267362/convert-integer-to-a-string-in-a-given-numeric-base-in-python#2267446. """ digits: list[str] = [] while x: - digits.append(digs[x % 36]) + digits.append(_digs[x % 36]) x //= 36 digits.reverse() return "".join(digits) @@ -53,7 +56,7 @@ def id_generator() -> Iterator[str]: cnt = 1 while True: - yield "_" + base36(cnt) + yield "_" + _base36(cnt) cnt += 1 @@ -77,11 +80,11 @@ def indexes_generator( indexer = None """Callable[[Sequence[str], str], Sequence[str]]""" if indexes_lang: - from . import indexes as idxs + from .indexes import languages - indexer = idxs.languages.get(indexes_lang, None) + indexer = languages.get(indexes_lang, None) if not indexer: - keys_str = ", ".join(idxs.languages) + keys_str = ", ".join(languages) msg = ( "extended indexes not supported for the" f" specified language: {indexes_lang}.\n" @@ -106,9 +109,9 @@ def generate_indexes( normal_indexes = set() for idx in indexes: - normal = _normalize.title(idx, BeautifulSoup) - normal_indexes.add(_normalize.title_long(normal)) - normal_indexes.add(_normalize.title_short(normal)) + normal = normalize_title(idx, BeautifulSoup) + normal_indexes.add(normalize_title_long(normal)) + normal_indexes.add(normalize_title_short(normal)) normal_indexes.discard(title) s = f"" diff --git a/pyglossary/plugins/appledict/_normalize.py b/pyglossary/plugins/appledict/_normalize.py index 41f1a6f37..c162c8df9 100644 --- a/pyglossary/plugins/appledict/_normalize.py +++ b/pyglossary/plugins/appledict/_normalize.py @@ -22,20 +22,22 @@ import re from typing import Any -re_spaces = re.compile(r"[ \t\n]{2,}") -re_title = re.compile('<[^<]+?>|"|[<>]|\xef\xbb\xbf') -re_title_short = re.compile(r"\[.*?\]") -re_whitespace = re.compile("(\t|\n|\r)") +__all__ = ["title", "title_long", "title_short"] + +_re_spaces = re.compile(r"[ \t\n]{2,}") +_re_title = re.compile('<[^<]+?>|"|[<>]|\xef\xbb\xbf') +_re_title_short = re.compile(r"\[.*?\]") +_re_whitespace = re.compile("(\t|\n|\r)") # FIXME: rename all/most functions here, add a 'fix_' prefix -def spaces(s: str) -> str: +def _spaces(s: str) -> str: """ Strip off leading and trailing whitespaces and replace contiguous whitespaces with just one space. """ - return re_spaces.sub(" ", s.strip()) + return _re_spaces.sub(" ", s.strip()) _brackets_sub = ( @@ -78,7 +80,7 @@ def spaces(s: str) -> str: ) -def brackets(s: str) -> str: +def _brackets(s: str) -> str: r""" Replace all crazy brackets with square ones []. @@ -91,17 +93,17 @@ def brackets(s: str) -> str: if "{" in s: for exp, sub in _brackets_sub: s = exp.sub(sub, s) - return spaces(s) + return _spaces(s) -def truncate(text: str, length: int = 449) -> str: +def _truncate(text: str, length: int = 449) -> str: """ Trunct a string to given length :param str text: :return: truncated text :rtype: str. """ - content = re_whitespace.sub(" ", text) + content = _re_whitespace.sub(" ", text) if len(text) > length: # find the next space after max_len chars (do not break inside a word) pos = content[:length].rfind(" ") @@ -123,10 +125,10 @@ def title(title: str, BeautifulSoup: Any) -> str: # FIXME: html or lxml? gives warning unless it's lxml ).get_text(strip=True) else: - title = re_title.sub("", title) + title = _re_title.sub("", title) title = title.replace("&", "&") - title = brackets(title) - title = truncate(title, 1126) + title = _brackets(title) + title = _truncate(title, 1126) return title # noqa: RET504 @@ -151,4 +153,4 @@ def title_short(s: str) -> str: title_short("str[ing]") -> str. """ - return spaces(re_title_short.sub("", s)) + return _spaces(_re_title_short.sub("", s)) diff --git a/pyglossary/plugins/appledict/indexes/__init__.py b/pyglossary/plugins/appledict/indexes/__init__.py index ca3a2da1f..8554359ea 100644 --- a/pyglossary/plugins/appledict/indexes/__init__.py +++ b/pyglossary/plugins/appledict/indexes/__init__.py @@ -25,9 +25,7 @@ if TYPE_CHECKING: from collections.abc import Callable, Sequence -from pyglossary.core import log - -__all__ = ["languages", "log"] +__all__ = ["languages"] languages: dict[str, Callable[[Sequence[str], str], set[str]]] = {} """ @@ -39,12 +37,6 @@ :param content: cleaned entry content :return: iterable of indexes (str). -use -``` - from . import languages - # or - from appledict.indexes import languages -``` """ here = os.path.dirname(os.path.abspath(__file__)) diff --git a/pyglossary/plugins/appledict/indexes/zh.py b/pyglossary/plugins/appledict/indexes/zh.py index d1e3f7eec..6ec4d4728 100644 --- a/pyglossary/plugins/appledict/indexes/zh.py +++ b/pyglossary/plugins/appledict/indexes/zh.py @@ -36,7 +36,7 @@ from typing import TYPE_CHECKING -from . import languages, log +from . import languages if TYPE_CHECKING: from collections.abc import Sequence diff --git a/pyglossary/plugins/appledict/jing/__main__.py b/pyglossary/plugins/appledict/jing/__main__.py index de5b4ac8e..b3a1cc0a4 100644 --- a/pyglossary/plugins/appledict/jing/__main__.py +++ b/pyglossary/plugins/appledict/jing/__main__.py @@ -6,7 +6,7 @@ sys.path.append(os.path.abspath(os.path.dirname(__file__))) # noqa: E402 -from . import main +from .main import main log = logging.getLogger("root") console_output_handler = logging.StreamHandler(sys.stderr) @@ -18,4 +18,4 @@ log.addHandler(console_output_handler) log.setLevel(logging.INFO) -sys.exit(main.main()) +sys.exit(main()) diff --git a/pyglossary/plugins/appledict/writer.py b/pyglossary/plugins/appledict/writer.py index 5759bbdde..680820cf7 100644 --- a/pyglossary/plugins/appledict/writer.py +++ b/pyglossary/plugins/appledict/writer.py @@ -32,11 +32,12 @@ from ._content import prepare_content from ._dict import ( - _normalize, id_generator, indexes_generator, quote_string, ) +from ._normalize import title as normalize_title +from ._normalize import title_long as normalize_title_long if TYPE_CHECKING: import io @@ -52,7 +53,7 @@ BeautifulSoup = None -def loadBeautifulSoup() -> None: +def _loadBeautifulSoup() -> None: global BeautifulSoup try: import bs4 as BeautifulSoup @@ -70,13 +71,13 @@ def loadBeautifulSoup() -> None: ) -def abspath_or_None(path: str | None) -> str | None: +def _abspath_or_None(path: str | None) -> str | None: if not path: return None return os.path.abspath(os.path.expanduser(path)) -def write_header( +def _write_header( toFile: io.TextIOBase, front_back_matter: str | None, ) -> None: @@ -95,7 +96,7 @@ def write_header( toFile.write(_file.read()) -def format_default_prefs(default_prefs: dict[str, Any] | None) -> str: +def _format_default_prefs(default_prefs: dict[str, Any] | None) -> str: """ :type default_prefs: dict or None @@ -118,7 +119,7 @@ def format_default_prefs(default_prefs: dict[str, Any] | None) -> str: ).strip() -def write_css(fname: str, css_file: str) -> None: +def _write_css(fname: str, css_file: str) -> None: with open(fname, mode="wb") as toFile: if css_file: with open(css_file, mode="rb") as fromFile: @@ -214,7 +215,7 @@ def write(self) -> Generator[None, EntryType, None]: # noqa: PLR0912, PLR0915 if clean_html: if BeautifulSoup is None: - loadBeautifulSoup() + _loadBeautifulSoup() if BeautifulSoup is None: log.warning( "clean_html option passed but BeautifulSoup not found. " @@ -228,10 +229,10 @@ def write(self) -> Generator[None, EntryType, None]: # noqa: PLR0912, PLR0915 fileNameBase = basename(dirname).replace(".", "_") filePathBase = join(dirname, fileNameBase) # before chdir (outside indir block) - css = abspath_or_None(css) - xsl = abspath_or_None(xsl) - prefs_html = abspath_or_None(prefs_html) - front_back_matter = abspath_or_None(front_back_matter) + css = _abspath_or_None(css) + xsl = _abspath_or_None(xsl) + prefs_html = _abspath_or_None(prefs_html) + front_back_matter = _abspath_or_None(front_back_matter) generate_id = id_generator() generate_indexes = indexes_generator(indexes) @@ -241,7 +242,7 @@ def write(self) -> Generator[None, EntryType, None]: # noqa: PLR0912, PLR0915 os.mkdir(myResDir) with open(filePathBase + ".xml", mode="w", encoding="utf-8") as toFile: - write_header(toFile, front_back_matter) + _write_header(toFile, front_back_matter) while True: entry = yield if entry is None: @@ -254,8 +255,8 @@ def write(self) -> Generator[None, EntryType, None]: # noqa: PLR0912, PLR0915 word, alts = words[0], words[1:] defi = entry.defi - long_title = _normalize.title_long( - _normalize.title(word, BeautifulSoup), + long_title = normalize_title_long( + normalize_title(word, BeautifulSoup), ) if not long_title: continue @@ -284,7 +285,7 @@ def write(self) -> Generator[None, EntryType, None]: # noqa: PLR0912, PLR0915 if prefs_html: shutil.copy(prefs_html, myResDir) - write_css(filePathBase + ".css", css) + _write_css(filePathBase + ".css", css) with open(join(dirname, "Makefile"), mode="w", encoding="utf-8") as toFile: toFile.write( @@ -332,7 +333,7 @@ def write(self) -> Generator[None, EntryType, None]: # noqa: PLR0912, PLR0915 DCSDictionaryCopyright=copyright_, DCSDictionaryManufacturerName=glos.author, DCSDictionaryXSL=basename(xsl) if xsl else "", - DCSDictionaryDefaultPrefs=format_default_prefs(default_prefs), + DCSDictionaryDefaultPrefs=_format_default_prefs(default_prefs), DCSDictionaryPrefsHTML=basename(prefs_html) if prefs_html else "", DCSDictionaryFrontMatterReferenceID=frontMatterReferenceID, ), diff --git a/pyglossary/plugins/appledict_bin/appledict_file_tools.py b/pyglossary/plugins/appledict_bin/appledict_file_tools.py index e2dc7f83b..38eb67b10 100644 --- a/pyglossary/plugins/appledict_bin/appledict_file_tools.py +++ b/pyglossary/plugins/appledict_bin/appledict_file_tools.py @@ -35,7 +35,7 @@ # addressing of AppleDict binary files always ignores first 0x40 bytes -def readIntPair(buffer: io.BufferedIOBase) -> tuple[int, int]: +def _readIntPair(buffer: io.BufferedIOBase) -> tuple[int, int]: # to satisfy mymy, put them in vars with declared type a: int b: int @@ -61,7 +61,7 @@ def guessFileOffsetLimit(file: io.BufferedIOBase) -> tuple[int, int]: """Returns address offset to start parsing from and EOF address.""" file.seek(APPLEDICT_FILE_OFFSET) limit = readInt(file) - intPair = readIntPair(file) + intPair = _readIntPair(file) if intPair == (0, -1): # 0000 0000 FFFF FFFF return 0x20, limit diff --git a/pyglossary/plugins/appledict_bin/key_data.py b/pyglossary/plugins/appledict_bin/key_data.py index 7f1a359cc..2b1f904d4 100644 --- a/pyglossary/plugins/appledict_bin/key_data.py +++ b/pyglossary/plugins/appledict_bin/key_data.py @@ -14,15 +14,17 @@ # GNU General Public License for more details. from __future__ import annotations -import typing +from typing import TYPE_CHECKING -if typing.TYPE_CHECKING: +if TYPE_CHECKING: from collections.abc import Sequence + from typing import Any, TypeAlias -__all__ = ["KeyData", "RawKeyData"] +__all__ = ["KeyData", "RawKeyDataType"] -RawKeyData: typing.TypeAlias = "tuple[int, int, Sequence[str]]" -"""tuple(priority, parentalControl, keyTextFields)""" +if TYPE_CHECKING: + RawKeyDataType: TypeAlias = tuple[int, int, Sequence[str]] + """tuple(priority, parentalControl, keyTextFields)""" """ @@ -99,7 +101,7 @@ def __init__( # noqa: PLR0913 self.entryTitle = entryTitle self.anchor = anchor - def toDict(self) -> dict[str, typing.Any]: + def toDict(self) -> dict[str, Any]: return { "priority": self.priority, "parentalControl": self.parentalControl, @@ -110,7 +112,7 @@ def toDict(self) -> dict[str, typing.Any]: } @staticmethod - def fromRaw(rawKeyData: RawKeyData, keyTextFieldOrder: list[str]) -> KeyData: + def fromRaw(rawKeyData: RawKeyDataType, keyTextFieldOrder: list[str]) -> KeyData: priority, parentalControl, keyTextFields = rawKeyData keyword = "" headword = "" diff --git a/pyglossary/plugins/appledict_bin/reader.py b/pyglossary/plugins/appledict_bin/reader.py index 2deaccfb8..a696f3d81 100644 --- a/pyglossary/plugins/appledict_bin/reader.py +++ b/pyglossary/plugins/appledict_bin/reader.py @@ -38,7 +38,7 @@ ) from .appledict_properties import from_metadata from .article_address import ArticleAddress -from .key_data import KeyData, RawKeyData +from .key_data import KeyData if TYPE_CHECKING: import io @@ -55,6 +55,7 @@ from pyglossary.lxml_types import Element from .appledict_properties import AppleDictProperties + from .key_data import RawKeyDataType from zlib import decompress @@ -95,7 +96,7 @@ def __init__(self, glos: ReaderGlossaryType) -> None: self._re_xmlns = re.compile(' xmlns:d="[^"<>]+"') self._titleById: dict[str, str] = {} self._wordCount = 0 - self._keyTextData: dict[ArticleAddress, list[RawKeyData]] = {} + self._keyTextData: dict[ArticleAddress, list[RawKeyDataType]] = {} self._cssName = "" @staticmethod @@ -516,7 +517,7 @@ def readKeyTextData( # noqa: PLR0912, PLR0915 Sets self._keyTextData when done. """ buff.seek(bufferOffset) - keyTextData: dict[ArticleAddress, list[RawKeyData]] = {} + keyTextData: dict[ArticleAddress, list[RawKeyDataType]] = {} while bufferOffset < bufferLimit: yield (bufferOffset, bufferLimit) buff.seek(bufferOffset) @@ -594,7 +595,7 @@ def readKeyTextData( # noqa: PLR0912, PLR0915 word_form = read_x_bytes_as_word(buff, word_form_len) keyTextFields.append(word_form) - entryKeyTextData: RawKeyData = ( + entryKeyTextData: RawKeyDataType = ( priority, parentalControl, tuple(keyTextFields), diff --git a/pyglossary/plugins/dict_org/writer.py b/pyglossary/plugins/dict_org/writer.py index de527b5fa..69db8b8aa 100644 --- a/pyglossary/plugins/dict_org/writer.py +++ b/pyglossary/plugins/dict_org/writer.py @@ -16,7 +16,7 @@ __all__ = ["Writer"] -def installToDictd(filename: str, dictzip: bool) -> None: +def _installToDictd(filename: str, dictzip: bool) -> None: """Filename is without extension (neither .index or .dict or .dict.dz).""" import shutil import subprocess @@ -73,7 +73,7 @@ def finish(self) -> None: if self._dictzip: runDictzip(f"{self._filename}.dict") if self._install: - installToDictd( + _installToDictd( self._filename, self._dictzip, ) diff --git a/pyglossary/plugins/dsl/reader.py b/pyglossary/plugins/dsl/reader.py index 4716b7ff8..6aa39ddc0 100644 --- a/pyglossary/plugins/dsl/reader.py +++ b/pyglossary/plugins/dsl/reader.py @@ -50,7 +50,7 @@ htmlEntityPattern = re.compile(r"&#?\w+;") -def unescape(text: str) -> str: +def _unescape(text: str) -> str: def fixup(m: re.Match) -> str: text = m.group(0) if text[:2] == "&#": @@ -76,15 +76,12 @@ def fixup(m: re.Match) -> str: return htmlEntityPattern.sub(fixup, text) -# }}} - - # precompiled regexs -re_wrapped_in_quotes = re.compile("^(\\'|\")(.*)(\\1)$") +_re_wrapped_in_quotes = re.compile("^(\\'|\")(.*)(\\1)$") -def unwrap_quotes(s: str) -> str: - return re_wrapped_in_quotes.sub("\\2", s) +def _unwrap_quotes(s: str) -> str: + return _re_wrapped_in_quotes.sub("\\2", s) class Reader: @@ -228,17 +225,17 @@ def detectEncoding(self) -> str: ) def setInfo(self, key: str, value: str) -> None: - self._glos.setInfo(key, unwrap_quotes(value)) + self._glos.setInfo(key, _unwrap_quotes(value)) def processHeaderLine(self, line: str) -> None: if line.startswith("#NAME"): - self.setInfo("name", unwrap_quotes(line[6:].strip())) + self.setInfo("name", _unwrap_quotes(line[6:].strip())) elif line.startswith("#INDEX_LANGUAGE"): - self._glos.sourceLangName = unwrap_quotes(line[16:].strip()) + self._glos.sourceLangName = _unwrap_quotes(line[16:].strip()) elif line.startswith("#CONTENTS_LANGUAGE"): - self._glos.targetLangName = unwrap_quotes(line[19:].strip()) + self._glos.targetLangName = _unwrap_quotes(line[19:].strip()) elif line.startswith("#INCLUDE"): - self.processInclude(unwrap_quotes(line[9:].strip())) + self.processInclude(_unwrap_quotes(line[9:].strip())) def processInclude(self, filename: str) -> None: reader = Reader(self._glos) diff --git a/pyglossary/plugins/dsl/transform.py b/pyglossary/plugins/dsl/transform.py index 8148053c5..240582168 100644 --- a/pyglossary/plugins/dsl/transform.py +++ b/pyglossary/plugins/dsl/transform.py @@ -14,7 +14,7 @@ __all__ = ["Transformer"] -re_comment_block = re.compile(r"\{\{([^}]*)\}\}") +_re_comment_block = re.compile(r"\{\{([^}]*)\}\}") class Result(NamedTuple): @@ -161,7 +161,7 @@ def closeTag(self, tag: str) -> None: def transform(self) -> tuple[Result | None, ErrorType]: # TODO: implement these 2 with lex functions - self.input = re_comment_block.sub("", self.input) + self.input = _re_comment_block.sub("", self.input) lex: LexType = lexRoot tr = cast("TransformerType", self) diff --git a/pyglossary/plugins/ebook_epub2/writer.py b/pyglossary/plugins/ebook_epub2/writer.py index e3d232666..20b7376d1 100644 --- a/pyglossary/plugins/ebook_epub2/writer.py +++ b/pyglossary/plugins/ebook_epub2/writer.py @@ -30,7 +30,7 @@ __all__ = ["Writer"] -def newUUID() -> str: +def _newUUID() -> str: import uuid return str(uuid.uuid4()).replace("-", "") @@ -172,7 +172,7 @@ class Writer(EbookWriter): def __init__(self, glos: WriterGlossaryType) -> None: glos.setInfo( "uuid", - os.getenv("EPUB_UUID") or glos.getInfo("uuid") or newUUID(), + os.getenv("EPUB_UUID") or glos.getInfo("uuid") or _newUUID(), ) EbookWriter.__init__( self, diff --git a/pyglossary/plugins/ebook_kobo/writer.py b/pyglossary/plugins/ebook_kobo/writer.py index 123cd77aa..e58f9db63 100644 --- a/pyglossary/plugins/ebook_kobo/writer.py +++ b/pyglossary/plugins/ebook_kobo/writer.py @@ -39,7 +39,7 @@ __all__ = ["Writer"] -def is_cyrillic_char(c: str) -> bool: +def _is_cyrillic_char(c: str) -> bool: # U+0400 - U+04FF: Cyrillic # U+0500 - U+052F: Cyrillic Supplement if "\u0400" <= c <= "\u052f": @@ -62,7 +62,7 @@ def is_cyrillic_char(c: str) -> bool: return c in {"\ufe2e", "\ufe2f", "\u1d2b", "\u1d78"} -def fixFilename(fname: str) -> str: +def _fixFilename(fname: str) -> str: return Path(fname.replace("/", "2F").replace("\\", "5C")).name @@ -98,7 +98,7 @@ def get_prefix(self, word: str) -> str: # noqa: PLR6301 return "11" if len(wo) > 1 and wo[1] == "\x00": wo = wo[:1] - if is_cyrillic_char(wo[0]): + if _is_cyrillic_char(wo[0]): return wo # if either of the first 2 chars are not unicode letters, return "11" for c in wo: @@ -129,7 +129,7 @@ def write_groups(self) -> Generator[None, EntryType, None]: def writeGroup(lastPrefix: str) -> None: nonlocal htmlContents - group_fname = fixFilename(lastPrefix) + group_fname = _fixFilename(lastPrefix) htmlContents += "" core.trace( log, diff --git a/pyglossary/plugins/edict2/conv.py b/pyglossary/plugins/edict2/conv.py index 5cd46eba4..bf1be6901 100644 --- a/pyglossary/plugins/edict2/conv.py +++ b/pyglossary/plugins/edict2/conv.py @@ -17,9 +17,19 @@ from pyglossary.lxml_types import T_htmlfile -line_reg = re.compile(r"^([^ ]+) ([^ ]+) \[([^\]]+)\] /(.+)/$") +__all__ = [ + "Article", + "parse_line_simp", + "parse_line_trad", + "render_article", + "render_syllables_color", + "render_syllables_no_color", +] -COLORS = { + +_re_line = re.compile(r"^([^ ]+) ([^ ]+) \[([^\]]+)\] /(.+)/$") + +_COLORS = { "": "black", "1": "red", "2": "orange", @@ -31,7 +41,7 @@ def parse_line_trad(line: str) -> tuple[str, str, str, list[str]] | None: line = line.strip() - match = line_reg.match(line) + match = _re_line.match(line) if match is None: return None trad, simp, pinyin, eng = match.groups() @@ -41,7 +51,7 @@ def parse_line_trad(line: str) -> tuple[str, str, str, list[str]] | None: def parse_line_simp(line: str) -> tuple[str, str, str, list[str]] | None: line = line.strip() - match = line_reg.match(line) + match = _re_line.match(line) if match is None: return None trad, simp, pinyin, eng = match.groups() @@ -82,12 +92,12 @@ def render_syllables_color( with hf.element("div", style="display: inline-block"): for index, syllable in enumerate(syllables): - with hf.element("font", color=COLORS[tones[index]]): + with hf.element("font", color=_COLORS[tones[index]]): hf.write(syllable) # @lru_cache(maxsize=128) -def convert_pinyin(pinyin: str) -> tuple[Sequence[str], Sequence[str]]: +def _convert_pinyin(pinyin: str) -> tuple[Sequence[str], Sequence[str]]: return tuple(zip(*map(convert, pinyin.split()), strict=False)) # type: ignore @@ -98,7 +108,7 @@ def render_article( names = article.names() # pinyin_tones = [convert(syl) for syl in pinyin.split()] - pinyin_list, tones = convert_pinyin(article.pinyin) + pinyin_list, tones = _convert_pinyin(article.pinyin) f = BytesIO() with ET.htmlfile(f, encoding="utf-8") as _hf: # noqa: PLR1702 diff --git a/pyglossary/plugins/edict2/pinyin.py b/pyglossary/plugins/edict2/pinyin.py index e5682acba..38d92bb5a 100644 --- a/pyglossary/plugins/edict2/pinyin.py +++ b/pyglossary/plugins/edict2/pinyin.py @@ -5,7 +5,7 @@ __all__ = ["convert"] -TONES = { +_TONES = { "a1": "ā", "a2": "á", "a3": "ǎ", @@ -33,7 +33,7 @@ } # using v for the umlauted u -VOWELS = ("a", "e", "o", "iu", "ui", "i", "u", "v") +_VOWELS = ("a", "e", "o", "iu", "ui", "i", "u", "v") def convert(word: str) -> tuple[str, str]: @@ -45,9 +45,9 @@ def convert(word: str) -> tuple[str, str]: if tone not in {"1", "2", "3", "4"}: return word, "" - for vowel in VOWELS: + for vowel in _VOWELS: if vowel in pinyin: vowel1 = vowel[-1] - return pinyin.replace(vowel1, TONES[vowel1 + tone]), tone + return pinyin.replace(vowel1, _TONES[vowel1 + tone]), tone return pinyin, tone diff --git a/pyglossary/plugins/edict2/reader.py b/pyglossary/plugins/edict2/reader.py index 1f31b1d0f..34e7473c0 100644 --- a/pyglossary/plugins/edict2/reader.py +++ b/pyglossary/plugins/edict2/reader.py @@ -5,7 +5,14 @@ from pyglossary.core import log from pyglossary.io_utils import nullTextIO -from . import conv +from .conv import ( + Article, + parse_line_simp, + parse_line_trad, + render_article, + render_syllables_color, + render_syllables_no_color, +) if TYPE_CHECKING: import io @@ -58,13 +65,11 @@ def __iter__(self) -> Iterator[EntryType]: glos = self._glos render_syllables = ( - conv.render_syllables_color + render_syllables_color if self._colorize_tones - else conv.render_syllables_no_color - ) - parse_line = ( - conv.parse_line_trad if self._traditional_title else conv.parse_line_simp + else render_syllables_no_color ) + parse_line = parse_line_trad if self._traditional_title else parse_line_simp while True: line = file.readline() @@ -79,9 +84,9 @@ def __iter__(self) -> Iterator[EntryType]: if parts is None: log.warning(f"bad line: {line!r}") continue - names, article_text = conv.render_article( + names, article_text = render_article( render_syllables, - conv.Article(*parts), + Article(*parts), ) entry = glos.newEntry( names, diff --git a/pyglossary/plugins/edict2/summarize.py b/pyglossary/plugins/edict2/summarize.py index 737ec0b89..b656b66ae 100644 --- a/pyglossary/plugins/edict2/summarize.py +++ b/pyglossary/plugins/edict2/summarize.py @@ -5,9 +5,9 @@ __all__ = ["summarize"] -parenthetical = re.compile(r"\([^)]+?\)") -punct_table = {ord(p): " " for p in string.punctuation if p not in "-'"} -stops = { +_parenthetical = re.compile(r"\([^)]+?\)") +_punct_table = {ord(p): " " for p in string.punctuation if p not in "-'"} +_stops = { "i", "me", "my", @@ -165,10 +165,10 @@ def summarize(phrase: str) -> str: - phrase = parenthetical.sub("", phrase) - phrase = phrase.translate(punct_table) + phrase = _parenthetical.sub("", phrase) + phrase = phrase.translate(_punct_table) words = phrase.split() - relevant_words = [word for word in words if word not in stops] + relevant_words = [word for word in words if word not in _stops] if not relevant_words: relevant_words = words return " ".join(relevant_words[:10]) diff --git a/pyglossary/plugins/edlin/writer.py b/pyglossary/plugins/edlin/writer.py index 97be7cfda..3be39e17c 100644 --- a/pyglossary/plugins/edlin/writer.py +++ b/pyglossary/plugins/edlin/writer.py @@ -18,7 +18,7 @@ __all__ = ["Writer"] -def makeDir(direc: str) -> None: +def _makeDir(direc: str) -> None: if not isdir(direc): os.makedirs(direc) @@ -79,7 +79,7 @@ def saveEntry( nextHash: str | None, ) -> None: dpath = join(self._filename, thisHash[:2]) - makeDir(dpath) + _makeDir(dpath) with open( join(dpath, thisHash[2:]), "w", diff --git a/pyglossary/plugins/freedict/reader.py b/pyglossary/plugins/freedict/reader.py index cdebee57e..900e0c3a6 100644 --- a/pyglossary/plugins/freedict/reader.py +++ b/pyglossary/plugins/freedict/reader.py @@ -27,14 +27,14 @@ __all__ = ["Reader"] -TEI = "{http://www.tei-c.org/ns/1.0}" -ENTRY = f"{TEI}entry" -INCLUDE = "{http://www.w3.org/2001/XInclude}include" -NAMESPACE = {None: "http://www.tei-c.org/ns/1.0"} +_TEI = "{http://www.tei-c.org/ns/1.0}" +_ENTRY = f"{_TEI}entry" +_INCLUDE = "{http://www.w3.org/2001/XInclude}include" +_NAMESPACE = {None: "http://www.tei-c.org/ns/1.0"} @dataclass(slots=True) -class ParsedSense: +class _ParsedSense: transCits: list[Element] defs: list[Element] grams: list[Element] @@ -66,7 +66,7 @@ class Reader(ReaderUtils): gramClass = "grammar" supportedTags: set[str] = { - f"{TEI}{tag}" + f"{_TEI}{tag}" for tag in ( "entry", "form", # entry.form @@ -162,7 +162,7 @@ def writeTransCit( assert isinstance(children, list) quotes: list[Element] = [] - sense = ET.Element(f"{TEI}sense") + sense = ET.Element(f"{_TEI}sense") for child in children: if isinstance(child, str): child = child.strip() # noqa: PLW2901 @@ -174,15 +174,15 @@ def writeTransCit( if child.__class__.__name__ == "_Comment": continue - if child.tag == f"{TEI}quote": + if child.tag == f"{_TEI}quote": quotes.append(child) continue - if child.tag in {f"{TEI}gramGrp", f"{TEI}usg", f"{TEI}note"}: + if child.tag in {f"{_TEI}gramGrp", f"{_TEI}usg", f"{_TEI}note"}: sense.append(child) continue - if child.tag == f"{TEI}cit": + if child.tag == f"{_TEI}cit": # TODO continue @@ -222,7 +222,7 @@ def writeChild(item: str | Element, depth: int) -> None: hf.write(item) return - if item.tag == f"{TEI}ref": + if item.tag == f"{_TEI}ref": if item.text: if count > 0: hf.write(self.getCommaSep(item.text)) @@ -287,20 +287,20 @@ def writeRichText( if isinstance(child, str): hf.write(child) continue - if child.tag == f"{TEI}ref": + if child.tag == f"{_TEI}ref": self.writeRef(hf, child) continue - if child.tag == f"{TEI}br": + if child.tag == f"{_TEI}br": hf.write(ET.Element("br")) continue - if child.tag == f"{TEI}p": + if child.tag == f"{_TEI}p": with hf.element("p", **child.attrib): self.writeRichText(hf, child) continue - if child.tag == f"{TEI}div": + if child.tag == f"{_TEI}div": self.writeWithDirection(hf, child, "div") continue - if child.tag == f"{TEI}span": + if child.tag == f"{_TEI}span": self.writeWithDirection(hf, child, "span") continue @@ -316,7 +316,7 @@ def writeNote( def parseSenseSense( # noqa: PLR0912 self, sense: Element, - ) -> ParsedSense: + ) -> _ParsedSense: # this element can be 1st-level (directly under ) # or 2nd-level transCits: list[Element] = [] @@ -329,7 +329,7 @@ def parseSenseSense( # noqa: PLR0912 exampleCits: list[Element] = [] langs: list[Element] = [] for child in sense.iterchildren(): - if child.tag == f"{TEI}cit": + if child.tag == f"{_TEI}cit": if child.attrib.get("type", "trans") == "trans": transCits.append(child) elif child.attrib.get("type") == "example": @@ -338,11 +338,11 @@ def parseSenseSense( # noqa: PLR0912 log.warning(f"unknown cit type: {self.tostring(child)}") continue - if child.tag == f"{TEI}def": + if child.tag == f"{_TEI}def": defs.append(child) continue - if child.tag == f"{TEI}note": + if child.tag == f"{_TEI}note": type_ = child.attrib.get("type") if not type_: notes.append(child) @@ -355,31 +355,31 @@ def parseSenseSense( # noqa: PLR0912 notes.append(child) continue - if child.tag == f"{TEI}ref": + if child.tag == f"{_TEI}ref": refs.append(child) continue - if child.tag == f"{TEI}usg": + if child.tag == f"{_TEI}usg": if not child.text: log.warning(f"empty usg: {self.tostring(child)}") continue usages.append(child) continue - if child.tag == f"{TEI}lang": + if child.tag == f"{_TEI}lang": langs.append(child) continue - if child.tag in {f"{TEI}sense", f"{TEI}gramGrp"}: + if child.tag in {f"{_TEI}sense", f"{_TEI}gramGrp"}: continue - if child.tag == f"{TEI}xr": + if child.tag == f"{_TEI}xr": xrList.append(child) continue log.warning(f"unknown tag {child.tag} in ") - return ParsedSense( + return _ParsedSense( transCits=transCits, defs=defs, grams=grams, @@ -463,10 +463,10 @@ def writeSenseSense( # noqa: PLR0912 "style": f"padding: {self._example_padding}px 0px;", }, ): - for quote in cit.findall("quote", NAMESPACE): + for quote in cit.findall("quote", _NAMESPACE): self.writeWithDirection(hf, quote, "div") - for cit2 in cit.findall("cit", NAMESPACE): - for quote in cit2.findall("quote", NAMESPACE): + for cit2 in cit.findall("cit", _NAMESPACE): + for quote in cit2.findall("quote", _NAMESPACE): quote.attrib.update(cit2.attrib) self.writeWithDirection(hf, quote, "div") @@ -515,7 +515,7 @@ def writeGramGroupChildren( hf: T_htmlfile, elem: Element, ) -> None: - self.writeGramGroups(hf, elem.findall("gramGrp", NAMESPACE)) + self.writeGramGroups(hf, elem.findall("gramGrp", _NAMESPACE)) def writeSense( self, @@ -526,7 +526,7 @@ def writeSense( self.writeGramGroupChildren(hf, sense) self.makeList( hf, - sense.findall("sense", NAMESPACE), + sense.findall("sense", _NAMESPACE), self.writeSenseSense, single_prefix="", ) @@ -565,15 +565,15 @@ def normalizeGramGrpChild(self, elem: Element) -> str: # noqa: PLR0912 if not text: return "" text = text.strip() - if tag == f"{TEI}pos": + if tag == f"{_TEI}pos": return self.posMapping.get(text.lower(), text) - if tag == f"{TEI}gen": + if tag == f"{_TEI}gen": return self.genderMapping.get(text.lower(), text) - if tag in {f"{TEI}num", f"{TEI}number"}: + if tag in {f"{_TEI}num", f"{_TEI}number"}: return self.numberMapping.get(text.lower(), text) - if tag == f"{TEI}subc": + if tag == f"{_TEI}subc": return self.subcMapping.get(text.lower(), text) - if tag == f"{TEI}gram": + if tag == f"{_TEI}gram": type_ = elem.get("type") if type_: if type_ == "pos": @@ -590,10 +590,10 @@ def normalizeGramGrpChild(self, elem: Element) -> str: # noqa: PLR0912 log.warning(f" with no type: {self.tostring(elem)}") return text - if tag == f"{TEI}note": + if tag == f"{_TEI}note": return text - if tag == f"{TEI}colloc": + if tag == f"{_TEI}colloc": return "" log.warning( @@ -622,9 +622,9 @@ def br() -> Element: inflectedKeywords: list[str] = [] - for form in entry.findall("form", NAMESPACE): + for form in entry.findall("form", _NAMESPACE): inflected = form.get("type") == "infl" - for orth in form.findall("orth", NAMESPACE): + for orth in form.findall("orth", _NAMESPACE): if not orth.text: continue if inflected: @@ -636,10 +636,10 @@ def br() -> Element: pronList = [ pron.text.strip("/") - for pron in entry.findall("form/pron", NAMESPACE) + for pron in entry.findall("form/pron", _NAMESPACE) if pron.text ] - senseList = entry.findall("sense", NAMESPACE) + senseList = entry.findall("sense", _NAMESPACE) with ET.htmlfile(buff, encoding="utf-8") as hf: with hf.element("div"): @@ -680,7 +680,7 @@ def br() -> Element: ) def setWordCount(self, header: Element) -> None: - extent_elem = header.find(".//extent", NAMESPACE) + extent_elem = header.find(".//extent", _NAMESPACE) if extent_elem is None: log.warning( "did not find 'extent' tag in metedata, progress bar will not word", @@ -717,7 +717,7 @@ def setGlosInfo(self, key: str, value: str) -> None: self._glos.setInfo(key, unescape_unicode(value)) def setCopyright(self, header: Element) -> None: - elems = header.findall(".//availability//p", NAMESPACE) + elems = header.findall(".//availability//p", _NAMESPACE) if not elems: log.warning("did not find copyright") return @@ -727,14 +727,14 @@ def setCopyright(self, header: Element) -> None: log.debug(f"Copyright: {copyright_!r}") def setPublisher(self, header: Element) -> None: - elem = header.find(".//publisher", NAMESPACE) + elem = header.find(".//publisher", _NAMESPACE) if elem is None or not elem.text: log.warning("did not find publisher") return self.setGlosInfo("publisher", elem.text) def setCreationTime(self, header: Element) -> None: - elem = header.find(".//publicationStmt/date", NAMESPACE) + elem = header.find(".//publicationStmt/date", _NAMESPACE) if elem is None or not elem.text: return self.setGlosInfo("creationTime", elem.text) @@ -745,7 +745,7 @@ def replaceRefLink(self, text: str) -> str: def setDescription(self, header: Element) -> None: elems: list[Element] = [] for tag in ("sourceDesc", "projectDesc"): - elems += header.findall(f".//{tag}//p", NAMESPACE) + elems += header.findall(f".//{tag}//p", _NAMESPACE) desc = self.stripParagList(elems) if not desc: return @@ -771,11 +771,11 @@ def setDescription(self, header: Element) -> None: def setMetadata(self, header: Element) -> None: self.setWordCount(header) - title = header.find(".//title", NAMESPACE) + title = header.find(".//title", _NAMESPACE) if title is not None and title.text: self.setGlosInfo("name", title.text) - edition = header.find(".//edition", NAMESPACE) + edition = header.find(".//edition", _NAMESPACE) if edition is not None and edition.text: self.setGlosInfo("edition", edition.text) @@ -846,7 +846,7 @@ def open( context = ET.iterparse( # type: ignore # noqa: PGH003 cfile, events=("end",), - tag=f"{TEI}teiHeader", + tag=f"{_TEI}teiHeader", ) for _, elem in context: self.setMetadata(elem) # type: ignore @@ -886,12 +886,12 @@ def __iter__(self) -> Iterator[EntryType]: context = ET.iterparse( # type: ignore # noqa: PGH003 self._file, events=("end",), - tag=(ENTRY, INCLUDE), + tag=(_ENTRY, _INCLUDE), ) for _, _elem in context: elem = cast("Element", _elem) - if elem.tag == INCLUDE: + if elem.tag == _INCLUDE: reader = self.loadInclude(elem) if reader is not None: yield from reader diff --git a/pyglossary/plugins/html_dir/writer.py b/pyglossary/plugins/html_dir/writer.py index aeae6dd45..6ac0d87c0 100644 --- a/pyglossary/plugins/html_dir/writer.py +++ b/pyglossary/plugins/html_dir/writer.py @@ -26,10 +26,10 @@ __all__ = ["Writer"] -nbsp = "\xa0" -# nbsp = " " +_nbsp = "\xa0" +# _nbsp = " " -darkStyle = """ +_darkStyle = """ body {{ background-color: #373737; color: #eee; @@ -295,7 +295,7 @@ def getEntryWebLink(entry: EntryType) -> str: if not entry_url_fmt: return "" url = entry_url_fmt.format(word=html.escape(entry.l_word[0])) - return f'{nbsp}🌏' + return f'{_nbsp}🌏' # from math import log2, ceil # maxPosHexLen = int(ceil(log2(max_file_size) / 4)) @@ -314,7 +314,7 @@ def getEntryWebLink(entry: EntryType) -> str: title = glos.getInfo("name") style = "" if self._dark: - style = darkStyle + style = _darkStyle cssLink = '' if self._css else "" @@ -345,7 +345,7 @@ def navBar() -> str: ) return ( '" ) @@ -446,7 +446,7 @@ def addLinks(text: str, pos: int) -> None: # entry_link_sym = "¶" entry_link_sym = "🔗" text = ( - f'
{title}{nbsp}{nbsp}' + f'
{title}{_nbsp}{_nbsp}' f'' f"{entry_link_sym}" f"{getEntryWebLink(entry)}" diff --git a/pyglossary/plugins/quickdic6/writer.py b/pyglossary/plugins/quickdic6/writer.py index 4daccc77c..e55e2ed54 100644 --- a/pyglossary/plugins/quickdic6/writer.py +++ b/pyglossary/plugins/quickdic6/writer.py @@ -28,10 +28,10 @@ __all__ = ["Writer"] -default_de_normalizer_rules = ( +_defaultNormalizerRulesDE = ( ":: Lower; 'ae' > 'ä'; 'oe' > 'ö'; 'ue' > 'ü'; 'ß' > 'ss'; " ) -default_normalizer_rules = ( +_defaultNormalizerRules = ( ":: Any-Latin; ' ' > ; :: Lower; :: NFD; :: [:Nonspacing Mark:] Remove; :: NFC ;" ) @@ -132,7 +132,7 @@ def write(self) -> Generator[None, EntryType, None]: short_name = long_name = iso = sourceLangCode normalizer_rules = self._normalizer_rules or ( - default_de_normalizer_rules if iso == "DE" else default_normalizer_rules + _defaultNormalizerRulesDE if iso == "DE" else _defaultNormalizerRules ) self._dic.add_index( short_name, diff --git a/pyglossary/plugins/stardict/reader.py b/pyglossary/plugins/stardict/reader.py index 1af1c328f..4ce908aa7 100644 --- a/pyglossary/plugins/stardict/reader.py +++ b/pyglossary/plugins/stardict/reader.py @@ -32,7 +32,7 @@ __all__ = ["Reader"] -def verifySameTypeSequence(s: str) -> bool: +def _verifySameTypeSequence(s: str) -> bool: if not s: return True # maybe should just check it's in ("h", "m", "x") @@ -41,8 +41,10 @@ def verifySameTypeSequence(s: str) -> bool: return len(s) == 1 -class XdxfTransformerType(Protocol): - def transformByInnerString(self, text: str) -> str: ... +if TYPE_CHECKING: + + class XdxfTransformerType(Protocol): + def transformByInnerString(self, text: str) -> str: ... class Reader: @@ -113,7 +115,7 @@ def open(self, filename: str) -> None: self._filename = realpath(self._filename) self.readIfoFile() sametypesequence = self._glos.getInfo("sametypesequence") - if not verifySameTypeSequence(sametypesequence): + if not _verifySameTypeSequence(sametypesequence): raise LookupError(f"Invalid {sametypesequence = }") self._indexData = self.readIdxFile() self._wordCount = len(self._indexData) diff --git a/pyglossary/plugins/stardict/sqlist.py b/pyglossary/plugins/stardict/sqlist.py index 0f758cf6d..cd0d0e65f 100644 --- a/pyglossary/plugins/stardict/sqlist.py +++ b/pyglossary/plugins/stardict/sqlist.py @@ -20,7 +20,7 @@ ] -class BaseSqList: +class _BaseSqList: def __init__( self, database: str, @@ -102,7 +102,7 @@ def __iter__(self) -> Iterator[EntryType]: yield row[1:] -class IdxSqList(BaseSqList): +class IdxSqList(_BaseSqList): @classmethod def getExtraColumns(cls) -> list[tuple[str, str]]: # list[(columnName, dataType)] @@ -111,7 +111,7 @@ def getExtraColumns(cls) -> list[tuple[str, str]]: ] -class SynSqList(BaseSqList): +class SynSqList(_BaseSqList): @classmethod def getExtraColumns(cls) -> list[tuple[str, str]]: # list[(columnName, dataType)] diff --git a/pyglossary/plugins/stardict/writer.py b/pyglossary/plugins/stardict/writer.py index 7fe14cbd2..7639ffb14 100644 --- a/pyglossary/plugins/stardict/writer.py +++ b/pyglossary/plugins/stardict/writer.py @@ -44,16 +44,16 @@ ) -# re_newline = re.compile("[\n\r]+") -re_newline = re.compile("\n\r?|\r\n?") +# _re_newline = re.compile("[\n\r]+") +_re_newline = re.compile("\n\r?|\r\n?") -def newlinesToSpace(text: str) -> str: - return re_newline.sub(" ", text) +def _newlinesToSpace(text: str) -> str: + return _re_newline.sub(" ", text) -def newlinesToBr(text: str) -> str: - return re_newline.sub("
", text) +def _newlinesToBr(text: str) -> str: + return _re_newline.sub("
", text) class Writer: @@ -355,7 +355,7 @@ def writeIdxFile(self, indexList: T_SdList[tuple[bytes, bytes]]) -> None: ) def getBookname(self) -> str: - bookname = newlinesToSpace(self._glos.getInfo("name")) + bookname = _newlinesToSpace(self._glos.getInfo("name")) sourceLang = self._sourceLang targetLang = self._targetLang if sourceLang and targetLang: @@ -374,7 +374,7 @@ def getDescription(self) -> str: publisher = glos.getInfo("publisher") if publisher: desc = f"Publisher: {publisher}\n{desc}" - return newlinesToBr(desc) + return _newlinesToBr(desc) def writeIfoFile( self, @@ -409,7 +409,7 @@ def writeIfoFile( value = glos.getInfo(key) if not value: continue - value = newlinesToSpace(value) + value = _newlinesToSpace(value) ifoDict[key] = value ifoDict["description"] = self.getDescription() diff --git a/pyglossary/plugins/wordnet/reader.py b/pyglossary/plugins/wordnet/reader.py index df881cd7b..ad905966b 100644 --- a/pyglossary/plugins/wordnet/reader.py +++ b/pyglossary/plugins/wordnet/reader.py @@ -38,12 +38,12 @@ # "(?:[^\\"]+|\\.)*" # some examples don't have closing quote which # make the subn with this expression hang -# quotedTextPattern = re.compile(r'"(?:[^"]+|\.)*["|\n]') +# _re_quotedText = re.compile(r'"(?:[^"]+|\.)*["|\n]') # make it a capturing group so that we can get rid of quotes -quotedTextPattern = re.compile(r'"([^"]+)"') +_re_quotedText = re.compile(r'"([^"]+)"') -refPattern = re.compile(r"`(\w+)'") +_re_ref = re.compile(r"`(\w+)'") class SynSet: @@ -225,11 +225,11 @@ def a(word: str) -> str: if not line or not line.strip(): continue synset = SynSet(line) - gloss_with_examples, _ = quotedTextPattern.subn( + gloss_with_examples, _ = _re_quotedText.subn( lambda x: f'{x.group(1)}', synset.gloss, ) - gloss_with_examples, _ = refPattern.subn( + gloss_with_examples, _ = _re_ref.subn( lambda x: a(x.group(1)), gloss_with_examples, )