diff --git a/simplemma/strategies/dictionaries/data/ast.plzma b/simplemma/strategies/dictionaries/data/ast.plzma index ee7ba10..8e11639 100644 Binary files a/simplemma/strategies/dictionaries/data/ast.plzma and b/simplemma/strategies/dictionaries/data/ast.plzma differ diff --git a/simplemma/strategies/dictionaries/data/bg.plzma b/simplemma/strategies/dictionaries/data/bg.plzma index ffa0752..f0ba170 100644 Binary files a/simplemma/strategies/dictionaries/data/bg.plzma and b/simplemma/strategies/dictionaries/data/bg.plzma differ diff --git a/simplemma/strategies/dictionaries/data/ca.plzma b/simplemma/strategies/dictionaries/data/ca.plzma index 169a000..dd51824 100644 Binary files a/simplemma/strategies/dictionaries/data/ca.plzma and b/simplemma/strategies/dictionaries/data/ca.plzma differ diff --git a/simplemma/strategies/dictionaries/data/cs.plzma b/simplemma/strategies/dictionaries/data/cs.plzma index 72f7a36..2d23f4d 100644 Binary files a/simplemma/strategies/dictionaries/data/cs.plzma and b/simplemma/strategies/dictionaries/data/cs.plzma differ diff --git a/simplemma/strategies/dictionaries/data/cy.plzma b/simplemma/strategies/dictionaries/data/cy.plzma index ebe06ba..451ae75 100644 Binary files a/simplemma/strategies/dictionaries/data/cy.plzma and b/simplemma/strategies/dictionaries/data/cy.plzma differ diff --git a/simplemma/strategies/dictionaries/data/da.plzma b/simplemma/strategies/dictionaries/data/da.plzma index fcd07e3..bf35d70 100644 Binary files a/simplemma/strategies/dictionaries/data/da.plzma and b/simplemma/strategies/dictionaries/data/da.plzma differ diff --git a/simplemma/strategies/dictionaries/data/de.plzma b/simplemma/strategies/dictionaries/data/de.plzma index 7ce4eb7..9763bc5 100644 Binary files a/simplemma/strategies/dictionaries/data/de.plzma and b/simplemma/strategies/dictionaries/data/de.plzma differ diff --git a/simplemma/strategies/dictionaries/data/el.plzma b/simplemma/strategies/dictionaries/data/el.plzma index bf1576b..a06b004 100644 Binary files a/simplemma/strategies/dictionaries/data/el.plzma and b/simplemma/strategies/dictionaries/data/el.plzma differ diff --git a/simplemma/strategies/dictionaries/data/en.plzma b/simplemma/strategies/dictionaries/data/en.plzma index 1848975..18bb3e3 100644 Binary files a/simplemma/strategies/dictionaries/data/en.plzma and b/simplemma/strategies/dictionaries/data/en.plzma differ diff --git a/simplemma/strategies/dictionaries/data/enm.plzma b/simplemma/strategies/dictionaries/data/enm.plzma index b1a2558..a3794a3 100644 Binary files a/simplemma/strategies/dictionaries/data/enm.plzma and b/simplemma/strategies/dictionaries/data/enm.plzma differ diff --git a/simplemma/strategies/dictionaries/data/es.plzma b/simplemma/strategies/dictionaries/data/es.plzma index e21a44d..b768362 100644 Binary files a/simplemma/strategies/dictionaries/data/es.plzma and b/simplemma/strategies/dictionaries/data/es.plzma differ diff --git a/simplemma/strategies/dictionaries/data/et.plzma b/simplemma/strategies/dictionaries/data/et.plzma index f988e14..36dae58 100644 Binary files a/simplemma/strategies/dictionaries/data/et.plzma and b/simplemma/strategies/dictionaries/data/et.plzma differ diff --git a/simplemma/strategies/dictionaries/data/fa.plzma b/simplemma/strategies/dictionaries/data/fa.plzma index 2ed1405..04b4a5e 100644 Binary files a/simplemma/strategies/dictionaries/data/fa.plzma and b/simplemma/strategies/dictionaries/data/fa.plzma differ diff --git a/simplemma/strategies/dictionaries/data/fi.plzma b/simplemma/strategies/dictionaries/data/fi.plzma index 01ca226..9cacbc5 100644 Binary files a/simplemma/strategies/dictionaries/data/fi.plzma and b/simplemma/strategies/dictionaries/data/fi.plzma differ diff --git a/simplemma/strategies/dictionaries/data/fr.plzma b/simplemma/strategies/dictionaries/data/fr.plzma index 2168dd3..287a7ae 100644 Binary files a/simplemma/strategies/dictionaries/data/fr.plzma and b/simplemma/strategies/dictionaries/data/fr.plzma differ diff --git a/simplemma/strategies/dictionaries/data/ga.plzma b/simplemma/strategies/dictionaries/data/ga.plzma index bbeb3a0..42d125b 100644 Binary files a/simplemma/strategies/dictionaries/data/ga.plzma and b/simplemma/strategies/dictionaries/data/ga.plzma differ diff --git a/simplemma/strategies/dictionaries/data/gd.plzma b/simplemma/strategies/dictionaries/data/gd.plzma index ce03f89..10e69d1 100644 Binary files a/simplemma/strategies/dictionaries/data/gd.plzma and b/simplemma/strategies/dictionaries/data/gd.plzma differ diff --git a/simplemma/strategies/dictionaries/data/gl.plzma b/simplemma/strategies/dictionaries/data/gl.plzma index b2d23db..26a5bc9 100644 Binary files a/simplemma/strategies/dictionaries/data/gl.plzma and b/simplemma/strategies/dictionaries/data/gl.plzma differ diff --git a/simplemma/strategies/dictionaries/data/gv.plzma b/simplemma/strategies/dictionaries/data/gv.plzma index 1120f81..561b7d5 100644 Binary files a/simplemma/strategies/dictionaries/data/gv.plzma and b/simplemma/strategies/dictionaries/data/gv.plzma differ diff --git a/simplemma/strategies/dictionaries/data/hbs.plzma b/simplemma/strategies/dictionaries/data/hbs.plzma index 7a9bd3b..adef566 100644 Binary files a/simplemma/strategies/dictionaries/data/hbs.plzma and b/simplemma/strategies/dictionaries/data/hbs.plzma differ diff --git a/simplemma/strategies/dictionaries/data/hi.plzma b/simplemma/strategies/dictionaries/data/hi.plzma index e597250..766e4bb 100644 Binary files a/simplemma/strategies/dictionaries/data/hi.plzma and b/simplemma/strategies/dictionaries/data/hi.plzma differ diff --git a/simplemma/strategies/dictionaries/data/hu.plzma b/simplemma/strategies/dictionaries/data/hu.plzma index efbc23c..a26ed25 100644 Binary files a/simplemma/strategies/dictionaries/data/hu.plzma and b/simplemma/strategies/dictionaries/data/hu.plzma differ diff --git a/simplemma/strategies/dictionaries/data/hy.plzma b/simplemma/strategies/dictionaries/data/hy.plzma index fa7d70e..edc5050 100644 Binary files a/simplemma/strategies/dictionaries/data/hy.plzma and b/simplemma/strategies/dictionaries/data/hy.plzma differ diff --git a/simplemma/strategies/dictionaries/data/id.plzma b/simplemma/strategies/dictionaries/data/id.plzma index 95f14f0..75f6a6a 100644 Binary files a/simplemma/strategies/dictionaries/data/id.plzma and b/simplemma/strategies/dictionaries/data/id.plzma differ diff --git a/simplemma/strategies/dictionaries/data/is.plzma b/simplemma/strategies/dictionaries/data/is.plzma index 7a51c4a..b188fae 100644 Binary files a/simplemma/strategies/dictionaries/data/is.plzma and b/simplemma/strategies/dictionaries/data/is.plzma differ diff --git a/simplemma/strategies/dictionaries/data/it.plzma b/simplemma/strategies/dictionaries/data/it.plzma index e67d648..c0ecca8 100644 Binary files a/simplemma/strategies/dictionaries/data/it.plzma and b/simplemma/strategies/dictionaries/data/it.plzma differ diff --git a/simplemma/strategies/dictionaries/data/ka.plzma b/simplemma/strategies/dictionaries/data/ka.plzma index c03b93f..a2377e7 100644 Binary files a/simplemma/strategies/dictionaries/data/ka.plzma and b/simplemma/strategies/dictionaries/data/ka.plzma differ diff --git a/simplemma/strategies/dictionaries/data/la.plzma b/simplemma/strategies/dictionaries/data/la.plzma index a03c290..7711813 100644 Binary files a/simplemma/strategies/dictionaries/data/la.plzma and b/simplemma/strategies/dictionaries/data/la.plzma differ diff --git a/simplemma/strategies/dictionaries/data/lb.plzma b/simplemma/strategies/dictionaries/data/lb.plzma index 1e8c598..80460e6 100644 Binary files a/simplemma/strategies/dictionaries/data/lb.plzma and b/simplemma/strategies/dictionaries/data/lb.plzma differ diff --git a/simplemma/strategies/dictionaries/data/lt.plzma b/simplemma/strategies/dictionaries/data/lt.plzma index 923387b..28f0d62 100644 Binary files a/simplemma/strategies/dictionaries/data/lt.plzma and b/simplemma/strategies/dictionaries/data/lt.plzma differ diff --git a/simplemma/strategies/dictionaries/data/lv.plzma b/simplemma/strategies/dictionaries/data/lv.plzma index b703301..7298d8c 100644 Binary files a/simplemma/strategies/dictionaries/data/lv.plzma and b/simplemma/strategies/dictionaries/data/lv.plzma differ diff --git a/simplemma/strategies/dictionaries/data/mk.plzma b/simplemma/strategies/dictionaries/data/mk.plzma index b8dfc75..64ff201 100644 Binary files a/simplemma/strategies/dictionaries/data/mk.plzma and b/simplemma/strategies/dictionaries/data/mk.plzma differ diff --git a/simplemma/strategies/dictionaries/data/ms.plzma b/simplemma/strategies/dictionaries/data/ms.plzma index 8855264..58a5aa1 100644 Binary files a/simplemma/strategies/dictionaries/data/ms.plzma and b/simplemma/strategies/dictionaries/data/ms.plzma differ diff --git a/simplemma/strategies/dictionaries/data/nb.plzma b/simplemma/strategies/dictionaries/data/nb.plzma index 49e741b..5d2004f 100644 Binary files a/simplemma/strategies/dictionaries/data/nb.plzma and b/simplemma/strategies/dictionaries/data/nb.plzma differ diff --git a/simplemma/strategies/dictionaries/data/nl.plzma b/simplemma/strategies/dictionaries/data/nl.plzma index 07cf01f..8650bcb 100644 Binary files a/simplemma/strategies/dictionaries/data/nl.plzma and b/simplemma/strategies/dictionaries/data/nl.plzma differ diff --git a/simplemma/strategies/dictionaries/data/nn.plzma b/simplemma/strategies/dictionaries/data/nn.plzma index dee0dea..8f2fc0a 100644 Binary files a/simplemma/strategies/dictionaries/data/nn.plzma and b/simplemma/strategies/dictionaries/data/nn.plzma differ diff --git a/simplemma/strategies/dictionaries/data/pl.plzma b/simplemma/strategies/dictionaries/data/pl.plzma index 18cc902..ffc980b 100644 Binary files a/simplemma/strategies/dictionaries/data/pl.plzma and b/simplemma/strategies/dictionaries/data/pl.plzma differ diff --git a/simplemma/strategies/dictionaries/data/pt.plzma b/simplemma/strategies/dictionaries/data/pt.plzma index e823d62..801f107 100644 Binary files a/simplemma/strategies/dictionaries/data/pt.plzma and b/simplemma/strategies/dictionaries/data/pt.plzma differ diff --git a/simplemma/strategies/dictionaries/data/ro.plzma b/simplemma/strategies/dictionaries/data/ro.plzma index bf46ce1..9ab570f 100644 Binary files a/simplemma/strategies/dictionaries/data/ro.plzma and b/simplemma/strategies/dictionaries/data/ro.plzma differ diff --git a/simplemma/strategies/dictionaries/data/ru.plzma b/simplemma/strategies/dictionaries/data/ru.plzma index 83db6cf..0b27481 100644 Binary files a/simplemma/strategies/dictionaries/data/ru.plzma and b/simplemma/strategies/dictionaries/data/ru.plzma differ diff --git a/simplemma/strategies/dictionaries/data/se.plzma b/simplemma/strategies/dictionaries/data/se.plzma index ace5079..5376be3 100644 Binary files a/simplemma/strategies/dictionaries/data/se.plzma and b/simplemma/strategies/dictionaries/data/se.plzma differ diff --git a/simplemma/strategies/dictionaries/data/sk.plzma b/simplemma/strategies/dictionaries/data/sk.plzma index de6731b..ee989bb 100644 Binary files a/simplemma/strategies/dictionaries/data/sk.plzma and b/simplemma/strategies/dictionaries/data/sk.plzma differ diff --git a/simplemma/strategies/dictionaries/data/sl.plzma b/simplemma/strategies/dictionaries/data/sl.plzma index ee574b2..c5badea 100644 Binary files a/simplemma/strategies/dictionaries/data/sl.plzma and b/simplemma/strategies/dictionaries/data/sl.plzma differ diff --git a/simplemma/strategies/dictionaries/data/sq.plzma b/simplemma/strategies/dictionaries/data/sq.plzma index 8210e43..181e16e 100644 Binary files a/simplemma/strategies/dictionaries/data/sq.plzma and b/simplemma/strategies/dictionaries/data/sq.plzma differ diff --git a/simplemma/strategies/dictionaries/data/sv.plzma b/simplemma/strategies/dictionaries/data/sv.plzma index 3ba0383..2bf5281 100644 Binary files a/simplemma/strategies/dictionaries/data/sv.plzma and b/simplemma/strategies/dictionaries/data/sv.plzma differ diff --git a/simplemma/strategies/dictionaries/data/sw.plzma b/simplemma/strategies/dictionaries/data/sw.plzma index 491d472..573fc9a 100644 Binary files a/simplemma/strategies/dictionaries/data/sw.plzma and b/simplemma/strategies/dictionaries/data/sw.plzma differ diff --git a/simplemma/strategies/dictionaries/data/tl.plzma b/simplemma/strategies/dictionaries/data/tl.plzma index 039b099..f50d436 100644 Binary files a/simplemma/strategies/dictionaries/data/tl.plzma and b/simplemma/strategies/dictionaries/data/tl.plzma differ diff --git a/simplemma/strategies/dictionaries/data/tr.plzma b/simplemma/strategies/dictionaries/data/tr.plzma index ba22661..3e91722 100644 Binary files a/simplemma/strategies/dictionaries/data/tr.plzma and b/simplemma/strategies/dictionaries/data/tr.plzma differ diff --git a/simplemma/strategies/dictionaries/data/uk.plzma b/simplemma/strategies/dictionaries/data/uk.plzma index ef4ed28..0c64e01 100644 Binary files a/simplemma/strategies/dictionaries/data/uk.plzma and b/simplemma/strategies/dictionaries/data/uk.plzma differ diff --git a/simplemma/strategies/dictionaries/dictionary_factory.py b/simplemma/strategies/dictionaries/dictionary_factory.py index fef7903..101bdfc 100644 --- a/simplemma/strategies/dictionaries/dictionary_factory.py +++ b/simplemma/strategies/dictionaries/dictionary_factory.py @@ -15,7 +15,7 @@ from functools import lru_cache from os import listdir, path from pathlib import Path -from typing import Dict +from typing import ByteString, Dict if sys.version_info >= (3, 8): from typing import Protocol @@ -30,7 +30,7 @@ ] -def _load_dictionary_from_disk(langcode: str) -> Dict[str, str]: +def _load_dictionary_from_disk(langcode: str) -> Dict[ByteString, ByteString]: """ Load a dictionary from disk. @@ -68,7 +68,7 @@ class DictionaryFactory(Protocol): def get_dictionary( self, lang: str, - ) -> Dict[str, str]: + ) -> Dict[ByteString, ByteString]: """ Get the dictionary for a specific language. @@ -102,7 +102,7 @@ def __init__(self, cache_max_size: int = 8): cache_max_size (int): The maximum size of the cache for loaded dictionaries. Defaults to `8`. """ - self._data: Dict[str, Dict[str, str]] = {} + self._data: Dict[str, Dict[ByteString, ByteString]] = {} self._load_dictionary_from_disk = lru_cache(maxsize=cache_max_size)( _load_dictionary_from_disk ) @@ -110,7 +110,7 @@ def __init__(self, cache_max_size: int = 8): def get_dictionary( self, lang: str, - ) -> Dict[str, str]: + ) -> Dict[ByteString, ByteString]: """ Get the dictionary for a specific language. diff --git a/simplemma/strategies/dictionary_lookup.py b/simplemma/strategies/dictionary_lookup.py index 22544d4..a98d365 100644 --- a/simplemma/strategies/dictionary_lookup.py +++ b/simplemma/strategies/dictionary_lookup.py @@ -3,7 +3,7 @@ It provides lemmatization using dictionary lookup. """ -from typing import Optional +from typing import ByteString, Dict, Optional from .dictionaries.dictionary_factory import DefaultDictionaryFactory, DictionaryFactory from .lemmatization_strategy import LemmatizationStrategy @@ -26,6 +26,13 @@ def __init__( """ self._dictionary_factory = dictionary_factory + def _get( + self, token: str, dictionary: Dict[ByteString, ByteString] + ) -> Optional[str]: + "Convenience function to handle bytestring to string conversion." + result = dictionary.get(token.encode("utf-8")) + return result.decode("utf-8") if result else None # type: ignore[union-attr] + def get_lemma(self, token: str, lang: str) -> Optional[str]: """ Get Lemma using Dictionary Lookup @@ -43,8 +50,9 @@ def get_lemma(self, token: str, lang: str) -> Optional[str]: """ # Search the language data, reverse case to extend coverage. dictionary = self._dictionary_factory.get_dictionary(lang) - if token in dictionary: - return dictionary[token] + result = self._get(token, dictionary) + if result: + return result # Try upper or lowercase. token = token.lower() if token[0].isupper() else token.capitalize() - return dictionary.get(token) + return self._get(token, dictionary) diff --git a/simplemma/strategies/greedy_dictionary_lookup.py b/simplemma/strategies/greedy_dictionary_lookup.py index 0915402..ea372de 100644 --- a/simplemma/strategies/greedy_dictionary_lookup.py +++ b/simplemma/strategies/greedy_dictionary_lookup.py @@ -58,7 +58,7 @@ def get_lemma(self, token: str, lang: str) -> str: return token dictionary = self._dictionary_factory.get_dictionary(lang) - candidate = token + candidate = token.encode("utf-8") for _ in range(self._steps): if candidate not in dictionary: break @@ -73,4 +73,4 @@ def get_lemma(self, token: str, lang: str) -> str: candidate = new_candidate - return candidate + return candidate.decode("utf-8") diff --git a/simplemma/utils.py b/simplemma/utils.py index 1d81fa0..57d47cb 100644 --- a/simplemma/utils.py +++ b/simplemma/utils.py @@ -6,7 +6,7 @@ - [validate_lang_input][simplemma.utils.validate_lang_input]: Validates the language input and ensures it is a valid tuple. """ -from typing import Tuple, Union +from typing import ByteString, Tuple, Union def validate_lang_input(lang: Union[str, Tuple[str, ...]]) -> Tuple[str]: @@ -31,7 +31,9 @@ def validate_lang_input(lang: Union[str, Tuple[str, ...]]) -> Tuple[str]: return lang # type: ignore[return-value] -def levenshtein_dist(str1: str, str2: str) -> int: +def levenshtein_dist( + first: Union[ByteString, str], second: Union[ByteString, str] +) -> int: """ Calculate the Levenshtein distance between two strings. @@ -47,6 +49,8 @@ def levenshtein_dist(str1: str, str2: str) -> int: int: The Levenshtein distance between the two strings. """ + str1 = first.encode("utf-8") if isinstance(first, str) else first + str2 = second.encode("utf-8") if isinstance(second, str) else second # inspired by this noticeably faster code: # https://gist.github.com/p-hash/9e0f9904ce7947c133308fbe48fe032b if str1 == str2: diff --git a/tests/test_dictionary_pickler.py b/tests/test_dictionary_pickler.py index 95dd1bb..2fc806f 100644 --- a/tests/test_dictionary_pickler.py +++ b/tests/test_dictionary_pickler.py @@ -26,9 +26,9 @@ def test_logic() -> None: # different order mydict = dictionary_pickler._read_dict(testfile, "es", silent=True) assert len(mydict) == 5 - assert mydict["closeones"] == "closeone" + assert mydict[b"closeones"] == b"closeone" item = sorted(mydict.keys(), reverse=True)[0] - assert item == "valid-word" + assert item == b"valid-word" # file I/O assert dictionary_pickler._determine_path("lists", "de").endswith("de.txt") @@ -37,3 +37,4 @@ def test_logic() -> None: listpath = os.path.join(TEST_DIR, "data") os_handle, temp_outputfile = tempfile.mkstemp(suffix=".pkl", text=True) dictionary_pickler._pickle_dict("zz", listpath, temp_outputfile) + dictionary_pickler._pickle_dict("zz", listpath, in_place=True) diff --git a/tests/test_lemmatizer.py b/tests/test_lemmatizer.py index 4f31774..e911cf1 100644 --- a/tests/test_lemmatizer.py +++ b/tests/test_lemmatizer.py @@ -1,6 +1,6 @@ """Tests for `simplemma` package.""" -from typing import Dict +from typing import ByteString, Dict import pytest @@ -17,8 +17,8 @@ class CustomDictionaryFactory(DictionaryFactory): def get_dictionary( self, lang: str, - ) -> Dict[str, str]: - return {"testing": "the test works!!"} + ) -> Dict[ByteString, ByteString]: + return {b"testing": b"the test works!!"} assert ( Lemmatizer( diff --git a/training/dictionary_pickler.py b/training/dictionary_pickler.py index f00ed6d..15345d1 100644 --- a/training/dictionary_pickler.py +++ b/training/dictionary_pickler.py @@ -10,7 +10,7 @@ import re from operator import itemgetter from pathlib import Path -from typing import Dict, List, Optional +from typing import ByteString, Dict, List, Optional import simplemma from simplemma.strategies.defaultrules import DEFAULT_RULES @@ -49,7 +49,9 @@ def _determine_path(listpath: str, langcode: str) -> str: return str(Path(__file__).parent / filename) -def _read_dict(filepath: str, langcode: str, silent: bool) -> Dict[str, str]: +def _read_dict( + filepath: str, langcode: str, silent: bool +) -> Dict[ByteString, ByteString]: mydict: Dict[str, str] = {} myadditions: List[str] = [] i: int = 0 @@ -80,8 +82,8 @@ def _read_dict(filepath: str, langcode: str, silent: bool) -> Dict[str, str]: # print line if the rule is wrong if ( len(columns[1]) > 6 - and columns[1] != columns[0] and langcode in DEFAULT_RULES + and columns[1] != columns[0] ): rule = DEFAULT_RULES[langcode](columns[1]) if rule is not None and rule != columns[1]: @@ -119,18 +121,22 @@ def _read_dict(filepath: str, langcode: str, silent: bool) -> Dict[str, str]: for word in myadditions: mydict[word] = word LOGGER.debug("%s %s", langcode, i) - return dict(sorted(mydict.items())) + # sort and convert to bytestrings + return {k.encode("utf-8"): v.encode("utf-8") for k, v in sorted(mydict.items())} def _load_dict( langcode: str, listpath: str = "lists", silent: bool = True -) -> Dict[str, str]: +) -> Dict[ByteString, ByteString]: filepath = _determine_path(listpath, langcode) return _read_dict(filepath, langcode, silent) def _pickle_dict( - langcode: str, listpath: str = "lists", filepath: Optional[str] = None + langcode: str = "en", + listpath: str = "lists", + filepath: Optional[str] = None, + in_place: bool = False, ) -> None: mydict = _load_dict(langcode, listpath) # sort dictionary to help saving space during compression @@ -138,7 +144,12 @@ def _pickle_dict( mydict = dict(sorted(mydict.items(), key=itemgetter(1))) if filepath is None: filename = f"strategies/dictionaries/data/{langcode}.plzma" - filepath = str(Path(simplemma.__file__).parent / filename) + directory = ( + Path(simplemma.__file__).parent + if in_place + else Path(__file__).parent.parent / "simplemma" + ) + filepath = str(directory / filename) with lzma.open(filepath, "wb") as filehandle: # , filters=my_filters, preset=9 pickle.dump(mydict, filehandle, protocol=4) LOGGER.debug("%s %s", langcode, len(mydict)) @@ -146,5 +157,5 @@ def _pickle_dict( if __name__ == "__main__": logging.basicConfig(level=logging.DEBUG) - for listcode in SUPPORTED_LANGUAGES: + for listcode in sorted(SUPPORTED_LANGUAGES): _pickle_dict(listcode)