diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index f81880c..ad913ee 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -12,30 +12,55 @@ on: jobs: build: - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 strategy: matrix: - python-version: [3.6, 3.7, 3.8] + python-version: ["3.9", "3.10", "3.11"] steps: - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} + + - name: Set up JDK 21 + uses: actions/setup-java@v3 + with: + distribution: 'temurin' + java-version: '21' + + - name: Create and activate virtualenv + run: | + python -m venv venv + source venv/bin/activate + python -m pip install --upgrade pip + - name: Install dependencies run: | - python -m pip install --upgrade pip setuptools wheel - pip install pytest pytest-xdist # Testing packages - python setup.py install_egg_info # Workaround https://github.com/pypa/pip/issues/4537 - pip install -e . # Run pytest + source venv/bin/activate + pip install setuptools wheel build pytest pytest-xdist + python -m build --sdist --wheel + pip install dist/*.whl + + - name: Verify installed packages + run: | + source venv/bin/activate + pip list + - name: Import language_tool_python run: | + source venv/bin/activate printf "import language_tool_python\n" | python + - name: Test with pytest run: | + source venv/bin/activate pytest -vx --dist=loadfile -n auto - #- name: Run command-line tests - # run: | - # ./tests/test_local.bash # Test command-line with local server - # ./tests/test_remote.bash # Test command-line with remote server + + - name: Run additional tests in bash scripts for Ubuntu + run: | + source venv/bin/activate + bash tests/test_local.bash + bash tests/test_remote.bash diff --git a/.gitignore b/.gitignore index 631f6ee..b2776cb 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,12 @@ __pycache__/ build/ dist/ language_tool_python/LanguageTool-*/ +language_tool_python-*/ +pytest-cache-files-*/ +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ diff --git a/MANIFEST.in b/MANIFEST.in index b418c2f..01861f5 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,9 +1,12 @@ -exclude Makefile -include COPYING -include README.rst include run_doctest.py -include test.bash -include test.py -include test_remote.bash -include requirements.txt -exclude language_tool_python/Language-Tool-* +include tests/test_local.bash +include tests/test_remote.bash +prune pytest-cache-files-*/ +prune env/ +prune .env +prune .venv +prune env/ +prune venv/ +prune ENV/ +prune env.bak/ +prune venv.bak/ diff --git a/Makefile b/Makefile index 3f28e7a..6537b0b 100644 --- a/Makefile +++ b/Makefile @@ -15,4 +15,4 @@ check: ./language_tool_python \ $(wildcard ./language_tool_python/*.py) \ $(wildcard *.py) - python setup.py --long-description | rstcheck - + python extract_long_description.py | rstcheck - diff --git a/README.md b/README.md index b6fd50f..21e0065 100644 --- a/README.md +++ b/README.md @@ -264,7 +264,7 @@ The default download path is `~/.cache/language_tool_python/`. The LanguageTool ## Prerequisites -- [Python 3.6+](https://www.python.org) +- [Python 3.9+](https://www.python.org) - [LanguageTool](https://www.languagetool.org) (Java 8.0 or higher) The installation process should take care of downloading LanguageTool (it may diff --git a/build_and_publish.sh b/build_and_publish.sh index 5dd4625..1f322ea 100755 --- a/build_and_publish.sh +++ b/build_and_publish.sh @@ -1,4 +1,5 @@ -rm -rf build/ dist/ -python setup.py sdist bdist_wheel +rm -rf build/ dist/ *.egg-info/ +python -m pip install --upgrade pip setuptools wheel build +python -m build twine check dist/* twine upload dist/* --verbose \ No newline at end of file diff --git a/extract_long_description.py b/extract_long_description.py new file mode 100644 index 0000000..546aa53 --- /dev/null +++ b/extract_long_description.py @@ -0,0 +1,13 @@ +import toml +import os + +with open("pyproject.toml", "rb") as f: + pyproject = toml.loads(f.read().decode('utf-8')) + +readme_path = pyproject["project"]["readme"] + +if os.path.exists(readme_path): + with open(readme_path, "r", encoding="utf-8") as readme_file: + print(readme_file.read()) +else: + raise FileNotFoundError(f"{readme_path} not found.") diff --git a/language_tool_python/__main__.py b/language_tool_python/__main__.py index 54c3d4d..d68e0dd 100644 --- a/language_tool_python/__main__.py +++ b/language_tool_python/__main__.py @@ -4,12 +4,17 @@ import locale import re import sys +from importlib.metadata import version +import toml from .server import LanguageTool from .utils import LanguageToolError -import pkg_resources -__version__ = pkg_resources.require("language_tool_python")[0].version +try: + __version__ = version("language_tool_python") +except PackageNotFoundError: + with open("pyproject.toml", "rb") as f: + __version__ = toml.loads(f.read().decode('utf-8'))["project"]["version"] def parse_args(): @@ -33,6 +38,8 @@ def parse_args(): parser.add_argument('--enabled-only', action='store_true', help='disable all rules except those specified in ' '--enable') + parser.add_argument('-p', '--picky', action='store_true', + help='If set, additional rules will be activated.') parser.add_argument( '--version', action='version', version='%(prog)s {}'.format(__version__), @@ -77,14 +84,6 @@ def get_text(filename, encoding, ignore): return text -def print_unicode(text): - """Print in a portable manner.""" - if sys.version_info[0] < 3: - text = text.encode('utf-8') - - print(text) - - def main(): args = parse_args() @@ -109,10 +108,10 @@ def main(): if args.remote_port is not None: remote_server += ':{}'.format(args.remote_port) lang_tool = LanguageTool( + language=args.language, motherTongue=args.mother_tongue, remote_server=remote_server, ) - guess_language = None try: text = get_text(filename, encoding, ignore=args.ignore_lines) @@ -120,23 +119,6 @@ def main(): print('{}: {}'.format(filename, exception), file=sys.stderr) continue - if args.language: - if args.language.lower() == 'auto': - try: - from guess_language import guess_language - except ImportError: - print('guess_language is unavailable.', file=sys.stderr) - return 1 - else: - language = guess_language(text) - print('Detected language: {}'.format(language), - file=sys.stderr) - if not language: - return 1 - lang_tool.language = language - else: - lang_tool.language = args.language - if not args.spell_check: lang_tool.disable_spellchecking() @@ -144,9 +126,12 @@ def main(): lang_tool.enabled_rules.update(args.enable) lang_tool.enabled_rules_only = args.enabled_only + if args.picky: + lang_tool.picky = True + try: if args.apply: - print_unicode(lang_tool.correct(text)) + print(lang_tool.correct(text)) else: for match in lang_tool.check(text): rule_id = match.ruleId @@ -162,7 +147,7 @@ def main(): if replacement_text and not message.endswith(('.', '?')): message += '; suggestions: ' + replacement_text - print_unicode('{}: {}: {}'.format( + print('{}: {}: {}'.format( filename, rule_id, message)) diff --git a/language_tool_python/config_file.py b/language_tool_python/config_file.py index 7bc8bc0..1688007 100644 --- a/language_tool_python/config_file.py +++ b/language_tool_python/config_file.py @@ -5,12 +5,12 @@ import tempfile ALLOWED_CONFIG_KEYS = { - 'maxTextLength', 'maxTextHardLength', 'secretTokenKey', 'maxCheckTimeMillis', 'maxErrorsPerWordRate', - 'maxSpellingSuggestions', 'maxCheckThreads', 'cacheSize', 'cacheTTLSeconds', 'cacheSize', 'requestLimit', + 'maxTextLength', 'maxTextHardLength', 'maxCheckTimeMillis', 'maxErrorsPerWordRate', + 'maxSpellingSuggestions', 'maxCheckThreads', 'cacheSize', 'cacheTTLSeconds', 'requestLimit', 'requestLimitInBytes', 'timeoutRequestLimit', 'requestLimitPeriodInSeconds', 'languageModel', - 'word2vecModel', 'fasttextModel', 'fasttextBinary', 'maxWorkQueueSize', 'rulesFile', 'warmUp', + 'fasttextModel', 'fasttextBinary', 'maxWorkQueueSize', 'rulesFile', 'blockedReferrers', 'premiumOnly', 'disabledRuleIds', 'pipelineCaching', 'maxPipelinePoolSize', - 'pipelineCaching', 'pipelineExpireTimeInSeconds', 'pipelinePrewarming' + 'pipelineExpireTimeInSeconds', 'pipelinePrewarming' } class LanguageToolConfig: config: Dict[str, Any] @@ -19,6 +19,15 @@ def __init__(self, config: Dict[str, Any]): assert set(config.keys()) <= ALLOWED_CONFIG_KEYS, f"unexpected keys in config: {set(config.keys()) - ALLOWED_CONFIG_KEYS}" assert len(config), "config cannot be empty" self.config = config + + if 'disabledRuleIds' in self.config: + self.config['disabledRuleIds'] = ','.join(self.config['disabledRuleIds']) + if 'blockedReferrers' in self.config: + self.config['blockedReferrers'] = ','.join(self.config['blockedReferrers']) + for key in ["pipelineCaching", "premiumOnly", "pipelinePrewarming"]: + if key in self.config: + self.config[key] = str(bool(self.config[key])).lower() + self.path = self._create_temp_file() def _create_temp_file(self) -> str: diff --git a/language_tool_python/download_lt.py b/language_tool_python/download_lt.py index dee76da..b3e24b7 100755 --- a/language_tool_python/download_lt.py +++ b/language_tool_python/download_lt.py @@ -31,7 +31,7 @@ BASE_URL = os.environ.get('LTP_DOWNLOAD_HOST', 'https://www.languagetool.org/download/') FILENAME = 'LanguageTool-{version}.zip' -LTP_DOWNLOAD_VERSION = '6.4' +LTP_DOWNLOAD_VERSION = '6.5' JAVA_VERSION_REGEX = re.compile( r'^(?:java|openjdk) version "(?P\d+)(|\.(?P\d+)\.[^"]+)"', @@ -114,8 +114,9 @@ def http_get(url, out_file, proxies=None): total = int(content_length) if content_length is not None else None if req.status_code == 403: # Not found on AWS raise Exception('Could not find at URL {}.'.format(url)) + version = re.search(r'(\d+\.\d+)', url).group(1) progress = tqdm.tqdm(unit="B", unit_scale=True, total=total, - desc=f'Downloading LanguageTool {LTP_DOWNLOAD_VERSION}') + desc=f'Downloading LanguageTool {version}') for chunk in req.iter_content(chunk_size=1024): if chunk: # filter out keep-alive new chunks progress.update(len(chunk)) diff --git a/language_tool_python/server.py b/language_tool_python/server.py index 46237e1..dd4193d 100644 --- a/language_tool_python/server.py +++ b/language_tool_python/server.py @@ -10,6 +10,7 @@ import subprocess import threading import urllib.parse +import psutil from .config_file import LanguageToolConfig from .download_lt import download_lt, LTP_DOWNLOAD_VERSION @@ -20,7 +21,8 @@ parse_url, get_locale_language, get_language_tool_directory, get_server_cmd, FAILSAFE_LANGUAGE, startupinfo, - LanguageToolError, ServerError, PathError + LanguageToolError, ServerError, PathError, + kill_process_force ) @@ -67,6 +69,7 @@ def __init__( self._url = urllib.parse.urljoin(self._url, 'v2/') self._update_remote_server_config(self._url) elif not self._server_is_alive(): + self._stop_consume_event = threading.Event() self._start_server_on_free_port() if language is None: try: @@ -75,7 +78,7 @@ def __init__( language = FAILSAFE_LANGUAGE if newSpellings: self._new_spellings = newSpellings - self._register_spellings(self._new_spellings) + self._register_spellings() self._language = LanguageTag(language, self._get_languages()) self.motherTongue = motherTongue self.disabled_rules = set() @@ -84,6 +87,7 @@ def __init__( self.enabled_categories = set() self.enabled_rules_only = False self.preferred_variants = set() + self.picky = False def __enter__(self): return self @@ -158,6 +162,8 @@ def _create_params(self, text: str) -> Dict[str, str]: params['enabledCategories'] = ','.join(self.enabled_categories) if self.preferred_variants: params['preferredVariants'] = ','.join(self.preferred_variants) + if self.picky: + params['level'] = 'picky' return params def correct(self, text: str) -> str: @@ -188,33 +194,36 @@ def _get_valid_spelling_file_path() -> str: .format(spelling_file_path)) return spelling_file_path - def _register_spellings(self, spellings): + def _register_spellings(self): spelling_file_path = self._get_valid_spelling_file_path() - with ( - open(spelling_file_path, "a+", encoding='utf-8') - ) as spellings_file: - spellings_file.write( - "\n" + "\n".join([word for word in spellings]) - ) + with open(spelling_file_path, "r+", encoding='utf-8') as spellings_file: + existing_spellings = set(line.strip() for line in spellings_file.readlines()) + new_spellings = [word for word in self._new_spellings if word not in existing_spellings] + self._new_spellings = new_spellings + if new_spellings: + if len(existing_spellings) > 0: + spellings_file.write("\n") + spellings_file.write("\n".join(new_spellings)) if DEBUG_MODE: print("Registered new spellings at {}".format(spelling_file_path)) def _unregister_spellings(self): spelling_file_path = self._get_valid_spelling_file_path() - with ( - open(spelling_file_path, 'r+', encoding='utf-8') - ) as spellings_file: - spellings_file.seek(0, os.SEEK_END) - for _ in range(len(self._new_spellings)): - while spellings_file.read(1) != '\n': - spellings_file.seek(spellings_file.tell() - 2, os.SEEK_SET) - spellings_file.seek(spellings_file.tell() - 2, os.SEEK_SET) - spellings_file.seek(spellings_file.tell() + 1, os.SEEK_SET) - spellings_file.truncate() + + with open(spelling_file_path, 'r', encoding='utf-8') as spellings_file: + lines = spellings_file.readlines() + + updated_lines = [ + line for line in lines if line.strip() not in self._new_spellings + ] + if updated_lines and updated_lines[-1].endswith('\n'): + updated_lines[-1] = updated_lines[-1].strip() + + with open(spelling_file_path, 'w', encoding='utf-8', newline='\n') as spellings_file: + spellings_file.writelines(updated_lines) + if DEBUG_MODE: - print( - "Unregistered new spellings at {}".format(spelling_file_path) - ) + print(f"Unregistered new spellings at {spelling_file_path}") def _get_languages(self) -> set: """Get supported languages (by querying the server).""" @@ -334,7 +343,7 @@ def _start_local_server(self): if self._server: self._consumer_thread = threading.Thread( - target=lambda: _consume(self._server.stdout)) + target=lambda: self._consume(self._server.stdout)) self._consumer_thread.daemon = True self._consumer_thread.start() else: @@ -345,34 +354,70 @@ def _start_local_server(self): raise ServerError( 'Server running; don\'t start a server here.' ) + + def _consume(self, stdout): + """Consume/ignore the rest of the server output. + Without this, the server will end up hanging due to the buffer + filling up. + """ + while not self._stop_consume_event.is_set() and stdout.readline(): + pass + def _server_is_alive(self): return self._server and self._server.poll() is None def _terminate_server(self): - LanguageToolError_message = '' - try: - self._server.terminate() - except OSError: - pass - try: - LanguageToolError_message = self._server.communicate()[1].strip() - except (IOError, ValueError): - pass - try: - self._server.stdout.close() - except IOError: - pass - try: - self._server.stdin.close() - except IOError: - pass - try: - self._server.stderr.close() - except IOError: - pass - self._server = None - return LanguageToolError_message + """ + Terminate the LanguageTool server process and its associated resources. + This method ensures the server process and any associated threads or child processes + are properly terminated and cleaned up. + """ + # Signal the consumer thread to stop consuming stdout + self._stop_consume_event.set() + if self._consumer_thread: + # Wait for the consumer thread to finish + self._consumer_thread.join(timeout=5) + + error_message = '' + if self._server: + try: + try: + # Get the main server process using psutil + proc = psutil.Process(self._server.pid) + except psutil.NoSuchProcess: + # If the process doesn't exist, set proc to None + proc = None + + # Attempt to terminate the process gracefully + self._server.terminate() + # Wait for the process to terminate and capture any stderr output + _, stderr = self._server.communicate(timeout=5) + + except subprocess.TimeoutExpired: + # If the process does not terminate within the timeout, force kill it + kill_process_force(proc=proc) + # Capture remaining stderr output after force termination + _, stderr = self._server.communicate() + + finally: + # Close all associated file descriptors (stdin, stdout, stderr) + if self._server.stdin: + self._server.stdin.close() + if self._server.stdout: + self._server.stdout.close() + if self._server.stderr: + self._server.stderr.close() + + # Release the server process object + self._server = None + + # Capture any error messages from stderr, if available + if stderr: + error_message = stderr.strip() + + # Return the error message, if any, for further logging or debugging + return error_message class LanguageToolPublicAPI(LanguageTool): @@ -386,14 +431,5 @@ def __init__(self, *args, **kwargs): @atexit.register def terminate_server(): """Terminate the server.""" - for proc in RUNNING_SERVER_PROCESSES: - proc.terminate() - - -def _consume(stdout): - """Consume/ignore the rest of the server output. - Without this, the server will end up hanging due to the buffer - filling up. - """ - while stdout.readline(): - pass + for pid in [p.pid for p in RUNNING_SERVER_PROCESSES]: + kill_process_force(pid=pid) diff --git a/language_tool_python/utils.py b/language_tool_python/utils.py index c455246..8007418 100644 --- a/language_tool_python/utils.py +++ b/language_tool_python/utils.py @@ -6,6 +6,7 @@ import subprocess import urllib.parse import urllib.request +import psutil from .config_file import LanguageToolConfig from .match import Match @@ -177,3 +178,23 @@ def get_jar_info() -> Tuple[str, str]: def get_locale_language(): """Get the language code for the current locale setting.""" return locale.getlocale()[0] or locale.getdefaultlocale()[0] + + +def kill_process_force(*, pid=None, proc=None): + """Kill a process and its children forcefully. + Usefin when the process is unresponsive, particulary on Windows. + Using psutil is more reliable than killing the process with subprocess.""" + assert any([pid, proc]), "Must pass either pid or proc" + try: + proc = psutil.Process(pid) if proc is None else proc + except psutil.NoSuchProcess: + return + for child in proc.children(recursive=True): + try: + child.kill() + except psutil.NoSuchProcess: + pass + try: + proc.kill() + except psutil.NoSuchProcess: + pass diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..f03ad33 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,30 @@ +[project] +name = "language_tool_python" +version = "2.8.2" +requires-python = ">=3.9" +description = "Checks grammar using LanguageTool." +readme = { file = "README.md", content-type = "text/markdown" } +license = { file = "LICENSE" } +authors = [ + { name = "Jack Morris", email = "jxmorris12@gmail.com" } +] +urls = { Repository = "https://github.com/jxmorris12/language_tool_python.git" } + +dependencies = [ + "requests", + "tqdm", + "psutil", + "toml" +] + +[project.optional-dependencies] +dev = [ + "pytest", + "pytest-xdist", + "pytest-cov", + "pytest-runner" +] + +[build-system] +requires = ["setuptools>=61.0", "wheel"] +build-backend = "setuptools.build_meta" diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 9dc910a..0000000 --- a/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -pip -requests -tqdm -wheel diff --git a/requirements_dev.txt b/requirements_dev.txt deleted file mode 100644 index 466a229..0000000 --- a/requirements_dev.txt +++ /dev/null @@ -1,3 +0,0 @@ -pytest -pytest-cov -pytest-runner diff --git a/setup.py b/setup.py deleted file mode 100755 index 84f21bd..0000000 --- a/setup.py +++ /dev/null @@ -1,20 +0,0 @@ -from setuptools import setup, find_packages - -from os import path -this_directory = path.abspath(path.dirname(__file__)) -with open(path.join(this_directory, 'README.md'), encoding='utf-8') as f: - long_description = f.read() - -setup( - name='language_tool_python', - version='2.8.2', - description='Checks grammar using LanguageTool.', - long_description_content_type='text/markdown', - long_description=long_description, - author='Jack Morris', - author_email='jxmorris12@gmail.com', - url='https://github.com/jxmorris12/language_tool_python', - license='GNU GPL', - packages=find_packages(), - install_requires=open('requirements.txt').readlines(), -) diff --git a/tests/test_major_functionality.py b/tests/test_major_functionality.py index 20f4a65..90eec5c 100644 --- a/tests/test_major_functionality.py +++ b/tests/test_major_functionality.py @@ -95,7 +95,7 @@ def test_process_starts_and_stops_on_close(): def test_local_client_server_connection(): import language_tool_python - tool1 = language_tool_python.LanguageTool('en-US', host='0.0.0.0') + tool1 = language_tool_python.LanguageTool('en-US', host='127.0.0.1') url = 'http://{}:{}/'.format(tool1._host, tool1._port) tool2 = language_tool_python.LanguageTool('en-US', remote_server=url) assert len(tool2.check('helo darknes my old frend')) @@ -255,6 +255,17 @@ def test_session_only_new_spellings(): assert initial_checksum.hexdigest() == subsequent_checksum.hexdigest() +def test_disabled_rule_in_config(): + import language_tool_python + GRAMMAR_TOOL_CONFIG = { + 'disabledRuleIds': ['MORFOLOGIK_RULE_EN_US'] + } + with language_tool_python.LanguageTool('en-US', config=GRAMMAR_TOOL_CONFIG) as tool: + text = "He realised that the organization was in jeopardy." + matches = tool.check(text) + assert len(matches) == 0 + + def test_debug_mode(): from language_tool_python.server import DEBUG_MODE assert DEBUG_MODE is False