From ff5cf0c214bad9e357a16a1c85f8db0c3225c38d Mon Sep 17 00:00:00 2001 From: Adam Mitchell Date: Sat, 24 Sep 2022 00:20:27 -0500 Subject: [PATCH 1/2] adding support for language-indexing argument of config name for issue 32. --- language_indexing.egg-info/PKG-INFO | 10 +++ language_indexing.egg-info/SOURCES.txt | 83 +++++++++++++++++++ .../dependency_links.txt | 0 language_indexing.egg-info/entry_points.txt | 2 + language_indexing.egg-info/requires.txt | 6 ++ language_indexing.egg-info/top_level.txt | 2 + language_indexing/language_indexing.py | 26 +++++- language_indexing/language_indexing_config.py | 13 ++- language_indexing/write_user_config.py | 4 +- 9 files changed, 137 insertions(+), 9 deletions(-) create mode 100644 language_indexing.egg-info/PKG-INFO create mode 100644 language_indexing.egg-info/SOURCES.txt create mode 100644 language_indexing.egg-info/dependency_links.txt create mode 100644 language_indexing.egg-info/entry_points.txt create mode 100644 language_indexing.egg-info/requires.txt create mode 100644 language_indexing.egg-info/top_level.txt diff --git a/language_indexing.egg-info/PKG-INFO b/language_indexing.egg-info/PKG-INFO new file mode 100644 index 0000000..8efdaa2 --- /dev/null +++ b/language_indexing.egg-info/PKG-INFO @@ -0,0 +1,10 @@ +Metadata-Version: 2.1 +Name: language-indexing +Version: 0.0.1 +Summary: UNKNOWN +Home-page: https://github.com/wikitongues/Language-Indexing +Author: Wikitongues +License: UNKNOWN +Platform: UNKNOWN + +UNKNOWN diff --git a/language_indexing.egg-info/SOURCES.txt b/language_indexing.egg-info/SOURCES.txt new file mode 100644 index 0000000..9ab6714 --- /dev/null +++ b/language_indexing.egg-info/SOURCES.txt @@ -0,0 +1,83 @@ +MANIFEST.in +README.md +setup.cfg +setup.py +language_indexing/__init__.py +language_indexing/crawler_process_factory.py +language_indexing/items.py +language_indexing/lang_attribute_parser.py +language_indexing/lang_to_iso_converter.py +language_indexing/language.py +language_indexing/language_indexing.py +language_indexing/language_indexing_config.py +language_indexing/language_indexing_runner.py +language_indexing/middlewares.py +language_indexing/pipelines.py +language_indexing/resource_language_service.py +language_indexing/resource_language_service_factory.py +language_indexing/settings.py +language_indexing/spider_input_factory.py +language_indexing/url_sanitizer.py +language_indexing/write_user_config.py +language_indexing.egg-info/PKG-INFO +language_indexing.egg-info/SOURCES.txt +language_indexing.egg-info/dependency_links.txt +language_indexing.egg-info/entry_points.txt +language_indexing.egg-info/requires.txt +language_indexing.egg-info/top_level.txt +language_indexing/config/__init__.py +language_indexing/config/config_keys.py +language_indexing/config/indexing.cfg +language_indexing/config/load_configs.py +language_indexing/config/logging_settings.py +language_indexing/data_store/__init__.py +language_indexing/data_store/external_resource_data_store.py +language_indexing/data_store/language_data_store.py +language_indexing/data_store/response_object.py +language_indexing/data_store/airtable/__init__.py +language_indexing/data_store/airtable/airtable_connection_info.py +language_indexing/data_store/airtable/airtable_external_resource_data_store.py +language_indexing/data_store/airtable/airtable_external_resource_data_store_factory.py +language_indexing/data_store/airtable/airtable_external_resource_extractor.py +language_indexing/data_store/airtable/airtable_external_resource_formatter.py +language_indexing/data_store/airtable/airtable_http_client.py +language_indexing/data_store/airtable/airtable_language_data_store.py +language_indexing/data_store/airtable/airtable_language_data_store_factory.py +language_indexing/data_store/airtable/airtable_language_extractor.py +language_indexing/data_store/airtable/airtable_table_info.py +language_indexing/data_store/airtable/fake_external_resource_data_store.py +language_indexing/data_store/airtable/fake_language_data_store.py +language_indexing/data_store/airtable/field_name.py +language_indexing/data_store/airtable/offset_utility.py +language_indexing/spiders/__init__.py +language_indexing/spiders/translated_site_spider.py +language_indexing/spiders/wikipedia_spider.py +language_indexing/spiders/input/__init__.py +language_indexing/spiders/input/translated_site_spider_input.py +language_indexing/spiders/input/wikipedia_spider_input.py +language_indexing/spiders/util/__init__.py +language_indexing/spiders/util/external_resource_parser.py +language_indexing/spiders/util/targeted_spider_util.py +language_indexing/spiders/util/wikipedia_util.py +language_indexing/util/__init__.py +language_indexing/util/path_util.py +test/__init__.py +test/test_airtable_external_resource_data_store.py +test/test_airtable_external_resource_extractor.py +test/test_airtable_external_resource_formatter.py +test/test_airtable_http_client.py +test/test_airtable_language_data_store.py +test/test_airtable_language_extractor.py +test/test_crawler_process_factory.py +test/test_external_resource_parser.py +test/test_lang_to_iso_converter.py +test/test_language_indexing_runner.py +test/test_load_configs.py +test/test_path_util.py +test/test_pipelines.py +test/test_resource_language_service.py +test/test_resource_language_service_factory.py +test/test_spider_input_factory.py +test/test_targeted_spider_util.py +test/test_url_sanitizer.py +test/test_wikipedia_util.py diff --git a/language_indexing.egg-info/dependency_links.txt b/language_indexing.egg-info/dependency_links.txt new file mode 100644 index 0000000..e69de29 diff --git a/language_indexing.egg-info/entry_points.txt b/language_indexing.egg-info/entry_points.txt new file mode 100644 index 0000000..33e2f0e --- /dev/null +++ b/language_indexing.egg-info/entry_points.txt @@ -0,0 +1,2 @@ +[console_scripts] +language-indexing = language_indexing.language_indexing:main diff --git a/language_indexing.egg-info/requires.txt b/language_indexing.egg-info/requires.txt new file mode 100644 index 0000000..2ac93d9 --- /dev/null +++ b/language_indexing.egg-info/requires.txt @@ -0,0 +1,6 @@ +inflection +languagecodes +pre-commit +requests +responses +Scrapy diff --git a/language_indexing.egg-info/top_level.txt b/language_indexing.egg-info/top_level.txt new file mode 100644 index 0000000..112e8be --- /dev/null +++ b/language_indexing.egg-info/top_level.txt @@ -0,0 +1,2 @@ +language_indexing +test diff --git a/language_indexing/language_indexing.py b/language_indexing/language_indexing.py index 95ca021..a7aa33e 100644 --- a/language_indexing/language_indexing.py +++ b/language_indexing/language_indexing.py @@ -1,7 +1,9 @@ #!/usr/bin/env python # Entry point for the program, invoked from the console +import argparse import sys import types +from typing import Optional from .config import config_keys as keys from .config.load_configs import load_external_resource_airtable_config, load_languages_airtable_config @@ -11,15 +13,28 @@ def main() -> None: + + parser = argparse.ArgumentParser(description="Configuration file name") + parser.add_argument("-f", "--filename", help='an alternative config file name, such as "example_file_name.cfg".') + + arg = None + try: + arg = parser.parse_args() + if arg.filename[-4:] != ".cfg": + print('Input Error: The filename must include the file extension, ".cfg".') + raise Exception() + except (Exception): + print("Using default config filename.") + configs = types.SimpleNamespace() - ask_user_for_user_file_creation() + ask_user_for_user_file_creation(arg.filename) start = input("Begin the web crawling process? (Y/N) ") if start.lower() == "n": sys.exit(0) - configure(configs) + configure(configs, arg) sites = configs.main_config[keys.SITES_SECTION].__dict__.keys() @@ -43,7 +58,7 @@ def main() -> None: sys.exit(1) -def configure(configs: types.SimpleNamespace) -> None: +def configure(configs: types.SimpleNamespace, alt_config_filename: Optional[str] = None) -> None: # Instantiate configuration object configs.main_config = LanguageIndexingConfiguration() @@ -51,7 +66,10 @@ def configure(configs: types.SimpleNamespace) -> None: load_config(configs.main_config) # Read user config - load_config(configs.main_config, "user_config") + if alt_config_filename is None: + load_config(configs.main_config, "user_config") + else: + load_config(configs.main_config, alt_config_filename) configs.external_resource_data_store = load_external_resource_airtable_config(configs.main_config) diff --git a/language_indexing/language_indexing_config.py b/language_indexing/language_indexing_config.py index c507ab8..45f6717 100644 --- a/language_indexing/language_indexing_config.py +++ b/language_indexing/language_indexing_config.py @@ -21,19 +21,26 @@ def load_config(config: LanguageIndexingConfiguration, default_config_file_name: # Default case: when nothing is passed, program reads the default config if default_config_file_name is None: config_file = open(os.path.join(os.path.dirname(__file__), "config/indexing.cfg"), "r") - else: + elif default_config_file_name == "user_config": config_file = open(user_config_file()) + else: + config_file = open(user_config_file(default_config_file_name)) + readline(config, config_file) -def user_config_file() -> str: +def user_config_file(alt_config_file_name: Optional[str] = None) -> str: if platform == "windows" or platform == "win32": env = os.getenv("APPDATA") elif platform == "linux" or platform == "linux2" or platform == "darwin": env = os.getenv("HOME") else: raise Exception("This program is intended only for Mac," + "Linux, or Windows machines.") - return os.sep.join([env, "wikitongues-language-indexing.cfg"]) + + if alt_config_file_name is None: + return os.sep.join([env, "wikitongues-language-indexing.cfg"]) + else: + return os.sep.join([env, alt_config_file_name]) def readline(config: LanguageIndexingConfiguration, default_config: TextIOWrapper) -> None: diff --git a/language_indexing/write_user_config.py b/language_indexing/write_user_config.py index bf6a4e6..81f7d2c 100644 --- a/language_indexing/write_user_config.py +++ b/language_indexing/write_user_config.py @@ -3,8 +3,8 @@ from .language_indexing_config import user_config_file -def ask_user_for_user_file_creation() -> None: - user_file = user_config_file() +def ask_user_for_user_file_creation(alt_config_file_name: str) -> None: + user_file = user_config_file(alt_config_file_name) if os.path.isfile(user_file) is True: overwrite = input("The user file already exist, do you want to " + "overwrite the current config file? (Y/N) ") if overwrite.lower() == "y": From 4c8e7c560031570e5c6da6039a5f734253558835 Mon Sep 17 00:00:00 2001 From: Adam Mitchell Date: Sat, 24 Sep 2022 00:30:42 -0500 Subject: [PATCH 2/2] fixing case of null arg --- language_indexing/language_indexing.py | 5 +++-- language_indexing/write_user_config.py | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/language_indexing/language_indexing.py b/language_indexing/language_indexing.py index a7aa33e..90f6126 100644 --- a/language_indexing/language_indexing.py +++ b/language_indexing/language_indexing.py @@ -19,7 +19,8 @@ def main() -> None: arg = None try: - arg = parser.parse_args() + args = parser.parse_args() + arg = args.filename if arg.filename[-4:] != ".cfg": print('Input Error: The filename must include the file extension, ".cfg".') raise Exception() @@ -28,7 +29,7 @@ def main() -> None: configs = types.SimpleNamespace() - ask_user_for_user_file_creation(arg.filename) + ask_user_for_user_file_creation(arg) start = input("Begin the web crawling process? (Y/N) ") if start.lower() == "n": diff --git a/language_indexing/write_user_config.py b/language_indexing/write_user_config.py index 81f7d2c..09aabea 100644 --- a/language_indexing/write_user_config.py +++ b/language_indexing/write_user_config.py @@ -1,9 +1,10 @@ import os +from typing import Optional from .language_indexing_config import user_config_file -def ask_user_for_user_file_creation(alt_config_file_name: str) -> None: +def ask_user_for_user_file_creation(alt_config_file_name: Optional[str] = None) -> None: user_file = user_config_file(alt_config_file_name) if os.path.isfile(user_file) is True: overwrite = input("The user file already exist, do you want to " + "overwrite the current config file? (Y/N) ")