Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pass configuration filename as arugument for Issue #32 #78

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions language_indexing.egg-info/PKG-INFO
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
Metadata-Version: 2.1
Name: language-indexing
Version: 0.0.1
Summary: UNKNOWN
Home-page: https://github.com/wikitongues/Language-Indexing
Author: Wikitongues
License: UNKNOWN
Platform: UNKNOWN

UNKNOWN
83 changes: 83 additions & 0 deletions language_indexing.egg-info/SOURCES.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
MANIFEST.in
README.md
setup.cfg
setup.py
language_indexing/__init__.py
language_indexing/crawler_process_factory.py
language_indexing/items.py
language_indexing/lang_attribute_parser.py
language_indexing/lang_to_iso_converter.py
language_indexing/language.py
language_indexing/language_indexing.py
language_indexing/language_indexing_config.py
language_indexing/language_indexing_runner.py
language_indexing/middlewares.py
language_indexing/pipelines.py
language_indexing/resource_language_service.py
language_indexing/resource_language_service_factory.py
language_indexing/settings.py
language_indexing/spider_input_factory.py
language_indexing/url_sanitizer.py
language_indexing/write_user_config.py
language_indexing.egg-info/PKG-INFO
language_indexing.egg-info/SOURCES.txt
language_indexing.egg-info/dependency_links.txt
language_indexing.egg-info/entry_points.txt
language_indexing.egg-info/requires.txt
language_indexing.egg-info/top_level.txt
language_indexing/config/__init__.py
language_indexing/config/config_keys.py
language_indexing/config/indexing.cfg
language_indexing/config/load_configs.py
language_indexing/config/logging_settings.py
language_indexing/data_store/__init__.py
language_indexing/data_store/external_resource_data_store.py
language_indexing/data_store/language_data_store.py
language_indexing/data_store/response_object.py
language_indexing/data_store/airtable/__init__.py
language_indexing/data_store/airtable/airtable_connection_info.py
language_indexing/data_store/airtable/airtable_external_resource_data_store.py
language_indexing/data_store/airtable/airtable_external_resource_data_store_factory.py
language_indexing/data_store/airtable/airtable_external_resource_extractor.py
language_indexing/data_store/airtable/airtable_external_resource_formatter.py
language_indexing/data_store/airtable/airtable_http_client.py
language_indexing/data_store/airtable/airtable_language_data_store.py
language_indexing/data_store/airtable/airtable_language_data_store_factory.py
language_indexing/data_store/airtable/airtable_language_extractor.py
language_indexing/data_store/airtable/airtable_table_info.py
language_indexing/data_store/airtable/fake_external_resource_data_store.py
language_indexing/data_store/airtable/fake_language_data_store.py
language_indexing/data_store/airtable/field_name.py
language_indexing/data_store/airtable/offset_utility.py
language_indexing/spiders/__init__.py
language_indexing/spiders/translated_site_spider.py
language_indexing/spiders/wikipedia_spider.py
language_indexing/spiders/input/__init__.py
language_indexing/spiders/input/translated_site_spider_input.py
language_indexing/spiders/input/wikipedia_spider_input.py
language_indexing/spiders/util/__init__.py
language_indexing/spiders/util/external_resource_parser.py
language_indexing/spiders/util/targeted_spider_util.py
language_indexing/spiders/util/wikipedia_util.py
language_indexing/util/__init__.py
language_indexing/util/path_util.py
test/__init__.py
test/test_airtable_external_resource_data_store.py
test/test_airtable_external_resource_extractor.py
test/test_airtable_external_resource_formatter.py
test/test_airtable_http_client.py
test/test_airtable_language_data_store.py
test/test_airtable_language_extractor.py
test/test_crawler_process_factory.py
test/test_external_resource_parser.py
test/test_lang_to_iso_converter.py
test/test_language_indexing_runner.py
test/test_load_configs.py
test/test_path_util.py
test/test_pipelines.py
test/test_resource_language_service.py
test/test_resource_language_service_factory.py
test/test_spider_input_factory.py
test/test_targeted_spider_util.py
test/test_url_sanitizer.py
test/test_wikipedia_util.py
Empty file.
2 changes: 2 additions & 0 deletions language_indexing.egg-info/entry_points.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[console_scripts]
language-indexing = language_indexing.language_indexing:main
6 changes: 6 additions & 0 deletions language_indexing.egg-info/requires.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
inflection
languagecodes
pre-commit
requests
responses
Scrapy
2 changes: 2 additions & 0 deletions language_indexing.egg-info/top_level.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
language_indexing
test
27 changes: 23 additions & 4 deletions language_indexing/language_indexing.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
#!/usr/bin/env python
# Entry point for the program, invoked from the console
import argparse
import sys
import types
from typing import Optional

from .config import config_keys as keys
from .config.load_configs import load_external_resource_airtable_config, load_languages_airtable_config
Expand All @@ -11,15 +13,29 @@


def main() -> None:

parser = argparse.ArgumentParser(description="Configuration file name")
parser.add_argument("-f", "--filename", help='an alternative config file name, such as "example_file_name.cfg".')

arg = None
try:
args = parser.parse_args()
arg = args.filename
if arg.filename[-4:] != ".cfg":
print('Input Error: The filename must include the file extension, ".cfg".')
raise Exception()
except (Exception):
print("Using default config filename.")

configs = types.SimpleNamespace()

ask_user_for_user_file_creation()
ask_user_for_user_file_creation(arg)

start = input("Begin the web crawling process? (Y/N) ")
if start.lower() == "n":
sys.exit(0)

configure(configs)
configure(configs, arg)

sites = configs.main_config[keys.SITES_SECTION].__dict__.keys()

Expand All @@ -43,15 +59,18 @@ def main() -> None:
sys.exit(1)


def configure(configs: types.SimpleNamespace) -> None:
def configure(configs: types.SimpleNamespace, alt_config_filename: Optional[str] = None) -> None:
# Instantiate configuration object
configs.main_config = LanguageIndexingConfiguration()

# Read default config
load_config(configs.main_config)

# Read user config
load_config(configs.main_config, "user_config")
if alt_config_filename is None:
load_config(configs.main_config, "user_config")
else:
load_config(configs.main_config, alt_config_filename)

configs.external_resource_data_store = load_external_resource_airtable_config(configs.main_config)

Expand Down
13 changes: 10 additions & 3 deletions language_indexing/language_indexing_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,19 +21,26 @@ def load_config(config: LanguageIndexingConfiguration, default_config_file_name:
# Default case: when nothing is passed, program reads the default config
if default_config_file_name is None:
config_file = open(os.path.join(os.path.dirname(__file__), "config/indexing.cfg"), "r")
else:
elif default_config_file_name == "user_config":
config_file = open(user_config_file())
else:
config_file = open(user_config_file(default_config_file_name))

readline(config, config_file)


def user_config_file() -> str:
def user_config_file(alt_config_file_name: Optional[str] = None) -> str:
if platform == "windows" or platform == "win32":
env = os.getenv("APPDATA")
elif platform == "linux" or platform == "linux2" or platform == "darwin":
env = os.getenv("HOME")
else:
raise Exception("This program is intended only for Mac," + "Linux, or Windows machines.")
return os.sep.join([env, "wikitongues-language-indexing.cfg"])

if alt_config_file_name is None:
return os.sep.join([env, "wikitongues-language-indexing.cfg"])
else:
return os.sep.join([env, alt_config_file_name])


def readline(config: LanguageIndexingConfiguration, default_config: TextIOWrapper) -> None:
Expand Down
5 changes: 3 additions & 2 deletions language_indexing/write_user_config.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import os
from typing import Optional

from .language_indexing_config import user_config_file


def ask_user_for_user_file_creation() -> None:
user_file = user_config_file()
def ask_user_for_user_file_creation(alt_config_file_name: Optional[str] = None) -> None:
user_file = user_config_file(alt_config_file_name)
if os.path.isfile(user_file) is True:
overwrite = input("The user file already exist, do you want to " + "overwrite the current config file? (Y/N) ")
if overwrite.lower() == "y":
Expand Down