From dd320f96b993030b812181dc98032c641d707e47 Mon Sep 17 00:00:00 2001 From: Krzysztof Swietlicki Date: Tue, 7 Nov 2023 12:13:13 +0100 Subject: [PATCH] tenant_parser.py: add support for extra-config-paths When creating/updating repo_map, 'extra-config-paths' list is extracted from tenant configuration and saved in repo's 'tenants' dictionary (next to 'jobs' and 'roles') Extra-config-paths values are used for Scraper class initialization. The 'scrape_job_files' method extends the whitelist with it 'test_integration' tests were extended to verify this new functionality --- tests/conftest.py | 10 ++- tests/scraper/test_integration.py | 82 ++++++++++++++++++- tests/scraper/test_repo_parser.py | 33 +++++++- .../repo_files/zuul-extra.d/extra-jobs.yaml | 4 + tests/testdata/test.foo.yaml | 9 ++ zubbi/scraper/main.py | 14 +++- zubbi/scraper/repo_parser.py | 19 ++++- zubbi/scraper/scraper.py | 7 +- zubbi/scraper/tenant_parser.py | 20 ++++- 9 files changed, 184 insertions(+), 14 deletions(-) create mode 100644 tests/testdata/repo_files/zuul-extra.d/extra-jobs.yaml diff --git a/tests/conftest.py b/tests/conftest.py index bc879cc..b82aa5a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -76,7 +76,11 @@ def repo_data(): # and the other one is used for the parser. repo = DummyRepo("my/project") - tenants = {"jobs": ["foo"], "roles": ["foo", "bar"]} + tenants = { + "jobs": ["foo"], + "roles": ["foo", "bar"], + "extra_config_paths": {"zuul-extra.d": ["bar"]}, + } job_files = { "zuul.d/jobs.yaml": { @@ -91,6 +95,10 @@ def repo_data(): "content": raw_file("repo_files/zuul.d/jobs-parse-error.yaml"), "blame": [], }, + "zuul-extra.d/extra-jobs.yaml": { + "content": raw_file("repo_files/zuul-extra.d/extra-jobs.yaml"), + "blame": [], + }, } role_files = { diff --git a/tests/scraper/test_integration.py b/tests/scraper/test_integration.py index f44d0cd..dc583ef 100644 --- a/tests/scraper/test_integration.py +++ b/tests/scraper/test_integration.py @@ -29,6 +29,33 @@ run: playbooks/non-existing-playbook.yaml """ +MOCKED_JOB_CONTENT_2 = """ +- job: + name: even-cooler-new-job + parent: super-base-job + description: | + This is another job for testing purposes. + run: playbooks/non-existing-super-playbook.yaml +""" + +MOCKED_PROJECT_CONTENT = """ +- job: + name: super-duper-new-job + parent: lame-base-job + description: | + This is yet another job for testing purposes. + run: playbooks/non-existing-hyper-playbook.yaml + +- project: + name: my-simple-project + check: + jobs: + - noop + gate: + jobs: + - super-duper-new-job +""" + MOCKED_ROLE_DESCRIPTION = """ Role description containing some reStructuredText expressions. @@ -110,6 +137,19 @@ class MockGitHubRepository(GitHubRepository): "roles/foobar/README": "Simple text in a file without extension", "roles/empty-dir/REAMDE.whatever": "This file won't be checked out", }, + "orga1/repo3": { + REPO_ROOT: { + "project-extra.yaml": MockContents( + "project-extra.yaml", MockContents.FILE + ), + "zuul-extra.d": MockContents("zuul-extra.d", MockContents.DIR), + }, + "project-extra.yaml": MOCKED_PROJECT_CONTENT, + "zuul-extra.d": { + "jobs.yaml": MockContents("zuul-extra.d/jobs.yaml", MockContents.FILE) + }, + "zuul-extra.d/jobs.yaml": MOCKED_JOB_CONTENT_2, + }, # Empty repositories "orga2/repo1": {}, "orga2/repo3": {}, @@ -206,6 +246,39 @@ def test_scrape(): }, }, ), + "orga1/repo3": ( + { + "project-extra.yaml": { + "last_changed": "2018-09-17 15:15:15", + "blame": [], + "content": "\n- job:\n" + " name: super-duper-new-job\n" + " parent: lame-base-job\n" + " description: |\n" + " This is yet another job for testing purposes.\n" + " run: playbooks/non-existing-hyper-playbook.yaml\n" + "\n- project:\n" + " name: my-simple-project\n" + " check:\n" + " jobs:\n" + " - noop\n" + " gate:\n" + " jobs:\n" + " - super-duper-new-job\n", + }, + "zuul-extra.d/jobs.yaml": { + "last_changed": "2018-09-17 15:15:15", + "blame": [], + "content": "\n- job:\n" + " name: even-cooler-new-job\n" + " parent: super-base-job\n" + " description: |\n" + " This is another job for testing purposes.\n" + " run: playbooks/non-existing-super-playbook.yaml\n", + }, + }, + {}, + ), "orga2/repo1": ({}, {}), "orga2/repo3": ({}, {}), } @@ -221,7 +294,14 @@ def test_scrape(): for repo, tenants in repo_map.items(): gh_repo = MockGitHubRepository(repo) - job_files, role_files = Scraper(gh_repo).scrape() + extra_config_paths = tenants["tenants"].get("extra_config_paths", {}) + if repo == "orga1/repo2": + assert len(extra_config_paths) == 1 + elif repo == "orga1/repo3": + assert len(extra_config_paths) == 2 + else: + assert len(extra_config_paths) == 0 + job_files, role_files = Scraper(gh_repo, extra_config_paths).scrape() assert (job_files, role_files) == expected[repo] diff --git a/tests/scraper/test_repo_parser.py b/tests/scraper/test_repo_parser.py index 761b65d..8434775 100644 --- a/tests/scraper/test_repo_parser.py +++ b/tests/scraper/test_repo_parser.py @@ -30,7 +30,12 @@ def test_parse(repo_data): repo, tenants, job_files, role_files = repo_data jobs, roles = RepoParser( - repo, tenants, job_files, role_files, scrape_time, is_reusable_repo=False + repo, + tenants, + job_files, + role_files, + scrape_time, + is_reusable_repo=False, ).parse() # We assume that we can access the resulting jobs and roles dictionary @@ -39,6 +44,7 @@ def test_parse(repo_data): job_2 = jobs[1] job_3 = jobs[2] job_4 = jobs[3] + job_5 = jobs[4] role_1 = [r for r in roles if r["role_name"] == "foo"][0] role_2 = [r for r in roles if r["role_name"] == "bar"][0] @@ -109,6 +115,23 @@ def test_parse(repo_data): "last_updated": None, } + expected_job_5 = { + "job_name": "awesome-job", + "repo": "my/project", + "tenants": ["bar"], + "description": "Job in custom directory, without a playbook or parent.\n", + "description_html": "

Job in custom directory, without a playbook or parent.

\n", + "parent": "base", + "url": "https://github/zuul-extra.d/extra-jobs.yaml", + "private": False, + "platforms": [], + "reusable": False, + "line_start": 1, + "line_end": 4, + "scrape_time": scrape_time, + "last_updated": None, + } + expected_role_1 = { "role_name": "foo", "repo": "my/project", @@ -198,6 +221,7 @@ def test_parse(repo_data): assert job_2.to_dict(skip_empty=False) == expected_job_2 assert job_3.to_dict(skip_empty=False) == expected_job_3 assert job_4.to_dict(skip_empty=False) == expected_job_4 + assert job_5.to_dict(skip_empty=False) == expected_job_5 assert role_1.to_dict(skip_empty=False) == expected_role_1 assert role_2.to_dict(skip_empty=False) == expected_role_2 @@ -208,7 +232,12 @@ def test_parse_reusable_repo(repo_data): repo, tenants, job_files, role_files = repo_data jobs, roles = RepoParser( - repo, tenants, job_files, role_files, scrape_time, is_reusable_repo=True + repo, + tenants, + job_files, + role_files, + scrape_time, + is_reusable_repo=True, ).parse() # We assume that we can access the resulting jobs and roles dictionary diff --git a/tests/testdata/repo_files/zuul-extra.d/extra-jobs.yaml b/tests/testdata/repo_files/zuul-extra.d/extra-jobs.yaml new file mode 100644 index 0000000..3099a0c --- /dev/null +++ b/tests/testdata/repo_files/zuul-extra.d/extra-jobs.yaml @@ -0,0 +1,4 @@ +- job: + name: awesome-job + description: | + Job in custom directory, without a playbook or parent. diff --git a/tests/testdata/test.foo.yaml b/tests/testdata/test.foo.yaml index 9645c94..11bb56c 100644 --- a/tests/testdata/test.foo.yaml +++ b/tests/testdata/test.foo.yaml @@ -7,9 +7,18 @@ - orga1/repo1: exclude: [pipeline, project] - orga1/repo2 + - orga1/repo3: + exclude: + - project + - pipeline + extra-config-paths: + - project-extra.yaml + - zuul-extra.d/ - orga2/repo1 untrusted-projects: - orga2/repo1: {shadow: orga1/repo2} - orga1/repo2: exclude: [project] + extra-config-paths: + - zuul-extra.d/ - orga2/repo3 diff --git a/zubbi/scraper/main.py b/zubbi/scraper/main.py index 039b9f4..1e0d64a 100644 --- a/zubbi/scraper/main.py +++ b/zubbi/scraper/main.py @@ -556,14 +556,22 @@ def _scrape_repo_map( def scrape_repo(repo, tenants, reusable_repos, scrape_time): - job_files, role_files = Scraper(repo).scrape() + job_files, role_files = Scraper( + repo, + tenants.get("extra-config-paths", {}), + ).scrape() - is_rusable_repo = repo.repo_name in reusable_repos + is_reusable_repo = repo.repo_name in reusable_repos jobs = [] roles = [] try: jobs, roles = RepoParser( - repo, tenants, job_files, role_files, scrape_time, is_rusable_repo + repo, + tenants, + job_files, + role_files, + scrape_time, + is_reusable_repo, ).parse() except Exception: LOGGER.exception("Unable to parse job or role definitions in repo '%s'", repo) diff --git a/zubbi/scraper/repo_parser.py b/zubbi/scraper/repo_parser.py index a7c832c..2304f5f 100644 --- a/zubbi/scraper/repo_parser.py +++ b/zubbi/scraper/repo_parser.py @@ -31,7 +31,13 @@ class RepoParser: def __init__( - self, repo, tenants, job_files, role_files, scrape_time, is_reusable_repo + self, + repo, + tenants, + job_files, + role_files, + scrape_time, + is_reusable_repo, ): self.repo = repo self.tenants = tenants @@ -64,6 +70,15 @@ def parse_job_files(self): # LOGGER.debug(json.dumps(repo_jobs, indent=4)) return repo_jobs + def _get_job_tenants(self, file_path): + extra_config_paths = self.tenants.get("extra_config_paths", {}) + tenants = self.tenants["jobs"] + for extra_config_path in extra_config_paths.keys(): + if file_path.startswith(extra_config_path): + tenants = extra_config_paths[extra_config_path] + break + return tenants + def parse_job_definitions(self, file_path, job_info): try: jobs_yaml = yaml.load(job_info["content"], Loader=ZuulSafeLoader) @@ -83,7 +98,7 @@ def parse_job_definitions(self, file_path, job_info): job = ZuulJob(meta={"id": uuid}) job.job_name = job_name job.repo = self.repo.name - job.tenants = self.tenants["jobs"] + job.tenants = self._get_job_tenants(file_path) job.private = self.repo.private job.scrape_time = self.scrape_time job.line_start = job_def["__line_start__"] diff --git a/zubbi/scraper/scraper.py b/zubbi/scraper/scraper.py index 4055fd2..b272b55 100644 --- a/zubbi/scraper/scraper.py +++ b/zubbi/scraper/scraper.py @@ -33,8 +33,11 @@ class Scraper: - def __init__(self, repo): + def __init__(self, repo, extra_config_paths=None): self.repo = repo + self.extra_config_paths = ( + list(extra_config_paths.keys()) if extra_config_paths else [] + ) def scrape(self): LOGGER.info("Scraping '%s'", self.repo.name) @@ -55,7 +58,7 @@ def scrape_job_files(self): job_files = self.iterate_directory( REPO_ROOT, - whitelist=ZUUL_DIRECTORIES + ZUUL_FILES, + whitelist=ZUUL_DIRECTORIES + ZUUL_FILES + self.extra_config_paths, # NOTE (felix): As we provide this directly to the # str.endswith() method, the argument must be a str or a # tuple of strings, otherwise the following exception is diff --git a/zubbi/scraper/tenant_parser.py b/zubbi/scraper/tenant_parser.py index ca20b03..fc1c1ed 100644 --- a/zubbi/scraper/tenant_parser.py +++ b/zubbi/scraper/tenant_parser.py @@ -14,6 +14,7 @@ import logging import os +from collections import defaultdict import yaml @@ -62,7 +63,7 @@ def parse(self): self.tenants.append(tenant_name) def _update_repo_map(self, project, connection_name, tenant): - project_name, exclude = self._extract_project(project) + project_name, exclude, extra_config_paths = self._extract_project(project) # Map the current tenant to the current repository repo_tenant_entry = self.repo_map.setdefault( @@ -75,14 +76,27 @@ def _update_repo_map(self, project, connection_name, tenant): repo_tenant_entry["tenants"]["jobs"].append(tenant) repo_tenant_entry["tenants"]["roles"].append(tenant) + if extra_config_paths: + repo_tenant_extra_config_paths = repo_tenant_entry["tenants"].setdefault( + "extra_config_paths", defaultdict(lambda: []) + ) + for extra_config_path in extra_config_paths: + repo_tenant_extra_config_paths[extra_config_path].append(tenant) + def _extract_project(self, project): project_name = project exclude = [] + extra_config_paths = [] if type(project) is dict: # Get the first key of the dict containing the project name. project_name = list(project.keys())[0] - exclude = project.get("exclude", []) - return project_name, exclude + exclude = project[project_name].get("exclude", []) + # NOTE (swietlicki): directories in extra-config-path section contain + # trailing slash, while inside the Scraper.iterate_directory() the comparison + # is done against dir names without trailing slash + for item in project[project_name].get("extra-config-paths", []): + extra_config_paths.append(item[:-1] if item.endswith("/") else item) + return project_name, exclude, extra_config_paths def _load_tenant_sources_from_file(self, sources_file): LOGGER.info("Parsing tenant sources file '%s'", sources_file)