From dab25f37815919a1fd9d5b386ce7ca0c716a7b69 Mon Sep 17 00:00:00 2001
From: Krzysztof Swietlicki <Krzysztof.Swietlicki@partner.bmw.de>
Date: Tue, 7 Nov 2023 12:13:13 +0100
Subject: [PATCH] tenant_parser.py: add support for extra-config-paths

When creating/updating repo_map, 'extra-config-paths' list is extracted from
tenant configuration and saved in repo's 'tenants' dictionary (next to 'jobs'
and 'roles')

Extra-config-paths values are used for Scraper class initialization.
The 'scrape_job_files' method extends the whitelist with it

'test_integration' tests were extended to verify this new functionality
---
 tests/conftest.py                             | 10 ++-
 tests/scraper/test_integration.py             | 78 ++++++++++++++++++-
 tests/scraper/test_repo_parser.py             | 33 +++++++-
 .../repo_files/zuul-extra.d/extra-jobs.yaml   |  4 +
 tests/testdata/test.foo.yaml                  |  9 +++
 zubbi/scraper/main.py                         | 14 +++-
 zubbi/scraper/repo_parser.py                  | 21 ++++-
 zubbi/scraper/scraper.py                      |  7 +-
 zubbi/scraper/tenant_parser.py                | 28 ++++++-
 9 files changed, 190 insertions(+), 14 deletions(-)
 create mode 100644 tests/testdata/repo_files/zuul-extra.d/extra-jobs.yaml

diff --git a/tests/conftest.py b/tests/conftest.py
index bc879cc..b82aa5a 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -76,7 +76,11 @@ def repo_data():
     # and the other one is used for the parser.
     repo = DummyRepo("my/project")
 
-    tenants = {"jobs": ["foo"], "roles": ["foo", "bar"]}
+    tenants = {
+        "jobs": ["foo"],
+        "roles": ["foo", "bar"],
+        "extra_config_paths": {"zuul-extra.d": ["bar"]},
+    }
 
     job_files = {
         "zuul.d/jobs.yaml": {
@@ -91,6 +95,10 @@ def repo_data():
             "content": raw_file("repo_files/zuul.d/jobs-parse-error.yaml"),
             "blame": [],
         },
+        "zuul-extra.d/extra-jobs.yaml": {
+            "content": raw_file("repo_files/zuul-extra.d/extra-jobs.yaml"),
+            "blame": [],
+        },
     }
 
     role_files = {
diff --git a/tests/scraper/test_integration.py b/tests/scraper/test_integration.py
index f44d0cd..24c924b 100644
--- a/tests/scraper/test_integration.py
+++ b/tests/scraper/test_integration.py
@@ -29,6 +29,33 @@
     run: playbooks/non-existing-playbook.yaml
 """
 
+MOCKED_JOB_CONTENT_2 = """
+- job:
+    name: even-cooler-new-job
+    parent: super-base-job
+    description: |
+      This is another job for testing purposes.
+    run: playbooks/non-existing-super-playbook.yaml
+"""
+
+MOCKED_PROJECT_CONTENT = """
+- job:
+    name: super-duper-new-job
+    parent: lame-base-job
+    description: |
+      This is yet another job for testing purposes.
+    run: playbooks/non-existing-hyper-playbook.yaml
+
+- project:
+    name: my-simple-project
+    check:
+      jobs:
+        - noop
+    gate:
+      jobs:
+        - super-duper-new-job
+"""
+
 MOCKED_ROLE_DESCRIPTION = """
 Role description containing some reStructuredText expressions.
 
@@ -110,6 +137,19 @@ class MockGitHubRepository(GitHubRepository):
             "roles/foobar/README": "Simple text in a file without extension",
             "roles/empty-dir/REAMDE.whatever": "This file won't be checked out",
         },
+        "orga1/repo3": {
+            REPO_ROOT: {
+                "project-extra.yaml": MockContents(
+                    "project-extra.yaml", MockContents.FILE
+                ),
+                "zuul-extra.d": MockContents("zuul-extra.d", MockContents.DIR),
+            },
+            "project-extra.yaml": MOCKED_PROJECT_CONTENT,
+            "zuul-extra.d": {
+                "jobs.yaml": MockContents("zuul-extra.d/jobs.yaml", MockContents.FILE)
+            },
+            "zuul-extra.d/jobs.yaml": MOCKED_JOB_CONTENT_2,
+        },
         # Empty repositories
         "orga2/repo1": {},
         "orga2/repo3": {},
@@ -206,6 +246,39 @@ def test_scrape():
                 },
             },
         ),
+        "orga1/repo3": (
+            {
+                "project-extra.yaml": {
+                    "last_changed": "2018-09-17 15:15:15",
+                    "blame": [],
+                    "content": "\n- job:\n"
+                    "    name: super-duper-new-job\n"
+                    "    parent: lame-base-job\n"
+                    "    description: |\n"
+                    "      This is yet another job for testing purposes.\n"
+                    "    run: playbooks/non-existing-hyper-playbook.yaml\n"
+                    "\n- project:\n"
+                    "    name: my-simple-project\n"
+                    "    check:\n"
+                    "      jobs:\n"
+                    "        - noop\n"
+                    "    gate:\n"
+                    "      jobs:\n"
+                    "        - super-duper-new-job\n",
+                },
+                "zuul-extra.d/jobs.yaml": {
+                    "last_changed": "2018-09-17 15:15:15",
+                    "blame": [],
+                    "content": "\n- job:\n"
+                    "    name: even-cooler-new-job\n"
+                    "    parent: super-base-job\n"
+                    "    description: |\n"
+                    "      This is another job for testing purposes.\n"
+                    "    run: playbooks/non-existing-super-playbook.yaml\n",
+                },
+            },
+            {},
+        ),
         "orga2/repo1": ({}, {}),
         "orga2/repo3": ({}, {}),
     }
@@ -221,7 +294,10 @@ def test_scrape():
 
     for repo, tenants in repo_map.items():
         gh_repo = MockGitHubRepository(repo)
-        job_files, role_files = Scraper(gh_repo).scrape()
+        extra_config_paths = tenants["tenants"].get("extra_config_paths", {})
+        if repo == "orga1/repo3":
+            assert len(extra_config_paths) == 2
+        job_files, role_files = Scraper(gh_repo, extra_config_paths).scrape()
         assert (job_files, role_files) == expected[repo]
 
 
diff --git a/tests/scraper/test_repo_parser.py b/tests/scraper/test_repo_parser.py
index 761b65d..8434775 100644
--- a/tests/scraper/test_repo_parser.py
+++ b/tests/scraper/test_repo_parser.py
@@ -30,7 +30,12 @@ def test_parse(repo_data):
     repo, tenants, job_files, role_files = repo_data
 
     jobs, roles = RepoParser(
-        repo, tenants, job_files, role_files, scrape_time, is_reusable_repo=False
+        repo,
+        tenants,
+        job_files,
+        role_files,
+        scrape_time,
+        is_reusable_repo=False,
     ).parse()
 
     # We assume that we can access the resulting jobs and roles dictionary
@@ -39,6 +44,7 @@ def test_parse(repo_data):
     job_2 = jobs[1]
     job_3 = jobs[2]
     job_4 = jobs[3]
+    job_5 = jobs[4]
     role_1 = [r for r in roles if r["role_name"] == "foo"][0]
     role_2 = [r for r in roles if r["role_name"] == "bar"][0]
 
@@ -109,6 +115,23 @@ def test_parse(repo_data):
         "last_updated": None,
     }
 
+    expected_job_5 = {
+        "job_name": "awesome-job",
+        "repo": "my/project",
+        "tenants": ["bar"],
+        "description": "Job in custom directory, without a playbook or parent.\n",
+        "description_html": "<p>Job in custom directory, without a playbook or parent.</p>\n",
+        "parent": "base",
+        "url": "https://github/zuul-extra.d/extra-jobs.yaml",
+        "private": False,
+        "platforms": [],
+        "reusable": False,
+        "line_start": 1,
+        "line_end": 4,
+        "scrape_time": scrape_time,
+        "last_updated": None,
+    }
+
     expected_role_1 = {
         "role_name": "foo",
         "repo": "my/project",
@@ -198,6 +221,7 @@ def test_parse(repo_data):
     assert job_2.to_dict(skip_empty=False) == expected_job_2
     assert job_3.to_dict(skip_empty=False) == expected_job_3
     assert job_4.to_dict(skip_empty=False) == expected_job_4
+    assert job_5.to_dict(skip_empty=False) == expected_job_5
     assert role_1.to_dict(skip_empty=False) == expected_role_1
     assert role_2.to_dict(skip_empty=False) == expected_role_2
 
@@ -208,7 +232,12 @@ def test_parse_reusable_repo(repo_data):
     repo, tenants, job_files, role_files = repo_data
 
     jobs, roles = RepoParser(
-        repo, tenants, job_files, role_files, scrape_time, is_reusable_repo=True
+        repo,
+        tenants,
+        job_files,
+        role_files,
+        scrape_time,
+        is_reusable_repo=True,
     ).parse()
 
     # We assume that we can access the resulting jobs and roles dictionary
diff --git a/tests/testdata/repo_files/zuul-extra.d/extra-jobs.yaml b/tests/testdata/repo_files/zuul-extra.d/extra-jobs.yaml
new file mode 100644
index 0000000..3099a0c
--- /dev/null
+++ b/tests/testdata/repo_files/zuul-extra.d/extra-jobs.yaml
@@ -0,0 +1,4 @@
+- job:
+    name: awesome-job
+    description: |
+      Job in custom directory, without a playbook or parent.
diff --git a/tests/testdata/test.foo.yaml b/tests/testdata/test.foo.yaml
index 9645c94..11bb56c 100644
--- a/tests/testdata/test.foo.yaml
+++ b/tests/testdata/test.foo.yaml
@@ -7,9 +7,18 @@
           - orga1/repo1:
               exclude: [pipeline, project]
           - orga1/repo2
+          - orga1/repo3:
+              exclude:
+                - project
+                - pipeline
+              extra-config-paths:
+                - project-extra.yaml
+                - zuul-extra.d/
           - orga2/repo1
         untrusted-projects:
           - orga2/repo1: {shadow: orga1/repo2}
           - orga1/repo2:
               exclude: [project]
+              extra-config-paths:
+                - zuul-extra.d/
           - orga2/repo3
diff --git a/zubbi/scraper/main.py b/zubbi/scraper/main.py
index 039b9f4..1e0d64a 100644
--- a/zubbi/scraper/main.py
+++ b/zubbi/scraper/main.py
@@ -556,14 +556,22 @@ def _scrape_repo_map(
 
 
 def scrape_repo(repo, tenants, reusable_repos, scrape_time):
-    job_files, role_files = Scraper(repo).scrape()
+    job_files, role_files = Scraper(
+        repo,
+        tenants.get("extra-config-paths", {}),
+    ).scrape()
 
-    is_rusable_repo = repo.repo_name in reusable_repos
+    is_reusable_repo = repo.repo_name in reusable_repos
     jobs = []
     roles = []
     try:
         jobs, roles = RepoParser(
-            repo, tenants, job_files, role_files, scrape_time, is_rusable_repo
+            repo,
+            tenants,
+            job_files,
+            role_files,
+            scrape_time,
+            is_reusable_repo,
         ).parse()
     except Exception:
         LOGGER.exception("Unable to parse job or role definitions in repo '%s'", repo)
diff --git a/zubbi/scraper/repo_parser.py b/zubbi/scraper/repo_parser.py
index a7c832c..a883879 100644
--- a/zubbi/scraper/repo_parser.py
+++ b/zubbi/scraper/repo_parser.py
@@ -31,7 +31,13 @@
 
 class RepoParser:
     def __init__(
-        self, repo, tenants, job_files, role_files, scrape_time, is_reusable_repo
+        self,
+        repo,
+        tenants,
+        job_files,
+        role_files,
+        scrape_time,
+        is_reusable_repo,
     ):
         self.repo = repo
         self.tenants = tenants
@@ -64,6 +70,17 @@ def parse_job_files(self):
             # LOGGER.debug(json.dumps(repo_jobs, indent=4))
         return repo_jobs
 
+    def _get_job_tenants(self, file_path):
+        extra_config_paths = self.tenants.get("extra_config_paths", {})
+        tenants = []
+        for extra_config_path in extra_config_paths.keys():
+            if file_path.startswith(extra_config_path):
+                tenants = extra_config_paths[extra_config_path]
+                break
+        if not tenants:
+            tenants = self.tenants["jobs"]
+        return tenants
+
     def parse_job_definitions(self, file_path, job_info):
         try:
             jobs_yaml = yaml.load(job_info["content"], Loader=ZuulSafeLoader)
@@ -83,7 +100,7 @@ def parse_job_definitions(self, file_path, job_info):
                 job = ZuulJob(meta={"id": uuid})
                 job.job_name = job_name
                 job.repo = self.repo.name
-                job.tenants = self.tenants["jobs"]
+                job.tenants = self._get_job_tenants(file_path)
                 job.private = self.repo.private
                 job.scrape_time = self.scrape_time
                 job.line_start = job_def["__line_start__"]
diff --git a/zubbi/scraper/scraper.py b/zubbi/scraper/scraper.py
index 4055fd2..b272b55 100644
--- a/zubbi/scraper/scraper.py
+++ b/zubbi/scraper/scraper.py
@@ -33,8 +33,11 @@
 
 
 class Scraper:
-    def __init__(self, repo):
+    def __init__(self, repo, extra_config_paths=None):
         self.repo = repo
+        self.extra_config_paths = (
+            list(extra_config_paths.keys()) if extra_config_paths else []
+        )
 
     def scrape(self):
         LOGGER.info("Scraping '%s'", self.repo.name)
@@ -55,7 +58,7 @@ def scrape_job_files(self):
 
         job_files = self.iterate_directory(
             REPO_ROOT,
-            whitelist=ZUUL_DIRECTORIES + ZUUL_FILES,
+            whitelist=ZUUL_DIRECTORIES + ZUUL_FILES + self.extra_config_paths,
             # NOTE (felix): As we provide this directly to the
             # str.endswith() method, the argument must be a str or a
             # tuple of strings, otherwise the following exception is
diff --git a/zubbi/scraper/tenant_parser.py b/zubbi/scraper/tenant_parser.py
index ca20b03..7a11d30 100644
--- a/zubbi/scraper/tenant_parser.py
+++ b/zubbi/scraper/tenant_parser.py
@@ -62,7 +62,7 @@ def parse(self):
             self.tenants.append(tenant_name)
 
     def _update_repo_map(self, project, connection_name, tenant):
-        project_name, exclude = self._extract_project(project)
+        project_name, exclude, extra_config_paths = self._extract_project(project)
 
         # Map the current tenant to the current repository
         repo_tenant_entry = self.repo_map.setdefault(
@@ -75,14 +75,36 @@ def _update_repo_map(self, project, connection_name, tenant):
             repo_tenant_entry["tenants"]["jobs"].append(tenant)
         repo_tenant_entry["tenants"]["roles"].append(tenant)
 
+        if extra_config_paths:
+            if "extra_config_paths" not in repo_tenant_entry["tenants"]:
+                repo_tenant_entry["tenants"]["extra_config_paths"] = {}
+            for extra_config_path in extra_config_paths:
+                if (
+                    extra_config_path
+                    not in repo_tenant_entry["tenants"]["extra_config_paths"].keys()
+                ):
+                    repo_tenant_entry["tenants"]["extra_config_paths"][
+                        extra_config_path
+                    ] = []
+
+                repo_tenant_entry["tenants"]["extra_config_paths"][
+                    extra_config_path
+                ].append(tenant)
+
     def _extract_project(self, project):
         project_name = project
         exclude = []
+        extra_config_paths = []
         if type(project) is dict:
             # Get the first key of the dict containing the project name.
             project_name = list(project.keys())[0]
-            exclude = project.get("exclude", [])
-        return project_name, exclude
+            exclude = project[project_name].get("exclude", [])
+            # NOTE (swietlicki): directories in extra-config-path section contain
+            # trailing slash, while inside the Scraper.iterate_directory() the comparison
+            # is done against dir names without trailing slash
+            for item in project[project_name].get("extra-config-paths", []):
+                extra_config_paths.append(item[:-1] if item.endswith("/") else item)
+        return project_name, exclude, extra_config_paths
 
     def _load_tenant_sources_from_file(self, sources_file):
         LOGGER.info("Parsing tenant sources file '%s'", sources_file)