Skip to content

Commit

Permalink
tenant_parser.py: add support for extra-config-paths
Browse files Browse the repository at this point in the history
When creating/updating repo_map, 'extra-config-paths' list is extracted from
tenant configuration and saved in repo's 'tenants' dictionary (next to 'jobs'
and 'roles')

Extra-config-paths values are used for Scraper class initialization.
The 'scrape_job_files' method extends the whitelist with it

'test_integration' tests were extended to verify this new functionality
  • Loading branch information
Krzysztof Swietlicki authored and Krzysztof Swietlicki committed Dec 5, 2023
1 parent fe94579 commit dab25f3
Show file tree
Hide file tree
Showing 9 changed files with 190 additions and 14 deletions.
10 changes: 9 additions & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,11 @@ def repo_data():
# and the other one is used for the parser.
repo = DummyRepo("my/project")

tenants = {"jobs": ["foo"], "roles": ["foo", "bar"]}
tenants = {
"jobs": ["foo"],
"roles": ["foo", "bar"],
"extra_config_paths": {"zuul-extra.d": ["bar"]},
}

job_files = {
"zuul.d/jobs.yaml": {
Expand All @@ -91,6 +95,10 @@ def repo_data():
"content": raw_file("repo_files/zuul.d/jobs-parse-error.yaml"),
"blame": [],
},
"zuul-extra.d/extra-jobs.yaml": {
"content": raw_file("repo_files/zuul-extra.d/extra-jobs.yaml"),
"blame": [],
},
}

role_files = {
Expand Down
78 changes: 77 additions & 1 deletion tests/scraper/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,33 @@
run: playbooks/non-existing-playbook.yaml
"""

MOCKED_JOB_CONTENT_2 = """
- job:
name: even-cooler-new-job
parent: super-base-job
description: |
This is another job for testing purposes.
run: playbooks/non-existing-super-playbook.yaml
"""

MOCKED_PROJECT_CONTENT = """
- job:
name: super-duper-new-job
parent: lame-base-job
description: |
This is yet another job for testing purposes.
run: playbooks/non-existing-hyper-playbook.yaml
- project:
name: my-simple-project
check:
jobs:
- noop
gate:
jobs:
- super-duper-new-job
"""

MOCKED_ROLE_DESCRIPTION = """
Role description containing some reStructuredText expressions.
Expand Down Expand Up @@ -110,6 +137,19 @@ class MockGitHubRepository(GitHubRepository):
"roles/foobar/README": "Simple text in a file without extension",
"roles/empty-dir/REAMDE.whatever": "This file won't be checked out",
},
"orga1/repo3": {
REPO_ROOT: {
"project-extra.yaml": MockContents(
"project-extra.yaml", MockContents.FILE
),
"zuul-extra.d": MockContents("zuul-extra.d", MockContents.DIR),
},
"project-extra.yaml": MOCKED_PROJECT_CONTENT,
"zuul-extra.d": {
"jobs.yaml": MockContents("zuul-extra.d/jobs.yaml", MockContents.FILE)
},
"zuul-extra.d/jobs.yaml": MOCKED_JOB_CONTENT_2,
},
# Empty repositories
"orga2/repo1": {},
"orga2/repo3": {},
Expand Down Expand Up @@ -206,6 +246,39 @@ def test_scrape():
},
},
),
"orga1/repo3": (
{
"project-extra.yaml": {
"last_changed": "2018-09-17 15:15:15",
"blame": [],
"content": "\n- job:\n"
" name: super-duper-new-job\n"
" parent: lame-base-job\n"
" description: |\n"
" This is yet another job for testing purposes.\n"
" run: playbooks/non-existing-hyper-playbook.yaml\n"
"\n- project:\n"
" name: my-simple-project\n"
" check:\n"
" jobs:\n"
" - noop\n"
" gate:\n"
" jobs:\n"
" - super-duper-new-job\n",
},
"zuul-extra.d/jobs.yaml": {
"last_changed": "2018-09-17 15:15:15",
"blame": [],
"content": "\n- job:\n"
" name: even-cooler-new-job\n"
" parent: super-base-job\n"
" description: |\n"
" This is another job for testing purposes.\n"
" run: playbooks/non-existing-super-playbook.yaml\n",
},
},
{},
),
"orga2/repo1": ({}, {}),
"orga2/repo3": ({}, {}),
}
Expand All @@ -221,7 +294,10 @@ def test_scrape():

for repo, tenants in repo_map.items():
gh_repo = MockGitHubRepository(repo)
job_files, role_files = Scraper(gh_repo).scrape()
extra_config_paths = tenants["tenants"].get("extra_config_paths", {})
if repo == "orga1/repo3":
assert len(extra_config_paths) == 2
job_files, role_files = Scraper(gh_repo, extra_config_paths).scrape()
assert (job_files, role_files) == expected[repo]


Expand Down
33 changes: 31 additions & 2 deletions tests/scraper/test_repo_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,12 @@ def test_parse(repo_data):
repo, tenants, job_files, role_files = repo_data

jobs, roles = RepoParser(
repo, tenants, job_files, role_files, scrape_time, is_reusable_repo=False
repo,
tenants,
job_files,
role_files,
scrape_time,
is_reusable_repo=False,
).parse()

# We assume that we can access the resulting jobs and roles dictionary
Expand All @@ -39,6 +44,7 @@ def test_parse(repo_data):
job_2 = jobs[1]
job_3 = jobs[2]
job_4 = jobs[3]
job_5 = jobs[4]
role_1 = [r for r in roles if r["role_name"] == "foo"][0]
role_2 = [r for r in roles if r["role_name"] == "bar"][0]

Expand Down Expand Up @@ -109,6 +115,23 @@ def test_parse(repo_data):
"last_updated": None,
}

expected_job_5 = {
"job_name": "awesome-job",
"repo": "my/project",
"tenants": ["bar"],
"description": "Job in custom directory, without a playbook or parent.\n",
"description_html": "<p>Job in custom directory, without a playbook or parent.</p>\n",
"parent": "base",
"url": "https://github/zuul-extra.d/extra-jobs.yaml",
"private": False,
"platforms": [],
"reusable": False,
"line_start": 1,
"line_end": 4,
"scrape_time": scrape_time,
"last_updated": None,
}

expected_role_1 = {
"role_name": "foo",
"repo": "my/project",
Expand Down Expand Up @@ -198,6 +221,7 @@ def test_parse(repo_data):
assert job_2.to_dict(skip_empty=False) == expected_job_2
assert job_3.to_dict(skip_empty=False) == expected_job_3
assert job_4.to_dict(skip_empty=False) == expected_job_4
assert job_5.to_dict(skip_empty=False) == expected_job_5
assert role_1.to_dict(skip_empty=False) == expected_role_1
assert role_2.to_dict(skip_empty=False) == expected_role_2

Expand All @@ -208,7 +232,12 @@ def test_parse_reusable_repo(repo_data):
repo, tenants, job_files, role_files = repo_data

jobs, roles = RepoParser(
repo, tenants, job_files, role_files, scrape_time, is_reusable_repo=True
repo,
tenants,
job_files,
role_files,
scrape_time,
is_reusable_repo=True,
).parse()

# We assume that we can access the resulting jobs and roles dictionary
Expand Down
4 changes: 4 additions & 0 deletions tests/testdata/repo_files/zuul-extra.d/extra-jobs.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
- job:
name: awesome-job
description: |
Job in custom directory, without a playbook or parent.
9 changes: 9 additions & 0 deletions tests/testdata/test.foo.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,18 @@
- orga1/repo1:
exclude: [pipeline, project]
- orga1/repo2
- orga1/repo3:
exclude:
- project
- pipeline
extra-config-paths:
- project-extra.yaml
- zuul-extra.d/
- orga2/repo1
untrusted-projects:
- orga2/repo1: {shadow: orga1/repo2}
- orga1/repo2:
exclude: [project]
extra-config-paths:
- zuul-extra.d/
- orga2/repo3
14 changes: 11 additions & 3 deletions zubbi/scraper/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -556,14 +556,22 @@ def _scrape_repo_map(


def scrape_repo(repo, tenants, reusable_repos, scrape_time):
job_files, role_files = Scraper(repo).scrape()
job_files, role_files = Scraper(
repo,
tenants.get("extra-config-paths", {}),
).scrape()

is_rusable_repo = repo.repo_name in reusable_repos
is_reusable_repo = repo.repo_name in reusable_repos
jobs = []
roles = []
try:
jobs, roles = RepoParser(
repo, tenants, job_files, role_files, scrape_time, is_rusable_repo
repo,
tenants,
job_files,
role_files,
scrape_time,
is_reusable_repo,
).parse()
except Exception:
LOGGER.exception("Unable to parse job or role definitions in repo '%s'", repo)
Expand Down
21 changes: 19 additions & 2 deletions zubbi/scraper/repo_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,13 @@

class RepoParser:
def __init__(
self, repo, tenants, job_files, role_files, scrape_time, is_reusable_repo
self,
repo,
tenants,
job_files,
role_files,
scrape_time,
is_reusable_repo,
):
self.repo = repo
self.tenants = tenants
Expand Down Expand Up @@ -64,6 +70,17 @@ def parse_job_files(self):
# LOGGER.debug(json.dumps(repo_jobs, indent=4))
return repo_jobs

def _get_job_tenants(self, file_path):
extra_config_paths = self.tenants.get("extra_config_paths", {})
tenants = []
for extra_config_path in extra_config_paths.keys():
if file_path.startswith(extra_config_path):
tenants = extra_config_paths[extra_config_path]
break
if not tenants:
tenants = self.tenants["jobs"]
return tenants

def parse_job_definitions(self, file_path, job_info):
try:
jobs_yaml = yaml.load(job_info["content"], Loader=ZuulSafeLoader)
Expand All @@ -83,7 +100,7 @@ def parse_job_definitions(self, file_path, job_info):
job = ZuulJob(meta={"id": uuid})
job.job_name = job_name
job.repo = self.repo.name
job.tenants = self.tenants["jobs"]
job.tenants = self._get_job_tenants(file_path)
job.private = self.repo.private
job.scrape_time = self.scrape_time
job.line_start = job_def["__line_start__"]
Expand Down
7 changes: 5 additions & 2 deletions zubbi/scraper/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,11 @@


class Scraper:
def __init__(self, repo):
def __init__(self, repo, extra_config_paths=None):
self.repo = repo
self.extra_config_paths = (
list(extra_config_paths.keys()) if extra_config_paths else []
)

def scrape(self):
LOGGER.info("Scraping '%s'", self.repo.name)
Expand All @@ -55,7 +58,7 @@ def scrape_job_files(self):

job_files = self.iterate_directory(
REPO_ROOT,
whitelist=ZUUL_DIRECTORIES + ZUUL_FILES,
whitelist=ZUUL_DIRECTORIES + ZUUL_FILES + self.extra_config_paths,
# NOTE (felix): As we provide this directly to the
# str.endswith() method, the argument must be a str or a
# tuple of strings, otherwise the following exception is
Expand Down
28 changes: 25 additions & 3 deletions zubbi/scraper/tenant_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def parse(self):
self.tenants.append(tenant_name)

def _update_repo_map(self, project, connection_name, tenant):
project_name, exclude = self._extract_project(project)
project_name, exclude, extra_config_paths = self._extract_project(project)

# Map the current tenant to the current repository
repo_tenant_entry = self.repo_map.setdefault(
Expand All @@ -75,14 +75,36 @@ def _update_repo_map(self, project, connection_name, tenant):
repo_tenant_entry["tenants"]["jobs"].append(tenant)
repo_tenant_entry["tenants"]["roles"].append(tenant)

if extra_config_paths:
if "extra_config_paths" not in repo_tenant_entry["tenants"]:
repo_tenant_entry["tenants"]["extra_config_paths"] = {}
for extra_config_path in extra_config_paths:
if (
extra_config_path
not in repo_tenant_entry["tenants"]["extra_config_paths"].keys()
):
repo_tenant_entry["tenants"]["extra_config_paths"][
extra_config_path
] = []

repo_tenant_entry["tenants"]["extra_config_paths"][
extra_config_path
].append(tenant)

def _extract_project(self, project):
project_name = project
exclude = []
extra_config_paths = []
if type(project) is dict:
# Get the first key of the dict containing the project name.
project_name = list(project.keys())[0]
exclude = project.get("exclude", [])
return project_name, exclude
exclude = project[project_name].get("exclude", [])
# NOTE (swietlicki): directories in extra-config-path section contain
# trailing slash, while inside the Scraper.iterate_directory() the comparison
# is done against dir names without trailing slash
for item in project[project_name].get("extra-config-paths", []):
extra_config_paths.append(item[:-1] if item.endswith("/") else item)
return project_name, exclude, extra_config_paths

def _load_tenant_sources_from_file(self, sources_file):
LOGGER.info("Parsing tenant sources file '%s'", sources_file)
Expand Down

0 comments on commit dab25f3

Please sign in to comment.