Skip to content

Commit

Permalink
Capture additional runner metadata dev branch (#65)
Browse files Browse the repository at this point in the history
Co-authored-by: Adnan Khan <[email protected]>
  • Loading branch information
jimmyscchang and AdnaneKhan authored Jan 29, 2024
1 parent 4b37e19 commit 8561d76
Show file tree
Hide file tree
Showing 8 changed files with 143 additions and 66 deletions.
5 changes: 5 additions & 0 deletions gato/enumerate/recommender.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,11 @@ def print_repo_runner_info(repository: Repository):
f"{Output.bright(repository.accessible_runners[0].runner_name)}"
f" and the machine name was "
f"{Output.bright(repository.accessible_runners[0].machine_name)}"
f" and the runner type was "
f"{Output.bright(repository.accessible_runners[0].runner_type)}"
f" in the {Output.bright(repository.accessible_runners[0].runner_group)} group"
f" with the following labels: "
f"{Output.bright(', '.join(repository.accessible_runners[0].labels))}"
)

for runner in repository.accessible_runners:
Expand Down
12 changes: 8 additions & 4 deletions gato/enumerate/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,13 @@ def __perform_runlog_enumeration(self, repository: Repository):
if wf_runs:
for wf_run in wf_runs:
runner = Runner(
wf_run['runner_name'], wf_run['machine_name'], non_ephemeral=wf_run['non_ephemeral']
wf_run['runner_name'],
wf_run['runner_type'],
wf_run['token_permissions'],
runner_group=wf_run['runner_group'],
machine_name=wf_run['machine_name'],
labels=wf_run['requested_labels'],
non_ephemeral=wf_run['non_ephemeral']
)

repository.add_accessible_runner(runner)
Expand Down Expand Up @@ -139,9 +145,7 @@ def enumerate_repository(self, repository: Repository, large_org_enum=False):

# If we are doing internal enum, get the logs, because coverage is
# more important here and it's ok if it takes time.
elif not repository.is_public() and self.__perform_runlog_enumeration(repository):
runner_detected = True
else:
elif not repository.is_public() or not large_org_enum:
runner_detected = self.__perform_runlog_enumeration(repository)

if runner_detected:
Expand Down
104 changes: 67 additions & 37 deletions gato/github/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@

logger = logging.getLogger(__name__)


class Api():
"""Class to serve as an abstraction layer to interact with the GitHub API.
It handles utilizing proxies, along with passing the PAT and handling any
Expand All @@ -21,6 +20,9 @@ class Api():

RUNNER_RE = re.compile(r'Runner name: \'([\w+-.]+)\'')
MACHINE_RE = re.compile(r'Machine name: \'([\w+-.]+)\'')
RUNNERGROUP_RE = re.compile(r'Runner group name: \'([\w+-.]+)\'')
RUNNERTYPE_RE = re.compile(r'([\w+-.]+)')

RUN_THRESHOLD = 90

def __init__(self, pat: str, version: str = "2022-11-28",
Expand Down Expand Up @@ -111,49 +113,77 @@ def __process_run_log(self, log_content: bytes, run_info: dict):
Returns:
dict: metadata about the run execution.
"""
log_package = None
log_package = dict()
token_permissions = dict()
runner_type = None
non_ephemeral = False
labels = None
runner_name = None
machine_name = None
runner_group = None

with zipfile.ZipFile(io.BytesIO(log_content)) as runres:
for zipinfo in runres.infolist():
# TODO use a lambda for this messy logic
if "checkout" in zipinfo.filename or "Checkout" in zipinfo.filename:
if zipinfo.filename.startswith('0_'):
with runres.open(zipinfo) as run_setup:
content = run_setup.read().decode()
if "Cleaning the repository" in content:
non_ephemeral = True

if log_package:
log_package['non_ephemeral'] = non_ephemeral
content_lines = content.split('\n')

if "Set up job" in zipinfo.filename:
with runres.open(zipinfo) as run_setup:
content = run_setup.read().decode()
if "Image Release: https://github.com/actions/runner-images" in content:
if "Image Release: https://github.com/actions/runner-images" in content or \
"Job is about to start running on the hosted runner: GitHub Actions" in content:
# Larger runners will appear to be self-hosted, but
# they will have the image name. Skip if we see this.
# If the log contains "job is about to start running on hosted runner",
# the runner is a Github hosted runner so we can skip it.
continue

if "Runner name" in content or \
"Machine name" in content:

# Need to replace windows style line
# return with linux..
matches = Api.RUNNER_RE.search(content)
runner_name = matches.group(1) if matches else None

matches = Api.MACHINE_RE.search(content)
hostname = matches.group(1) if matches else None

log_package = {
"setup_log": content,
"runner_name": runner_name,
"machine_name": hostname,
"run_id": run_info["id"],
"run_attempt": run_info["run_attempt"],
"non_ephemeral": non_ephemeral
}
return log_package
index = 0
while index < len(content_lines) and content_lines[index]:
line = content_lines[index]

if "Requested labels: " in line:
labels = line.split("Requested labels: ")[1].split(', ')

if "Runner name: " in line:
runner_name = line.split("Runner name: ")[1].replace("'", "")

if "Machine name: " in line:
machine_name = line.split("Machine name: ")[1].replace("'", "")

if "Runner group name:" in line:
runner_group = line.split("Runner group name: ")[1].replace("'", "")

if "Job is about to start running on" in line:
runner_type = line.split()[-1]
matches = Api.RUNNERTYPE_RE.search(runner_type)
runner_type = matches.group(1)

if "GITHUB_TOKEN Permission" in line:
while "##[endgroup]" not in content_lines[index+1]:
index += 1
scope = content_lines[index].split()[1].replace(':', '')
permission = content_lines[index].split()[2]
token_permissions[scope] = permission
log_package["token_permissions"] = token_permissions

if "Cleaning the repository" in line:
non_ephemeral = True
log_package["non_ephemeral"] = non_ephemeral

index += 1

log_package = {
"requested_labels": labels,
"runner_name": runner_name,
"machine_name": machine_name,
"runner_group": runner_group,
"runner_type": runner_type,
"run_id": run_info["id"],
"run_attempt": run_info["run_attempt"],
"non_ephemeral": non_ephemeral,
"token_permissions": token_permissions
}

return log_package

def __get_full_runlog(self, log_content: bytes, run_name: str):
"""Gets the full text of the runlog from the zip file by matching the
Expand Down Expand Up @@ -682,13 +712,12 @@ def retrieve_run_logs(self, repo_name: str, short_circuit: str = True):
if runs.status_code == 200:
logger.debug(f'Enumerating runs within {repo_name}')
for run in runs.json()['workflow_runs']:

# We are only interested in runs that actually executed.
if run['conclusion'] != 'success' and \
run['conclusion'] != 'failure':
continue

if short_circuit:
if short_circuit:
# If we are only looking for the presence of SH runners and
# not trying to determine ephmeral vs not from repeats, then
# we just need to look at each branch + wf combination once.
Expand All @@ -700,6 +729,7 @@ def retrieve_run_logs(self, repo_name: str, short_circuit: str = True):
run_log = self.call_get(
f'/repos/{repo_name}/actions/runs/{run["id"]}/'
f'attempts/{run["run_attempt"]}/logs')

if run_log.status_code == 200:
run_log = self.__process_run_log(run_log.content, run)
if run_log:
Expand Down Expand Up @@ -1175,4 +1205,4 @@ def commit_workflow(self, repo_name: str,
if self.__verify_result(r, 201) is False:
return None

return new_commit_sha
return new_commit_sha
11 changes: 10 additions & 1 deletion gato/models/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ class Runner:
def __init__(
self,
runner_name,
runner_type=None,
token_permissions=None,
runner_group=None,
machine_name=None,
os=None,
status=None,
Expand All @@ -25,6 +28,9 @@ def __init__(
"""
self.runner_name = runner_name
self.machine_name = machine_name
self.runner_group = runner_group
self.runner_type = runner_type
self.token_permissions = token_permissions
self.os = os
self.status = status
self.labels = labels
Expand All @@ -37,10 +43,13 @@ def toJSON(self):
"name": self.runner_name,
"machine_name": self.machine_name if self.machine_name
else "Unknown",
"runner_type": self.runner_type if self.runner_type else "Unknown",
"runner_group_name": self.runner_group if self.runner_group else "Unknown",
"token_permissions": self.token_permissions,
"os": self.os if self.os else "Unknown",
"status": self.status if self.status else "Unknown",
"labels": [label for label in self.labels],
"non_ephemeral": self.non_ephemeral
}

return representation
return representation
Binary file modified unit_test/files/run_log.zip
Binary file not shown.
6 changes: 3 additions & 3 deletions unit_test/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -465,18 +465,18 @@ def test_retrieve_run_logs(mock_get):
zip_bytes = run_log.read()
mock_get.return_value.content = zip_bytes

abstraction_layer = Api( test_pat, "2022-11-28")
abstraction_layer = Api(test_pat, "2022-11-28")
logs = abstraction_layer.retrieve_run_logs("testOrg/testRepo")

assert len(logs) == 1
assert list(logs)[0]['runner_name'] == 'ghrunner-test'
assert list(logs)[0]['runner_name'] == 'runner-30'

logs = abstraction_layer.retrieve_run_logs(
"testOrg/testRepo", short_circuit=False
)

assert len(logs) == 1
assert list(logs)[0]['runner_name'] == 'ghrunner-test'
assert list(logs)[0]['runner_name'] == 'runner-30'


@patch("gato.github.api.requests.get")
Expand Down
35 changes: 20 additions & 15 deletions unit_test/test_enumerate.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,21 @@

Output(False, True)

BASE_MOCK_RUNNER = [{
"machine_name": "unittest1",
"runner_name": "much_unit_such_test",
"runner_type": "organization",
"non_ephemeral": False,
"token_permissions": {
"Actions": "write"
},
"runner_group": "Default",
"requested_labels": [
"self-hosted",
"Linux",
"X64"
]
}]

@pytest.fixture(scope="session", autouse=True)
def load_test_files(request):
Expand Down Expand Up @@ -102,9 +117,7 @@ def test_enumerate_repo_admin(mock_api, capsys):
"scopes": ['repo', 'workflow']
}

mock_api.return_value.retrieve_run_logs.return_value = [
{"machine_name": "unittest1", "runner_name": "much_unit_such_test", "non_ephemeral": False}
]
mock_api.return_value.retrieve_run_logs.return_value = BASE_MOCK_RUNNER

repo_data = json.loads(json.dumps(TEST_REPO_DATA))
repo_data['permissions']['admin'] = True
Expand Down Expand Up @@ -142,9 +155,7 @@ def test_enumerate_repo_admin_no_wf(mock_api, capsys):
"scopes": ['repo']
}

mock_api.return_value.retrieve_run_logs.return_value = [
{"machine_name": "unittest1", "runner_name": "much_unit_such_test", "non_ephemeral": False}
]
mock_api.return_value.retrieve_run_logs.return_value = BASE_MOCK_RUNNER

repo_data = json.loads(json.dumps(TEST_REPO_DATA))
repo_data['permissions']['admin'] = True
Expand Down Expand Up @@ -182,9 +193,7 @@ def test_enumerate_repo_no_wf_no_admin(mock_api, capsys):
"scopes": ['repo']
}

mock_api.return_value.retrieve_run_logs.return_value = [
{"machine_name": "unittest1", "runner_name": "much_unit_such_test", "non_ephemeral": False}
]
mock_api.return_value.retrieve_run_logs.return_value = BASE_MOCK_RUNNER

repo_data = json.loads(json.dumps(TEST_REPO_DATA))
repo_data['permissions']['admin'] = False
Expand Down Expand Up @@ -221,9 +230,7 @@ def test_enumerate_repo_no_wf_maintain(mock_api, capsys):
"scopes": ['repo', 'workflow']
}

mock_api.return_value.retrieve_run_logs.return_value = [
{"machine_name": "unittest1", "runner_name": "much_unit_such_test", "non_ephemeral": False}
]
mock_api.return_value.retrieve_run_logs.return_value = BASE_MOCK_RUNNER

repo_data = json.loads(json.dumps(TEST_REPO_DATA))

Expand Down Expand Up @@ -261,9 +268,7 @@ def test_enumerate_repo_only(mock_api, capsys):
"scopes": ['repo', 'workflow']
}

mock_api.return_value.retrieve_run_logs.return_value = [
{"machine_name": "unittest1", "runner_name": "much_unit_such_test", "non_ephemeral": False}
]
mock_api.return_value.retrieve_run_logs.return_value = BASE_MOCK_RUNNER

repo_data = json.loads(json.dumps(TEST_REPO_DATA))

Expand Down
36 changes: 30 additions & 6 deletions unit_test/test_repo_enumerate.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,21 @@ def test_enumerate_repo():
"scopes": ['repo', 'workflow']
}

mock_api.retrieve_run_logs.return_value = [
{"machine_name": "unittest1", "runner_name": "much_unit_such_test", "non_ephemeral": False}
]
mock_api.retrieve_run_logs.return_value = [{
"machine_name": "unittest1",
"runner_name": "much_unit_such_test",
"runner_type": "organization",
"non_ephemeral": False,
"token_permissions": {
"Actions": "write"
},
"runner_group": "Default",
"requested_labels": [
"self-hosted",
"Linux",
"X64"
]
}]

repo_data = json.loads(json.dumps(TEST_REPO_DATA))
test_repo = Repository(repo_data)
Expand Down Expand Up @@ -74,9 +86,21 @@ def test_enumerate_repo_admin():
"scopes": ['repo', 'workflow']
}

mock_api.retrieve_run_logs.return_value = [
{"machine_name": "unittest1", "runner_name": "much_unit_such_test", "non_ephemeral": False}
]
mock_api.retrieve_run_logs.return_value = [{
"machine_name": "unittest1",
"runner_name": "much_unit_such_test",
"runner_type": "organization",
"non_ephemeral": False,
"token_permissions": {
"Actions": "write"
},
"runner_group": "Default",
"requested_labels": [
"self-hosted",
"Linux",
"X64"
]
}]

repo_data = json.loads(json.dumps(TEST_REPO_DATA))
repo_data['permissions']['admin'] = True
Expand Down

0 comments on commit 8561d76

Please sign in to comment.