Skip to content

Commit

Permalink
initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
jimmyscchang committed Jan 22, 2024
1 parent baff978 commit 075d2a5
Show file tree
Hide file tree
Showing 4 changed files with 97 additions and 46 deletions.
5 changes: 5 additions & 0 deletions gato/enumerate/recommender.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,11 @@ def print_repo_runner_info(repository: Repository):
f"{Output.bright(repository.accessible_runners[0].runner_name)}"
f" and the machine name was "
f"{Output.bright(repository.accessible_runners[0].machine_name)}"
f" and the runner type was "
f"{Output.bright(repository.accessible_runners[0].runner_type)}"
f" in the {Output.bright(repository.accessible_runners[0].runner_group)} group"
f" with the following labels: "
f"{Output.bright(', '.join(repository.accessible_runners[0].labels))}"
)

for runner in repository.accessible_runners:
Expand Down
20 changes: 8 additions & 12 deletions gato/enumerate/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,13 @@ def __perform_runlog_enumeration(self, repository: Repository):
if wf_runs:
for wf_run in wf_runs:
runner = Runner(
wf_run['runner_name'], wf_run['machine_name'], non_ephemeral=wf_run['non_ephemeral']
wf_run['runner_name'],
wf_run['runner_type'],
wf_run['token_permissions'],
runner_group=wf_run['runner_group'],
machine_name=wf_run['machine_name'],
labels=wf_run['requested_labels'],
non_ephemeral=wf_run['non_ephemeral']
)

repository.add_accessible_runner(runner)
Expand Down Expand Up @@ -132,17 +138,7 @@ def enumerate_repository(self, repository: Repository, large_org_enum=False):
runner_detected = True

if not self.skip_log:
# If we are enumerating an organization, only enumerate runlogs if
# the workflow suggests a sh_runner.
if large_org_enum and runner_detected:
self.__perform_runlog_enumeration(repository)

# If we are doing internal enum, get the logs, because coverage is
# more important here and it's ok if it takes time.
elif not repository.is_public() and self.__perform_runlog_enumeration(repository):
runner_detected = True
else:
runner_detected = self.__perform_runlog_enumeration(repository)
runner_detected = self.__perform_runlog_enumeration(repository)

if runner_detected:
# Only display permissions (beyond having none) if runner is
Expand Down
109 changes: 75 additions & 34 deletions gato/github/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,14 @@
import zipfile
import re
import io
import json #Jimmy

from gato.cli import Output
from datetime import datetime, timezone, timedelta

logger = logging.getLogger(__name__)
logging.root.setLevel(logging.DEBUG)



class Api():
Expand All @@ -21,6 +24,9 @@ class Api():

RUNNER_RE = re.compile(r'Runner name: \'([\w+-.]+)\'')
MACHINE_RE = re.compile(r'Machine name: \'([\w+-.]+)\'')
RUNNERGROUP_RE = re.compile(r'Runner group name: \'([\w+-.]+)\'')
RUNNERTYPE_RE = re.compile(r'([\w+-.]+)')

RUN_THRESHOLD = 90

def __init__(self, pat: str, version: str = "2022-11-28",
Expand Down Expand Up @@ -111,49 +117,79 @@ def __process_run_log(self, log_content: bytes, run_info: dict):
Returns:
dict: metadata about the run execution.
"""
log_package = None
log_package = dict()
token_permissions = dict()
runner_type = None
non_ephemeral = False
labels = None
runner_name = None
machine_name = None
runner_group = None


with zipfile.ZipFile(io.BytesIO(log_content)) as runres:
for zipinfo in runres.infolist():
# TODO use a lambda for this messy logic
if "checkout" in zipinfo.filename or "Checkout" in zipinfo.filename:
if zipinfo.filename.startswith('0_'):
with runres.open(zipinfo) as run_setup:
content = run_setup.read().decode()
if "Cleaning the repository" in content:
non_ephemeral = True
content_lines = content.split('\n')

if log_package:
log_package['non_ephemeral'] = non_ephemeral

if "Set up job" in zipinfo.filename:
with runres.open(zipinfo) as run_setup:
content = run_setup.read().decode()
if "Image Release: https://github.com/actions/runner-images" in content:
if "Image Release: https://github.com/actions/runner-images" in content or \
"Job is about to start running on the hosted runner: GitHub Actions" in content:
# Larger runners will appear to be self-hosted, but
# they will have the image name. Skip if we see this.
# If the log contains "job is about to start running on hosted runner",
# the runner is a Github hosted runner so we can skip it.
continue

if "Runner name" in content or \
"Machine name" in content:

# Need to replace windows style line
# return with linux..
matches = Api.RUNNER_RE.search(content)
runner_name = matches.group(1) if matches else None

matches = Api.MACHINE_RE.search(content)
hostname = matches.group(1) if matches else None

log_package = {
"setup_log": content,
"runner_name": runner_name,
"machine_name": hostname,
"run_id": run_info["id"],
"run_attempt": run_info["run_attempt"],
"non_ephemeral": non_ephemeral
}
return log_package

index = 0
while index < len(content_lines) and content_lines[index]:
line = content_lines[index]

if "Requested labels: " in line:
labels = line.split("Requested labels: ")[1].split(', ')

if "Runner name: " in line:
runner_name = line.split("Runner name: ")[1]

if "Machine name: " in line:
machine_name = line.split("Machine name: ")[1]

if "Runner group name:" in line:
runner_group = line.split("Runner group name: ")[1]

if "Job is about to start running on" in line:
runner_type = line.split()[-1]
matches = Api.RUNNERTYPE_RE.search(runner_type)
runner_type = matches.group(1)

if "GITHUB_TOKEN Permission" in line:
while "##[endgroup]" not in content_lines[index+1]:
index += 1
scope = content_lines[index].split()[1]
permission = content_lines[index].split()[2]
token_permissions[scope] = permission
log_package["token_permissions"] = token_permissions

if "Cleaning the repository" in line:
non_ephemeral = True
log_package["non_ephemeral"] = non_ephemeral

index += 1

log_package = {
"requested_labels": labels,
"runner_name": runner_name,
"machine_name": machine_name,
"runner_group": runner_group,
"runner_type": runner_type,
"run_id": run_info["id"],
"run_attempt": run_info["run_attempt"],
"non_ephemeral": non_ephemeral,
"token_permissions": token_permissions
}

return log_package

def __get_full_runlog(self, log_content: bytes, run_name: str):
"""Gets the full text of the runlog from the zip file by matching the
Expand Down Expand Up @@ -667,7 +703,8 @@ def retrieve_run_logs(self, repo_name: str, short_circuit: str = True):
start_date = datetime.now() - timedelta(days = 60)
runs = self.call_get(
f'/repos/{repo_name}/actions/runs', params={
"per_page": "30",
# "per_page": "30",
"per_page": "10",
"status":"completed",
"exclude_pull_requests": "true",
"created":f">{start_date.isoformat()}"
Expand All @@ -678,10 +715,13 @@ def retrieve_run_logs(self, repo_name: str, short_circuit: str = True):
# the machine_name:runner_name.
run_logs = {}
names = set()
# print(runs.json()['workflow_runs'])

if runs.status_code == 200:
logger.debug(f'Enumerating runs within {repo_name}')
print(f'Enumerating runs within {repo_name}')
for run in runs.json()['workflow_runs']:
# print(json.dumps(run, indent=5)) # Jimmy

# We are only interested in runs that actually executed.
if run['conclusion'] != 'success' and \
Expand All @@ -700,6 +740,7 @@ def retrieve_run_logs(self, repo_name: str, short_circuit: str = True):
run_log = self.call_get(
f'/repos/{repo_name}/actions/runs/{run["id"]}/'
f'attempts/{run["run_attempt"]}/logs')

if run_log.status_code == 200:
run_log = self.__process_run_log(run_log.content, run)
if run_log:
Expand Down
9 changes: 9 additions & 0 deletions gato/models/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ class Runner:
def __init__(
self,
runner_name,
runner_type,
token_permissions,
runner_group=None,
machine_name=None,
os=None,
status=None,
Expand All @@ -25,6 +28,9 @@ def __init__(
"""
self.runner_name = runner_name
self.machine_name = machine_name
self.runner_group = runner_group
self.runner_type = runner_type
self.token_permissions = token_permissions
self.os = os
self.status = status
self.labels = labels
Expand All @@ -37,6 +43,9 @@ def toJSON(self):
"name": self.runner_name,
"machine_name": self.machine_name if self.machine_name
else "Unknown",
"runner_type": self.runner_type if self.runner_type else "Unknown",
"runner_group_name": self.runner_group if self.runner_group else "Unknown",
"token_permissions": self.token_permissions,
"os": self.os if self.os else "Unknown",
"status": self.status if self.status else "Unknown",
"labels": [label for label in self.labels],
Expand Down

0 comments on commit 075d2a5

Please sign in to comment.