forked from cockpit-project/bots
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtests-data
executable file
·481 lines (421 loc) · 16.8 KB
/
tests-data
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
#!/usr/bin/env python3
# This file is part of Cockpit.
#
# Copyright (C) 2017 Red Hat, Inc.
#
# Cockpit is free software; you can redistribute it and/or modify it
# under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation; either version 2.1 of the License, or
# (at your option) any later version.
#
# Cockpit is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with Cockpit; If not, see <http://www.gnu.org/licenses/>.
import gzip
import json
import os
import re
import socket
import ssl
import subprocess
import sys
import tempfile
import time
import urllib.parse
import urllib.request, urllib.error, urllib.parse
import zlib
import html.parser
sys.dont_write_bytecode = True
import task
from machine import testvm
# The number of days of previous closed pull requests to learn from
SINCE_DAYS = 120
BOTS = os.path.abspath(os.path.dirname(__file__))
SEEDED = set()
SINKS = { }
def run(filename, verbose=False, dry=False, **kwargs):
since = time.time() - 60 * 60 * 24 * SINCE_DAYS
pulls = Pulls(since)
# Seed with our input data
if filename:
if "/" not in filename and not os.path.exists(filename):
if not dry:
subprocess.check_call([ os.path.join(BOTS, "image-download"), "--state", filename ])
filename = os.path.join(testvm.get_images_data_dir(), filename)
(outfd, outname) = tempfile.mkstemp(prefix=os.path.basename(filename), dir=os.path.dirname(filename))
os.close(outfd)
output = gzip.open(outname, 'wb')
if os.path.exists(filename):
with gzip.open(filename, 'rb') as fp:
seed(since, fp, pulls, output)
else:
output = sys.stdout.buffer
outname = None
def write(**kwargs):
line = json.dumps(kwargs).encode('utf-8') + b"\n"
output.write(line)
# Iterate through all revisions, pull requests on this branch
for (commit, merged, created, pull) in commits("master", pulls, since, verbose):
logged = False
if verbose:
sys.stderr.write("- {0}\n".format(commit))
for (context, created, url, log) in logs(commit):
if verbose:
sys.stderr.write(" - {0} {1}\n".format(created, context))
for (status, name, body, tracker) in tap(log):
write(pull=pull, revision=commit, status=status,
context=context, date=created, merged=merged,
test=name, url=url, tracker=tracker, log=body)
logged = True
# Nothing found for this log
if not logged:
write(pull=pull, revision=commit, status="unknown", date=created,
merged=merged, url=url, log=log)
logged = True
# Nothing found for this revision
if not logged:
write(pull=pull, revision=commit, status="unknown", date=created, merged=merged)
logged = True
sys.stdout.flush()
if output:
output.close()
if outname:
os.rename(outname, filename)
if not dry and outname and filename:
upload = [ os.path.join(BOTS, "image-upload"), "--state", filename ]
subprocess.check_call(upload)
# An HTML parser that just pulls out all the <a href="...">
# link hrefs in a given page of content. We also qualify these
# hrefs with a base url, in case they're relative
class HrefParser(html.parser.HTMLParser):
def __init__(self, base, hrefs):
html.parser.HTMLParser.__init__(self)
self.hrefs = hrefs
self.base = base
def handle_starttag(self, tag, attrs):
if tag.lower() == "a":
for (name, value) in attrs:
if name.lower() == "href":
url = urllib.parse.urljoin(self.base, value)
# print 'HREF', url
self.hrefs.append(url)
# Check if a given pull request was included in its base
# branch via merging or otherwise
class Pulls():
def __init__(self, since):
self.fetched = { }
self.checked = { }
self.pulls = { }
self.listing = [ ]
self.since = since
# Get all the pull requests since a given time
def __iter__(self):
if self.listing:
iterate = self.pulls.values()
else:
iterate = task.api.pulls(state="all", since=self.since)
listing = [ ]
for pull in iterate:
self.pulls[pull["number"]] = pull
listing.append(pull)
yield pull
self.listing = listing
# Turn a stning/int pull number into an pull object
def normalize(self, pull):
if isinstance(pull, int):
pull = str(pull)
if isinstance(pull, str):
if "/" not in pull:
pull = qualify("pulls/{0}".format(pull))
if pull in self.pulls:
pull = self.pulls[pull]
else:
pull = task.api.get(pull)
self.pulls[pull["url"]] = pull
elif not isinstance(pull, dict):
raise ValueError("Invalid pull request: {0}".format(repr(pull)))
return pull
def merged(self, pull):
pull = self.normalize(pull)
# if not pull:
# return None
number = pull["number"]
if number in self.checked:
return self.checked[number]
if pull.get("state") != "closed":
return None
# GitHub is telling us this was merged
if pull.get("merged"):
return True
# Fetch git data about this branch
cwd = os.path.dirname(__file__)
base = pull["base"]["ref"]
if base not in self.fetched:
try:
subprocess.check_call([ "git", "fetch", "-q", "--", "origin", base ], cwd=cwd)
except subprocess.CalledProcessError:
return None # error already printed by process
self.fetched[base] = base
# Look for git commits up until a year before the pull request
when = time.mktime(time.strptime(pull["created_at"], "%Y-%m-%dT%H:%M:%SZ"))
when -= 60 * 60 * 24 * 365
since = time.strftime("%Y-%m-%d", time.gmtime(when))
# Check if it's referred to in this branch
match = "(Closes|Fixes|closes|fixes).*{0}".format(number)
cmd = [
"git", "log", "--extended-regexp", "--grep", match,
"--since=" + since, "origin/" + base
]
output = subprocess.check_output(cmd, cwd=cwd)
self.checked[number] = output and True or False
return self.checked[number]
# Retrieves the content of the given URL
def retrieve(url):
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
req = urllib.request.urlopen(url, context=ctx)
return req.read().decode('utf-8', 'replace')
# Returns a list of all results at the given URL
def links(url):
result = [ ]
parser = HrefParser(url, result)
try:
parser.feed(retrieve(url))
except urllib.error.HTTPError as ex:
if ex.code != 404:
raise
except (ConnectionResetError, urllib.error.URLError, socket.gaierror) as ex:
sys.stderr.write("{0}: {1}\n".format(url, ex))
return result
# Parses seed input data and passes it through to output
# all the while preparing the fact that certain URLs have
# already been seen
def seed(since, fp, pulls, output):
seeded = None
known = re.compile("# SKIP Known issue #([0-9]+)", re.IGNORECASE)
while True:
try:
line = fp.readline()
except (OSError, zlib.error) as ex:
sys.stderr.write("tests-data: {0}\n".format(str(ex)))
break
if not line:
break
try:
item = json.loads(line.decode('utf-8'))
except ValueError as ex:
sys.stderr.write("tests-data: {0}\n".format(str(ex)))
continue
# Once we see a new pull treat the old one as complete and seeded
# As a failsafe, just to make sure we didn't miss something
# wo don't treat the last pull request as completely seeded
pull = item.get("pull")
if pull and pull != seeded:
SEEDED.add(seeded)
seeded = None
if pull and item.get("merged") not in [ True, False ]:
item["merged"] = pulls.merged(pull)
# Note that we've already retrieved this URL
url = item.get("url")
if url and item.get("log") is not None:
SEEDED.add(url)
SEEDED.add(urllib.parse.urljoin(url, "./"))
# If the pull request had a known merged value it can be seeded
# This forces us to retrieve data about open pull requests again
if item["merged"] in [ True, False ]:
seeded = pull
SEEDED.add(item["revision"])
date = item.get("date")
if not date or since > time.mktime(time.strptime(date, "%Y-%m-%dT%H:%M:%SZ")):
continue
# COMPAT: Fix data that wasn't yet valid
if item["status"] == "skip":
match = known.search(item["log"])
if match:
item["status"] = "failure"
item["tracker"] = qualify("issues/{0}".format(match.group(1)))
line = json.dumps(item).encode('utf-8') + b"\n"
output.write(line)
# Generate a list of (revision, merged, url) for the given branch
# This includes pull requests targeting the branch in question
#
# revision: the SHA of a commit
# merged: True/False/None whether merged or not
# url: The URL for the pull request or None
def commits(branch, pulls, since, verbose=False):
if verbose:
sys.stderr.write("{0}\n".format(branch))
# Iterate through commits on master
for commit in task.api.commits(branch, since=since):
revision = commit["sha"].lower()
if revision not in SEEDED:
yield revision, True, commit["commit"]["committer"]["date"], None
# Iterate through pull requests
for pull in pulls:
if pull["number"] in SEEDED:
continue
if pull["base"]["ref"] != branch:
continue
if verbose:
sys.stderr.write("pull-{0}\n".format(pull["number"]))
merged = pulls.merged(pull)
for revision in revisions(pull):
yield revision, merged, pull["created_at"], pull["url"]
# The next revisions for the pull request are not the ones
# that got merged. Only the first one produced by revisions
if merged:
merged = False
# Get all the revisions in a pull request. GitHub doesn't help
# us here so we have to use silly tricks
def revisions(pull):
head = pull.get("head", { }).get("sha")
if not head:
return
# First give back the main pull request
head = head.lower()
yield head
# All the revisions we've seen
seen = set([ head ])
# Seed the set of sinks. We use these sinks to figure out additional
# revisions for the pull request. Unfortunately GitHub doesn't help us
# with a list of revisions that this pull request used to reflect. So
# we have to look to our sink for that info.
data = task.api.get("commits/{0}/status?page=1&per_page=100".format(head))
for status in data.get("statuses", [ ]):
url = status["target_url"]
if url:
SEEDED.add(urllib.parse.urljoin(url, "./"))
sink = urllib.parse.urljoin(url, "../")
if sink not in SINKS:
SINKS[sink] = links(sink)
# Now ask each sink for its set of urls
name = "pull-{0}".format(pull["number"])
for sink in SINKS:
for link in SINKS[sink]:
# We only care about stuff at the sink where pull-XXXX is in
# the URL. This is how we figure out whether things are related
if name not in link:
continue
# Already retrieved this one
if link in SEEDED:
continue
# Build a URL for the cockpituous sink /status file and read it
target = urllib.parse.urljoin(link, "status")
try:
data = json.loads(retrieve(target))
except (ValueError, ConnectionError) as ex:
sys.stderr.write("{0}: {1}\n".format(target, ex))
except urllib.error.HTTPError as ex:
if ex.code != 404:
raise
except urllib.error.URLError as ex:
sys.stderr.write("{0}: {1}\n".format(target, ex))
pass
else:
# The status file contains a "revision" field which is the git revision
# of what was tested during that test run. This is what we're after
if "revision" in data:
revision = data["revision"].lower()
if revision not in seen:
seen.add(revision)
yield revision
# Pull out all status (context, created, log) for a given revision. This includes multiple
# test runs for a given revision, and all the various status contexts
def logs(revision):
page = 1
count = 100
while count == 100:
data = task.api.get("commits/{0}/status?page={1}&per_page={2}".format(revision, page, count))
count = 0
for status in data.get("statuses", [ ]):
count += 1
# Make sure to not consider "state": "success" as a success
# here because individual tests may have failed, or been retried.
#
# Always only consider tests individually to have run or failed
# not entire test suite statuses
if status["state"] in [ "pending" ]:
continue
target = status.get("target_url")
if not target:
continue
if target.endswith(".html"):
target = target[:-5]
if target in SEEDED:
continue
log = None
try:
log = retrieve(target)
except urllib.error.HTTPError as ex:
if ex.code != 404:
raise
log = ""
except (ConnectionResetError, urllib.error.URLError, socket.gaierror) as ex:
sys.stderr.write("{0}: {1}\n".format(target, ex))
if log is not None:
yield (status["context"], status["created_at"], target, log)
# Generate (status, name, body, tracker) for each Test Anything Protocol test
# in the content.
#
# status: possible values "success", "failure", "skip"
# name: the name of the test
# body: full log of the test
# tracker: url tracking the failure, or None
def tap(content):
name = status = tracker = None
prefix = None
body = [ ]
blocks = False
for line in content.split('\n'):
# The test intro, everything before here is fluff
if not prefix and line.startswith("1.."):
prefix = line
body = [ ]
name = status = tracker = None
# A TAP test status line
elif line.startswith("ok ") or line.startswith("not ok "):
body.append(line)
# Parse out the status
if line.startswith("not ok "):
status = "failure"
line = line[7:]
else:
line = line[3:]
if "# SKIP KNOWN ISSUE" in line.upper():
status = "failure"
(unused, delim, issue) = line.partition("#")
tracker = qualify("issues/{0}".format(issue))
if "# SKIP" in line.upper():
status = "skip"
else:
status = "success"
# Parse out the name
while line[0].isspace() or line[0].isdigit():
line = line[1:]
(name, delim, directive) = line.partition("#")
(name, delim, directive) = name.partition("duration")
name = name.strip()
# Old Cockpit tests had strange blocks
if not blocks:
yield (status, name, "\n".join(body), tracker)
status = name = tracker = None
body = [ ]
else:
# Old Cockpit tests didn't separate bound their stuff properly
if line.startswith("# --------------------"):
blocks = True
if status:
yield (status, name, "\n".join(body), tracker)
name = status = tracker = None
body = [ ]
body.append(line)
# Qualify a URL into the GitHub repository
def qualify(path):
return "https://api.github.com" + task.api.qualify(path)
if __name__ == '__main__':
task.main(function=run, title="Pull out test data for pull requests", verbose=True)