forked from dmlc/mxnet-notebooks
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_ipynb.py
166 lines (148 loc) · 5.82 KB
/
test_ipynb.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
"""
This script runs notebooks in selected directory and report
errors for each notebook.
Traceback information can be found in the output notebooks
generated in coresponding output directories.
Before running this scripe, make sure all the notebooks have
been run at least once and outputs are generated.
"""
import os
import errno
import json
import ConfigParser
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
import nbformat
from nbconvert.preprocessors import ExecutePreprocessor
def _notebook_run(path):
"""Execute a notebook via nbconvert and collect output.
Parameters
----------
path : str
notebook file path.
Returns
-------
error : str
notebook first cell execution errors.
"""
error = ""
parent_dir, nb_name = os.path.split(path)
with open(path) as nb_file:
nb = nbformat.read(nb_file, as_version=4)
ep = ExecutePreprocessor(timeout=900, kernel_name='python2')
#Use a loop to avoid "Kernel died before replying to kernel_info" error, repeat 5 times
for _ in range(0, 5):
error = ""
try:
ep.preprocess(nb, {'metadata': {'path': parent_dir}})
except Exception as e:
error = str(e)
finally:
if error != 'Kernel died before replying to kernel_info':
output_dir = parent_dir + "/test_output"
output_nb = output_dir + "/" + os.path.splitext(nb_name)[0] + "_output.ipynb"
#Trap an EEXIST to avoid race condition
try:
os.makedirs(output_dir)
except OSError as exception:
if exception.errno != errno.EEXIST:
raise
with open(output_nb, mode='w') as f:
nbformat.write(nb, f)
f.close()
nb_file.close()
if len(error) == 0:
cell_num = _verify_output(path, output_nb)
if cell_num > 0:
error = "Output in cell No.%d has changed." % cell_num
return error
return error
def _verify_output(origin_nb, output_nb):
"""Compare the output cells of testing output notebook with original notebook.
Parameters
----------
origin_nb : str
original notebook file path.
output_nb : str
output notebook file path.
Returns
-------
cell_num : int
First cell number in which outputs are incompatible
"""
cell_num = 0
origin_nb_file = open(origin_nb)
origin_nb_js = json.load(origin_nb_file)
output_nb_file = open(output_nb)
output_nb_js = json.load(output_nb_file)
for origin_cell, output_cell in zip(origin_nb_js["cells"], output_nb_js["cells"]):
if len(origin_cell["source"]) == 0 or origin_cell["source"][0] == "# Output may vary\n" or not origin_cell.has_key("outputs"):
continue
if _extract_output(origin_cell["outputs"]) != _extract_output(output_cell["outputs"]):
cell_num = origin_cell["execution_count"]
break
origin_nb_file.close()
output_nb_file.close()
return cell_num
def _extract_output(outputs):
"""Extract text part of ouput of a notebook cell.
Parasmeters
-----------
outputs : list
list of output
Returns
-------
ret : str
Concatenation of all text output contents
"""
ret = ''
for dict in outputs:
for key, val in dict.items():
if str(key).startswith('text'):
for content in val:
ret += str(content)
elif key == 'data':
for dt_key, dt_val in val.items():
if str(dt_key).startswith('text'):
for dt_content in dt_val:
if not str(dt_content).startswith('<matplotlib') and not str(dt_content).startswith('<graphviz'):
ret += str(dt_content)
return ret
configParser = ConfigParser.RawConfigParser()
configFilePath = 'test_config.txt'
configParser.read(configFilePath)
test_dirs = configParser.get('Folder Path', 'path').split(', ')
failed_notebooks = []
total_num = 0
fail_num = 0
succ_num = 0
for dir in test_dirs:
for root, dirs, files in os.walk(dir):
for file in files:
if file.endswith('.ipynb') and not file.endswith('-checkpoint.ipynb') and not file.endswith('_output.ipynb'):
notebook = os.path.join(root, file)
parent_dir = os.path.dirname(notebook)
if parent_dir == "output":
continue
print "Start to test %s.\n" % notebook
error = _notebook_run(notebook)
if len(error) == 0:
succ_num += 1
print "Tests for %s all passed!\n" % file
else:
fail_num += 1
failed_notebooks.append(notebook)
print "Tests for %s failed:\n" % file
print error + '\n'
if (error == 'Cell execution timed out, see log for details.' or
error == 'Kernel died before replying to kernel_info'):
print "Please manually run this notebook to debug.\n"
else:
print "See output notebook for the traceback.\n"
total_num += 1
print "%d notebooks tested, %d succeeded, %d failed" % (total_num, succ_num, fail_num)
if len(failed_notebooks) > 0:
print "Following are failed notebooks:"
for nb in failed_notebooks:
print nb