-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathplot_statistics.py
executable file
·113 lines (89 loc) · 3.75 KB
/
plot_statistics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#! /bin/env python3
''' Plots statistics for a specific run '''
# pylint: disable=too-many-locals,too-many-statements,too-many-branches,fixme
import os
import sys
import argparse
from sys import stderr
import matplotlib.pyplot as plt
from seaborn import lineplot
from pandas import read_csv, concat
def _main():
parser = argparse.ArgumentParser()
parser.add_argument('path', type=str)
parser.add_argument('--prefix', type=str,
help='Only consider metrics with specific prefixes, e.g. "nodes"')
parser.add_argument('--filter', type=str, help="Select which metrics to use")
parser.add_argument('--font-size', type=int, default=4)
parser.add_argument('--linewidth', type=float, default=0.5)
parser.add_argument('--outfile', type=str, default='statistics.pdf')
args = parser.parse_args()
plt.rcParams.update({'font.size': args.font_size})
metric_names = {
"incoming_data": "Incoming Data",
"network_traffic": "Network Traffic",
}
data = read_csv(args.path, header=0)
# convert time to seconds
data["time"] = data["time"] / 1000.0
metrics = map(lambda a: a.split('.')[-1], list(data.columns.values))
metrics = set(filter(lambda a: a != "time", metrics))
num_metrics = len(metrics)
cluster_cols = filter(lambda a: "network." in a, list(data.columns.values))
network_metrics = set(map(lambda a: a.split('.')[-1], cluster_cols))
print(f"Found network metrics: {network_metrics}")
node_cols = list(filter(lambda a: "nodes." in a, list(data.columns.values)))
nodes = set(map(lambda a: a.split('.')[1], node_cols))
node_metrics = set(map(lambda a: a.split('.')[-1], node_cols))
print(f"Found node metrics: {node_metrics}")
if num_metrics == 0:
stderr.write("ERROR: need at least one metric\n")
sys.exit(1)
_fig, axes = plt.subplots(num_metrics, 1)
if num_metrics == 1:
axes = [axes]
node_data = None
for node_idx in nodes:
prefix = f"nodes.{node_idx}."
column_names = [col for col in data.columns if col.startswith(prefix) or col == "time"]
this_node = data[column_names].copy()
this_node["node"] = f"#{node_idx}"
this_node.columns = [col.replace(prefix, '') for col in this_node.columns]
if node_data is None:
node_data = this_node
else:
node_data = concat([node_data, this_node])
for (metric_idx, metric_name) in enumerate(metrics):
axis = axes[metric_idx]
if metric_name in ["job-runtime", "total-job-runtime", "num-objects"]:
# show runtime in log scale (first instance spawn takes long)
axis.set_yscale("log")
else:
axis.set_yscale("linear")
if metric_name in network_metrics:
metric_key = "network."+metric_name
plot_data = data
lineplot(plot_data, x="time", y=metric_key,
linewidth=args.linewidth, ax=axis)
elif metric_name in node_metrics:
assert node_data is not None
plot_data = node_data
metric_key = metric_name
lineplot(node_data, x="time", y=metric_name,
hue="node", linewidth=args.linewidth, ax=axis)
axis.legend(title="Node")
else:
raise RuntimeError(f"Invalid state for metric {metric_name}")
if metric_idx+1 == len(metrics):
# only label x axis once
axis.set_xlabel("Time (s)")
else:
xax = axis.get_xaxis()
xax.set_visible(False)
axis.set(ylim=(0, None))
axis.set_ylabel(metric_names[metric_name])
plt.tight_layout()
print(f'Writing output to {args.outfile}')
plt.savefig(args.outfile)
if __name__ == "__main__":
_main()