-
Notifications
You must be signed in to change notification settings - Fork 16
/
utils.py
116 lines (92 loc) · 3.25 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import json
import numpy as np
import pandas as pd
class Params():
"""Parameters object taken from: https://github.com/cs230-stanford/cs230-code-examples/blob/master/pytorch/nlp/utils.py
Parameters
----------
json_path : string
Returns
----------
Parameters object
"""
def __init__(self, json_path):
with open(json_path) as f:
params = json.load(f)
self.__dict__.update(params)
def save(self, json_path):
with open(json_path, 'w') as f:
json.dump(self.__dict__, f, indent=4)
def update(self, json_path):
"""Loads parameters from json file"""
with open(json_path) as f:
params = json.load(f)
self.__dict__.update(params)
@property
def dict(self):
"""Gives dict-like access to Params instance by `params.dict['learning_rate']"""
return self.__dict__
def one_hot_encode(y):
""" One hot encode y for binary features. We use this to get from 1 dim ys to predict proba's.
This is taken from this s.o. post: https://stackoverflow.com/questions/29831489/convert-array-of-indices-to-1-hot-encoded-numpy-array
Parameters
----------
y : np.ndarray
Returns
----------
A np.ndarray of the one hot encoded data.
"""
y_hat_one_hot = np.zeros((len(y), 2))
y_hat_one_hot[np.arange(len(y)), y] = 1
return y_hat_one_hot
def rank_features(explanation):
""" Given an explanation of type (name, value) provide the ranked list of feature names according to importance
Parameters
----------
explanation : list
Returns
----------
List contained ranked feature names
"""
ordered_tuples = sorted(explanation, key=lambda x : abs(x[1]), reverse=True)
results = [tup[0] if tup[1] != 0 else ("Nothing shown",0) for tup in ordered_tuples]
return results
def get_rank_map(ranks, to_consider):
""" Give a list of feature names in their ranked positions, return a map from position ranks
to pct occurances.
Parameters
----------
ranks : list
to_consider : int
Returns
----------
A dictionary containing the ranks mapped to the uniques.
"""
unique = {i+1 : [] for i in range(len(ranks))}
for i, rank in enumerate(ranks):
for unique_rank in np.unique(rank):
unique[i+1].append((unique_rank, np.sum(np.array(rank) == unique_rank) / to_consider))
return unique
def experiment_summary(explanations, features):
""" Provide a high level display of the experiment results for the top three features.
This should be read as the rank (e.g. 1 means most important) and the pct occurances
of the features of interest.
Parameters
----------
explanations : list
explain_features : list
bias_feature : string
Returns
----------
A summary of the experiment
"""
# features_of_interest = explain_features + [bias_feature]
top_features = [[], [], []]
# sort ranks into top 3 features
for exp in explanations:
ranks = rank_features(exp)
for i in range(3):
for f in features + ["Nothing shown"]:
if f in ranks[i]:
top_features[i].append(f)
return get_rank_map(top_features, len(explanations))