-
Notifications
You must be signed in to change notification settings - Fork 16
/
create_pca.py
64 lines (41 loc) · 1.34 KB
/
create_pca.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
from adversarial_models import *
from utils import *
from get_data import *
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np
import pandas as pd
import lime
import lime.lime_tabular
import shap
from copy import deepcopy
params = Params("model_configurations/experiment_params.json")
X, y, cols = get_and_preprocess_compas_data(params)
features = [c for c in X]
race_indc = features.index('race')
X = X.values
c_cols = [features.index('c_charge_degree_F'), features.index('c_charge_degree_M'), features.index('two_year_recid'), features.index('race'), features.index("sex_Male"), features.index("sex_Female")]
X = np.delete(X, c_cols, axis=1)
ss = StandardScaler().fit(X)
X = ss.transform(X)
r = []
for _ in range(1):
p = np.random.normal(0,1,size=X.shape)
# for row in p:
# for c in c_cols:
# row[c] = np.random.choice(X[:,c])
X_p = X + p
r.append(X_p)
r = np.vstack(r)
p = [1 for _ in range(len(r))]
iid = [0 for _ in range(len(X))]
all_x = np.vstack((r,X))
all_y = np.array(p + iid)
from matplotlib import pyplot as plt
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
results = pca.fit_transform(all_x)
print (len(X))
plt.scatter(results[:500,0], results[:500,1], alpha=.1)
plt.scatter(results[-500:,0], results[-500:,1], alpha=.1)
plt.show()