-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhungrian.py
52 lines (39 loc) · 1.32 KB
/
hungrian.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
'''
If we use (external) classification evalutation measures like F1 or
accuracy for clustering evaluation, problems may arise.
One way to fix is to perform label matching.
Here we performs kmeans clustering on the Iris dataset and proceed to use
the Hungarian (Munkres) algorithm to correct the mismatched labeling.
https://gist.github.com/siolag161/dc6e42b64e1bde1f263b
'''
import sys
import numpy as np
from munkres import Munkres
def make_cost_matrix(c1, c2):
uc1 = np.unique(c1)
uc2 = np.unique(c2)
l1 = uc1.size
l2 = uc2.size
# print('uc1,uc2:',uc1,uc2)
# print('l1,l2:',l1,l2)
assert(l1 == l2 and np.all(uc1 == uc2))
m = np.ones([l1, l2])
for i in range(l1):
it_i = np.nonzero(c1 == uc1[i])[0]
for j in range(l2):
it_j = np.nonzero(c2 == uc2[j])[0]
m_ij = np.intersect1d(it_j, it_i)
m[i,j] = -m_ij.size
return m
def translate_clustering(clt, mapper):
return np.array([ mapper[i] for i in clt ])
def label_mapping(tru,pre):
classes = tru
labels = pre
num_labels = len(np.unique(classes))
cost_matrix = make_cost_matrix(labels, classes)
m = Munkres()
indexes = m.compute(cost_matrix)
mapper = { old: new for (old, new) in indexes }
new_labels = translate_clustering(labels, mapper)
return new_labels