-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathMinimumSpanningTree.py
234 lines (187 loc) · 9.2 KB
/
MinimumSpanningTree.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
import numpy as np
from scipy.sparse import csr_matrix
from scipy.sparse.csgraph import minimum_spanning_tree
from Symbol import *
from Edmonds import *
from RelationShip_Classifier import *
import pdb
class MinimumSpanningTree:
'''
This class is used for segmentation, it create a minimum spanning tree, which is then used by dynamic programining algorithm.
'''
def __init__main():
pass
def get_spanning_tree(self, trace_objs):
'''
The function creates minimum spanning tree using Euclidean distance
input :
trace_objs: These are strokes
return : an matrix which represents minimum spanning tree.
'''
N=len(trace_objs) # trace_objs is a dictionary
adj_matrix=np.zeros((N,N))
for i in xrange(N):
a_x,a_y=trace_objs[i].get_centroids()
for j in xrange(i+1,N):
b_x,b_y=trace_objs[j].get_centroids()
adj_matrix[i,j]=np.sum(np.square(a_x-b_x)+np.square(a_y-b_y))
tree=minimum_spanning_tree(adj_matrix)
return (tree.toarray().astype(int))
def get_spanning_tree_symbol_level(self, symbol_objs):
'''
The function creates minimum spanning tree using Euclidean distance
input :
trace_objs: These are strokes
return : an matrix which represents minimum spanning tree.
'''
N=len(trace_objs) # trace_objs is a dictionary
adj_matrix=np.zeros((N,N))
for i in xrange(N):
a_x,a_y=trace_objs[i].get_centroids()
for j in xrange(i+1,N):
b_x,b_y=trace_objs[j].get_centroids()
adj_matrix[i,j]=np.sum(np.square(a_x-b_x)+np.square(a_y-b_y))
tree=minimum_spanning_tree(adj_matrix)
return (tree.toarray().astype(int))
def memoized_initialization(self,n):
'''
The function is used for initialization of r,s arrays
input : size of the array
returns:
r: stores the distance for every possible split
s: stroes the index of what is the correct split, this is used for back tracking.
'''
r = np.zeros(n)
s=np.zeros(n)
for i in xrange(n):
s[i]=0
r[i]=-1000000
return r,s
# To check if there is an edge between 2 nodes.
def check_edge(self,list_strokes,minimum_span_tree):
'''
The function check whether there is an edge between the two vertices (strokes) of the graph.
input:
list_strokes : List of strokes which are vertices of a graph.
minimum_span_tree: The minimum spanning tree of the graph.
returns:
true is there is a direct edge between the two vertices.
'''
flag=True
for i in xrange(len(list_strokes)-1):
index_1=list_strokes[i]
index_2=list_strokes[i+1]
if (minimum_span_tree[index_1,index_2]==True) or (minimum_span_tree[index_2,index_1]==True):
continue
else:
flag=False
break
return flag
def optimal_segmentation(self,total_iter,start,r,s,classifier_obj,X_dict_test,min_span_tree,dict_mapping_strokes,symbol_obj):
'''
The function is used to create optimal segmentation of strokes suing minimum spanning tree and Dyanmic Programming.
input :
total_iter : Total number of iterations , i.e total number of strokes.
r: It is used lookup array, to get the values of already computed distance .
s: It is used get the index , which gives the best split of segemantation of strokes. i.e the iterartion, number which gives max distance for that set of strokes.
classifier_obj : Classifier's Object used for classification.
X_dict_test : Samples for segmenatation
min_span_tree : minimum spanning tree matrix
dict_mapping_strokes: This stores the list of strokes to be grouped together for the best split .
symbol_obj : Object of symbol class
returns:
r[j]: It is used lookup array, to get the values of already computed distance .This returns the max values for est possible combination of strokes.
s: It is used get the index , which gives the best split of segemantation of strokes. i.e the iterartion, number which gives max distance for that set of strokes.
dict_mapping_strokes: This stores the list of strokes to be grouped together for the best split .
'''
if r[total_iter-start]>=0:
return r[total_iter-start],s,dict_mapping_strokes
elif total_iter-start==0:
r[total_iter-start]=1
return r[total_iter-start],s,dict_mapping_strokes
else:
q=-10000 # initialized to some -ve value
stroke_set_list=[]
list_strokes=[]
counter =1
for i in xrange(start,total_iter):
list_strokes.append(i)
if self.check_edge(list_strokes,min_span_tree):
stroke_set=X_dict_test[i-1]
stroke_set_list.append(stroke_set) # appending the strokes/trace object , if the symbol contains multiple strokes.
symbol_obj.symbol_list=stroke_set_list
features_list=symbol_obj.get_features()
stroke_set_array=np.asarray(features_list)
prob_stroke_set=np.max(classifier_obj.predict_proba(stroke_set_array.reshape(1,-1)))
start_next_iter=i+1
val,s,dict_mapping_strokes=self.optimal_segmentation(total_iter,start_next_iter,r,s,classifier_obj,X_dict_test,min_span_tree,dict_mapping_strokes,symbol_obj)
val=(prob_stroke_set+val)
#val=val*prob_stroke_set
if q<val:
q=val
s[total_iter-start]=counter # This is back track for the best path or optimal partioning
dict_mapping_strokes[total_iter-start]=[k for k in list_strokes]
r[total_iter-start]=q
counter=counter+1
else:
continue
return r[total_iter-start],s,dict_mapping_strokes
def get_segmentation(self,trace_obj_dict,classifier_obj,symbol_obj,File_write_obj,fileName,lg_folder_name,rel_classifier_obj):
'''
The function is calls optimal segmenattation and then call method to write lg files
Input
trace_obj_dict: trace objects for that file
classifier_obj: classifier object
symbol_obj: Symbol class object
File_write_obj:File write object
lg_folder_name: Foldername to store lg files
'''
min_span_tree=self.get_spanning_tree(trace_obj_dict)
demo_min_span_tree=min_span_tree
temp=np.zeros(demo_min_span_tree.shape[0])
temp=temp.reshape(temp.shape[0],1)
demo_min_span_tree=np.hstack((temp,demo_min_span_tree))
temp=np.zeros(demo_min_span_tree.shape[1])
demo_min_span_tree=np.vstack((temp,demo_min_span_tree))
demo_min_span_tree_bool=demo_min_span_tree>0
# Now get best probability distribution for set of strokes
dict_trace_map={}
count=0
s=[]
# This will help in mapping which index belong to which trace
for i in xrange(1,len(trace_obj_dict)+1):
dict_trace_map[i]=trace_obj_dict[i-1]
n=len(trace_obj_dict)+1
r,s = self.memoized_initialization(n)
dict_mapping_strokes={}
r,s,dict_mapping_strokes=self.optimal_segmentation(n,1,r,s,classifier_obj,trace_obj_dict,demo_min_span_tree_bool,dict_mapping_strokes,symbol_obj)
index=n-1
list_predict_symbol=[]
symbol_list=[]
count_traces=0
#Back Track and get the list of strokes
while index>0:
x=s[index]
list_strokes=dict_mapping_strokes[index]
symbol_object=Symbol()
symbol_list.append(symbol_object) # List of symbol object , which will be used to extract features
strokes_symbol=[]
for k in list_strokes:
trace_obj=dict_trace_map[k]
strokes_symbol.append(trace_obj)
count_traces=count_traces+1
symbol_object.symbol_list=strokes_symbol
index=index-int(x)
stroke_to_pixel=[]
X_test=[]
for symbol in symbol_list:
features=symbol.get_features()
X_test.append(features)
X_test_final=np.asarray(X_test)
predict_labels=classifier_obj.predict(X_test_final)
e=Graph()
r=RelationShipClassifier()
adj_matrix,dict_mapping_Symbol_index,index_to_symbol=e.LineOfSight(symbol_list,rel_classifier_obj)
dict_map_rel_to_syms=r.get_parse_layout(adj_matrix,dict_mapping_Symbol_index,index_to_symbol,rel_classifier_obj)
dict_map_rel_to_syms=dict_map_rel_to_syms[0]
File_write_obj.write_to_lg(predict_labels,fileName,symbol_list,dict_map_rel_to_syms,count_traces,lg_folder_name)