-
Notifications
You must be signed in to change notification settings - Fork 53
/
Copy pathdataUtils.py
65 lines (51 loc) · 1.81 KB
/
dataUtils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
__author__ = 'Frederic Godin ([email protected] / www.fredericgodin.com)'
import numpy
def read_and_sort_matlab_data(x_file,y_file,padding_value=15448):
sorted_dict = {}
x_data = []
i=0
file = open(x_file,"r")
for line in file:
words = line.split(",")
result = []
length=None
for word in words:
word_i = int(word)
if word_i == padding_value and length==None:
length = len(result)
result.append(word_i)
x_data.append(result)
if length==None:
length=len(result)
if length in sorted_dict:
sorted_dict[length].append(i)
else:
sorted_dict[length]=[i]
i+=1
file.close()
file = open(y_file,"r")
y_data = []
for line in file:
words = line.split(",")
y_data.append(int(words[0])-1)
file.close()
new_train_list = []
new_label_list = []
lengths = []
for length, indexes in sorted_dict.items():
for index in indexes:
new_train_list.append(x_data[index])
new_label_list.append(y_data[index])
lengths.append(length)
return numpy.asarray(new_train_list,dtype=numpy.int32),numpy.asarray(new_label_list,dtype=numpy.int32),lengths
def pad_to_batch_size(array,batch_size):
rows_extra = batch_size - (array.shape[0] % batch_size)
if len(array.shape)==1:
padding = numpy.zeros((rows_extra,),dtype=numpy.int32)
return numpy.concatenate((array,padding))
else:
padding = numpy.zeros((rows_extra,array.shape[1]),dtype=numpy.int32)
return numpy.vstack((array,padding))
def extend_lenghts(length_list,batch_size):
elements_extra = batch_size - (len(length_list) % batch_size)
length_list.extend([length_list[-1]]*elements_extra)