-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmake-dataset.lua
99 lines (74 loc) · 2.17 KB
/
make-dataset.lua
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
torch.setdefaulttensortype('torch.FloatTensor')
--cmd line arg
cmd = torch.CmdLine()
cmd:text()
cmd:text('Making train and test datasets')
cmd:text('Example:')
cmd:text('$> th make-dataset.lua -csvpath data6from45.csv')
cmd:text('Options:')
cmd:option('-csvpath', 'data6from45.csv', 'Path to csv file with data')
cmd:option('-startidx', '1', 'Start from index')
cmd:option('-endidx', '3000', 'End index')
cmd:option('-norm', '100', 'Normalizing value')
cmd:option('-save', 'lottery.t7', 'Path to save dataset')
opt = cmd:parse(arg or {})
delimiter = ","
csv_path = opt.csvpath
X_table = {}
start_idx = tonumber(opt.startidx)
batch_size = opt.endidx
mean = opt.norm
X_table = {}
local count = 1
local batch_count = 1
for line in io.lines(csv_path) do
if count >= start_idx and count<(start_idx + batch_size) then
X_table[batch_count] = line
batch_count = batch_count + 1
end
count = count + 1
end
num_instance = #X_table[1]:split(delimiter)
X_tensor = torch.zeros(#X_table,num_instance)
for i = 1,#X_table do
X_tensor[i] = torch.Tensor(X_table[i]:split(delimiter))
end
input = {}
ssi = {}
sso = {}
for i = 1, X_tensor:size(1) do
for b = 1, X_tensor:size(2) do
-- if X_tensor[i][b] > 0 then
-- ssi[b] = 1
ssi[b] = (X_tensor[i][b]/mean)
-- else
-- ssi[b] = 0
-- end
ss1 = torch.Tensor(ssi)
end
table.insert(input, ss1)
end
inputs = {}
outputs = {}
count = 1
count1 = 1
for i = 1, #input do
if i%2 == 1 then
outputs[count1] = input[i]
count1 = count1 + 1
else
inputs[count] = input[i]
count = count + 1
end
end
input = torch.Tensor(#inputs, 45)
output = torch.Tensor(#outputs, 45)
for i = 1, #inputs do
input[i] = inputs[i]
output[i] = outputs[i]
end
datasave = {input = input, output = output}
print ("Storing training data in file:",opt.save)
torch.save(opt.save, datasave)
dataset = torch.load(opt.save)
print(dataset)