This repository has been archived by the owner on Feb 7, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.cpp
323 lines (266 loc) · 7.97 KB
/
main.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
#include <dirent.h>
#include <stdlib.h>
#include <iostream>
#include <fstream>
#include <sstream>
#include <vector>
#include <string>
#include <iomanip>
#define CLASS_NUMBER 0
#define INITIAL_VAL 0
#define PRECISION 2
#define COMMA ','
#define COLON ':'
#define SPACE ' '
#define PERCENT '%'
#define NEW_LINE '\n'
#define SEPERATOR "/"
#define SUFFIX ".csv"
#define DATASET "dataset"
#define LABELS "labels"
#define ACCURACY "Accuracy"
using namespace std;
typedef vector<vector<float> > Dataset;
typedef vector<vector<int> > Labels;
typedef vector<int> Prediction;
typedef vector<float> Scores;
enum Classifier
{
BETHA_0,
BETHA_1,
BIAS
};
enum Feature
{
LENGTH,
WIDTH
};
enum Argument
{
VALIDATION_DIR = 1,
WEIGHT_VECTOR_DIR
};
void load_dataset(string filename, string dataset_directory,
vector<Dataset>& datasets, vector<string>& files);
vector<float> get_new_float_row(string line);
void read_csv(Dataset& dataset, string filename);
void read_csv(Labels& labels, string filename);
string get_full_path(string directory_path, string filename);
bool ends_with(string first, string second);
vector<Dataset> get_datasets(vector<string>& files, string dataset_directory);
Scores get_scores(Dataset dataset, Dataset betha,
int instance_index, int number_of_classes);
int get_max_index(vector<float> list);
Prediction predict(Dataset dataset, Labels labels,
Dataset betha, int number_of_classes);
float get_score(Dataset dataset, Labels labels,
Prediction label_prediction);
int get_final_class(vector<Prediction> label_predictions, int instance_index,
int number_of_classes, int number_of_classifier);
Prediction voter(Dataset dataset, vector<Prediction> label_predictions,
int number_of_classes, int number_of_classifier);
void predict_linearly(vector<Prediction>& label_predictions,
vector<Dataset> weight_vectors, Dataset dataset, Labels labels,
int number_of_classes, vector<string> files);
void predict_hard_voting(vector<Prediction> label_predictions, Dataset dataset,
Labels labels, int number_of_classes, int number_of_classifier);
void predict(string validation_dir, string weight_vector_dir);
void modify_command_argument(int counter, char const *arguments[]);
vector<float> get_new_float_row(string line)
{
istringstream templine(line);
string data;
vector<float> row;
while (getline(templine, data, COMMA)){
row.push_back(atof(data.c_str()));
}
return row;
}
vector<int> get_new_int_row(string line)
{
istringstream templine(line);
string data;
vector<int> row;
while (getline(templine, data, COMMA)){
row.push_back(atof(data.c_str()));
}
return row;
}
void read_csv(Dataset& dataset, string filename)
{
ifstream file;
file.open(filename);
string line;
getline(file, line, NEW_LINE);
while (getline(file, line, NEW_LINE))
dataset.push_back(get_new_float_row(line));
file.close();
}
void read_csv(Labels& labels, string filename)
{
ifstream file;
file.open(filename);
string line;
getline(file, line, NEW_LINE);
while (getline(file, line, NEW_LINE))
labels.push_back(get_new_int_row(line));
file.close();
}
string get_full_path(string directory_path, string filename)
{
return (directory_path + filename);
}
bool ends_with(string first, string second)
{
int first_LENGTH = first.size();
int second_LENGTH = second.size();
if (first_LENGTH < second_LENGTH)
return false;
if (first.substr(first_LENGTH - second_LENGTH,
first_LENGTH - 1) == second)
return true;
return false;
}
void load_dataset(string filename, string dataset_directory,
vector<Dataset>& datasets, vector<string>& files)
{
files.push_back(filename);
string full_path = get_full_path(dataset_directory, filename);
Dataset dataset;
read_csv(dataset, full_path);
datasets.push_back(dataset);
}
vector<Dataset> get_datasets(vector<string>& files, string dataset_directory)
{
vector<Dataset> datasets;
DIR* directory;
if ((directory = opendir(dataset_directory.c_str())) != nullptr)
{
struct dirent* entry;
while ((entry = readdir(directory)) != nullptr)
{
string filename = string(entry->d_name);
if (ends_with(filename, SUFFIX))
load_dataset(filename, dataset_directory,
datasets, files);
}
closedir(directory);
}
else
cerr << "Could not open directory..." << endl;
return datasets;
}
Scores get_scores(Dataset dataset, Dataset betha,
int instance_index, int number_of_classes)
{
Scores scores;
for (int class_index = 0; class_index < number_of_classes; ++class_index)
{
float score = betha[class_index][BIAS] +
betha[class_index][BETHA_0] * dataset[instance_index][LENGTH] +
betha[class_index][BETHA_1] * dataset[instance_index][WIDTH];
scores.push_back(score);
}
return scores;
}
int get_max_index(vector<float> list)
{
float max = list[0];
int max_index = 0;
for (int i = 1; i < list.size(); ++i)
if (list[i] > max)
{
max = list[i];
max_index = i;
}
return max_index;
}
Prediction predict(Dataset dataset, Labels labels,
Dataset betha, int number_of_classes)
{
Prediction label_prediction;
for (int index = 0; index < dataset.size(); ++index)
{
Scores scores = get_scores(dataset, betha, index, number_of_classes);
int class_number = get_max_index(scores);
label_prediction.push_back(class_number);
}
return label_prediction;
}
float get_score(Dataset dataset, Labels labels,
Prediction label_prediction)
{
float satisfied = 0;
for (int i = 0; i < labels.size(); ++i)
if (labels[i][CLASS_NUMBER] == label_prediction[i])
satisfied ++;
float score = satisfied / label_prediction.size();
return score;
}
int get_final_class(vector<Prediction> label_predictions, int instance_index,
int number_of_classes, int number_of_classifier)
{
vector<float> repetition(number_of_classes, INITIAL_VAL);
for (int i = 0; i < number_of_classifier; ++i)
repetition[label_predictions[i][instance_index]] ++;
int final_class = get_max_index(repetition);
return final_class;
}
Prediction voter(Dataset dataset, vector<Prediction> label_predictions,
int number_of_classes, int number_of_classifier)
{
Prediction final_pred;
for (int instance_index = 0; instance_index < dataset.size(); ++instance_index)
final_pred.push_back(get_final_class(
label_predictions, instance_index,
number_of_classes,
number_of_classifier));
return final_pred;
}
void predict_linearly(vector<Prediction>& label_predictions,
vector<Dataset> weight_vectors, Dataset dataset,
Labels labels, int number_of_classes, vector<string> files)
{
for (int i = 0; i < weight_vectors.size(); ++i)
{
Dataset betha = weight_vectors[i];
Prediction label_prediction = predict(dataset, labels, betha,
number_of_classes);
float score = get_score(dataset, labels, label_prediction);
label_predictions.push_back(label_prediction);
}
}
void predict_hard_voting(vector<Prediction> label_predictions,
Dataset dataset, Labels labels, int number_of_classes,
int number_of_classifier)
{
Prediction final_prediction = voter(dataset, label_predictions,
number_of_classes, number_of_classifier);
float score = get_score(dataset, labels, final_prediction);
cout << ACCURACY << COLON << SPACE << setprecision(PRECISION) <<
fixed << (score * 100) << PERCENT << endl;
}
void predict(string validation_dir, string weight_vector_dir)
{
Dataset dataset;
Labels labels;
vector<string> files;
vector<Prediction> label_predictions;
vector<Dataset> weight_vectors = get_datasets(files, weight_vector_dir);
int number_of_classifier = weight_vectors.size();
int number_of_classes = weight_vectors[0].size();
read_csv(dataset, validation_dir + DATASET + SUFFIX);
read_csv(labels, validation_dir + LABELS + SUFFIX);
predict_linearly(label_predictions, weight_vectors, dataset, labels,
number_of_classes, files);
predict_hard_voting(label_predictions, dataset, labels, number_of_classes,
number_of_classifier);
}
int main(int argc, char const *argv[])
{
string validation_dir = argv[VALIDATION_DIR];
string weight_vector_dir = argv[WEIGHT_VECTOR_DIR];
predict(validation_dir + SEPERATOR,
weight_vector_dir + SEPERATOR);
return 0;
}