-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathseq_wc.cpp
83 lines (71 loc) · 2.49 KB
/
seq_wc.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
/*
* Copyright (c) 2019 alpha group, CS department, University of Torino.
*
* This file is part of pico
* (see https://github.com/alpha-unito/pico).
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* This code implements a word-count (i.e., the Big Data "hello world!")
* on top of the PiCo API.
*
* We use a mix of static functions and lambdas in order to show the support
* of various user code styles provided by PiCo operators.
*/
#include <cassert>
#include <chrono>
#include <fstream>
#include <iostream>
#include <sstream>
#include <string>
#include <unordered_map>
#include "pico/KeyValue.hpp"
int main(int argc, char** argv) {
// parse command line
if (argc < 2) {
std::cerr << "Usage: " << argv[0] << " <input file> <output file>\n";
return -1;
}
std::string filename = argv[1];
std::string outputfilename = argv[2];
/* prepare the output word-count map */
std::unordered_map<std::string, unsigned> word_cnt;
/* start measurement */
auto t0 = std::chrono::high_resolution_clock::now();
/* read the input file line by line */
std::ifstream infile(filename);
assert(infile.is_open());
std::string line;
while (getline(infile, line)) {
std::istringstream f(line);
std::string s;
/* tokenize the line and increment each word counter */
while (std::getline(f, s, ' ')) word_cnt[s]++;
}
/* write output */
std::ofstream outfile(outputfilename);
assert(outfile.is_open());
for (auto it = word_cnt.begin(); it != word_cnt.end(); ++it) {
assert(it->second != 0);
pico::KeyValue<std::string, unsigned> kv(it->first, it->second);
outfile << kv.to_string() << std::endl;
}
/* stop measurement */
auto t1 = std::chrono::high_resolution_clock::now();
auto d = std::chrono::duration_cast<std::chrono::seconds>(t1 - t0);
/* print the execution time */
std::cout << "done in " << d.count() << " s\n";
return 0;
}