From 0e2547be863f4c1d86d017337270c20e6a6995d3 Mon Sep 17 00:00:00 2001 From: Joshua Moerman Date: Tue, 17 Nov 2015 17:27:04 +0100 Subject: [PATCH] Adds program to count size of test suite --- src/measure.cpp | 41 +++++++++++++++++++++++++++++++++++++++++ src/methods.cpp | 2 +- 2 files changed, 42 insertions(+), 1 deletion(-) create mode 100644 src/measure.cpp diff --git a/src/measure.cpp b/src/measure.cpp new file mode 100644 index 0000000..6bd6048 --- /dev/null +++ b/src/measure.cpp @@ -0,0 +1,41 @@ +#include + +#include +#include +#include +#include +#include + +using namespace std; + +template +int func(std::istream & in, std::ostream & out) { + unordered_map translation; + trie unique_traces; + + string line; + vector current_word; + while (getline(in, line)) { + current_word.clear(); + // TODO: this can be done more efficiently, I guess + stringstream ss(line); + string symbol; + while (ss >> symbol) { + if (symbol.empty()) continue; + const auto id = translation.insert(make_pair(symbol, translation.size())).first->second; + current_word.push_back(id); + } + unique_traces.insert(current_word); + } + + const auto p = total_size(unique_traces); + out << p.first << '\t' << p.second << '\t' << p.first + p.second << endl; + + return 0; +} + +int main(int argc, char * argv[]) { + // default is an alphabet is maximal 2^32 = 4'294'967'296 symbols + // this bound does not really matter for speed or space + return func(cin, cout); +} diff --git a/src/methods.cpp b/src/methods.cpp index c957993..2c015a3 100644 --- a/src/methods.cpp +++ b/src/methods.cpp @@ -125,7 +125,7 @@ int main(int argc, char * argv[]) { if(args.at("--print-suite").asBool()){ test_suite.for_each([](const auto & w){ for(const auto & x : w) { - cout << x; + cout << x << ' '; } cout << endl; });