From 1856602e3cd16f1f8e3707c803f2794197de0ee9 Mon Sep 17 00:00:00 2001 From: Joshua Moerman Date: Thu, 21 May 2015 15:56:16 +0200 Subject: [PATCH] Reduces memory usage a bit by using smaller types. --- lib/mealy.hpp | 8 ++++---- lib/reachability.cpp | 2 +- lib/separating_family.cpp | 2 +- lib/splitting_tree.cpp | 4 ++-- lib/trie.cpp | 17 ----------------- lib/trie.hpp | 38 +++++++++++++++++++++++++------------- lib/types.hpp | 6 +++--- src/distance.cpp | 2 +- src/methods.cpp | 2 +- src/trie_test.cpp | 4 ++-- 10 files changed, 40 insertions(+), 45 deletions(-) delete mode 100644 lib/trie.cpp diff --git a/lib/mealy.hpp b/lib/mealy.hpp index 5f61617..8ce3f3c 100644 --- a/lib/mealy.hpp +++ b/lib/mealy.hpp @@ -18,9 +18,9 @@ struct mealy { struct edge { edge() = default; - edge(state t, output o) : to(t), output(o) {} + edge(state t, output o) : to(t), out(o) {} state to = state(-1); - output output = size_t(-1); + output out = output(-1); }; // state -> input -> (output, state) @@ -34,7 +34,7 @@ struct mealy { inline bool is_complete(const mealy & m){ for(state n = 0; n < m.graph_size; ++n){ if(m.graph[n].size() != m.input_size) return false; - for(auto && e : m.graph[n]) if(e.to == state(-1) || e.output == output(-1)) return false; + for(auto && e : m.graph[n]) if(e.to == state(-1) || e.out == output(-1)) return false; } return true; } @@ -42,7 +42,7 @@ inline bool is_complete(const mealy & m){ inline bool defined(mealy const & m, state s, input i) { if (s >= m.graph.size()) return false; if (i >= m.graph[s].size()) return false; - if (m.graph[s][i].to == state(-1) || m.graph[s][i].output == output(-1)) return false; + if (m.graph[s][i].to == state(-1) || m.graph[s][i].out == output(-1)) return false; return true; } diff --git a/lib/reachability.cpp b/lib/reachability.cpp index 06b0dda..93cdecc 100644 --- a/lib/reachability.cpp +++ b/lib/reachability.cpp @@ -29,7 +29,7 @@ mealy reachable_submachine(const mealy& in, state start) { for (input i = 0; i < in.input_size; ++i) { const auto ret = apply(in, s, i); - const output o = ret.output; + const output o = ret.out; const state t = ret.to; if (!new_state.count(t)) new_state[t] = max_state++; diff --git a/lib/separating_family.cpp b/lib/separating_family.cpp index b7dd468..fd69dbf 100644 --- a/lib/separating_family.cpp +++ b/lib/separating_family.cpp @@ -16,7 +16,7 @@ separating_family create_separating_family(const adaptive_distinguishing_sequenc const splitting_tree & separating_sequences) { const auto N = sequence.CI.size(); - vector suffixes(N); + vector> suffixes(N); separating_family ret(N); // First we accumulate the kind-of-UIOs and the separating words we need. We will do this with a diff --git a/lib/splitting_tree.cpp b/lib/splitting_tree.cpp index 025f54f..c3e48d8 100644 --- a/lib/splitting_tree.cpp +++ b/lib/splitting_tree.cpp @@ -102,7 +102,7 @@ result create_splitting_tree(const mealy & g, options opt) { end(boom.states), [symbol, depth, &g, &update_succession](state state) { const auto r = apply(g, state, symbol); update_succession(state, r.to, depth); - return r.output; + return r.out; }, Q); // no split -> continue with other input symbols @@ -144,7 +144,7 @@ result create_splitting_tree(const mealy & g, options opt) { end(boom.states), [word, depth, &g, &update_succession](state state) { const mealy::edge r = apply(g, state, word.begin(), word.end()); update_succession(state, r.to, depth); - return r.output; + return r.out; }, Q); // not a valid split -> continue diff --git a/lib/trie.cpp b/lib/trie.cpp deleted file mode 100644 index a1642d5..0000000 --- a/lib/trie.cpp +++ /dev/null @@ -1,17 +0,0 @@ -#include "trie.hpp" - -std::vector> flatten(const trie & t) { - std::vector> ret; - t.for_each([&ret](auto && w) { ret.push_back(w); }); - return ret; -} - -std::pair total_size(const trie & t) { - size_t count = 0; - size_t total_count = 0; - t.for_each([&count, &total_count](auto && w) { - ++count; - total_count += w.size(); - }); - return {count, total_count}; -} diff --git a/lib/trie.hpp b/lib/trie.hpp index aaba44f..6f30c11 100644 --- a/lib/trie.hpp +++ b/lib/trie.hpp @@ -10,10 +10,10 @@ /// /// \brief A Trie datastructure used to remove prefixes in a set of words /// -/// The datastructure only works for words over size_t. In principle the symbols -/// can be unbounded, however having very large symbols degrades the performance -/// a lot. Some random testing shows that for symbols <= 50 the performance is -/// similar to std::set (which is solving a different problem). +/// The datastructure only works for words over integral unsigned types. In principle the symbols +/// can be unbounded, however having very large symbols degrades the performance a lot. Some random +/// testing shows that for symbols <= 50 the performance is similar to std::set (which is solving a +/// different problem). /// /// Tests : 1M words, avg words length 4 (geometric dist.), alphabet 50 symbols /// trie reduction 58% in 1.15s @@ -23,7 +23,9 @@ /// There are, however, "internal iterators" exposed as a for_each() member /// function (if only we had coroutines already...) /// -struct trie { +template struct trie { + static_assert(std::is_integral::value && std::is_unsigned::value, ""); + /// \brief Inserts a word (given by iterators \p begin and \p end) /// \returns true if the element was inserted, false if already there template bool insert(Iterator && begin, Iterator && end) { @@ -46,19 +48,17 @@ struct trie { /// \brief Applies \p function to all word (not to the prefixes) template void for_each(Fun && function) const { - std::vector word; + std::vector word; return for_each_impl(std::forward(function), word); } /// \brief Empties the complete set - void clear() { - branches.clear(); - } + void clear() { branches.clear(); } private: - template void for_each_impl(Fun && function, std::vector & word) const { + template void for_each_impl(Fun && function, std::vector & word) const { size_t count = 0; - for (size_t i = 0; i < branches.size(); ++i) { + for (T i = 0; i < branches.size(); ++i) { auto const & b = branches[i]; if (b) { ++count; @@ -80,7 +80,19 @@ struct trie { /// \brief Flattens a trie \p t /// \returns an array of words (without the prefixes) -std::vector> flatten(trie const & t); +template std::vector> flatten(trie const & t) { + std::vector> ret; + t.for_each([&ret](auto && w) { ret.push_back(w); }); + return ret; +} /// \brief Returns size and total sum of symbols -std::pair total_size(trie const & t); +template std::pair total_size(trie const & t) { + size_t count = 0; + size_t total_count = 0; + t.for_each([&count, &total_count](auto && w) { + ++count; + total_count += w.size(); + }); + return {count, total_count}; +} diff --git a/lib/types.hpp b/lib/types.hpp index 0667b06..8aa4969 100644 --- a/lib/types.hpp +++ b/lib/types.hpp @@ -4,9 +4,9 @@ #include // We use size_ts for fast indexing. Note that there is little type safety here -using state = size_t; -using input = size_t; -using output = size_t; +using state = uint16_t; +using input = uint16_t; +using output = uint16_t; using word = std::vector; diff --git a/src/distance.cpp b/src/distance.cpp index ea34de2..1faccf3 100644 --- a/src/distance.cpp +++ b/src/distance.cpp @@ -95,7 +95,7 @@ int main(int argc, char * argv[]) { auto q1 = apply(m1, s1, i); auto q2 = apply(m2, s2, i); - if (q1.output != q2.output) { + if (q1.out != q2.out) { current_counterexamples++; } diff --git a/src/methods.cpp b/src/methods.cpp index ce76046..5533304 100644 --- a/src/methods.cpp +++ b/src/methods.cpp @@ -79,7 +79,7 @@ int main(int argc, char * argv[]) { const auto prefixes = prefixes_fut.get(); const auto middles = middles_fut.get(); const auto suffixes = suffixes_fut.get(); - trie test_suite; + trie test_suite; clog << "start testing" << endl; const state start = 0; diff --git a/src/trie_test.cpp b/src/trie_test.cpp index 9819602..fd18ea5 100644 --- a/src/trie_test.cpp +++ b/src/trie_test.cpp @@ -24,7 +24,7 @@ static void test() { word w5 = {5, 5, 3}; word w6 = {5, 5, 3, 1}; - trie t; + trie t; check(t.insert(w1)); check(!t.insert(w1)); check(t.insert(w2)); @@ -77,7 +77,7 @@ static void performance() { using seconds = std::chrono::duration; auto t_start = clock::now(); - trie t; + trie t; for (auto&& w : corpus) t.insert(w); auto t_end = clock::now();