1
Fork 0
mirror of https://github.com/Jaxan/hybrid-ads.git synced 2025-04-27 23:17:44 +02:00

Reduces memory usage a bit by using smaller types.

This commit is contained in:
Joshua Moerman 2015-05-21 15:56:16 +02:00
parent f5108ceb43
commit 1856602e3c
10 changed files with 40 additions and 45 deletions

View file

@ -18,9 +18,9 @@
struct mealy { struct mealy {
struct edge { struct edge {
edge() = default; edge() = default;
edge(state t, output o) : to(t), output(o) {} edge(state t, output o) : to(t), out(o) {}
state to = state(-1); state to = state(-1);
output output = size_t(-1); output out = output(-1);
}; };
// state -> input -> (output, state) // state -> input -> (output, state)
@ -34,7 +34,7 @@ struct mealy {
inline bool is_complete(const mealy & m){ inline bool is_complete(const mealy & m){
for(state n = 0; n < m.graph_size; ++n){ for(state n = 0; n < m.graph_size; ++n){
if(m.graph[n].size() != m.input_size) return false; if(m.graph[n].size() != m.input_size) return false;
for(auto && e : m.graph[n]) if(e.to == state(-1) || e.output == output(-1)) return false; for(auto && e : m.graph[n]) if(e.to == state(-1) || e.out == output(-1)) return false;
} }
return true; return true;
} }
@ -42,7 +42,7 @@ inline bool is_complete(const mealy & m){
inline bool defined(mealy const & m, state s, input i) { inline bool defined(mealy const & m, state s, input i) {
if (s >= m.graph.size()) return false; if (s >= m.graph.size()) return false;
if (i >= m.graph[s].size()) return false; if (i >= m.graph[s].size()) return false;
if (m.graph[s][i].to == state(-1) || m.graph[s][i].output == output(-1)) return false; if (m.graph[s][i].to == state(-1) || m.graph[s][i].out == output(-1)) return false;
return true; return true;
} }

View file

@ -29,7 +29,7 @@ mealy reachable_submachine(const mealy& in, state start) {
for (input i = 0; i < in.input_size; ++i) { for (input i = 0; i < in.input_size; ++i) {
const auto ret = apply(in, s, i); const auto ret = apply(in, s, i);
const output o = ret.output; const output o = ret.out;
const state t = ret.to; const state t = ret.to;
if (!new_state.count(t)) new_state[t] = max_state++; if (!new_state.count(t)) new_state[t] = max_state++;

View file

@ -16,7 +16,7 @@ separating_family create_separating_family(const adaptive_distinguishing_sequenc
const splitting_tree & separating_sequences) { const splitting_tree & separating_sequences) {
const auto N = sequence.CI.size(); const auto N = sequence.CI.size();
vector<trie> suffixes(N); vector<trie<input>> suffixes(N);
separating_family ret(N); separating_family ret(N);
// First we accumulate the kind-of-UIOs and the separating words we need. We will do this with a // First we accumulate the kind-of-UIOs and the separating words we need. We will do this with a

View file

@ -102,7 +102,7 @@ result create_splitting_tree(const mealy & g, options opt) {
end(boom.states), [symbol, depth, &g, &update_succession](state state) { end(boom.states), [symbol, depth, &g, &update_succession](state state) {
const auto r = apply(g, state, symbol); const auto r = apply(g, state, symbol);
update_succession(state, r.to, depth); update_succession(state, r.to, depth);
return r.output; return r.out;
}, Q); }, Q);
// no split -> continue with other input symbols // no split -> continue with other input symbols
@ -144,7 +144,7 @@ result create_splitting_tree(const mealy & g, options opt) {
end(boom.states), [word, depth, &g, &update_succession](state state) { end(boom.states), [word, depth, &g, &update_succession](state state) {
const mealy::edge r = apply(g, state, word.begin(), word.end()); const mealy::edge r = apply(g, state, word.begin(), word.end());
update_succession(state, r.to, depth); update_succession(state, r.to, depth);
return r.output; return r.out;
}, Q); }, Q);
// not a valid split -> continue // not a valid split -> continue

View file

@ -1,17 +0,0 @@
#include "trie.hpp"
std::vector<std::vector<size_t>> flatten(const trie & t) {
std::vector<std::vector<size_t>> ret;
t.for_each([&ret](auto && w) { ret.push_back(w); });
return ret;
}
std::pair<size_t, size_t> total_size(const trie & t) {
size_t count = 0;
size_t total_count = 0;
t.for_each([&count, &total_count](auto && w) {
++count;
total_count += w.size();
});
return {count, total_count};
}

View file

@ -10,10 +10,10 @@
/// ///
/// \brief A Trie datastructure used to remove prefixes in a set of words /// \brief A Trie datastructure used to remove prefixes in a set of words
/// ///
/// The datastructure only works for words over size_t. In principle the symbols /// The datastructure only works for words over integral unsigned types. In principle the symbols
/// can be unbounded, however having very large symbols degrades the performance /// can be unbounded, however having very large symbols degrades the performance a lot. Some random
/// a lot. Some random testing shows that for symbols <= 50 the performance is /// testing shows that for symbols <= 50 the performance is similar to std::set (which is solving a
/// similar to std::set (which is solving a different problem). /// different problem).
/// ///
/// Tests : 1M words, avg words length 4 (geometric dist.), alphabet 50 symbols /// Tests : 1M words, avg words length 4 (geometric dist.), alphabet 50 symbols
/// trie reduction 58% in 1.15s /// trie reduction 58% in 1.15s
@ -23,7 +23,9 @@
/// There are, however, "internal iterators" exposed as a for_each() member /// There are, however, "internal iterators" exposed as a for_each() member
/// function (if only we had coroutines already...) /// function (if only we had coroutines already...)
/// ///
struct trie { template <typename T> struct trie {
static_assert(std::is_integral<T>::value && std::is_unsigned<T>::value, "");
/// \brief Inserts a word (given by iterators \p begin and \p end) /// \brief Inserts a word (given by iterators \p begin and \p end)
/// \returns true if the element was inserted, false if already there /// \returns true if the element was inserted, false if already there
template <typename Iterator> bool insert(Iterator && begin, Iterator && end) { template <typename Iterator> bool insert(Iterator && begin, Iterator && end) {
@ -46,19 +48,17 @@ struct trie {
/// \brief Applies \p function to all word (not to the prefixes) /// \brief Applies \p function to all word (not to the prefixes)
template <typename Fun> void for_each(Fun && function) const { template <typename Fun> void for_each(Fun && function) const {
std::vector<size_t> word; std::vector<T> word;
return for_each_impl(std::forward<Fun>(function), word); return for_each_impl(std::forward<Fun>(function), word);
} }
/// \brief Empties the complete set /// \brief Empties the complete set
void clear() { void clear() { branches.clear(); }
branches.clear();
}
private: private:
template <typename Fun> void for_each_impl(Fun && function, std::vector<size_t> & word) const { template <typename Fun> void for_each_impl(Fun && function, std::vector<T> & word) const {
size_t count = 0; size_t count = 0;
for (size_t i = 0; i < branches.size(); ++i) { for (T i = 0; i < branches.size(); ++i) {
auto const & b = branches[i]; auto const & b = branches[i];
if (b) { if (b) {
++count; ++count;
@ -80,7 +80,19 @@ struct trie {
/// \brief Flattens a trie \p t /// \brief Flattens a trie \p t
/// \returns an array of words (without the prefixes) /// \returns an array of words (without the prefixes)
std::vector<std::vector<size_t>> flatten(trie const & t); template <typename T> std::vector<std::vector<T>> flatten(trie<T> const & t) {
std::vector<std::vector<T>> ret;
t.for_each([&ret](auto && w) { ret.push_back(w); });
return ret;
}
/// \brief Returns size and total sum of symbols /// \brief Returns size and total sum of symbols
std::pair<size_t, size_t> total_size(trie const & t); template <typename T> std::pair<size_t, size_t> total_size(trie<T> const & t) {
size_t count = 0;
size_t total_count = 0;
t.for_each([&count, &total_count](auto && w) {
++count;
total_count += w.size();
});
return {count, total_count};
}

View file

@ -4,9 +4,9 @@
#include <vector> #include <vector>
// We use size_ts for fast indexing. Note that there is little type safety here // We use size_ts for fast indexing. Note that there is little type safety here
using state = size_t; using state = uint16_t;
using input = size_t; using input = uint16_t;
using output = size_t; using output = uint16_t;
using word = std::vector<input>; using word = std::vector<input>;

View file

@ -95,7 +95,7 @@ int main(int argc, char * argv[]) {
auto q1 = apply(m1, s1, i); auto q1 = apply(m1, s1, i);
auto q2 = apply(m2, s2, i); auto q2 = apply(m2, s2, i);
if (q1.output != q2.output) { if (q1.out != q2.out) {
current_counterexamples++; current_counterexamples++;
} }

View file

@ -79,7 +79,7 @@ int main(int argc, char * argv[]) {
const auto prefixes = prefixes_fut.get(); const auto prefixes = prefixes_fut.get();
const auto middles = middles_fut.get(); const auto middles = middles_fut.get();
const auto suffixes = suffixes_fut.get(); const auto suffixes = suffixes_fut.get();
trie test_suite; trie<input> test_suite;
clog << "start testing" << endl; clog << "start testing" << endl;
const state start = 0; const state start = 0;

View file

@ -24,7 +24,7 @@ static void test() {
word w5 = {5, 5, 3}; word w5 = {5, 5, 3};
word w6 = {5, 5, 3, 1}; word w6 = {5, 5, 3, 1};
trie t; trie<unsigned> t;
check(t.insert(w1)); check(t.insert(w1));
check(!t.insert(w1)); check(!t.insert(w1));
check(t.insert(w2)); check(t.insert(w2));
@ -77,7 +77,7 @@ static void performance() {
using seconds = std::chrono::duration<double>; using seconds = std::chrono::duration<double>;
auto t_start = clock::now(); auto t_start = clock::now();
trie t; trie<unsigned> t;
for (auto&& w : corpus) t.insert(w); for (auto&& w : corpus) t.insert(w);
auto t_end = clock::now(); auto t_end = clock::now();