From e4a7cf9933ba9e194ed51be1524cbe22178a3ac1 Mon Sep 17 00:00:00 2001 From: Joshua Moerman Date: Fri, 13 Mar 2015 18:03:27 +0100 Subject: [PATCH] Removes some duplicate words. Adds conformance to java interface. Adds randomization --- lib/logging.hpp | 6 ++ lib/mealy.hpp | 12 ++- lib/read_mealy_from_dot.cpp | 7 ++ lib/seperating_family.cpp | 8 ++ lib/splitting_tree.cpp | 8 -- lib/splitting_tree.hpp | 2 +- lib/types.hpp | 24 +++++ src/conf-hyp.cpp | 9 -- src/conf.cpp | 9 -- src/main.cpp | 203 ++++++++++++++++++++++++++++-------- src/metrics.cpp | 56 ++++++++++ 11 files changed, 270 insertions(+), 74 deletions(-) create mode 100644 src/metrics.cpp diff --git a/lib/logging.hpp b/lib/logging.hpp index cbcd1d5..a471ade 100644 --- a/lib/logging.hpp +++ b/lib/logging.hpp @@ -34,3 +34,9 @@ struct timer{ return s.count(); } }; + +// has same signature, but does not log :) +struct silent_timer { + silent_timer(std::string){} + void stop(); +}; diff --git a/lib/mealy.hpp b/lib/mealy.hpp index 80cd396..5cee652 100644 --- a/lib/mealy.hpp +++ b/lib/mealy.hpp @@ -46,10 +46,20 @@ inline auto apply(mealy const & m, state state, input input){ template auto apply(mealy const & m, state state, Iterator b, Iterator e){ - mealy::edge ret; + mealy::edge ret{state, -1}; while(b != e){ ret = apply(m, state, *b++); state = ret.to; } return ret; } + +// Used to invert the input_indices and output_indices maps +template +std::vector create_reverse_map(std::map const & indices){ + std::vector ret(indices.size()); + for(auto&& p : indices){ + ret[p.second.base()] = p.first; + } + return ret; +} diff --git a/lib/read_mealy_from_dot.cpp b/lib/read_mealy_from_dot.cpp index f8f99a2..3342567 100644 --- a/lib/read_mealy_from_dot.cpp +++ b/lib/read_mealy_from_dot.cpp @@ -1,10 +1,13 @@ #include "read_mealy_from_dot.hpp" #include "mealy.hpp" +#include #include #include #include +#include + using namespace std; template @@ -56,6 +59,10 @@ mealy read_mealy_from_dot(istream& in){ v[m.input_indices[input].base()] = {m.nodes_indices[rh], m.output_indices[output]}; } + assert(m.graph_size > 0); + assert(m.input_size > 0); + assert(m.output_size > 0); + assert(is_complete(m)); return m; } diff --git a/lib/seperating_family.cpp b/lib/seperating_family.cpp index fbad30e..ecbdfce 100644 --- a/lib/seperating_family.cpp +++ b/lib/seperating_family.cpp @@ -1,5 +1,8 @@ #include "seperating_family.hpp" +#include +#include + #include #include #include @@ -44,5 +47,10 @@ seperating_family create_seperating_family(const adaptive_distinguishing_sequenc work.push({word, c}); } + // Remove duplicates + for(auto & vec : seperating_family){ + boost::erase(vec, boost::unique(boost::sort(vec))); + } + return seperating_family; } diff --git a/lib/splitting_tree.cpp b/lib/splitting_tree.cpp index 2b7ebb1..93fa93f 100644 --- a/lib/splitting_tree.cpp +++ b/lib/splitting_tree.cpp @@ -24,14 +24,6 @@ splitting_tree &lca_impl2(splitting_tree & node){ return node; // this is a leaf } -template -std::vector concat(std::vector const & l, std::vector const & r){ - std::vector ret(l.size() + r.size()); - auto it = copy(begin(l), end(l), begin(ret)); - copy(begin(r), end(r), it); - return ret; -} - result create_splitting_tree(const mealy& g, options opt){ const auto N = g.graph.size(); const auto P = g.input_indices.size(); diff --git a/lib/splitting_tree.hpp b/lib/splitting_tree.hpp index 9dc6aef..05206e2 100644 --- a/lib/splitting_tree.hpp +++ b/lib/splitting_tree.hpp @@ -13,7 +13,7 @@ struct splitting_tree { std::vector states; std::vector children; - std::vector seperator; + word seperator; size_t depth = 0; mutable int mark = 0; // used for some algorithms... }; diff --git a/lib/types.hpp b/lib/types.hpp index a097dcf..202043d 100644 --- a/lib/types.hpp +++ b/lib/types.hpp @@ -12,3 +12,27 @@ using input = phantom; using output = phantom; using word = std::vector; + +// concattenation of words +template +std::vector concat(std::vector const & l, std::vector const & r){ + std::vector ret(l.size() + r.size()); + auto it = copy(begin(l), end(l), begin(ret)); + copy(begin(r), end(r), it); + return ret; +} + +// extends all words in seqs by all input symbols. Used to generate *all* strings +inline std::vector all_seqs(input min, input max, std::vector const & seqs){ + std::vector ret((max.base() - min.base()) * seqs.size()); + auto it = begin(ret); + for(auto const & x : seqs){ + for(input i = min; i < max; ++i){ + it->resize(x.size() + 1); + auto e = copy(x.begin(), x.end(), it->begin()); + *e++ = i; + it++; + } + } + return ret; +} diff --git a/src/conf-hyp.cpp b/src/conf-hyp.cpp index 4d398ea..e799537 100644 --- a/src/conf-hyp.cpp +++ b/src/conf-hyp.cpp @@ -9,15 +9,6 @@ using namespace std; -template -vector create_reverse_map(map const & indices){ - vector ret(indices.size()); - for(auto&& p : indices){ - ret[p.second.base()] = p.first; - } - return ret; -} - template vector resize_new(vector const & in, size_t N){ vector ret(N); diff --git a/src/conf.cpp b/src/conf.cpp index 39fde5d..1d644d8 100644 --- a/src/conf.cpp +++ b/src/conf.cpp @@ -9,15 +9,6 @@ using namespace std; -template -vector create_reverse_map(map const & indices){ - vector ret(indices.size()); - for(auto&& p : indices){ - ret[p.second.base()] = p.first; - } - return ret; -} - template vector resize_new(vector const & in, size_t N){ vector ret(N); diff --git a/src/main.cpp b/src/main.cpp index 96894fb..1b4e9f9 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -6,51 +6,35 @@ #include #include #include +#include #include #include +#include +#include +#include using namespace std; -template -vector create_reverse_map(map const & indices){ - vector ret(indices.size()); - for(auto&& p : indices){ - ret[p.second.base()] = p.first; - } - return ret; -} - -template -std::vector concat(std::vector const & l, std::vector const & r){ - std::vector ret(l.size() + r.size()); - auto it = copy(begin(l), end(l), begin(ret)); - copy(begin(r), end(r), it); - return ret; -} - -template -std::vector> all_seqs(T min, T max, std::vector> const & seqs){ - std::vector> ret((max - min) * seqs.size()); - auto it = begin(ret); - for(auto && x : seqs){ - for(T i = min; i < max; ++i){ - it->assign(x.size()+1); - auto e = copy(x.begin(), x.end(), it->begin()); - *e++ = i; - } - } - return ret; -} +using time_logger = silent_timer; int main(int argc, char *argv[]){ - if(argc != 2) return 1; + if(argc != 4) return 1; const string filename = argv[1]; const bool use_stdio = filename == "--"; + // 0 => only states checks. 1 => transition checks. 2 or more => deep checks + const auto k_max = stoul(argv[2]); + + const string mode = argv[3]; + const bool streaming = mode == "stream"; + const bool random_part = streaming; + const bool statistics = mode == "stats"; + const bool compress_suite = mode == "compr"; + const auto machine = [&]{ - timer t("reading file " + filename); + time_logger t("reading file " + filename); if(use_stdio){ return read_mealy_from_dot(cin); } else { @@ -60,12 +44,12 @@ int main(int argc, char *argv[]){ auto all_pair_seperating_sequences_fut = async([&]{ const auto splitting_tree_hopcroft = [&]{ - timer t("creating hopcroft splitting tree"); + time_logger t("creating hopcroft splitting tree"); return create_splitting_tree(machine, hopcroft_style); }(); const auto all_pair_seperating_sequences = [&]{ - timer t("gathering all seperating sequences"); + time_logger t("gathering all seperating sequences"); return create_all_pair_seperating_sequences(splitting_tree_hopcroft.root); }(); @@ -74,12 +58,12 @@ int main(int argc, char *argv[]){ auto sequence_fut = async([&]{ const auto splitting_tree = [&]{ - timer t("Lee & Yannakakis I"); + time_logger t("Lee & Yannakakis I"); return create_splitting_tree(machine, lee_yannakakis_style); }(); const auto sequence = [&]{ - timer t("Lee & Yannakakis II"); + time_logger t("Lee & Yannakakis II"); return create_adaptive_distinguishing_sequence(splitting_tree); }(); @@ -87,23 +71,154 @@ int main(int argc, char *argv[]){ }); auto transfer_sequences_fut = std::async([&]{ - timer t("determining transfer sequences"); + time_logger t("determining transfer sequences"); return create_transfer_sequences(machine, 0); }); + auto inputs_fut = std::async([&]{ + return create_reverse_map(machine.input_indices); + }); + + auto relevant_inputs_fut = std::async([&]{ + time_logger t("determining relevance of inputs"); + vector> distributions(machine.graph_size); + + for(state s = 0; s < machine.graph_size; ++s){ + vector r_cache(machine.input_size, 0); + for(input i = 0; i < machine.input_size; ++i){ + const auto test1 = apply(machine, s, i).output != machine.output_indices.at("quiescence"); + const auto test2 = apply(machine, s, i).to != s; + r_cache[i.base()] = test1 + test2; + } + + distributions[s.base()] = discrete_distribution(begin(r_cache), end(r_cache)); + } + return distributions; + }); + const auto all_pair_seperating_sequences = all_pair_seperating_sequences_fut.get(); const auto sequence = sequence_fut.get(); const auto seperating_family = [&]{ - timer t("making seperating family"); + time_logger t("making seperating family"); return create_seperating_family(sequence, all_pair_seperating_sequences); }(); const auto transfer_sequences = transfer_sequences_fut.get(); - const auto inputs = create_reverse_map(machine.input_indices); + const auto inputs = inputs_fut.get(); - { - timer t("making test suite"); + const auto print_word = [&](auto w){ + for(auto && x : w) cout << inputs[x.base()] << ' '; + }; + + if(statistics){ + const auto adder = [](auto const & x){ + return [&x](auto const & l, auto const & r) { return l + x(r); }; + }; + + const auto size = adder([](auto const & r) { return r.size(); }); + + const auto p_size = transfer_sequences.size(); + const auto p_total = accumulate(begin(transfer_sequences), end(transfer_sequences), 0, size); + const auto p_avg = p_total / double(p_size); + + cout << "Prefixes:\n"; + cout << "\tsize\t" << p_size << '\n'; + cout << "\ttotal\t" << p_total << '\n'; + cout << "\tavg\t" << p_avg << '\n'; + + const auto w_fam_size = seperating_family.size(); + const auto w_fam_total = accumulate(begin(seperating_family), end(seperating_family), 0, size); + const auto w_fam_avg = w_fam_total / double(w_fam_size); + + const auto w_total = accumulate(begin(seperating_family), end(seperating_family), 0, adder([&size](auto const & r){ + return accumulate(begin(r), end(r), 0, size); + })); + const auto w_avg = w_total / double(w_fam_total); + + cout << "Suffixes:\n"; + cout << "\tsize\t" << w_fam_total << '\n'; + cout << "\tavg\t" << w_fam_avg << '\n'; + cout << "\ttotal\t" << w_total << '\n'; + cout << "\tavg\t" << w_avg << '\n'; + + cout << "Total tests (approximately):\n"; + double total = machine.graph_size * 1 * w_fam_avg; + double length = p_avg + 0 + w_avg; + for(size_t k = 0; k <= k_max; ++k){ + cout << "\tk = " << k << "\t" + << setw(16) << size_t(total) << " * " + << setw(3) << size_t(length) << " = " + << setw(20) << size_t(total * length) << endl; + total *= machine.input_size; + length += 1; + } + } + + if(streaming){ + time_logger t("outputting all preset tests"); + + vector all_sequences(1); + for(int k = 0; k <= k_max; ++k){ + cerr << "*** K = " << k << endl; + for(state s = 0; s < machine.graph_size; ++s){ + const auto prefix = transfer_sequences[s.base()]; + + for(auto && suffix : seperating_family[s.base()]){ + for(auto && r : all_sequences){ + print_word(prefix); + print_word(r); + print_word(suffix); + cout << endl; + } + } + } + + all_sequences = all_seqs(0, machine.input_size, all_sequences); + } + } + + if(random_part){ + time_logger t("outputting all random tests"); + + std::random_device rd; + std::mt19937 generator(rd()); + + uniform_int_distribution prefix_selection(0, transfer_sequences.size()); + uniform_int_distribution<> fair_coin(0, 1); + uniform_int_distribution suffix_selection; + auto relevant_inputs = relevant_inputs_fut.get(); + + using params = uniform_int_distribution::param_type; + + while(true){ + state current_state = 0; + + const auto & p = transfer_sequences[prefix_selection(generator)]; + current_state = apply(machine, current_state, begin(p), end(p)).to; + + vector m; + m.reserve(k_max + 2); + size_t minimal_size = k_max + 1; + while(minimal_size || fair_coin(generator)){ + input i = relevant_inputs[current_state.base()](generator); + m.push_back(i); + current_state = apply(machine, current_state, i).to; + if(minimal_size) minimal_size--; + } + + const auto & suffixes = seperating_family[current_state.base()]; + const auto & s = suffixes[suffix_selection(generator, params{0, suffixes.size()-1})]; + + print_word(p); + print_word(m); + print_word(s); + cout << endl; + } + } + + if(compress_suite){ + time_logger t("making test suite"); vector suite; for(state s = 0; s < machine.graph_size; ++s){ @@ -125,11 +240,7 @@ int main(int argc, char *argv[]){ boost::iostreams::filtering_ostream compressed_stream; compressed_stream.push(boost::iostreams::gzip_compressor()); - if(use_stdio){ - compressed_stream.push(cout); - } else { - compressed_stream.push(boost::iostreams::file_descriptor_sink(filename + "test_suite")); - } + compressed_stream.push(boost::iostreams::file_descriptor_sink(filename + "test_suite")); boost::archive::text_oarchive archive(compressed_stream); archive << real_suite; diff --git a/src/metrics.cpp b/src/metrics.cpp new file mode 100644 index 0000000..47ef325 --- /dev/null +++ b/src/metrics.cpp @@ -0,0 +1,56 @@ +#include +#include + +#include +#include +#include + +using namespace std; + +auto create_transfer_sequences(const mealy& machine, const state s, const input ignore){ + vector visited(machine.graph_size, false); + + queue work; + work.push(s); + while(!work.empty()){ + const auto u = work.front(); + work.pop(); + + if(visited[u.base()]) continue; + visited[u.base()] = true; + + for(input i = 0; i < machine.input_size; ++i){ + if(i == ignore) continue; + const auto v = apply(machine, u, i).to; + if(visited[v.base()]) continue; + work.push(v); + } + } + + return visited; +} + +int main(int argc, char *argv[]){ + if(argc != 2) return 1; + const string filename = argv[1]; + + const auto machine = read_mealy_from_dot(filename); + +// vector> table(machine.input_size); +// for(input i = 0; i < machine.input_size; ++i){ +// table[i.base()] = create_transfer_sequences(machine, 0, i); +// } + + // note the wrong iteration ;D + for(state s = 0; s < machine.graph_size; ++s){ + size_t scores[3] = {0, 0, 0}; + for(input i = 0; i < machine.input_size; ++i){ + const auto test1 = apply(machine, s, i).output != machine.output_indices.at("quiescence"); + const auto test2 = apply(machine, s, i).to != s; + + scores[test1 + test2]++; + } + cout << scores[2] << " " << scores[1] << " " << scores[0] << endl; + } +} +