Removes some duplicate words. Adds conformance to java interface. Adds randomization

2025-07-05 12:57:45 +02:00 · 2015-03-13 18:03:27 +01:00 · 2015-03-13 18:03:27 +01:00 · e4a7cf9933
commit e4a7cf9933
parent c4625bf775
11 changed files with 270 additions and 74 deletions
--- a/lib/logging.hpp
+++ b/lib/logging.hpp
@ -34,3 +34,9 @@ struct timer{
 		return s.count();
 	}
 };
+
+// has same signature, but does not log :)
+struct silent_timer {
+	silent_timer(std::string){}
+	void stop();
+};
--- a/lib/mealy.hpp
+++ b/lib/mealy.hpp
@ -46,10 +46,20 @@ inline auto apply(mealy const & m, state state, input input){

 template <typename Iterator>
 auto apply(mealy const & m, state state, Iterator b, Iterator e){
-	mealy::edge ret;
+	mealy::edge ret{state, -1};
 	while(b != e){
 		ret = apply(m, state, *b++);
 		state = ret.to;
 	}
 	return ret;
 }
+
+// Used to invert the input_indices and output_indices maps
+template <typename T>
+std::vector<std::string> create_reverse_map(std::map<std::string, T> const & indices){
+	std::vector<std::string> ret(indices.size());
+	for(auto&& p : indices){
+		ret[p.second.base()] = p.first;
+	}
+	return ret;
+}
--- a/lib/read_mealy_from_dot.cpp
+++ b/lib/read_mealy_from_dot.cpp
@ -1,10 +1,13 @@
 #include "read_mealy_from_dot.hpp"
 #include "mealy.hpp"

+#include <cassert>
 #include <fstream>
 #include <sstream>
 #include <string>

+#include <iostream>
+
 using namespace std;

 template <typename T>
@ -56,6 +59,10 @@ mealy read_mealy_from_dot(istream& in){
 		v[m.input_indices[input].base()] = {m.nodes_indices[rh], m.output_indices[output]};
 	}

+	assert(m.graph_size > 0);
+	assert(m.input_size > 0);
+	assert(m.output_size > 0);
+	assert(is_complete(m));
 	return m;
 }

--- a/lib/seperating_family.cpp
+++ b/lib/seperating_family.cpp
@ -1,5 +1,8 @@
 #include "seperating_family.hpp"

+#include <boost/range/algorithm.hpp>
+#include <boost/range/algorithm_ext/erase.hpp>
+
 #include <functional>
 #include <stack>
 #include <utility>
@ -44,5 +47,10 @@ seperating_family create_seperating_family(const adaptive_distinguishing_sequenc
 			work.push({word, c});
 	}

+	// Remove duplicates
+	for(auto & vec : seperating_family){
+		boost::erase(vec, boost::unique<boost::return_found_end>(boost::sort(vec)));
+	}
+
 	return seperating_family;
 }
--- a/lib/splitting_tree.cpp
+++ b/lib/splitting_tree.cpp
@ -24,14 +24,6 @@ splitting_tree &lca_impl2(splitting_tree & node){
 	return node; // this is a leaf
 }

-template <typename T>
-std::vector<T> concat(std::vector<T> const & l, std::vector<T> const & r){
-	std::vector<T> ret(l.size() + r.size());
-	auto it = copy(begin(l), end(l), begin(ret));
-	copy(begin(r), end(r), it);
-	return ret;
-}
-
 result create_splitting_tree(const mealy& g, options opt){
 	const auto N = g.graph.size();
 	const auto P = g.input_indices.size();
--- a/lib/splitting_tree.hpp
+++ b/lib/splitting_tree.hpp
@ -13,7 +13,7 @@ struct splitting_tree {

 	std::vector<state> states;
 	std::vector<splitting_tree> children;
-	std::vector<input> seperator;
+	word seperator;
 	size_t depth = 0;
 	mutable int mark = 0; // used for some algorithms...
 };
--- a/lib/types.hpp
+++ b/lib/types.hpp
@ -12,3 +12,27 @@ using input = phantom<size_t, struct input_tag>;
 using output = phantom<size_t, struct output_tag>;

 using word = std::vector<input>;
+
+// concattenation of words
+template <typename T>
+std::vector<T> concat(std::vector<T> const & l, std::vector<T> const & r){
+	std::vector<T> ret(l.size() + r.size());
+	auto it = copy(begin(l), end(l), begin(ret));
+	copy(begin(r), end(r), it);
+	return ret;
+}
+
+// extends all words in seqs by all input symbols. Used to generate *all* strings
+inline std::vector<word> all_seqs(input min, input max, std::vector<word> const & seqs){
+	std::vector<word> ret((max.base() - min.base()) * seqs.size());
+	auto it = begin(ret);
+	for(auto const & x : seqs){
+		for(input i = min; i < max; ++i){
+			it->resize(x.size() + 1);
+			auto e = copy(x.begin(), x.end(), it->begin());
+			*e++ = i;
+			it++;
+		}
+	}
+	return ret;
+}
--- a/src/conf-hyp.cpp
+++ b/src/conf-hyp.cpp
@ -9,15 +9,6 @@

 using namespace std;

-template <typename T>
-vector<string> create_reverse_map(map<string, T> const & indices){
-	vector<string> ret(indices.size());
-	for(auto&& p : indices){
-		ret[p.second.base()] = p.first;
-	}
-	return ret;
-}
-
 template <typename T>
 vector<T> resize_new(vector<T> const & in, size_t N){
 	vector<T> ret(N);
--- a/src/conf.cpp
+++ b/src/conf.cpp
@ -9,15 +9,6 @@

 using namespace std;

-template <typename T>
-vector<string> create_reverse_map(map<string, T> const & indices){
-	vector<string> ret(indices.size());
-	for(auto&& p : indices){
-		ret[p.second.base()] = p.first;
-	}
-	return ret;
-}
-
 template <typename T>
 vector<T> resize_new(vector<T> const & in, size_t N){
 	vector<T> ret(N);
--- a/src/main.cpp
+++ b/src/main.cpp
@ -6,51 +6,35 @@
 #include <seperating_matrix.hpp>
 #include <splitting_tree.hpp>
 #include <transfer_sequences.hpp>
+#include <partition.hpp>

 #include <io.hpp>

 #include <future>
+#include <numeric>
+#include <iomanip>
+#include <random>

 using namespace std;

-template <typename T>
-vector<string> create_reverse_map(map<string, T> const & indices){
-	vector<string> ret(indices.size());
-	for(auto&& p : indices){
-		ret[p.second.base()] = p.first;
-	}
-	return ret;
-}
-
-template <typename T>
-std::vector<T> concat(std::vector<T> const & l, std::vector<T> const & r){
-	std::vector<T> ret(l.size() + r.size());
-	auto it = copy(begin(l), end(l), begin(ret));
-	copy(begin(r), end(r), it);
-	return ret;
-}
-
-template <typename T>
-std::vector<std::vector<T>> all_seqs(T min, T max, std::vector<std::vector<T>> const & seqs){
-	std::vector<std::vector<T>> ret((max - min) * seqs.size());
-	auto it = begin(ret);
-	for(auto && x : seqs){
-		for(T i = min; i < max; ++i){
-			it->assign(x.size()+1);
-			auto e = copy(x.begin(), x.end(), it->begin());
-			*e++ = i;
-		}
-	}
-	return ret;
-}
+using time_logger = silent_timer;

 int main(int argc, char *argv[]){
-	if(argc != 2) return 1;
+	if(argc != 4) return 1;
 	const string filename = argv[1];
 	const bool use_stdio = filename == "--";

+	// 0 => only states checks. 1 => transition checks. 2 or more => deep checks
+	const auto k_max = stoul(argv[2]);
+
+	const string mode = argv[3];
+	const bool streaming = mode == "stream";
+	const bool random_part = streaming;
+	const bool statistics = mode == "stats";
+	const bool compress_suite = mode == "compr";
+
 	const auto machine = [&]{
-		timer t("reading file " + filename);
+		time_logger t("reading file " + filename);
 		if(use_stdio){
 			return read_mealy_from_dot(cin);
 		} else {
@ -60,12 +44,12 @@ int main(int argc, char *argv[]){

 	auto all_pair_seperating_sequences_fut = async([&]{
 		const auto splitting_tree_hopcroft = [&]{
-			timer t("creating hopcroft splitting tree");
+			time_logger t("creating hopcroft splitting tree");
 			return create_splitting_tree(machine, hopcroft_style);
 		}();

 		const auto all_pair_seperating_sequences = [&]{
-			timer t("gathering all seperating sequences");
+			time_logger t("gathering all seperating sequences");
 			return create_all_pair_seperating_sequences(splitting_tree_hopcroft.root);
 		}();

@ -74,12 +58,12 @@ int main(int argc, char *argv[]){

 	auto sequence_fut = async([&]{
 		const auto splitting_tree = [&]{
-			timer t("Lee & Yannakakis I");
+			time_logger t("Lee & Yannakakis I");
 			return create_splitting_tree(machine, lee_yannakakis_style);
 		}();

 		const auto sequence = [&]{
-			timer t("Lee & Yannakakis II");
+			time_logger t("Lee & Yannakakis II");
 			return create_adaptive_distinguishing_sequence(splitting_tree);
 		}();

@ -87,23 +71,154 @@ int main(int argc, char *argv[]){
 	});

 	auto transfer_sequences_fut = std::async([&]{
-		timer t("determining transfer sequences");
+		time_logger t("determining transfer sequences");
 		return create_transfer_sequences(machine, 0);
 	});

+	auto inputs_fut = std::async([&]{
+		return create_reverse_map(machine.input_indices);
+	});
+
+	auto relevant_inputs_fut = std::async([&]{
+		time_logger t("determining relevance of inputs");
+		vector<discrete_distribution<input>> distributions(machine.graph_size);
+
+		for(state s = 0; s < machine.graph_size; ++s){
+			vector<double> r_cache(machine.input_size, 0);
+			for(input i = 0; i < machine.input_size; ++i){
+				const auto test1 = apply(machine, s, i).output != machine.output_indices.at("quiescence");
+				const auto test2 = apply(machine, s, i).to != s;
+				r_cache[i.base()] = test1 + test2;
+			}
+
+			distributions[s.base()] = discrete_distribution<input>(begin(r_cache), end(r_cache));
+		}
+		return distributions;
+	});
+
 	const auto all_pair_seperating_sequences = all_pair_seperating_sequences_fut.get();
 	const auto sequence = sequence_fut.get();

 	const auto seperating_family = [&]{
-		timer t("making seperating family");
+		time_logger t("making seperating family");
 		return create_seperating_family(sequence, all_pair_seperating_sequences);
 	}();

 	const auto transfer_sequences = transfer_sequences_fut.get();
-	const auto inputs = create_reverse_map(machine.input_indices);
+	const auto inputs = inputs_fut.get();

-	{
-		timer t("making test suite");
+	const auto print_word = [&](auto w){
+		for(auto && x : w) cout << inputs[x.base()] << ' ';
+	};
+
+	if(statistics){
+		const auto adder = [](auto const & x){
+			return [&x](auto const & l, auto const & r) { return l + x(r); };
+		};
+
+		const auto size = adder([](auto const & r) { return r.size(); });
+
+		const auto p_size = transfer_sequences.size();
+		const auto p_total = accumulate(begin(transfer_sequences), end(transfer_sequences), 0, size);
+		const auto p_avg = p_total / double(p_size);
+
+		cout << "Prefixes:\n";
+		cout << "\tsize\t" << p_size << '\n';
+		cout << "\ttotal\t" << p_total << '\n';
+		cout << "\tavg\t" << p_avg << '\n';
+
+		const auto w_fam_size = seperating_family.size();
+		const auto w_fam_total = accumulate(begin(seperating_family), end(seperating_family), 0, size);
+		const auto w_fam_avg = w_fam_total / double(w_fam_size);
+
+		const auto w_total = accumulate(begin(seperating_family), end(seperating_family), 0, adder([&size](auto const & r){
+			return accumulate(begin(r), end(r), 0, size);
+		}));
+		const auto w_avg = w_total / double(w_fam_total);
+
+		cout << "Suffixes:\n";
+		cout << "\tsize\t" << w_fam_total << '\n';
+		cout << "\tavg\t" << w_fam_avg << '\n';
+		cout << "\ttotal\t" << w_total << '\n';
+		cout << "\tavg\t" << w_avg << '\n';
+
+		cout << "Total tests (approximately):\n";
+		double total = machine.graph_size * 1 * w_fam_avg;
+		double length = p_avg + 0 + w_avg;
+		for(size_t k = 0; k <= k_max; ++k){
+			cout << "\tk = " << k << "\t"
+				 << setw(16) << size_t(total)  << " * "
+				 << setw(3)  << size_t(length) << " = "
+				 << setw(20) << size_t(total * length) << endl;
+			total *= machine.input_size;
+			length += 1;
+		}
+	}
+
+	if(streaming){
+		time_logger t("outputting all preset tests");
+
+		vector<word> all_sequences(1);
+		for(int k = 0; k <= k_max; ++k){
+			cerr << "*** K = " << k << endl;
+			for(state s = 0; s < machine.graph_size; ++s){
+				const auto prefix = transfer_sequences[s.base()];
+
+				for(auto && suffix : seperating_family[s.base()]){
+					for(auto && r : all_sequences){
+						print_word(prefix);
+						print_word(r);
+						print_word(suffix);
+						cout << endl;
+					}
+				}
+			}
+
+			all_sequences = all_seqs(0, machine.input_size, all_sequences);
+		}
+	}
+
+	if(random_part){
+		time_logger t("outputting all random tests");
+
+		std::random_device rd;
+		std::mt19937 generator(rd());
+
+		uniform_int_distribution<size_t> prefix_selection(0, transfer_sequences.size());
+		uniform_int_distribution<> fair_coin(0, 1);
+		uniform_int_distribution<size_t> suffix_selection;
+		auto relevant_inputs = relevant_inputs_fut.get();
+
+		using params = uniform_int_distribution<size_t>::param_type;
+
+		while(true){
+			state current_state = 0;
+
+			const auto & p = transfer_sequences[prefix_selection(generator)];
+			current_state = apply(machine, current_state, begin(p), end(p)).to;
+
+			vector<input> m;
+			m.reserve(k_max + 2);
+			size_t minimal_size = k_max + 1;
+			while(minimal_size || fair_coin(generator)){
+				input i = relevant_inputs[current_state.base()](generator);
+				m.push_back(i);
+				current_state = apply(machine, current_state, i).to;
+				if(minimal_size) minimal_size--;
+			}
+
+			const auto & suffixes = seperating_family[current_state.base()];
+			const auto & s = suffixes[suffix_selection(generator, params{0, suffixes.size()-1})];
+
+			print_word(p);
+			print_word(m);
+			print_word(s);
+			cout << endl;
+		}
+	}
+
+	if(compress_suite){
+		time_logger t("making test suite");
 		vector<word> suite;

 		for(state s = 0; s < machine.graph_size; ++s){
@ -125,11 +240,7 @@ int main(int argc, char *argv[]){

 		boost::iostreams::filtering_ostream compressed_stream;
 		compressed_stream.push(boost::iostreams::gzip_compressor());
-		if(use_stdio){
-			compressed_stream.push(cout);
-		} else {
-			compressed_stream.push(boost::iostreams::file_descriptor_sink(filename + "test_suite"));
-		}
+		compressed_stream.push(boost::iostreams::file_descriptor_sink(filename + "test_suite"));

 		boost::archive::text_oarchive archive(compressed_stream);
 		archive << real_suite;
--- a/src/metrics.cpp
+++ b/src/metrics.cpp
@ -0,0 +1,56 @@
+#include <mealy.hpp>
+#include <read_mealy_from_dot.hpp>
+
+#include <iostream>
+#include <vector>
+#include <queue>
+
+using namespace std;
+
+auto create_transfer_sequences(const mealy& machine, const state s, const input ignore){
+	vector<bool> visited(machine.graph_size, false);
+
+	queue<state> work;
+	work.push(s);
+	while(!work.empty()){
+		const auto u = work.front();
+		work.pop();
+
+		if(visited[u.base()]) continue;
+		visited[u.base()] = true;
+
+		for(input i = 0; i < machine.input_size; ++i){
+			if(i == ignore) continue;
+			const auto v = apply(machine, u, i).to;
+			if(visited[v.base()]) continue;
+			work.push(v);
+		}
+	}
+
+	return visited;
+}
+
+int main(int argc, char *argv[]){
+	if(argc != 2) return 1;
+	const string filename = argv[1];
+
+	const auto machine = read_mealy_from_dot(filename);
+
+//	vector<vector<bool>> table(machine.input_size);
+//	for(input i = 0; i < machine.input_size; ++i){
+//		table[i.base()] = create_transfer_sequences(machine, 0, i);
+//	}
+
+	// note the wrong iteration ;D
+	for(state s = 0; s < machine.graph_size; ++s){
+		size_t scores[3] = {0, 0, 0};
+		for(input i = 0; i < machine.input_size; ++i){
+			const auto test1 = apply(machine, s, i).output != machine.output_indices.at("quiescence");
+			const auto test2 = apply(machine, s, i).to != s;
+
+			scores[test1 + test2]++;
+		}
+		cout << scores[2] << " " << scores[1] << " " << scores[0] << endl;
+	}
+}
+