From 22208275fd101e3801e01539de7458be293cc11f Mon Sep 17 00:00:00 2001
From: Joshua Moerman <lakseru@gmail.com>
Date: Mon, 20 Apr 2015 14:14:10 +0200
Subject: [PATCH] Alters the splitting tree to be minimal (for the W-method)

---
 lib/splitting_tree.cpp | 169 +++++++++++++++++++++++------------------
 lib/splitting_tree.hpp |  67 ++++++++--------
 src/methods.cpp        |  11 ++-
 3 files changed, 131 insertions(+), 116 deletions(-)
diff --git a/lib/splitting_tree.cpp b/lib/splitting_tree.cpp
index 4c111b8..2e4cf17 100644
--- a/lib/splitting_tree.cpp
+++ b/lib/splitting_tree.cpp
@@ -10,22 +10,19 @@
 
 using namespace std;
 
-splitting_tree::splitting_tree(size_t N, size_t d)
-: states(N)
-, depth(d)
-{
+splitting_tree::splitting_tree(size_t N, size_t d) : states(N), depth(d) {
 	iota(begin(states), end(states), 0);
 }
 
-splitting_tree &lca_impl2(splitting_tree & node){
-	if(node.mark > 1) return node;
-	for(auto && c : node.children){
-		if(c.mark > 0) return lca_impl2(c);
+splitting_tree & lca_impl2(splitting_tree & node) {
+	if (node.mark > 1) return node;
+	for (auto && c : node.children) {
+		if (c.mark > 0) return lca_impl2(c);
 	}
 	return node; // this is a leaf
 }
 
-result create_splitting_tree(const mealy& g, options opt){
+result create_splitting_tree(const mealy & g, options opt) {
 	const auto N = g.graph_size;
 	const auto P = g.input_size;
 	const auto Q = g.output_size;
@@ -34,130 +31,150 @@ result create_splitting_tree(const mealy& g, options opt){
 	auto & root = ret.root;
 	auto & succession = ret.successor_cache;
 
-	/* We'll use a queue to keep track of leaves we have to investigate;
-	 * In some cases we cannot split, and have to wait for other parts of the
-	 * tree. We keep track of how many times we did no work. If this is too
-	 * much, there is no complete splitting tree.
-	 */
+	// We'll use a queue to keep track of leaves we have to investigate;
+	// In some cases we cannot split, and have to wait for other parts of the
+	// tree. We keep track of how many times we did no work. If this is too
+	// much, there is no complete splitting tree.
 	queue<reference_wrapper<splitting_tree>> work;
 	size_t days_without_progress = 0;
 
-	/* List of inputs, will be shuffled in case of randomizations */
+	// List of inputs, will be shuffled in case of randomizations
 	vector<input> all_inputs(P);
 	iota(begin(all_inputs), end(all_inputs), 0);
 	random_device rd;
 	mt19937 generator(rd());
 
+	size_t current_order = 0;
+	bool split_in_current_order = false;
+
 	// Some lambda functions capturing some state, makes the code a bit easier :)
 	const auto add_push_new_block = [&work](list<list<state>> const & new_blocks, splitting_tree& boom) {
 		boom.children.assign(new_blocks.size(), splitting_tree(0, boom.depth + 1));
 
 		size_t i = 0;
-		for(auto && b : new_blocks){
+		for (auto && b : new_blocks) {
 			boom.children[i++].states.assign(begin(b), end(b));
 		}
 
-		for(auto && c : boom.children){
+		for (auto && c : boom.children) {
 			work.push(c);
 		}
 
-		assert(boom.states.size() == accumulate(begin(boom.children), end(boom.children), 0ul, [](size_t l, const splitting_tree & r) { return l + r.states.size(); }));
+		assert(boom.states.size() == accumulate(begin(boom.children), end(boom.children), 0ul,
+		                                        [](size_t l, const splitting_tree & r) {
+		                                        	return l + r.states.size();
+		                                        }));
 	};
-	const auto is_valid = [N, opt, &g](list<list<state>> const & blocks, input symbol){
-		if(!opt.check_validity) return true;
-
-		for(auto && block : blocks) {
-			const auto new_blocks = partition_(begin(block), end(block), [symbol, &g](state state){
+	const auto is_valid = [N, opt, &g](list<list<state>> const & blocks, input symbol) {
+		for (auto && block : blocks) {
+			const auto new_blocks = partition_(begin(block), end(block), [symbol, &g](state state) {
 				return apply(g, state, symbol).to;
 			}, N);
-			for(auto && new_block : new_blocks){
-				if(new_block.size() != 1) return false;
+			for (auto && new_block : new_blocks) {
+				if (new_block.size() != 1) return false;
 			}
 		}
 		return true;
 	};
-	const auto update_succession = [N, &succession](state s, state t, size_t depth){
-		if(succession.size() < depth+1) succession.resize(depth+1, vector<state>(N, state(-1)));
+	const auto update_succession = [N, &succession](state s, state t, size_t depth) {
+		if (succession.size() < depth + 1)
+			succession.resize(depth + 1, vector<state>(N, state(-1)));
 		succession[depth][s] = t;
 	};
 
 	// We'll start with the root, obviously
 	work.push(root);
-	while(!work.empty()){
+	while (!work.empty()) {
 		splitting_tree & boom = work.front();
 		work.pop();
 		const size_t depth = boom.depth;
 
-		if(boom.states.size() == 1) continue;
+		if (boom.states.size() == 1) continue;
 
-		if(opt.randomized){
-			shuffle(begin(all_inputs), end(all_inputs), generator);
-		}
+		if (opt.randomized) shuffle(begin(all_inputs), end(all_inputs), generator);
 
-		// First try to split on output
-		for(input symbol : all_inputs){
-			const auto new_blocks = partition_(begin(boom.states), end(boom.states), [symbol, depth, &g, &update_succession](state state){
-				const auto r = apply(g, state, symbol);
-				update_succession(state, r.to, depth);
-				return r.output;
-			}, Q);
+		if (!opt.assert_minimal_order || current_order == 0) {
+			// First try to split on output
+			for (input symbol : all_inputs) {
+				const auto new_blocks = partition_(
+				    begin(boom.states),
+				    end(boom.states), [symbol, depth, &g, &update_succession](state state) {
+				    	const auto r = apply(g, state, symbol);
+				    	update_succession(state, r.to, depth);
+				    	return r.output;
+				    }, Q);
 
-			// no split -> continue with other input symbols
-			if(new_blocks.size() == 1) continue;
+				// no split -> continue with other input symbols
+				if (new_blocks.size() == 1) continue;
 
-			// not a valid split -> continue
-			if(!is_valid(new_blocks, symbol)) continue;
+				// not a valid split -> continue
+				if (opt.check_validity && !is_valid(new_blocks, symbol)) continue;
 
-			// a succesful split, update partition and add the children
-			boom.seperator = {symbol};
-			add_push_new_block(new_blocks, boom);
+				// a succesful split, update partition and add the children
+				boom.seperator = {symbol};
+				add_push_new_block(new_blocks, boom);
 
-			goto has_split;
-		}
-
-		// Then try to split on state
-		for(input symbol : all_inputs){
-			vector<bool> successor_states(N, false);
-			for(auto && state : boom.states){
-				successor_states[apply(g, state, symbol).to] = true;
+				goto has_split;
 			}
+		}
 
-			const auto & oboom = lca(root, [&successor_states](state state) -> bool{
-				return successor_states[state];
-			});
+		if (!opt.assert_minimal_order || current_order > 0) {
+			// Then try to split on state
+			for (input symbol : all_inputs) {
+				vector<bool> successor_states(N, false);
+				for (auto && state : boom.states) {
+					successor_states[apply(g, state, symbol).to] = true;
+				}
 
-			// a leaf, hence not a split -> try other symbols
-			if(oboom.children.empty()) continue;
+				const auto & oboom = lca(root, [&successor_states](state state) -> bool {
+					return successor_states[state];
+				});
 
-			// possibly a succesful split, construct the children
-			const vector<input> word = concat(vector<input>(1, symbol), oboom.seperator);
-			const auto new_blocks = partition_(begin(boom.states), end(boom.states), [word, depth, &g, &update_succession](state state){
-				const mealy::edge r = apply(g, state, word.begin(), word.end());
-				update_succession(state, r.to, depth);
-				return r.output;
-			}, Q);
+				// a leaf, hence not a split -> try other symbols
+				if (oboom.children.empty()) continue;
 
-			// not a valid split -> continue
-			if(!is_valid(new_blocks, symbol)) continue;
+				// If we want to enforce the right order, we should :D
+				if (opt.assert_minimal_order && oboom.seperator.size() != current_order) continue;
 
-			assert(new_blocks.size() > 1);
+				// possibly a succesful split, construct the children
+				const vector<input> word = concat(vector<input>(1, symbol), oboom.seperator);
+				const auto new_blocks = partition_(
+				    begin(boom.states),
+				    end(boom.states), [word, depth, &g, &update_succession](state state) {
+				    	const mealy::edge r = apply(g, state, word.begin(), word.end());
+				    	update_succession(state, r.to, depth);
+				    	return r.output;
+				    }, Q);
 
-			// update partition and add the children
-			boom.seperator = word;
-			add_push_new_block(new_blocks, boom);
+				// not a valid split -> continue
+				if (opt.check_validity && !is_valid(new_blocks, symbol)) continue;
 
-			goto has_split;
+				assert(new_blocks.size() > 1);
+
+				// update partition and add the children
+				boom.seperator = word;
+				add_push_new_block(new_blocks, boom);
+
+				goto has_split;
+			}
 		}
 
 		// We tried all we could, but did not succeed => declare incompleteness.
-		if(days_without_progress++ >= work.size()) {
-			ret.is_complete = false;
-			return ret;
+		if (days_without_progress++ >= work.size()) {
+			if (!split_in_current_order || !opt.assert_minimal_order) {
+				ret.is_complete = false;
+				return ret;
+			}
+
+			current_order++;
+			split_in_current_order = false;
 		}
+
 		work.push(boom);
 		continue;
 
 		has_split:
+		split_in_current_order = true;
 		days_without_progress = 0;
 	}
 
diff --git a/lib/splitting_tree.hpp b/lib/splitting_tree.hpp
index e0851a2..8f83954 100644
--- a/lib/splitting_tree.hpp
+++ b/lib/splitting_tree.hpp
@@ -2,12 +2,9 @@
 
 #include "mealy.hpp"
 
-/*
- * A splitting tree as defined in Lee & Yannakakis. The structure is also
- * called a derivation tree in Knuutila. Both the classical Hopcroft algorithm
- * and the Lee & Yannakakis algorithm produce splitting trees.
- */
-
+/// \brief A splitting tree as defined in Lee & Yannakakis.
+/// This is also known as a derivation tree (Knuutila). Both the Gill/Moore/Hopcroft-style and the
+/// Lee&Yannakakis-style trees are splitting trees.
 struct splitting_tree {
 	splitting_tree(size_t N, size_t depth);
 
@@ -18,65 +15,59 @@ struct splitting_tree {
 	mutable int mark = 0; // used for some algorithms...
 };
 
-template <typename Fun>
-void lca_impl1(splitting_tree const & node, Fun && f){
+template <typename Fun> void lca_impl1(splitting_tree const & node, Fun && f) {
 	node.mark = 0;
-	if(!node.children.empty()){
-		for(auto && c : node.children){
+	if (!node.children.empty()) {
+		for (auto && c : node.children) {
 			lca_impl1(c, f);
-			if(c.mark) node.mark++;
+			if (c.mark) node.mark++;
 		}
 	} else {
-		for(auto && s : node.states){
-			if(f(s)) node.mark++;
+		for (auto && s : node.states) {
+			if (f(s)) node.mark++;
 		}
 	}
 }
 
 splitting_tree & lca_impl2(splitting_tree & node);
 
-template <typename Fun>
-splitting_tree & lca(splitting_tree & root, Fun && f){
+/// \brief Find the lowest common ancestor of elements on which \p f returns true.
+template <typename Fun> splitting_tree & lca(splitting_tree & root, Fun && f) {
 	static_assert(std::is_same<decltype(f(0)), bool>::value, "f should return a bool");
 	lca_impl1(root, f);
 	return lca_impl2(root);
 }
 
-template <typename Fun>
-const splitting_tree & lca(const splitting_tree & root, Fun && f){
+template <typename Fun> const splitting_tree & lca(const splitting_tree & root, Fun && f) {
 	static_assert(std::is_same<decltype(f(0)), bool>::value, "f should return a bool");
 	lca_impl1(root, f);
-	return lca_impl2(const_cast<splitting_tree&>(root));
+	return lca_impl2(const_cast<splitting_tree &>(root));
 }
 
 
-/*
- * The algorithm to create a splitting tree can be altered in some ways. This
- * struct provides options to the algorithm. There are two common setups.
- */
-
+/// \brief Structure contains options to alter the splitting tree creation.
+/// \p check_validity checks whether the transition/output map is injective on the current set of
+/// nodes which is being split. Setting this false degenerates to generating pairwise separating
+/// sequences. \p assert_minimal_order is used to produce minimal (pairwise) separating sequences.
+/// \p cach_succesors is needed by the second step in the LY algorithm and \p randomized randomizes
+/// the loops over the alphabet.
 struct options {
 	bool check_validity;
+	bool assert_minimal_order;
 	bool cache_succesors;
 	bool randomized;
 };
 
-const options lee_yannakakis_style = {true, true, false};
-const options hopcroft_style = {false, false, false};
-const options randomized_lee_yannakakis_style = {true, true, true};
-const options randomized_hopcroft_style = {false, false, true};
-
-/*
- * The algorithm to create a splitting tree also produces some other useful
- * data. This struct captures exactly that.
- */
+const options lee_yannakakis_style = {true, false, true, false};
+const options hopcroft_style = {false, false, false, false};
+const options min_hopcroft_style = {false, true, false, false};
+const options randomized_lee_yannakakis_style = {true, false, true, true};
+const options randomized_hopcroft_style = {false, false, false, true};
+const options randomized_min_hopcroft_style = {false, true, false, true};
 
+/// \brief The algorithm produces more than just a splitting tree, all results are put here.
 struct result {
-	result(size_t N)
-	: root(N, 0)
-	, successor_cache()
-	, is_complete(true)
-	{}
+	result(size_t N) : root(N, 0), successor_cache(), is_complete(N <= 1) {}
 
 	// The splitting tree as described in Lee & Yannakakis
 	splitting_tree root;
@@ -88,4 +79,6 @@ struct result {
 	bool is_complete;
 };
 
+/// \brief Creates a splitting tree by partition refinement.
+/// \returns a splitting tree and other calculated structures.
 result create_splitting_tree(mealy const & m, options opt);
diff --git a/src/methods.cpp b/src/methods.cpp
index 0f8a5ad..688f2e1 100644
--- a/src/methods.cpp
+++ b/src/methods.cpp
@@ -13,20 +13,25 @@
 using namespace std;
 
 int main(int argc, char * argv[]) {
-	if (argc != 2) return 1;
+	if (argc != 4) return 1;
 
 	const string filename = argv[1];
-	const size_t k_max = 1;
+	const string mode = argv[2];
+	const bool use_no_LY = mode == "--W-method";
+	const size_t k_max = std::stoul(argv[3]);
 
 	const auto machine = read_mealy_from_dot(filename).first;
 
 	auto sequence_fut = async([&] {
+		if (use_no_LY) {
+			return create_adaptive_distinguishing_sequence(result(machine.graph_size));
+		}
 		const auto tree = create_splitting_tree(machine, randomized_lee_yannakakis_style);
 		return create_adaptive_distinguishing_sequence(tree);
 	});
 
 	auto pairs_fut = async([&] {
-		const auto tree = create_splitting_tree(machine, randomized_hopcroft_style);
+		const auto tree = create_splitting_tree(machine, randomized_min_hopcroft_style);
 		return create_all_pair_seperating_sequences(tree.root);
 	});