diff --git a/docs/test_selection.tex b/docs/test_selection.tex new file mode 100644 index 0000000..f97ed63 --- /dev/null +++ b/docs/test_selection.tex @@ -0,0 +1,72 @@ +# Test selection strategies + + + + +## Augmented DS-method + +In order to perform less tests Chow and Vasilevski [...] pioneered the so called +W-method. In their framework a test query consists of a prefix $p$ bringing the +SUL to a specific state, a (random) middle part $m$ and a suffix $s$ asserting +that the SUL is in the appropriate state. This results in a test suite of the +form $P I^{\leq k} W$, where $P$ is a set of (shortest) access sequences, +$I^{\leq k}$ the set of all sequences of length at most $k$ and $W$ is the +characterization set. Classically, this characterization set is constructed by +taking the set of all (pairwise) separating sequences. For $k=1$ this test suite +is complete in the sense that if the SUL passes all tests, then either the SUL +is equivalent to the specification or the SUL has strictly more states than the +specification. By increasing $k$ we can check additional states. + +The generated test suite, however, was still too big in our learning context. +Fujiwara observed that it is possible to let the set $W$ depend on the state the +SUL is supposed to be [...]. This allows us to only take a subset of $W$ which +is relevant for a specific state. This slightly reduces the test suite which is +as powerful as the full test suite. This methods is known as the Wp-method. More +importantly, this observation allows for generalizations where we can carefully +pick the suffixes. + +In the presence of an (adaptive) distinguishing sequence we can take $W$ to be a +single suffix, greatly reducing the test suite. Lee and Yannakakis describe an +algorithm in [...] (which we will refer to as the LY algorithm) to efficiently +construct this sequence, if it exists. In our case, most hypotheses did not +enjoy an adaptive distinguishing sequence. In these cases the incomplete result +from the LY algorithm still contained a lot of information which we augmented by +pairwise separating sequences. + +As an example we show an incomplete adaptive distinguishing sequence for one of +the hypothesis in Figure ??. When we apply the input sequence and observe the right output, we know for sure that the SUL was in +state . Unfortunately not all path lead to a singleton set. When we for +instance apply the sequence and observe the right output, +we know for sure that the SUL was in one of the states . In +these cases we can resort the the pairwise separating sequences. + +We note that this augmented DS-method is in the worst case not any better than +the classical Wp-method. In our case, however, it greatly reduced the test +suites. + +Once we have our set of suffixes, which we call $Z$ now, our test algorithm +works as follows. The algorithm first exhaust the set $P I^{\leq k} Z$. If this +does not provide a counterexample, we will randomly pick test queries from $P +I^2 I^\ast Z$, where the algorithm samples uniformly from $P$, $I^2$ and $Z$ (if +$Z$ contains more that $1$ sequence for the supposed state) and with a geometric +distribution on $I^\ast$. + + +## Subalphabet selection + +During our attempts to learn the ESM we observed that the above method failed to +produce a certain counterexample. With domain knowledge we were able to provide +a handcrafted counterexample which allowed the algorithm to completely learn the +controller. In order to do this automatically we defined a subalphabet which is +important at the initialization phase of the controller. This subalphabet will +be used a bit more often that the full alphabet. We do this as follows. + +We start testing with the alphabet which provided a counterexample for the +previous hypothesis (for the first hypothesis we take the subalphabet). If no +counterexample can be found within a specified query bound, then we repeat with +the next alphabet. If both alphabets do not produce a counterexample within the +bound, the bound is increased by some factor and we repeat all. + +This method only marginally increases the number of tests. But it did find the +right counterexample we first had to construct by hand. diff --git a/lib/write_tree_to_dot.cpp b/lib/write_tree_to_dot.cpp index 8c95463..717bf51 100644 --- a/lib/write_tree_to_dot.cpp +++ b/lib/write_tree_to_dot.cpp @@ -1,49 +1,64 @@ -#include "write_tree_to_dot.hpp" #include "adaptive_distinguishing_sequence.hpp" +#include "read_mealy_from_dot.hpp" #include "splitting_tree.hpp" +#include "write_tree_to_dot.hpp" #include +#include using namespace std; -template -ostream & operator<<(ostream& out, vector const & x){ - if(x.empty()) return out; +template +void print_vec(ostream & out, const vector & x, const string & d, Fun && f) { + if (x.empty()) return; auto it = begin(x); - out << *it++; - while(it != end(x)) out << " " << *it++; - return out; + out << f(*it++); + while (it != end(x)) out << d << f(*it++); } +static const auto id = [](auto x) { return x; }; -void write_splitting_tree_to_dot(const splitting_tree& root, ostream& out_){ - write_tree_to_dot(root, [](const splitting_tree & node, ostream& out){ - out << node.states; - if(!node.seperator.empty()){ - out << "\\n" << node.seperator; + +void write_splitting_tree_to_dot(const splitting_tree & root, ostream & out_) { + write_tree_to_dot(root, [](const splitting_tree & node, ostream & out) { + print_vec(out, node.states, " ", id); + if (!node.seperator.empty()) { + out << "\\n"; + print_vec(out, node.seperator, " ", id); } }, out_); } -void write_splitting_tree_to_dot(const splitting_tree& root, const string& filename){ +void write_splitting_tree_to_dot(const splitting_tree & root, const string & filename) { ofstream file(filename); write_splitting_tree_to_dot(root, file); } -void write_adaptive_distinguishing_sequence_to_dot(const adaptive_distinguishing_sequence & root, ostream & out_){ - write_tree_to_dot(root, [](const adaptive_distinguishing_sequence & node, ostream& out){ - if(!node.word.empty()){ - out << node.word; + +void write_adaptive_distinguishing_sequence_to_dot(const adaptive_distinguishing_sequence & root, const translation & t, ostream & out_) { + const auto symbols = create_reverse_map(t.input_indices); + size_t overflows = 0; + write_tree_to_dot(root, [&symbols, &overflows](const adaptive_distinguishing_sequence & node, ostream & out) { + if (!node.word.empty()) { + print_vec(out, node.word, " ", [&symbols](auto x){ return "I" + symbols[x]; }); } else { vector I(node.CI.size()); - transform(begin(node.CI), end(node.CI), begin(I), [](const pair p){ return p.second; }); - out << "I = " << I; + transform(begin(node.CI), end(node.CI), begin(I), [](auto p){ return p.second; }); + if (I.size() < 7) { + out << '{'; + print_vec(out, I, ", ", id); + out << '}'; + } else { + overflows++; + out << I.size() << " states\\n{" << I[0] << ", ...}"; + } } }, out_); + clog << overflows << " overflows" << endl; } -void write_adaptive_distinguishing_sequence_to_dot(const adaptive_distinguishing_sequence & root, string const & filename){ +void write_adaptive_distinguishing_sequence_to_dot(const adaptive_distinguishing_sequence & root, const translation & t, const string & filename) { ofstream file(filename); - write_adaptive_distinguishing_sequence_to_dot(root, file); + write_adaptive_distinguishing_sequence_to_dot(root, t, file); } diff --git a/lib/write_tree_to_dot.hpp b/lib/write_tree_to_dot.hpp index a0906fa..7ad0032 100644 --- a/lib/write_tree_to_dot.hpp +++ b/lib/write_tree_to_dot.hpp @@ -7,15 +7,15 @@ // Generic printer for tree template -void write_tree_to_dot(const T & tree, NodeString && node_string, std::ostream & out){ +void write_tree_to_dot(const T & tree, NodeString && node_string, std::ostream & out) { using namespace std; - out << "digraph g {\n"; + out << "digraph {\n"; // breadth first int global_id = 0; queue>> work; work.push({global_id++, tree}); - while(!work.empty()){ + while (!work.empty()) { const auto id = work.front().first; const T & node = work.front().second; work.pop(); @@ -24,9 +24,10 @@ void write_tree_to_dot(const T & tree, NodeString && node_string, std::ostream & node_string(node, out); out << "\"];\n"; - for(auto && c : node.children){ + for (auto && c : node.children) { int new_id = global_id++; - out << "\ts" << id << " -> " << "s" << new_id << ";\n"; + out << "\ts" << id << " -> " + << "s" << new_id << ";\n"; work.push({new_id, c}); } } @@ -38,8 +39,9 @@ void write_tree_to_dot(const T & tree, NodeString && node_string, std::ostream & // Specialized printing for splitting trees and dist seqs struct splitting_tree; void write_splitting_tree_to_dot(const splitting_tree & root, std::ostream & out); -void write_splitting_tree_to_dot(const splitting_tree & root, std::string const & filename); +void write_splitting_tree_to_dot(const splitting_tree & root, const std::string & filename); struct adaptive_distinguishing_sequence; -void write_adaptive_distinguishing_sequence_to_dot(const adaptive_distinguishing_sequence & root, std::ostream & out); -void write_adaptive_distinguishing_sequence_to_dot(const adaptive_distinguishing_sequence & root, std::string const & filename); +struct translation; +void write_adaptive_distinguishing_sequence_to_dot(const adaptive_distinguishing_sequence & root, const translation & t, std::ostream & out); +void write_adaptive_distinguishing_sequence_to_dot(const adaptive_distinguishing_sequence & root, const translation & t, const std::string & filename); diff --git a/src/trees.cpp b/src/trees.cpp new file mode 100644 index 0000000..f89e643 --- /dev/null +++ b/src/trees.cpp @@ -0,0 +1,28 @@ +#include +#include +#include +#include + +#include +#include + +using namespace std; + +int main(int argc, char * argv[]) { + if (argc != 3) return 1; + const string filename = argv[1]; + + const string randomized_str = argv[2]; + const bool randomized = randomized_str == "randomized"; + + const auto machine_and_translation = read_mealy_from_dot(filename); + const auto & machine = machine_and_translation.first; + const auto & translation = machine_and_translation.second; + + const auto options = randomized ? randomized_lee_yannakakis_style : lee_yannakakis_style; + const auto tree = create_splitting_tree(machine, options); + const auto sequence = create_adaptive_distinguishing_sequence(tree); + + write_splitting_tree_to_dot(tree.root, filename + ".tree"); + write_adaptive_distinguishing_sequence_to_dot(sequence, translation, filename + ".seq"); +}