diff --git a/content.tex b/content.tex new file mode 100644 index 0000000..b73e24c --- /dev/null +++ b/content.tex @@ -0,0 +1,28 @@ +\project thesis +\startproduct content + +\starttext + +\completecontent + +\completelistoffigures + +\completelistoftables + +\completelistofalgorithms + +\showbodyfont + + +\chapter{Intro} +\section{Learning and Testing} +\section{Nominal Techniques} +\component content/test-methods +\chapter{Learning Embedded Stuff} +\chapter{Separating Sequences} +\chapter{Learning Nominal Automata} +\chapter{Ordered Nominal Sets} +\chapter{Succinct Nominal Automata?} + +\stoptext +\stopproduct diff --git a/content/test-methods.tex b/content/test-methods.tex new file mode 100644 index 0000000..97e4e17 --- /dev/null +++ b/content/test-methods.tex @@ -0,0 +1,785 @@ +\project thesis +\startcomponent test-methods + +%\usepackage[vlined]{algorithm2e} +% +%\newcommand{\todo}[1]{{\bf * }\marginpar{#1}} +% +%\newcommand{\Def}[1]{\emph{#1}} +%\newcommand{\bigO}{\mathcal{O}} +%\newcommand{\N}{\mathbb{N}} +%\newcommand{\tot}[1]{\xrightarrow{\,\,{#1}\,\,}} + +\startchapter[title={A Framework for FSM-based Test Methods}] + +\startsection[title={Introduction}] +\stopsection + +\startsection[title={Preliminaries}] + +\startsubsection[title={Mealy machines}] + +We will focus on Mealy machines, as those capture many protocol specifications and reactive systems. +Note that we restrict ourselves to deterministic and complete machines. + +We fix finite alphabets $I$ and $O$ of inputs respectively outputs. +We use the usual notation for operations on words: +$uv$ for the concatenation of two words $u, v \in I^{\ast}$ and $|u|$ for the length of $u$. + +\startdefinition +A \emph{Mealy machine} $M$ consists of a finite set of \emph{states} $S$, an \emph{initial state} $s_0 \in S$ and two functions: +\startitemize +\item a \emph{transition function} $\delta \colon S \times I \to S$, and +\item an \emph{output function} $\lambda \colon S \times I \to O$. +\stopitemize + +Both functions are extended to words as $\delta \colon S \times I^{\ast} \to S$ and $\lambda \colon S \times I^{\ast} \to O^{\ast}$. +By abuse of notation we will often write $s \in M$ instead of $s \in S$ and leave out $S$ altogether. +For a second Mealy machine $M'$ its members are denoted $S', s'_0, \delta'$ and $\lambda'$ by convention. + +The \emph{behaviour} of a state $s$ is given by the output function $\lambda(s, -) \colon I^{\ast} \to O^{\ast}$. +Two states $s$ and $t$ are \emph{equivalent} if they have equal behaviours, written $s \sim t$, and two Mealy machines are equivalent if their initial states are equivalent. +\stopdefinition + +An example Mealy machine is given in \in{Figure}[fig:running-example]. + +\startplacefigure[title={An example specification with input $I=\{a,b,c\}$ and output $O=\{0,1\}$.},reference=fig:running-example,list={An example specification.}] +\hbox{ +\starttikzpicture[shorten >=2pt,node distance=0.9cm and 3cm,bend angle=20] + \tikzstyle{every state}=[draw=black,text=black,inner + sep=2pt,minimum size=12pt,initial text=] + \node[state] (0) {$s_0$}; + \node[state] (1) [above left = of 0] {$s_1$}; + \node[state] (2) [below left = of 0] {$s_2$}; + \node[state] (3) [above right = of 0] {$s_3$}; + \node[state] (4) [below right = of 0] {$s_4$}; + \node (5) [above = of 0] {}; + \path[->] + (5) edge (0) + (0) edge [bend left=] node [below] {${a}/1$} (1) + (0) edge [bend right] node [below] {${b}/0$} (4) + (0) edge [loop below] node [below] {${c}/0$} (0) + (1) edge node [left ] {${a}/0$} (2) + (1) edge [bend left] node [above] {${b}/0$, ${c}/0$} (0) + (2) edge [bend right] node [below] {${b}/0$, ${c}/0$} (0) + (2) edge [loop left] node [left ] {${a}/1$} (1) + (3) edge [bend left=30] node [right] {${a}/1$} (4) + (3) edge [bend right] node [above] {${b}/0$} (0) + (3) edge [loop right] node [right] {${c}/1$} (3) + (4) edge [bend left] node [right] {${a}/1$} (3) + (4) edge [loop right] node [right] {${b}/1$} (4) + (4) edge [bend right] node [above] {${c}/0$} (0); +\stoptikzpicture +} +\stopplacefigure + +\stopsubsection + + +\startsubsection[title={Testing}] +In conformance testing we have a specification modelled as a Mealy machine and an implementation (the system under test, or SUT) which we assume to behave as a Mealy machine \cite{LeeYannakakis}. +Tests are generated from the specification and applied to the implementation. We assume that we can reset the implementation before every test. +If the output is different than the specified output, then we know the implementation is flawed. +The goals is to test as little as possible, while covering as much as possible. + +\startdefinition[reference=test-suite] +A \emph{test suite} is a finite subset $T \subseteq I^{\ast}$. +A test $t \in T$ is called \emph{maximal} if it is not a proper prefix of another $s \in T$. +We denote the set of maximal tests of $T$ with $max(T)$. +\todo{In de voorbeelden geef ik altijd $max(X)$, + maar in de bewijzen is juist $Pref(X)$ handig. + Moet dit consistent houden...} +\stopdefinition + +The maximal tests are the only tests in $T$ we actually have to apply to our SUT as we can record the intermediate outputs. +Also note that we do not have to consider outputs in the test suite, as those follow from the deterministic specification. +We define the size of a test suite as usual \cite{Dorofeeva,PLGHO14}. +The size of a test suite is measured as the sum of the lengths of all its maximal tests plus one reset per test. + +\startdefinition[reference=test-suite-size] +The \emph{size} of a test suite $T$ is defined to be +$||T|| = \sum\limits_{t \in max(T)} (|t| + 1)$. +\stopdefinition + +\stopsubsection + +\startsubsection[title={Completeness of test suites}] +\startexample[reference=incompleteness] +{\bf No test suite is complete.} +Consider the specification in \in{Figure}{a}[fig:incompleteness-example]. +This machine will always produce zeroes. +For any test suite we can make a faulty implementation which passes the test suite. +Such an implementation might look like \in{Figure}{b}[fig:incompleteness-example] with $n$ big enough. +This justifies the following definition. + +\startplacefigure[title={A basic example showing that finite test suites are incomplete. The implementation on the right will pass any test suite if we choose $n$ big enough.},reference=fig:incompleteness-example] +\startcombination[2*1] +{\hbox{ + \starttikzpicture[shorten >=2pt,node distance=2cm] + \tikzstyle{every state}=[draw=black,text=black,inner + sep=2pt,minimum size=12pt,initial text=] + \node[state] (0) {$s_0$}; + \path[->] + (0) edge [loop] node [below] {${a}/0$} (0); + \stoptikzpicture +}} {a} +{\hbox{ + \starttikzpicture[shorten >=2pt,node distance=2cm] + \tikzstyle{every state}=[draw=black,text=black,inner + sep=2pt,minimum size=12pt,initial text=] + \node[state] (0) {$s'_0$}; + \node[state] (1) [right of=0] {$s'_1$}; + \node (2) [right of=1] {$\cdots$}; + \node[state] (3) [right of=2] {$s'_n$}; + \path[->] + (0) edge [bend left=20 ] node [below] {${a}/0$} (1) + (1) edge [bend left=20 ] node [below] {${a}/0$} (2) + (2) edge [bend left=20 ] node [below] {${a}/0$} (3) + (3) edge [loop ] node [below] {${a}/1$} (3); + \stoptikzpicture +}} {b} +\stopcombination +\stopplacefigure +\stopexample + + +\startdefinition[reference=completeness] +Let $M$ be a Mealy machine and $T$ be a test suite. +We say that $T$ is \emph{$m$-complete (for $M$)} if for all inequivalent machines $M'$ with at most $m$ states there exists a $t \in T$ such that $\lambda(s_0, t) \neq \lambda'(s'_0, t)$. +\stopdefinition + +We are often interested in the case of $m$-completeness, where $m = n + k$ for some $k \in \naturalnumbers$ and $n$ is the number of states in the specification. +Here $k$ will stand for the number of \emph{extra states} we can test. +The issue of an unknown bound is addressed later in the paper. + +\stopsubsection +\startsubsection[title={State identifiers, access sequences and sets of words}] +\define[1]\Fam{\mathcal{#1}} + +In order to define a test suite modularly, we introduce notation for combining sets of words. +We require all sets to be \emph{prefix-closed}, this is very convenient in later proofs. +For sets of words $X$ and $Y$, we define: + +\startitemize +\item their concatenation $X \cdot Y = \{ xy \mid x \in X, y \in Y \}$, +\item iterated concatenation $X^0 = \{ \epsilon \}$ and $X^{n+1} = X \cdot X^{n}$, +\item bounded concatenation $X^{\leq n} = \bigcup_{i \leq n} X^i$, and +\item prefix closure $Pref(X) = \{ y \mid y \text{ is a prefix of } x, x \in X \}$. +\stopitemize + +For a set $S$, a family of sets $\Fam{X}$ is a set of sets indexed by $S$: +$\Fam{X} = \{ X_s \}_{s \in S}$. +We will use families to define words relevant for a state $s$. +We define: +\startitemize +\item flattening: $\bigcup \Fam{X} = \{ x \mid x \in X_s, s \in S \}$, +\item union: $\Fam{X} \cup \Fam{Y}$ is defined point-wise: +$(\Fam{X} \cup \Fam{Y})_s = X_s \cup Y_s$, and +\item $\Fam{X}$-equivalence: +$x \sim_\Fam{X} y$ if $\lambda(x,w) = \lambda(y,w)$ for all $w \in X_x \cap X_y$. +\todo{Hangt af van $M$, maar is nog niet gefixed} +\stopitemize + +\startdefinition +An \emph{access sequence for state $s$} is a word $w$ such that $\delta(q_0, w) = s$. +A set $P$ consisting of an access sequence for each state is called a \emph{state cover}. +If $P$ is a state cover, $P \cdot I$ is called a \emph{transition cover}. +\stopdefinition + +\startdefinition +A family of sets $\Fam{X}$ is a \emph{separating family} (or set of \emph{harmonized state identifiers} \cite{YP90}) if $x \sim_\Fam{X} y$ implies $x \sim y$. +\stopdefinition + +In other words, a family of words is a separating family if for each pair of inequivalent states it contains a word separating those states. +Following the definitions in \cite{LeeYannakakis}, a separating family where each set is a singleton is an \emph{adaptive distinguishing sequence} (ads). +An ads is of special interest since they can identify a state using a single word. +Separating families always exist for Mealy machines, but an ads might not exist. + +Given a specification $M$ (with states $S$) we define two operations: +\startitemize +\item concatenation: +$X \odot \Fam{Y} = \{ xy \mid x \in X, y \in Y_{\delta(s_0, x)} \}$ and +\item refinement: $\Fam{X} ; \Fam{Y}$ defined by +$(\Fam{X} ; \Fam{Y})_s = X_s \cup \bigcup_{y \sim_\Fam{X} x} D(\Fam{Y}, x, y)$, where +$D(\Fam{Y}, x, y) = Pref\{w \mid \lambda(x,w) \neq \lambda(y,w), w \in Y_x \cap Y_y\}$. +\stopitemize + +Note that these operations depend on the specification $M$. +We omit $M$ from the notation as the specification is always clear from the context. +We will use the following facts in defining our new test method. + +\startlemma[reference=lemma:refinement-props] +For all families $\Fam{X}$ and $\Fam{Y}$: +\startitemize +\item $\Fam{X} ; \Fam{X} = \Fam{X}$, +\item $\Fam{X} ; \Fam{Y} = \Fam{X}$, whenever $\Fam{X}$ is a separating family, and +\item $\Fam{X} ; \Fam{Y}$ is a separating family whenever $\Fam{Y}$ is a separating family. +\stopitemize +\stoplemma + +\stopsubsection +\stopsection +\startsection[title={Hybrid adaptive distinguishing sequences}] + +In this section we describe a new test generation method for Mealy machines. +Its completeness will be proven in a later section, together with completeness for related, known, methods. + +From a high level perspective, the method uses the algorithm by Lee and Yannakakis \cite{LeeYannakakis} to obtain an ads. +If no ads exists, their algorithm still provides some sequences which separates some inequivalent states. +Our extension is to refine the set of sequences by using pairwise separating sequences. +The reason we do this is the fact that the ADS-method generally constructs small test suites \cite{Dorofeeva}. +The test suites are small since an ads can identify a state with a single word, instead of a set of words which is generally needed. +Even if the ads does not exist, using the partial result of Lee and Yannakakis' algorithm can reduce the size of test suites. + +We will describe how to obtain a separating families and adaptive distinguishing sequences for Mealy machines. +For the former, one typically uses Moore's or Hopcroft's minimization algorithm \cite{Smetsers}. +For the latter, an efficient algorithm is described in \cite{LeeYannakakis}. +Both algorithms use a splitting tree as data structure. + +\startdefinition[reference=splitting-tree] +A \defn{splitting tree (for $M$)} is a rooted tree where each node $u$ has +\startitemize + \item a set of states $l(u) \subseteq M$, and + \item if $u$ is not a leaf, a sequence $\sigma(u) \in I^{\ast}$. +\stopitemize +We require that if a node $u$ has children $C(u)$ then +\startitemize + \item the sets of states of the children of $u$ partition $l(u)$, i.e., the set + $P(u) = \{l(v) \,|\, v \in C(u)\}$ is a partition of $l(u)$, and + \item the sequence $\sigma(u)$ witnesses the partition $P(u)$, meaning that + $\lambda(s, \sigma(u)) = \lambda(t, \sigma(u))$ iff $p = q$ for all $s \in p, t \in q$ for all $p, q \in P(u)$. +\stopitemize +A splitting tree is called \defn{complete} if all inequivalent states belong to different leaves. +\stopdefinition + +The splitting tree records the execution of a partition refinement algorithm (such as Moore's or Hopcroft's algorithm). +Each non-leaf node encode a \defn{split} together with a witness. +Since a complete splitting tree contains witnesses for all inequivalences, we can extract a separating family from it. +Briefly, for each state we define the set of words as follows: locate the leaf containing the state and collect all the sequences you read when traversing to the root of the tree. +We refer to \cite{Smetsers} for more details on these algorithms. + +For adaptive distinguishing sequences an additional requirement is put on the splitting tree: +for each non-leaf node $u$, the sequence $\sigma(u)$ defines an injective map $x \mapsto (\delta(x, \sigma(u)), \lambda(x, \sigma(u)))$ on the set $l(u)$. +In \cite{LeeYannakakis} such splits are called \defn{valid}. +\in{Figure}[fig:example-splitting-tree] shows both a valid and invalid split. +Validity precisely ensures that after performing a split, the states are still distinguishable. + +\startplacefigure[title={A complete splitting tree with shortest witnesses for the specification of \in{Figure}[fig:running-example]. Only the splits $a$ and $aa$ are valid.}, +reference=fig:example-splitting-tree] +\hbox{ +\starttikzpicture[node distance=1.4cm] +\node (0) [text width=2cm, align=center, ] {$s_0, s_1, s_2, s_3, s_4$ $a$}; +\node (1) [text width=1cm, align=center, below left of=0 ] {$s_1$}; +\node (2) [text width=2cm, align=center, below right of=0] {$s_0, s_2, s_3, s_4$ $b$}; +\node (3) [text width=1cm, align=center, below left of=2 ] {$s_0, s_2, s_3$ $c$}; +\node (4) [text width=1cm, align=center, below right of=2] {$s_4$}; +\node (5) [text width=1cm, align=center, below left of=3 ] {$s_0, s_2$ $aa$}; +\node (6) [text width=1cm, align=center, below right of=3] {$s_3$}; +\node (7) [text width=1cm, align=center, below left of=5 ] {$s_0$}; +\node (8) [text width=1cm, align=center, below right of=5] {$s_2$}; +\path[->] +(0) edge (1) +(0) edge (2) +(2) edge (3) +(2) edge (4) +(3) edge (5) +(3) edge (6) +(5) edge (7) +(5) edge (8); +\stoptikzpicture +} +\stopplacefigure + +\startfact +A complete splitting tree with valid splits exists if and only if there exists an adaptive distinguishing sequence \cite{LeeYannakakis}. +\stopfact + +Our method uses the exact same algorithm as the one by Lee and Yannakakis. +However, we also apply it in the case when the splitting tree with valid splits is not complete (and hence no adaptive distinguishing sequence exists). +Their algorithm still produces a family of sets, but is not necessarily a separating family. + +In order to recover separability, we refine that family of sets. +Let $\Fam{Z'}$ be the result of Lee and Yannakakis' algorithm (to distinguish from their notation, we add a prime) and let $\Fam{H}$ be a separating family extracted from an ordinary splitting tree. +The hybrid ADS family is defined as $\Fam{Z'} ; \Fam{H}$, and can be computed as sketched in \in{Algorithm}[alg:hybrid]. +By \in{Lemma}[lemma:refinement-props] we note the following: in the best case this family is an adaptive distinguishing sequence; in the worst case it is equal to $\Fam{H}$; and in general it is a combination of the two families. +For this reason we call the method a hybrid method. + +\startplacealgorithm[title={Obtaining the hybrid separating family $\Fam{Z'} ; \Fam{H}$},reference=alg:hybrid] +\startalgorithmic + \REQUIRE{A Mealy machine $M$} + \ENSURE{A separating family $Z'$} + \STATE{$T_1 \leftarrow$ splitting tree for Moore's minimization algorithm} + \STATE{$\Fam{H} \leftarrow$ separating family extracted from $T_1$} + \STATE{$T_2 \leftarrow$ splitting tree with valid splits (see \cite{LeeYannakakis})} + \STATE{$\Fam{Z'} \leftarrow$ (incomplete) family as constructed from $T_2$} + \FORALL{inequivalent states $s, t$ in the same leaf of $T_2$}{ + \STATE{$Z'_s \leftarrow Z_s \cup H_s$} + \STATE{$Z'_t \leftarrow Z_t \cup H_t$} + } + \ENDFOR + \RETURN{$Z'$} +\stopalgorithmic +\stopplacealgorithm + +With the hybrid family we can define the test suite as follows. +In words, the suite consists of tests of the form $p w s$, where $p$ brings the SUT to a certain state, $w$ is to detect $k$ extra states and $s$ is to identify the state. +Its $m$-completeness is proven in Section~\ref{sec:completeness}. + +\startdefinition +Let $P$ be a state cover, +$\Fam{Z'}$ be a family of sets constructed with the Lee and Yannakakis algorithm and +let $\Fam{H}$ be a separating family. +The \defn{hybrid ADS} test suite is +\startformula +T = P \cdot I^{\leq k+1} \odot (\Fam{Z'} ; \Fam{H}). +\stopformula +\stopdefinition + +\startsubsection[title={Example}] + +\todo{Beter opschrijven, en uiteindelijke test suite geven. + En vergelijken met andere methode? Misschien later pas.} +In the figure we see the (unique) result of Lee and Yannakakis' algorithm. +We note that the states $s_2, s_3, s_4$ are not split, so we need to refine the family for those states. + +\todo{Hier was ik gebleven} +\stopsubsection +\stopsection +\stopchapter +\stopcomponent + +\begin{figure} +\centering +\begin{subfigure}{0.45\textwidth} + \centering + \begin{tikzpicture}[node distance=1.4cm] + \node (0) [text width=2cm, align=center, ] {$s_0, s_1, s_2, s_3, s_4$\\$a$}; + \node (1) [text width=1cm, align=center, below left of=0 ] {$s_1$}; + \node (2) [text width=2cm, align=center, below right of=0] {$s_0, s_2, s_3, s_4$\\$aa$}; + \node (3) [text width=1cm, align=center, below left of=2 ] {$s_0$}; + \node (4) [text width=1cm, align=center, below right of=2] {$s_2, s_3, s_4$}; + \path[->] + (0) edge (1) + (0) edge (2) + (2) edge (3) + (2) edge (4); + \end{tikzpicture} + \caption{Largest splitting tree with only valid splits for Figure~\ref{fig:running-example}.} + \label{fig:example-splitting-tree-valid} +\end{subfigure}~ +\begin{subfigure}{0.45\textwidth} + \centering + \begin{tikzpicture}[node distance=1.4cm] + \node (0) [text width=2cm, align=center, ] {\tiny $s_0, s_1, s_2, s_3, s_4$\\$s_0, s_1, s_2, s_3, s_4$\\$a$}; + \node (1) [text width=1cm, align=center, below left of=0 ] {\tiny $s_1$\\$s_2$}; + \node (2) [text width=2cm, align=center, below right of=0] {\tiny $s_0, s_2, s_3, s_4$\\$s_1, s_2, s_4, s_3$\\$a$}; + \node (3) [text width=1cm, align=center, below left of=2 ] {\tiny $s_0$\\$s_2$}; + \node (4) [text width=1cm, align=center, below right of=2] {\tiny $s_2, s_3, s_4$\\$s_2, s_3, s_4$}; + \path[->] + (0) edge (1) + (0) edge (2) + (2) edge (3) + (2) edge (4); + \end{tikzpicture} + \caption{The adaptive distinguishing tree in notation of \cite{LeeYannakakis}.} + \label{fig:example-splitting-tree-ads} +\end{subfigure} +\end{figure} + +From the splitting tree in Figure~\ref{fig:example-splitting-tree}, we obtain the following separating family $\Fam{H}$. +From the figure above we obtain the family $\Fam{Z}$. +These families and the refinement $\Fam{Z'};\Fam{H}$ are given below: + +\begin{center} +\begin{tabular}{l @{\hspace{1cm}} l @{\hspace{1cm}} l} + $H_0 = \{aa,b,c\}$ & $Z'_0 = \{aa\}$ & $(Z';H)_0 = \{aa\}$ \\ + $H_1 = \{a\}$ & $Z'_1 = \{a\}$ & $(Z';H)_1 = \{a\}$ \\ + $H_2 = \{aa,b,c\}$ & $Z'_2 = \{aa\}$ & $(Z';H)_2 = \{aa,b,c\}$ \\ + $H_3 = \{a,b,c\}$ & $Z'_3 = \{aa\}$ & $(Z';H)_3 = \{a,b,c\}$ \\ + $H_4 = \{a,b\}$ & $Z'_4 = \{aa\}$ & $(Z';H)_4 = \{aa,b\}$ +\end{tabular} +\end{center} + +\subsection{When $k$ is not known} + +In many of the applications described in Section~\ref{sec:applications} no bound on the number of states of the SUT was known. +In such cases it is possible to randomly select test cases from an infinite test suite. +Unfortunately, we lose the theoretical guarantees of completeness with random generation. +Still, for the applications in Section~\ref{sec:applications} it has worked well in finding flaws. + +We can randomly test cases as follows. +In the above definition for the hybrid ADS test suite we replace $I^{\leq k}$ by $I^{\ast}$ to obtain an infinite test suite. +Then we sample tests as follows: +1) sample an element $p$ from $P$ uniformly, +2) sample a word $w$ from $I^{\ast}$ with a geometric distribution then +3) sample uniformly from $(\Fam{Z'} ; \Fam{H})_s$ for the state $s = \delta(s_0, pw)$. + + +\section{Related test methods} +\label{sec:methods} + +In this section, we briefly review the classical conformance testing methods: +the W, Wp, UIO, UIOv, HSI, ADS methods. +Our method described is very similar to some of these methods, so we will +relate them by describing them uniformly. + +There are many more test generation methods. Literature shows, however, that not +all of them are complete. For example, the methods in \cite{Bernhard} are +falsified by \cite{Petrenko} and the UIO-method \cite{Sabnani} is shown to be +incomplete in \cite{Vuong}. For that reason, completeness of the correct methods +is shown (again) in the next section. +We fix a state cover $P$ throughout this section and take the transition cover $Q = P \cdot I$. + +\subsection{W-method \cite{Chow,Vasilevskii}} +Possibly one of the earliest $m$-complete test methods. + +\begin{mdefinition}\label{w-method} + A set of words $W$ is a \Def{characterization set} if for each pair of inequivalent states $s$ and $t$ there exists a word $w \in W$ with $\lambda(s,w) \neq \lambda(t,w)$. + \newline + Let $W$ be a characterization set, the \Def{W test suite} is + defined as $(P \cup Q) \cdot I^{\leq k} \cdot W$. +\end{mdefinition} + +If we have a separating family $\Fam{W}$, we can obtain a characterization +set by flattening: take $W = \bigcup \Fam{W}$. + + +\subsection{The Wp-method \cite{Fujiwara}}\label{sec:wp} +The W-method was refined by Fujiwara to use smaller sets when identifying states. +In order to do that he defined state-local sets of words. + +\begin{mdefinition} +\label{state-identifier} +\label{wp-method} +A \Def{state identifier for $s$} is a set $W_s$ such that for every inequivalent +state $t$ there is a $w \in W_s$ such that $\lambda(s, w) \neq \lambda(t, w)$. +\newline +Let $\Fam{W}$ be a family of state identifiers, the \Def{Wp test suite} is +defined as $P \cdot I^{\leq k} \cdot \bigcup \Fam{W} \,\cup\, Q \cdot I^{\leq k} +\odot \Fam{W}$. +\end{mdefinition} + +Note that $\bigcup \Fam{W}$ is a characterization set as defined for the W-method. +It is needed for completeness to test states with the whole set $\bigcup \Fam{W}$. +Once states are tested as such, we can use the smaller sets $W_s$ for testing transitions. + + +\subsection{The HSI-method \cite{Luo,YP90}}\label{sec:hsi} +The Wp-method in turn was refined by Yevtushenko and Petrenko. +They make use of so called \emph{harmonized} state identifiers (which are called +separating families in \cite{LeeYannakakis} and in present paper). +By having this global property of the family, less tests need to +be executing when testing a state. + +\begin{mdefinition}\label{hsi-method} +Let $\Fam{H}$ be a separating family, the \Def{HSI test suite} is defined as +$(P \cup Q) \cdot I^{\leq k} \odot \Fam{H}$. +\end{mdefinition} + +Our newly described test method is an instance of the HSI-method. +However, in \cite{Luo,YP90} they describe the HSI-method together with a +specific way of generating the separating families. Namely, the set obtained +by a splitting tree with shortest witnesses. In present paper that +is generalized, allowing for our extension to be an instance. + + +\subsection{The ADS-method \cite{LeeYannakakis}}\label{sec:ads} +As discussed before, when a Mealy machine admits a adaptive distinguishing sequence, +only one test has to be performed for identifying a state. +This is exploited in the ADS-method. + +\begin{mdefinition}\label{ds-method} +Let $\Fam{Z}$ be a separating family where every set is a singleton, the +\Def{ADS test suite} is defined as $(P \cup Q) \cdot I^{\leq k} \odot \Fam{Z}$. +\end{mdefinition} + + +\subsection{The UIOv-method \cite{Vuong}}\label{sec:uiov} +Some Mealy machines which do not admit an adaptive distinguishing sequence, +may still admit state identifiers which are singletons. +These are exactly UIO sequences and gives rise to the UIOv-method. +In a way this is a generalization of the ADS-method, since the requirement +that state identifiers are harmonized is dropped. + +\begin{mdefinition}\label{uio}\label{uiov-method} + Given $s \in M$, we say that a word $w \in I^{\ast}$ is an \Def{UIO sequence for + $s$} if for all inequivalent $t \in M$ we have $\lambda(s, w) \neq \lambda(t, + w)$. + \newline + Let $\Fam{U} = \{ \text{a single UIO for } s \}_{s \in S}$ be a family of UIO + sequences, the \Def{UIOv test suite} is defined as $P \cdot I^{\leq k} \cdot + \bigcup \Fam{U} \,\cup\, Q \cdot I^{\leq k} \odot \Fam{U}$. +\end{mdefinition} + + +\subsection{Overview} +\label{sec:overview} + +We give an overview of the aforementioned test methods. +We classify them in two directions, +1) whether they use harmonized state identifiers or not and +2) whether it used singleton state identifiers or not. + +\begin{theorem}\label{thm:completeness} + The following test suites are all $n+k$-complete: +\begin{center} +\begin{tabular}{l|p{4.5cm}|p{4.5cm}|} +% empty cell + & Arbitrary + & Harmonized + \\ \hline +Many / pairwise + & Wp \newline $ P \cdot I^{\leq k} \cdot \bigcup \Fam{W} \,\cup\, Q \cdot I^{\leq k} \odot \Fam{W} $ + & HSI \newline $ (P \cup Q) \cdot I^{\leq k} \odot \Fam{H} $ + \\ \hline +Hybrid + & + & Hybrid ADS \newline + $ (P \cup Q) \cdot I^{\leq k} \odot (\Fam{Z'} ; \Fam{H}) $ + \\ \hline +Single / global + & UIOv \newline $ P \cdot I^{\leq k} \cdot \bigcup \Fam{U} \,\cup\, Q \cdot I^{\leq k} \odot \Fam{U} $ + & ADS \newline $ (P \cup Q) \cdot I^{\leq k} \odot \Fam{Z} $ + \\ \hline +\end{tabular} +\end{center} +\end{theorem} +\todo{Iets zeggen over de hybrid UIO method} +\todo{Geef groottes van suites voor het voorbeeld. Merk op dat ADS and UIOv niet van toepassing zijn (state 3 heeft geen UIO)} + +The incomplete \Def{UIO test suite} is defined as $(P \cup Q) \cdot +I^{\leq k} \odot \Fam{U}$, incompleteness is shown in \cite{Vuong}. + +It should be noted that the ADS-method is a specific instance of the HSI-method +and similarly the UIOv-method is an instance of the Wp-method. What is +generally meant by the Wp-method and HSI-method is the above formula together +with a particular way to obtain the (harmonised) state identifiers. + +We are often interested in the size of the test suite. In the worst case, all +methods generate a test suite with a size in $\bigO(pn^3)$ and this bound is +tight \cite{Vasilevskii}. Nevertheless we expect intuitively that the right column +performs better, as we are using a more structured set (given a separating +family for the HSI-method, we can always forget about the common prefixes and +apply the Wp-method, which will never be smaller if constructed in this way). +Also we expect the bottom row to perform better as there is a single test for +each state. Small experimental results confirm this intuition +\cite{Dorofeeva}. + +\section{Proof of completeness} +\label{sec:completeness} + +We fix a specification $M$ which has a minimal representative with $n$ states and an implementation $M'$ with at +most $n+k$ states. We assume that all states are reachable from the initial +state in both machines (i.e. both are \Def{connected}). We define the following +notation: + +\begin{mdefinition} + Let $W \subseteq I^{\ast}$ be a set of words. Two states $x, y$ (of possibly + different machines) are $W$-equivalent, written $x \sim_W y$, if $\lambda(x, + w) = \lambda(y, w)$ for all $w \in W$. +\end{mdefinition} + +We note that for a fixed set $W$ the relation $\sim_W$ is a equivalence relation +and that $W \subseteq V$ gives $x \sim_V y \Rightarrow x \sim_W y$. The next lemma +gives sufficient conditions for a test suite of the given anatomy to be +complete. We then prove that these conditions hold for the test suites in this +paper. + +\begin{lemma} + Let $\Fam{W}$ and $\Fam{W'}$ be two families of words and $P$ a state cover + for $M$. Let $T = P \cdot I^{\leq k} \odot \Fam{W} \,\cup\, P \cdot I^{\leq + k+1} \odot \Fam{W'}$ be a test suite. + + If + \vspace{-3mm} + \begin{itemize} + \item[1] for all $x, y \in M:$ $x \sim_{W_x \cap W_y} y$ implies $x \sim y$, + \item[2] for all $x, y \in M$ and $z \in M'$: $x \sim_{W_x} z$ and $z + \sim_{W'_y} y$ implies $x \sim y$ and + \item[3] the machines $M$ and $M'$ agree on $T$, + \end{itemize}\vspace{-3mm} + then $M$ and $M'$ are equivalent. +\end{lemma} +\begin{proof} + First, we prove that $P \cdot I^{\leq k}$ reaches all states in $M'$. + For $p, q \in P$ and $x = \delta(s_0, p), y = \delta(s_0, q)$ such that $x + \not\sim_{W_x \cap W_y} y$, we also have $\delta'(m'_0, p) \not\sim_{W_x + \cap W_y} \delta'(m'_0, q)$ in the implementation $M'$. + By (1) this means that there + are at least $n$ different behaviours in $M'$, hence at least $n$ states. + + Now $n$ states are reached by the previous argument (using the set $P$). + By assumption $M'$ has at most $k$ extra states and so we can reach all those extra states by using $I^{\leq k}$ after $P$. + + Second, we show that the reached states are bisimilar. + Define the relation $R = \{(\delta(q_0, p), \delta'(q_0', p)) \mid p \in P \cdot I^{\leq k}\}$. + Note that for each $(s, i) \in R$ we have $s \sim_{W_s} i$. + For each state $i \in M'$ there is a state $s \in M$ such that $(s, i) \in R$, since we reach all states in both machines by $P \cdot I^{\leq k}$. + We will prove that this relation is in fact a bisimulation up-to equivalence. + + For output, we note that $(s, i) \in R$ implies $\lambda(s, a) = \lambda'(i, a)$ for all $a$, + since the machines agree on $P \cdot I^{\leq k+1}$. + For the successors, let $(s, i) \in R$ and $a \in I$ and consider the successors $s_2 = \delta(s, a)$ and $i_2 = \delta'(i, a)$. + We know that there is some + $t \in M$ with $(t, i_2) \in R$. We also know that we + tested $i_2$ with the set $W'_t$. So we have: + $$ s_2 \sim_{W'_{s_2}} i_2 \sim_{W_t} t. $$ By the + second assumption, we conclude that $s_2 \sim t$. + So $s_2 \sim t$ and $(t, i) \in R$, which means that $R$ is a bisimulation up-to equivalence. + Using the theory of up-to techniques \cite{RotUpTo} we know that there exists a bisimulation relation containing $R$. + In particular $q_0$ and $q'_0$ are bisimilar. + And so the machines $M$ and $M'$ are equivalent. + \qed +\end{proof} + +Before we show that the conditions hold for the test methods described in this paper, we reflect on the above proof first. +This proof is very similar to the completeness proof in \cite{Chow}. +(In fact, it is also similar to Lemma 4 in \cite{Angluin} which proves termination in the L* learning algorithm. This correspondence was noted in \cite{BergCorrespondence}.) +In the first part we argue that all states are visited by using some sort of counting and reachability argument. +Then in the second part we show the actual equivalence. +To the best of the authors knowledge, this is first $m$-completeness proof which explicitly uses the concept of a bisimulation. +Using a bisimulation allows us to slightly generalize and use bisimulation up-to equivalence, dropping the the often-assumed requirement that $M$ is minimal. + +\begin{lemma} + Let $\Fam{W'}$ be a family of state identifiers for $M$. Define the family + $\Fam{W}$ by $W_s = \bigcup \Fam{W'}$. Then the conditions (1) and (2) in the + previous lemma are satisfied. +\end{lemma} +\begin{proof} + The first condition we note that $W_x \cap W_y = W_x = W_y$, and so $x + \sim_{W_x \cap W_y} y$ implies $x \sim_{W_x} y$, now by definition of state + identifier we get $x \sim y$. + + For the second condition, let $x \sim_{\bigcup \Fam{W'}} z \sim_{W'_y} y$. + Then we note that $W'_y \subseteq \bigcup{W'}$ and so we get $x \sim_{W'_y} z + \sim_{W'_y} y$. By transitivity we get $x \sim_{W'_y} y$ and so by definition + of state identifier we get $x \sim y$. + \qed +\end{proof} +\begin{corollary} + The W, Wp, UIOv and hybrid UIOv test suites are $n+k$-complete. +\end{corollary} + +\begin{lemma} + Let $\Fam{H}$ be a separating family and take $\Fam{W} = \Fam{W'} = \Fam{H}$. + Then the conditions (1) and (2) in the previous lemma are satisfied. +\end{lemma} +\begin{proof} + Let $x \sim_{H_x \cap H_y} y$, then by definition of separating family $x \sim + y$. + For the second condition, let $x \sim_{H_x} z \sim_{H_y} y$. Then we get $x + \sim_{H_x \cap H_y} z \sim_{H_x \cap H_y} y$ and so by transitivity $x + \sim_{H_x \cap H_y} y$, hence again $x \sim y$. + \qed +\end{proof} +\begin{corollary} + The HSI, ADS and hybrid ADS test suites are $n+k$-complete. +\end{corollary} + + +\section{Related Work} +\todo{Opnieuw lezen, want verouderd} +Comparison of test methods already appeared in the recent papers +\cite{Dorofeeva} and \cite{Endo}. Their work is mainly evaluated on randomly +generated Mealy machines. We continue their work by evaluating on many +specifications from industry. In addition, we evaluate in the context of +learning, which has been lacking so far. + +Many of the methods describe here are benchmarked on small Mealy machines in \cite{Dorofeeva} and \cite{Endo}. +Additionally, they included the P, H an SPY methods. +Unfortunately, the P and H methods do not fit naturally in the overview presented in Section~\ref{sec:overview}. +The P method is not able to test for extra states, making it less usable. +And the H method +The SPY method builds upon the HSI-method and changes the +prefixes in order to minimize the size of a test suite, exploiting overlap in test sequences. +We believe that this technique is independent of the HSI-method and can in fact be applied to all methods presented in this paper. +As such, the SPY method should be considered as an optimization technique, orthogonal to the work in this paper. + +More recently, a novel test method was devised which uses incomplete distinguishing sequences \cite{Hierons}. +They use sequences which can be considered to be adaptive distinguishing sequences on a subset of the state space. +With several of those one can cover the whole state space, obtaining a $m$-complete test suite. +This is a bit dual to our approach, as our "incomplete" adaptive distinguishing sequences define a course partition of the state space. +Our method becomes complete by refining the tests with pairwise separating sequences. + +Some work is put into minimizing the adaptive distinguishing sequences themselves. +In \cite{TY14} the authors describe greedy algorithms which construct small adaptive distinguishing sequences. +\todo{We expect that similar + heuristics also exist for the other test methods and that it will improve the + performance.} +However, they show that finding the minimal adaptive distinguishing sequence is +NP-complete, even approximation is NP-complete. +We would like to incorporate their greedy algorithms in our implementation. + + +\section{Applications} +\label{sec:applications} + +The presented test generation methods is implemented and used in a couple of applications. +The implementation can be found on \url{https://gitlab.science.ru.nl/moerman/Yannakakis}. + +This implementations has been used in several model learning applications: learning embedded controller software \cite{Smeenk}, learning the TCP protocol \cite{FiteruauTCP} and learning the MQTT protocol \cite{TapplerMQTT}. + +\todo{Enkele resultaten bespreken, test-suite-groottes vergelijken} +\todo{Future work? Meer benchmarks? Andere automaat-modellen?} + +\bibliographystyle{splncs03} +\bibliography{references} + + +% \subsection{Known algorithms} + +% We expose the well-known classical algorithms to find state identifiers here. + +% \begin{algorithm}[h] +% \DontPrintSemicolon +% \KwIn{A Mealy machine $M$} +% \KwResult{A complete splitting tree $T$} +% initialize with single node\; +% \While{true}{ +% \ForEach{symbol $a \in I$ and leaf $u \in T$}{ +% \If{if $\lambda(l(u), a)$ hits two or more outputs}{ +% split $u$ accordingly\; +% store $a$ as a witness in $\sigma(u)$\; +% } \ElseIf{$\delta(l(u), a)$ lands in labels of different leaves}{ +% $v \leftarrow lca(\delta(l(u), a))$\; +% split $u$ similar to $v$\; +% store $a \sigma(v)$ as witness in $\sigma(u)$\; +% } +% } +% \If{no split has been made}{ +% \Return{constructed tree} +% } +% } +% \caption{Moore's minimization algorithm adopted to record witnesses. When +% implemented efficiently it runs in $\bigO(pn^2)$. A more involved algorithm +% based on ideas of Hopcroft exists and runs in $\bigO(pn \log n)$.} +% \label{moore} +% \end{algorithm} + +% \begin{algorithm}[h] +% \DontPrintSemicolon +% \KwIn{A Mealy machine $M$} +% \KwResult{A (in)complete valid splitting tree $T$} +% initialize $T$ with single node\; +% \While{true}{ +% \ForEach{symbol $a \in I$ and leaf $u \in T$}{ +% \If{if $\lambda(l(u), a)$ hits two or more outputs and $\delta \times \lambda(-, a)$ is injective on $l(u)$}{ +% split $u$ accordingly\; +% store $a$ as a witness in $\sigma(u)$\; +% } \ElseIf{$\delta(l(u), a)$ lands in labels of different leaves and $\delta \times \lambda(-, a)$ is injective on $l(u)$}{ +% $v \leftarrow lca(\delta(l(u), a))$\; +% split $u$ similar to $v$\; +% store $a \sigma(v)$ as witness in $\sigma(u)$\; +% } +% } +% \If{no split has been made}{ +% \Return{constructed tree} +% } +% } +% \caption{(Simplified) algorithm by Lee and Yannakakis. Similarly to +% algorithm~\ref{moore} this creates a splitting tree, but additionally only +% allows \emph{valid} splits.} +% \label{lee-yannakakis} +% \end{algorithm} + +\end{document} diff --git a/environment.tex b/environment.tex new file mode 100644 index 0000000..bc954ff --- /dev/null +++ b/environment.tex @@ -0,0 +1,46 @@ +\startenvironment environment + +% Bug in context? Zonder de ../ kunnen componenten de files niet vinden +\usepath[{environment,../environment}] + +\environment font +\environment pdf +\environment paper + +\mainlanguage[en] + +\setuppagenumbering[alternative=doublesided] + +\defineenumeration[definition][text=Definition] +\defineenumeration[example][text=Example] +\defineenumeration[lemma][text=Lemma] +\defineenumeration[fact][text=Fact?] % niet nodig? +\setupenumeration[definition,example,lemma,fact][alternative=serried,width=fit,right=.] + +\definefloat[algorithm][algorithms][figure] +\setuplabeltext[en][algorithm=Algorithm ] + +\setupcombinedlist[content][list={chapter,section}] + +% The black dot is missing from Euler? +\setupitemize[2,joinedup,nowhite,after] + +% Standaard \colon had niet veel ruimte +\define\colon{{\,{:}\,\,}} +\define[1]\defn{\emph{#1}} + +\define[1]\todo{\color[red]{\ss \bf #1}} + +\usemodule[tikz] +\usetikzlibrary[automata, arrows, positioning] + +\usemodule[algorithmic] + +% Debugging +%\enabletrackers[references.references, visualizers.justification] +%\showmakeup +%\showlayout +%\showboxes +%\showframe + +\stopenvironment diff --git a/environment/font.tex b/environment/font.tex new file mode 100644 index 0000000..9cdabff --- /dev/null +++ b/environment/font.tex @@ -0,0 +1,26 @@ +\startenvironment font + +% Punctuatie mag van de regel afvallen. +% Old-style getallen +\definefontfeature[default][default][expansion=quality,protrusion=quality,onum=yes] +\setupalign[hz,hanging] + +% Standaard gebruiken we Pagella (een Open Source versie van Palatino) +\definefontfamily[mainfamily] [rm] [TeX Gyre Pagella] +% Voor sans-serif en teletype gebruiken we de Latin Modern fonts +% Het teletype font is iets groter gemaakt +\definefontfamily[mainfamily] [ss] [Latin Modern Sans] +\definefontfamily[mainfamily] [tt] [Latin Modern Mono][rscale=1.05] +% (Neo) Euler heeft geen mathbb symbolen, dus die halen we uit Pagella +\definefallbackfamily[mainfamily] [mm] [TeX Gyre Pagella Math][range=uppercasedoublestruck] +% Eventueel kunnen we mathcal vervangen door een ander font +%\definefallbackfamily[myfamily] [mm] [TeX Gyre Pagella Math][range=uppercasescript,rscale=1.05] +% (Neo) Euler heeft niet alle symbolen, maar Pagella wel +\definefallbackfamily[mainfamily] [mm] [TeX Gyre Pagella Math][range=0x0266D-0x0266F,rscale=0.95] % sharp and flat +% Neo Euler voor wiskunde. Merk op dat dit niet cursief is! +\definefontfamily[mainfamily] [mm] [Neo Euler] + +% Gebruik het font +\setupbodyfont[mainfamily,10pt] + +\stopenvironment diff --git a/environment/paper.tex b/environment/paper.tex new file mode 100644 index 0000000..63dddeb --- /dev/null +++ b/environment/paper.tex @@ -0,0 +1,13 @@ +\startenvironment paper + +% Proefschrift formaat is 17x24cm +% We doen aan beide zijden 5mm afloop, zodat we plaatjes vol kunnen printen +\definepapersize[proefschrift+rand][width=180mm, height=250mm] +\definepapersize[proefschrift][width=170mm, height=240mm] +\setuppapersize[proefschrift][proefschrift+rand] +\setuplayout[location={middle,middle}] + +% Snijranden +\setuplayout[marking=on] + +\stopenvironment diff --git a/environment/pdf.tex b/environment/pdf.tex new file mode 100644 index 0000000..3adf177 --- /dev/null +++ b/environment/pdf.tex @@ -0,0 +1,13 @@ +\startenvironment pdf + +% Synctex lijkt niet goed te werken voor TeXstudio. +% Uitzetten voor uiteindelijke versie! +%\setupsynctex[state=start] + +% Klikbare referenties +\setupinteraction[state=start,focus=standard,contrastcolor=darkgreen] + +% Bookmarks in de pdf +\placebookmarks[chapter,section] + +\stopenvironment diff --git a/thesis.tex b/thesis.tex new file mode 100644 index 0000000..ea22e87 --- /dev/null +++ b/thesis.tex @@ -0,0 +1,4 @@ +\environment environment +\startproject thesis +\product main +\stopproject