From a4d7e40e2d6a00975112520d03af955d8c301b3c Mon Sep 17 00:00:00 2001 From: Joshua Moerman Date: Tue, 18 Sep 2018 17:37:00 +0200 Subject: [PATCH] Starting with minimal separating sequences --- content.tex | 2 +- content/applying-automata-learning.tex | 3 + content/minimal-separating-sequences.tex | 244 +++++++++++++++++++++++ environment/notation.tex | 1 + 4 files changed, 249 insertions(+), 1 deletion(-) create mode 100644 content/minimal-separating-sequences.tex diff --git a/content.tex b/content.tex index d70f49b..12752d0 100644 --- a/content.tex +++ b/content.tex @@ -15,7 +15,7 @@ \section{Nominal Techniques} \component content/test-methods \component content/applying-automata-learning -\chapter{Separating Sequences} +\component content/minimal-separating-sequences \component content/learning-nominal-automata \chapter{Ordered Nominal Sets} \chapter{Succinct Nominal Automata?} diff --git a/content/applying-automata-learning.tex b/content/applying-automata-learning.tex index c32a931..3fd167d 100644 --- a/content/applying-automata-learning.tex +++ b/content/applying-automata-learning.tex @@ -336,6 +336,7 @@ In these cases the incomplete result from the LY algorithm still contained a lot \startplacefigure [title={A small part of an incomplete distinguishing sequence as produced by the LY algorithm. Leaves contain a set of possible initial states, inner nodes have input sequences and edges correspond to different output symbols (of which we only drew some), where Q stands for quiescence.}, + list={Incomplete distinguishing sequence from the LY algorithm.}, reference=fig:distinguishing-sequence] \externalfigure[hyp_20_partial_ds.pdf][width=\textwidth] \stopplacefigure @@ -476,6 +477,7 @@ The old transitions and state \kw{B} are removed. \startplacefigure [title={Example of empty transition transformation. On the left the original version. On the right the transformed version.}, + list={Empty transition transformation}, reference=fig:model-example1] \externalfigure[model-example1.pdf][width=0.5\textwidth] \stopplacefigure @@ -500,6 +502,7 @@ When flattening the statechart, we modified the guards of supertransitions to en \startplacefigure [title={Example of supertransition transformation. On the left the original version. On the right the transformed version}, + list={Supertransition transformation}, reference=fig:model-example2] \externalfigure[model-example2.pdf][width=0.4\textwidth] \stopplacefigure diff --git a/content/minimal-separating-sequences.tex b/content/minimal-separating-sequences.tex new file mode 100644 index 0000000..4eeca97 --- /dev/null +++ b/content/minimal-separating-sequences.tex @@ -0,0 +1,244 @@ +\project thesis +\startcomponent minimal-separating-sequences + +\startchapter + [title={Minimal Separating Sequences for All Pairs of States}, + reference=chap:minimal-separating-sequences] + +\todo{Auteurs en moet ik ook support noemen van de andere auteurs? Keywords?} + +\startabstract +Finding minimal separating sequences for all pairs of inequivalent states in a finite state machine +is a classic problem in automata theory. Sets of minimal separating sequences, for instance, play a +central role in many conformance testing methods. Moore has already outlined a partition refinement +algorithm that constructs such a set of sequences in $\bigO(mn)$ time, where $m$ is the number of +transitions and $n$ is the number of states. In this paper, we present an improved algorithm based +on the minimization algorithm of Hopcroft that runs in $\bigO(m \log n)$ time. The efficiency of our +algorithm is empirically verified and compared to the traditional algorithm. +\stopabstract + + +\startsection + [title={Introduction}] + +In diverse areas of computer science and engineering, systems can be modelled by \emph{finite state machines} (FSMs). +One of the cornerstones of automata theory is minimization of such machines (and many variation thereof). +In this process one obtains an equivalent minimal FSM, where states are different if and only if they have different behaviour. +The first to develop an algorithm for minimisation was \citet[Moore1956]. +His algorithm has a time complexity of $\bigO(mn)$, where $m$ is the number of transitions, and $n$ is the number of states of the FSM. +Later, \citet[Hopcroft1971] improved this bound to $\bigO(m \log n)$. + +Minimisation algorithms can be used as a framework for deriving a set of \emph{separating sequences} that show \emph{why} states are inequivalent. +The separating sequences in Moore's framework are of minimal length \citep[Gill1962]. +Obtaining minimal separating sequences in Hopcroft's framework, however, is a non-trivial task. +In this paper, we present an algorithm for finding such minimal separating sequences for all pairs of inequivalent states of a FSM in $\bigO(m \log n)$ time. + +Coincidentally, \citet[Bonchi2013] recently introduced a new algorithm for the equally fundamental problem of proving equivalence of states in non-deterministic automata. +As both their and our work demonstrate, even classical problems in automata theory can still offer surprising research opportunities. +Moreover, new ideas for well-studied problems may lead to algorithmic improvements that are of practical importance in a variety of applications. + +One such application for our work is in \emph{conformance testing}. +Here, the goal is to test if a black box implementation of a system is functioning as described by a given FSM. +It consists of applying sequences of inputs to the implementation, and comparing the output of the system to the output prescribed by the FSM. +Minimal separating sequences are used in many test generation methods \cite[dorofeeva2010fsm]. Therefore, our algorithm can be used to improve these methods. + + +\stopsection +\startsection + [title={Preliminaries}] + +We define a \emph{FSM} as a Mealy machine $M = (I, O, S, \delta, \lambda)$, where $I, O$ and $S$ are finite sets of \emph{inputs}, \emph{outputs} and \emph{states} respectively, $\delta \colon S \times I \to S$ is a \emph{transition function} and $\lambda \colon S \times I \to O$ is an \emph{output function}. +The functions $\delta$ and $\lambda$ are naturally extended to $\delta \colon S \times I^{*} \to S$ and $\lambda \colon S \times I^{*} \to O^{*}$. +Moreover, given a set of states $S' \subseteq S$ and a sequence $x \in I^{*}$, we define $\delta(S', x) = \{\delta(s, x) \mid s \in S'\}$ and $\lambda(S', x) = \{\lambda(s, x) \mid s \in S'\}$. +The \emph{inverse transition function} $\delta^{-1} \colon S \times I \to \mathcal{P}(S)$ is defined as $\delta^{-1}(s, a) = \{t \in S \mid \delta(t, a) = s\}$. +Observe that Mealy machines are deterministic and input-enabled (i.e., complete) by definition. +The initial state is not specified because it is of no importance in what follows. +For the remainder of this paper we fix a machine $M = (I, O, S, \delta, \lambda)$. +We use $n$ to denote its number of states, that is $n = |S|$, and $m$ to denote its number of transitions, that is $m = |S| \times |I|$. + +\startdefinition[reference=def:equivalent] +States $s$ and $t$ are \emph{equivalent} if $\lambda(s, x) = \lambda(t, x)$ for all $x$ in $I^{*}$. +\stopdefinition + +We are interested in the case where $s$ and $t$ are not equivalent, i.e., \emph{inequivalent}. +If all pairs of distinct states of a machine $M$ are inequivalent, then $M$ is \emph{minimal}. +An example of a minimal FSM is given in \in{Figure}[fig:automaton-tree]. + +\startdefinition[reference=def:separate] +A \emph{separating sequence} for states $s$ and $t$ in $s$ is a sequence $x \in I^{*}$ such that $\lambda(s, x) \neq \lambda(t, x)$. +We say $x$ is \emph{minimal} if $|y| \geq |x|$ for all separating sequences $y$ for $s$ and $t$. +\stopdefinition + +A separating sequence always exists if two states are inequivalent, and there might be multiple minimal separating sequences. +Our goal is to obtain minimal separating sequences for all pairs of inequivalent states of $M$. + + +\startsubsection + [title={Partition Refinement}, + reference=sec:partition] + +In this section we will discuss the basics of minimisation. +Both Moore's algorithm and Hopcroft's algorithm work by means of partition refinement. +A similar treatment (for DFAs) is given by \citet[Gries1973]. + +A \emph{partition} $P$ of $S$ is a set of pairwise disjoint non-empty subsets of $S$ whose union is exactly $S$. +Elements in $P$ are called \emph{blocks}. +If $P$ and $P'$ are partitions of $S$, then $P'$ is a \emph{refinement} of $P$ if every block of $P'$ is contained in a block of $P$. +A partition refinement algorithm constructs the finest partition under some constraint. +In our context the constraint is that equivalent states belong to the same block. + +\startdefinition[reference=def:valid] +A partition is \emph{valid} if equivalent states are in the same block. +\stopdefinition + +Partition refinement algorithms for FSMs start with the trivial partition $P = \{S\}$, and iteratively refine $P$ until it is the finest valid partition (where all states in a block are equivalent). +The blocks of such a \emph{complete} partition form the states of the minimized FSM, whose transition and output functions are well-defined because states in the same block are equivalent. + +Let $B$ be a block and $a$ be an input. +There are two possible reasons to split $B$ (and hence refine the partition). +First, we can \emph{split $B$ with respect to output after $a$} if the set $\lambda(B, a)$ contains more than one output. +Second, we can \emph{split $B$ with respect to the state after $a$} if there is no single block $B'$ containing the set $\delta(B, a)$. +In both cases it is obvious what the new blocks are: in the first case each output in $\lambda(B, a)$ defines a new block, in the second case each block containing a state in $\delta(B, a)$ defines a new block. +Both types of refinement preserve validity. + +Partition refinement algorithms for FSMs first perform splits w.r.t. output, until there are no such splits to be performed. +This is precisely the case when the partition is \emph{acceptable}. + +\startdefinition[reference=def:acceptable] +A partition is \emph{acceptable} if for all pairs $s, t$ of states contained in the same block and for all inputs $a$ in $I$, $\lambda(s, a) = \lambda(t, a)$. +\stopdefinition + +Any refinement of an acceptable partition is again acceptable. +The algorithm continues performing splits w.r.t. state, until no such splits can be performed. +This is exactly the case when the partition is stable. + +\startdefinition[reference=def:stable] +A partition is \emph{stable} if it is acceptable and for any input $a$ in $I$ and states $s$ and $t$ that are in the same block, states $\delta(s, a)$ and $\delta(t, a)$ are also in the same block. +\stopdefinition + +Since an FSM has only finitely many states, partition refinement will terminate. +The output is the finest valid partition which is acceptable and stable. +For a more formal treatment on partition refinement we refer to \citet[Gries1973]. + + +\stopsubsection +\startsubsection + [title={Splitting Trees and Refinable Partitions}, + reference=sec:tree] + +Both types of splits described above can be used to construct a separating sequence for the states that are split. +In a split w.r.t. the output after $a$, this sequence is simply $a$. +In a split w.r.t. the state after $a$, the sequence starts with an $a$ and continues with the separating sequence for states in $\delta(B, a)$. +In order to systematically keep track of this information, we maintain a \emph{splitting tree}. +The splitting tree was introduced by \citet[Lee1994] as a data structure for maintaining the operational history of a partition refinement algorithm. + +\startdefinition[reference=def:splitting-tree] +A \emph{splitting tree} for $M$ is a rooted tree $T$ with a finite set of nodes with the following properties: +\startitemize + \item Each node $u$ in $T$ is labelled by a subset of $S$, denoted $l(u)$. + \item The root is labelled by $S$. + \item For each inner node $u$, $l(u)$ is partitioned by the labels of its children. + \item Each inner node $u$ is associated with a sequence $\sigma(u)$ that separates states contained in different children of $u$. +\stopitemize +\stopdefinition + +We use $C(u)$ to denote the set of children of a node $u$. +The \emph{lowest common ancestor} (lca) for a set $S' \subseteq S$ is the node $u$ such that $S' \subseteq l(u)$ and $S' \not \subseteq l(v)$ for all $v \in C(u)$ and is denoted by $\lca(S')$. +For a pair of states $s$ and $t$ we use the shorthand $\lca(s, t)$ for $\lca(\{s, t\})$. + +The labels $l(u)$ can be stored as a \emph{refinable partition} data structure \citep[ValmariL2008]. +This is an array containing a permutation of the states, ordered so that states in the same block are adjacent. +The label $l(u)$ of a node then can be indicated by a slice of this array. +If node $u$ is split, some states in the \emph{slice} $l(u)$ may be moved to create the labels of its children, but this will not change the \emph{set} $l(u)$. + +A splitting tree $T$ can be used to record the history of a partition refinement algorithm because at any time the leaves of $T$ define a partition on $S$, denoted $P(T)$. +We say a splitting tree $T$ is valid (resp. acceptable, stable, complete) if $P(T)$ is as such. +A leaf can be expanded in one of two ways, corresponding to the two ways a block can be split. +Given a leaf $u$ and its block $B = l(u)$ we define the following two splits: + +\description{split-output} +Suppose there is an input $a$ such that $B$ can be split w.r.t output after $a$. +Then we set $\sigma(u) = a$, and we create a node for each subset of $B$ that produces the same output $x$ on $a$. These nodes are set to be children of $u$. + +\description{split-state} +Suppose there is an input $a$ such that $B$ can be split w.r.t. the state after $a$. +Then instead of splitting $B$ as described before, we proceed as follows. +First, we locate the node $v = \lca(\delta(B, a))$. +Since $v$ cannot be a leaf, it has at least two children whose labels contain elements of $\delta(B, a)$. +We can use this information to expand the tree as follows. +For each node $w$ in $C(v)$ we create a child of $u$ labelled $\{s \in B \mid \delta(s, a) \in l(w) \}$ if the label contains at least one state. +Finally, we set $\sigma(u) = a \sigma(v)$. + +A straight-forward adaptation of partition refinement for constructing a stable splitting tree for $M$ is shown in \in{Algorithm}[alg:tree]. +The termination and the correctness of the algorithm outlined in \in{Section}[sec:partition] are preserved. +It follows directly that states are equivalent if and only if they are in the same label of a leaf node. + +\todo{algorithm 1} + +%\begin{algorithm}[t] +% \DontPrintSemicolon +% \KwIn{A FSM $M$} +% \KwResult{A valid and stable splitting tree $T$} +% initialize $T$ to be a tree with a single node labeled $S$\; +% \Repeat{$P(T)$ is acceptable}{ +% find $a \in I, B \in P(T)$ such that we can split $B$ w.r.t.\@ output $\lambda(\cdot, a)$\; +% expand the $u \in T$ with $l(u)=B$ as described in (split-output)\; +% } +% \Repeat{$P(T)$ is stable}{ +% find $a \in I, B \in P(T)$ such that we can split $B$ w.r.t.\@ state $\delta(\cdot, a)$\; +% expand the $u \in T$ with $l(u)=B$ as described in (split-state)\; +% } +% \caption{Constructing a stable splitting tree} +% \label{alg:tree} +%\end{algorithm} + +\startexample[reference=ex:tree] +\in{Figure}[fig:automaton-tree] shows a FSM and a complete splitting tree for it. +This tree is constructed by \in{Algorithm}[alg:tree] as follows. +First, the root node is labelled by $\{s_0, \ldots, s_5\}$. +The even and uneven states produce different outputs after $a$, hence the root node is split. +Then we note that $s_4$ produces a different output after $b$ than $s_0$ and $s_2$, so $\{s_0, s_2, s_4\}$ is split as well. +At this point $T$ is acceptable: no more leaves can be split w.r.t. output. +Now, the states $\delta(\{s_1, s_3, s_5\}, a)$ are contained in different leaves of $T$. +Therefore, $\{s_1, s_3, s_5\}$ is split into $\{s_1, s_5\}$ and $\{s_3\}$ and associated with sequence $ab$. +At this point, $\delta(\{s_0, s_2\}, a)$ contains states that are in both children of $\{s_1, s_3, s_5\}$, so $\{s_0, s_2\}$ is split and the associated sequence is $aab$. +We continue until $T$ is complete. +\stopexample + +\startplacefigure + [title={A FSM (a) and a complete splitting tree for it (b)}, + reference=fig:automaton-tree] + +\starttikzpicture[shorten >=1pt,node distance=2cm,>=stealth'] + \node[state] (0) {$s_0$}; + \node[state] (1) [above right of=0] {$s_1$}; + \node[state] (2) [right of=1] {$s_2$}; + \node[state] (3) [below right of=2] {$s_3$}; + \node[state] (4) [below left of=3] {$s_4$}; + \node[state] (5) [below right of=0] {$s_5$}; + \path[->] + (0) edge [loop left] node [left] {${b}/0$} + (0) edge [bend right] node [right] {${a}/0$} (1) + (1) edge node [above] {${a}/1$} (2) + (1) edge [bend right] node [left] {${b}/0$} (0) + (2) edge node [right] {${b}/0$} (3) + (2) edge node [above right] {${a}/0$} (3) + (3) edge node [below right] {${b}/0$} (4) + (3) edge node [right] {${a}/1$} (4) + (4) edge node [above] {${a}/0$} (5) + (4) edge node [below] {${b}/1$} (5) + (5) edge node [below left] {${b}/0$} (0) + (5) edge node [left] {${a}/1$} (0); +\stoptikzpicture + +% \subfloat[\label{fig:monotone-tree}]{ +% \includegraphics[width=0.45\textwidth]{monotone-tree.pdf} +% } +\stopplacefigure + + +\todo{Ga verder bij \tt 285} + +\referencesifcomponent +\stopchapter +\stopcomponent diff --git a/environment/notation.tex b/environment/notation.tex index f89d571..6d37e77 100644 --- a/environment/notation.tex +++ b/environment/notation.tex @@ -12,6 +12,7 @@ \define\pref{pref} \define\suff{suff} +\define\lca{lca} % Of misschien toch \ss? \define[1]\kw{\text{\tt #1}}