From a4d7e40e2d6a00975112520d03af955d8c301b3c Mon Sep 17 00:00:00 2001
From: Joshua Moerman <lakseru@gmail.com>
Date: Tue, 18 Sep 2018 17:37:00 +0200
Subject: [PATCH] Starting with minimal separating sequences

---
 content.tex                              |   2 +-
 content/applying-automata-learning.tex   |   3 +
 content/minimal-separating-sequences.tex | 244 +++++++++++++++++++++++
 environment/notation.tex                 |   1 +
 4 files changed, 249 insertions(+), 1 deletion(-)
 create mode 100644 content/minimal-separating-sequences.tex

diff --git a/content.tex b/content.tex
index d70f49b..12752d0 100644
--- a/content.tex
+++ b/content.tex
@@ -15,7 +15,7 @@
 \section{Nominal Techniques}
 \component content/test-methods
 \component content/applying-automata-learning
-\chapter{Separating Sequences}
+\component content/minimal-separating-sequences
 \component content/learning-nominal-automata
 \chapter{Ordered Nominal Sets}
 \chapter{Succinct Nominal Automata?}
diff --git a/content/applying-automata-learning.tex b/content/applying-automata-learning.tex
index c32a931..3fd167d 100644
--- a/content/applying-automata-learning.tex
+++ b/content/applying-automata-learning.tex
@@ -336,6 +336,7 @@ In these cases the incomplete result from the LY algorithm still contained a lot
 
 \startplacefigure
   [title={A small part of an incomplete distinguishing sequence as produced by the LY algorithm. Leaves contain a set of possible initial states, inner nodes have input sequences and edges correspond to different output symbols (of which we only drew some), where Q stands for quiescence.},
+   list={Incomplete distinguishing sequence from the LY algorithm.},
    reference=fig:distinguishing-sequence]
 \externalfigure[hyp_20_partial_ds.pdf][width=\textwidth]
 \stopplacefigure
@@ -476,6 +477,7 @@ The old transitions and state \kw{B} are removed.
 
 \startplacefigure
   [title={Example of empty transition transformation. On the left the original version. On the right the transformed version.},
+   list={Empty transition transformation},
    reference=fig:model-example1]
 \externalfigure[model-example1.pdf][width=0.5\textwidth]
 \stopplacefigure
@@ -500,6 +502,7 @@ When flattening the statechart, we modified the guards of supertransitions to en
 
 \startplacefigure
   [title={Example of supertransition transformation. On the left the original version. On the right the transformed version},
+   list={Supertransition transformation},
    reference=fig:model-example2]
 \externalfigure[model-example2.pdf][width=0.4\textwidth]
 \stopplacefigure
diff --git a/content/minimal-separating-sequences.tex b/content/minimal-separating-sequences.tex
new file mode 100644
index 0000000..4eeca97
--- /dev/null
+++ b/content/minimal-separating-sequences.tex
@@ -0,0 +1,244 @@
+\project thesis
+\startcomponent minimal-separating-sequences
+
+\startchapter
+  [title={Minimal Separating Sequences for All Pairs of States},
+   reference=chap:minimal-separating-sequences]
+
+\todo{Auteurs en moet ik ook support noemen van de andere auteurs? Keywords?}
+
+\startabstract
+Finding minimal separating sequences for all pairs of inequivalent states in a finite state machine
+is a classic problem in automata theory. Sets of minimal separating sequences, for instance, play a
+central role in many conformance testing methods. Moore has already outlined a partition refinement
+algorithm that constructs such a set of sequences in $\bigO(mn)$ time, where $m$ is the number of
+transitions and $n$ is the number of states. In this paper, we present an improved algorithm based
+on the minimization algorithm of Hopcroft that runs in $\bigO(m \log n)$ time. The efficiency of our
+algorithm is empirically verified and compared to the traditional algorithm.
+\stopabstract
+
+
+\startsection
+  [title={Introduction}]
+
+In diverse areas of computer science and engineering, systems can be modelled by \emph{finite state machines} (FSMs).
+One of the cornerstones of automata theory is minimization of such machines (and many variation thereof).
+In this process one obtains an equivalent minimal FSM, where states are different if and only if they have different behaviour.
+The first to develop an algorithm for minimisation was \citet[Moore1956].
+His algorithm has a time complexity of $\bigO(mn)$, where $m$ is the number of transitions, and $n$ is the number of states of the FSM.
+Later, \citet[Hopcroft1971] improved this bound to $\bigO(m \log n)$.
+
+Minimisation algorithms can be used as a framework for deriving a set of \emph{separating sequences} that show \emph{why} states are inequivalent.
+The separating sequences in Moore's framework are of minimal length \citep[Gill1962].
+Obtaining minimal separating sequences in Hopcroft's framework, however, is a non-trivial task.
+In this paper, we present an algorithm for finding such minimal separating sequences for all pairs of inequivalent states of a FSM in $\bigO(m \log n)$ time.
+
+Coincidentally, \citet[Bonchi2013] recently introduced a new algorithm for the equally fundamental problem of proving equivalence of states in non-deterministic automata.
+As both their and our work demonstrate, even classical problems in automata theory can still offer surprising research opportunities.
+Moreover, new ideas for well-studied problems may lead to algorithmic improvements that are of practical importance in a variety of applications.
+
+One such application for our work is in \emph{conformance testing}.
+Here, the goal is to test if a black box implementation of a system is functioning as described by a given FSM.
+It consists of applying sequences of inputs to the implementation, and comparing the output of the system to the output prescribed by the FSM.
+Minimal separating sequences are used in many test generation methods \cite[dorofeeva2010fsm]. Therefore, our algorithm can be used to improve these methods.
+
+
+\stopsection
+\startsection
+  [title={Preliminaries}]
+
+We define a \emph{FSM} as a Mealy machine $M = (I, O, S, \delta, \lambda)$, where $I, O$ and $S$ are finite sets of \emph{inputs}, \emph{outputs} and \emph{states} respectively, $\delta \colon S \times I \to S$ is a \emph{transition function} and $\lambda \colon S \times I \to O$ is an \emph{output function}.
+The functions $\delta$ and $\lambda$ are naturally extended to $\delta \colon S \times I^{*} \to S$ and $\lambda \colon S \times I^{*} \to O^{*}$.
+Moreover, given a set of states $S' \subseteq S$ and a sequence $x \in I^{*}$, we define $\delta(S', x) = \{\delta(s, x) \mid s \in S'\}$ and $\lambda(S', x) = \{\lambda(s, x) \mid s \in S'\}$.
+The \emph{inverse transition function} $\delta^{-1} \colon S \times I \to \mathcal{P}(S)$ is defined as $\delta^{-1}(s, a) = \{t \in S \mid \delta(t, a) = s\}$.
+Observe that Mealy machines are deterministic and input-enabled (i.e., complete) by definition.
+The initial state is not specified because it is of no importance in what follows.
+For the remainder of this paper we fix a machine $M = (I, O, S, \delta, \lambda)$.
+We use $n$ to denote its number of states, that is $n = |S|$, and $m$ to denote its number of transitions, that is $m = |S| \times |I|$.
+
+\startdefinition[reference=def:equivalent]
+States $s$ and $t$ are \emph{equivalent} if $\lambda(s, x) = \lambda(t, x)$ for all $x$ in $I^{*}$.
+\stopdefinition
+
+We are interested in the case where $s$ and $t$ are not equivalent, i.e., \emph{inequivalent}.
+If all pairs of distinct states of a machine $M$ are inequivalent, then $M$ is \emph{minimal}.
+An example of a minimal FSM is given in \in{Figure}[fig:automaton-tree].
+
+\startdefinition[reference=def:separate]
+A \emph{separating sequence} for states $s$ and $t$ in $s$ is a sequence $x \in I^{*}$ such that $\lambda(s, x) \neq \lambda(t, x)$.
+We say $x$ is \emph{minimal} if $|y| \geq |x|$ for all separating sequences $y$ for $s$ and $t$.
+\stopdefinition
+
+A separating sequence always exists if two states are inequivalent, and there might be multiple minimal separating sequences.
+Our goal is to obtain minimal separating sequences for all pairs of inequivalent states of $M$.
+
+
+\startsubsection
+  [title={Partition Refinement},
+   reference=sec:partition]
+
+In this section we will discuss the basics of minimisation.
+Both Moore's algorithm and Hopcroft's algorithm work by means of partition refinement.
+A similar treatment (for DFAs) is given by \citet[Gries1973].
+
+A \emph{partition} $P$ of $S$ is a set of pairwise disjoint non-empty subsets of $S$ whose union is exactly $S$.
+Elements in $P$ are called \emph{blocks}.
+If $P$ and $P'$ are partitions of $S$, then $P'$ is a \emph{refinement} of $P$ if every block of $P'$ is contained in a block of $P$.
+A partition refinement algorithm constructs the finest partition under some constraint.
+In our context the constraint is that equivalent states belong to the same block.
+
+\startdefinition[reference=def:valid]
+A partition is \emph{valid} if equivalent states are in the same block.
+\stopdefinition
+
+Partition refinement algorithms for FSMs start with the trivial partition $P = \{S\}$, and iteratively refine $P$ until it is the finest valid partition (where all states in a block are equivalent).
+The blocks of such a \emph{complete} partition form the states of the minimized FSM, whose transition and output functions are well-defined because states in the same block are equivalent.
+
+Let $B$ be a block and $a$ be an input.
+There are two possible reasons to split $B$ (and hence refine the partition).
+First, we can \emph{split $B$ with respect to output after $a$} if the set $\lambda(B, a)$ contains more than one output.
+Second, we can \emph{split $B$ with respect to the state after $a$} if there is no single block $B'$ containing the set $\delta(B, a)$.
+In both cases it is obvious what the new blocks are: in the first case each output in $\lambda(B, a)$ defines a new block, in the second case each block containing a state in $\delta(B, a)$ defines a new block.
+Both types of refinement preserve validity.
+
+Partition refinement algorithms for FSMs first perform splits w.r.t. output, until there are no such splits to be performed.
+This is precisely the case when the partition is \emph{acceptable}.
+
+\startdefinition[reference=def:acceptable]
+A partition is \emph{acceptable} if for all pairs $s, t$ of states contained in the same block and for all inputs $a$ in $I$, $\lambda(s, a) = \lambda(t, a)$.
+\stopdefinition
+
+Any refinement of an acceptable partition is again acceptable.
+The algorithm continues performing splits w.r.t. state, until no such splits can be performed.
+This is exactly the case when the partition is stable.
+
+\startdefinition[reference=def:stable]
+A partition is \emph{stable} if it is acceptable and for any input $a$ in $I$ and states $s$ and $t$ that are in the same block, states $\delta(s, a)$ and $\delta(t, a)$ are also in the same block.
+\stopdefinition
+
+Since an FSM has only finitely many states, partition refinement will terminate.
+The output is the finest valid partition which is acceptable and stable.
+For a more formal treatment on partition refinement we refer to \citet[Gries1973].
+
+
+\stopsubsection
+\startsubsection
+  [title={Splitting Trees and Refinable Partitions},
+   reference=sec:tree]
+
+Both types of splits described above can be used to construct a separating sequence for the states that are split.
+In a split w.r.t. the output after $a$, this sequence is simply $a$.
+In a split w.r.t. the state after $a$, the sequence starts with an $a$ and continues with the separating sequence for states in $\delta(B, a)$.
+In order to systematically keep track of this information, we maintain a \emph{splitting tree}.
+The splitting tree was introduced by \citet[Lee1994] as a data structure for maintaining the operational history of a partition refinement algorithm.
+
+\startdefinition[reference=def:splitting-tree]
+A \emph{splitting tree} for $M$ is a rooted tree $T$ with a finite set of nodes with the following properties: 
+\startitemize
+  \item Each node $u$ in $T$ is labelled by a subset of $S$, denoted $l(u)$.
+  \item The root is labelled by $S$.
+  \item For each inner node $u$, $l(u)$ is partitioned by the labels of its children.
+  \item Each inner node $u$ is associated with a sequence $\sigma(u)$ that separates states contained in different children of $u$.
+\stopitemize
+\stopdefinition
+
+We use $C(u)$ to denote the set of children of a node $u$.
+The \emph{lowest common ancestor} (lca) for a set $S' \subseteq S$ is the node $u$ such that $S' \subseteq l(u)$ and $S' \not \subseteq l(v)$ for all $v \in C(u)$ and is denoted by $\lca(S')$.
+For a pair of states $s$ and $t$ we use the shorthand $\lca(s, t)$ for $\lca(\{s, t\})$.
+
+The labels $l(u)$ can be stored as a \emph{refinable partition} data structure \citep[ValmariL2008].
+This is an array containing a permutation of the states, ordered so that states in the same block are adjacent.
+The label $l(u)$ of a node then can be indicated by a slice of this array.
+If node $u$ is split, some states in the \emph{slice} $l(u)$ may be moved to create the labels of its children, but this will not change the \emph{set} $l(u)$.
+
+A splitting tree $T$ can be used to record the history of a partition refinement algorithm because at any time the leaves of $T$ define a partition on $S$, denoted $P(T)$.
+We say a splitting tree $T$ is valid (resp. acceptable, stable, complete) if $P(T)$ is as such.
+A leaf can be expanded in one of two ways, corresponding to the two ways a block can be split.
+Given a leaf $u$ and its block $B = l(u)$ we define the following two splits:
+
+\description{split-output}
+Suppose there is an input $a$ such that $B$ can be split w.r.t output after $a$.
+Then we set $\sigma(u) = a$, and we create a node for each subset of $B$ that produces the same output $x$ on $a$. These nodes are set to be children of $u$.
+
+\description{split-state}
+Suppose there is an input $a$ such that $B$ can be split w.r.t. the state after $a$.
+Then instead of splitting $B$ as described before, we proceed as follows.
+First, we locate the node $v = \lca(\delta(B, a))$.
+Since $v$ cannot be a leaf, it has at least two children whose labels contain elements of $\delta(B, a)$.
+We can use this information to expand the tree as follows.
+For each node $w$ in $C(v)$ we create a child of $u$ labelled $\{s \in B \mid \delta(s, a) \in l(w) \}$ if the label contains at least one state.
+Finally, we set $\sigma(u) = a \sigma(v)$.
+
+A straight-forward adaptation of partition refinement for constructing a stable splitting tree for $M$ is shown in \in{Algorithm}[alg:tree].
+The termination and the correctness of the algorithm outlined in \in{Section}[sec:partition] are preserved.
+It follows directly that states are equivalent if and only if they are in the same label of a leaf node.
+
+\todo{algorithm 1}
+
+%\begin{algorithm}[t]
+%    \DontPrintSemicolon
+%    \KwIn{A FSM $M$}
+%    \KwResult{A valid and stable splitting tree $T$}
+%    initialize $T$ to be a tree with a single node labeled $S$\;
+%    \Repeat{$P(T)$ is acceptable}{
+%        find $a \in I, B \in P(T)$ such that we can split $B$ w.r.t.\@ output $\lambda(\cdot, a)$\;
+%        expand the $u \in T$ with $l(u)=B$ as described in (split-output)\;
+%    }
+%    \Repeat{$P(T)$ is stable}{
+%        find $a \in I, B \in P(T)$ such that we can split $B$ w.r.t.\@ state $\delta(\cdot, a)$\;
+%        expand the $u \in T$ with $l(u)=B$ as described in (split-state)\;
+%    }
+%    \caption{Constructing a stable splitting tree}
+%    \label{alg:tree}
+%\end{algorithm}
+
+\startexample[reference=ex:tree]
+\in{Figure}[fig:automaton-tree] shows a FSM and a complete splitting tree for it.
+This tree is constructed by \in{Algorithm}[alg:tree] as follows.
+First, the root node is labelled by $\{s_0, \ldots, s_5\}$.
+The even and uneven states produce different outputs after $a$, hence the root node is split.
+Then we note that $s_4$ produces a different output after $b$ than $s_0$ and $s_2$, so $\{s_0, s_2, s_4\}$ is split as well.
+At this point $T$ is acceptable: no more leaves can be split w.r.t. output.
+Now, the states $\delta(\{s_1, s_3, s_5\}, a)$ are contained in different leaves of $T$.
+Therefore, $\{s_1, s_3, s_5\}$ is split into $\{s_1, s_5\}$ and $\{s_3\}$ and associated with sequence $ab$.
+At this point, $\delta(\{s_0, s_2\}, a)$ contains states that are in both children of $\{s_1, s_3, s_5\}$, so $\{s_0, s_2\}$ is split and the associated sequence is $aab$.
+We continue until $T$ is complete.
+\stopexample
+
+\startplacefigure
+  [title={A FSM (a) and a complete splitting tree for it (b)},
+   reference=fig:automaton-tree]
+
+\starttikzpicture[shorten >=1pt,node distance=2cm,>=stealth']
+  \node[state] (0) {$s_0$};
+  \node[state] (1) [above right of=0] {$s_1$};
+  \node[state] (2) [right of=1] {$s_2$};
+  \node[state] (3) [below right of=2] {$s_3$};
+  \node[state] (4) [below left of=3] {$s_4$};
+  \node[state] (5) [below right of=0] {$s_5$};
+  \path[->]
+  (0) edge [loop left]  node [left] {${b}/0$}
+  (0) edge [bend right] node [right] {${a}/0$} (1)
+  (1) edge node [above] {${a}/1$} (2)
+  (1) edge [bend right] node [left] {${b}/0$} (0)
+  (2) edge node [right] {${b}/0$} (3)
+  (2) edge node [above right] {${a}/0$} (3)
+  (3) edge node [below right] {${b}/0$} (4)
+  (3) edge node [right] {${a}/1$} (4)
+  (4) edge node [above] {${a}/0$} (5)
+  (4) edge node [below] {${b}/1$} (5)
+  (5) edge node [below left] {${b}/0$} (0)
+  (5) edge node [left] {${a}/1$} (0);
+\stoptikzpicture
+
+%  \subfloat[\label{fig:monotone-tree}]{
+%    \includegraphics[width=0.45\textwidth]{monotone-tree.pdf}
+%  }
+\stopplacefigure
+
+
+\todo{Ga verder bij \tt 285}
+
+\referencesifcomponent
+\stopchapter
+\stopcomponent
diff --git a/environment/notation.tex b/environment/notation.tex
index f89d571..6d37e77 100644
--- a/environment/notation.tex
+++ b/environment/notation.tex
@@ -12,6 +12,7 @@
 
 \define\pref{pref}
 \define\suff{suff}
+\define\lca{lca}
 
 % Of misschien toch \ss?
 \define[1]\kw{\text{\tt #1}}