Archived
1
Fork 0

References for sep seqs

This commit is contained in:
Joshua Moerman 2018-09-21 14:50:10 +02:00
parent d789acd7a9
commit 8b4bb3d007
6 changed files with 114 additions and 19 deletions

View file

@ -523,6 +523,24 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/popl/BonchiP13,
author = {Filippo Bonchi and
Damien Pous},
editor = {Roberto Giacobazzi and
Radhia Cousot},
title = {Checking {NFA} equivalence with bisimulations up to congruence},
booktitle = {The 40th Annual {ACM} {SIGPLAN-SIGACT} Symposium on Principles of
Programming Languages, {POPL} '13, Rome, Italy - January 23 - 25, 2013},
pages = {457--468},
publisher = {{ACM}},
year = {2013},
url = {http://doi.acm.org/10.1145/2429069.2429124},
doi = {10.1145/2429069.2429124},
timestamp = {Thu, 15 Jun 2017 21:39:11 +0200},
biburl = {https://dblp.org/rec/bib/conf/popl/BonchiP13},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/cacm/BonchiP15,
author = {Filippo Bonchi and
Damien Pous},
@ -909,6 +927,14 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@book{Gill62,
author = {Arthur Gill},
publisher = {McGraw-Hill},
title = {Introduction to the theory of finite-state machines},
year = {1962},
isbn = {978-0070232433}
}
@inproceedings{DBLP:conf/mompes/GraafD07,
author = {Bas Graaf and
Arie van Deursen},
@ -930,6 +956,18 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/acta/Gries73,
author = {David Gries},
title = {Describing an Algorithm by {Hopcroft}},
journal = {Acta Inf.},
volume = {2},
pages = {97--109},
year = {1973},
timestamp = {Wed, 29 Mar 2017 16:45:28 +0200},
biburl = {https://dblp.org/rec/bib/journals/acta/Gries73},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/pts/GrozLPS08,
author = {Roland Groz and
Keqin Li and
@ -1020,6 +1058,22 @@
url = {https://doi.org/10.1017/CBO9781139194655}
}
@inproceedings{Hopcroft71,
author = {John Edward Hopcroft},
booktitle = {Theory of Machines and Computations - Proceedings of an International
Symposium on the Theory of Machines and Computations Held at Technion
in Haifa, Israel, on August 1619, 1971},
editor = {Zvi Kohavi and
Azaria Paz}
pages = {189--196},
title = {An $n \log n$ algorithm for minimizing states in a finite automaton},
year = {1971},
publisher = {Academic Press},
doi = {10.1016/B978-0-12-417750-5.50022-1},
url = {https://doi.org/10.1016/B978-0-12-417750-5.50022-1},
isbn = {978-0-12-417750-5}
}
@inproceedings{DBLP:conf/vmcai/HowarSM11,
author = {Falk Howar and
Bernhard Steffen and
@ -1229,6 +1283,21 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/tcs/Knuutila01,
author = {Timo Knuutila},
title = {Re-describing an algorithm by Hopcroft},
journal = {Theor. Comput. Sci.},
volume = {250},
number = {1-2},
pages = {333--363},
year = {2001},
url = {https://doi.org/10.1016/S0304-3975(99)00150-4},
doi = {10.1016/S0304-3975(99)00150-4},
timestamp = {Wed, 14 Jun 2017 20:32:10 +0200},
biburl = {https://dblp.org/rec/bib/journals/tcs/Knuutila01},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@proceedings{DBLP:journals/corr/AtkeyK16,
editor = {Robert Atkey and
Neelakantan R. Krishnaswami},
@ -1561,6 +1630,15 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{Moore56,
author = {Edward F. Moore},
title = {Gedankenexperiments on Sequential Machines},
booktitle = {Sequential Machines, Automata Studies, Annals of Mathematical Studies, no.34},
year = {1956},
pages = {129--153},
publisher = {Princeton University Press}
}
@inproceedings{DBLP:conf/tacas/MouraB08,
author = {Leonardo Mendon{\c{c}}a de Moura and
Nikolaj Bj{\o}rner},
@ -1934,6 +2012,21 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{ValmariL08,
author = {Antti Valmari and
Petri Lehtinen},
title = {Efficient Minimization of {DFA}s with Partial Transition Functions},
booktitle = {Symposium on Theoretical Aspects of Computer Science (STACS), Bordeaux, France, 2008},
pages = {645--656},
year = {2008},
url = {http://arxiv.org/abs/0802.2826},
archivePrefix = {arXiv},
eprint = {0802.2826},
timestamp = {Mon, 13 Aug 2018 16:47:46 +0200},
biburl = {https://dblp.org/rec/bib/journals/corr/abs-0802-2826},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{Vasilevskii73,
author = {M.P. Vasilevskii},
title = {Failure diagnosis of automata},

View file

@ -24,23 +24,23 @@ algorithm is empirically verified and compared to the traditional algorithm.
In diverse areas of computer science and engineering, systems can be modelled by \emph{finite state machines} (FSMs).
One of the cornerstones of automata theory is minimization of such machines (and many variation thereof).
In this process one obtains an equivalent minimal FSM, where states are different if and only if they have different behaviour.
The first to develop an algorithm for minimisation was \citet[Moore1956].
The first to develop an algorithm for minimisation was \citet[Moore56].
His algorithm has a time complexity of $\bigO(mn)$, where $m$ is the number of transitions, and $n$ is the number of states of the FSM.
Later, \citet[Hopcroft1971] improved this bound to $\bigO(m \log n)$.
Later, \citet[Hopcroft71] improved this bound to $\bigO(m \log n)$.
Minimisation algorithms can be used as a framework for deriving a set of \emph{separating sequences} that show \emph{why} states are inequivalent.
The separating sequences in Moore's framework are of minimal length \citep[Gill1962].
The separating sequences in Moore's framework are of minimal length \citep[Gill62].
Obtaining minimal separating sequences in Hopcroft's framework, however, is a non-trivial task.
In this paper, we present an algorithm for finding such minimal separating sequences for all pairs of inequivalent states of a FSM in $\bigO(m \log n)$ time.
Coincidentally, \citet[Bonchi2013] recently introduced a new algorithm for the equally fundamental problem of proving equivalence of states in non-deterministic automata.
Coincidentally, \citet[DBLP:conf/popl/BonchiP13] recently introduced a new algorithm for the equally fundamental problem of proving equivalence of states in non-deterministic automata.
As both their and our work demonstrate, even classical problems in automata theory can still offer surprising research opportunities.
Moreover, new ideas for well-studied problems may lead to algorithmic improvements that are of practical importance in a variety of applications.
One such application for our work is in \emph{conformance testing}.
Here, the goal is to test if a black box implementation of a system is functioning as described by a given FSM.
It consists of applying sequences of inputs to the implementation, and comparing the output of the system to the output prescribed by the FSM.
Minimal separating sequences are used in many test generation methods \cite[dorofeeva2010fsm]. Therefore, our algorithm can be used to improve these methods.
Minimal separating sequences are used in many test generation methods \cite[DBLP:journals/infsof/DorofeevaEMCY10]. Therefore, our algorithm can be used to improve these methods.
\stopsection
@ -79,7 +79,7 @@ Our goal is to obtain minimal separating sequences for all pairs of inequivalent
In this section we will discuss the basics of minimisation.
Both Moore's algorithm and Hopcroft's algorithm work by means of partition refinement.
A similar treatment (for DFAs) is given by \citet[Gries1973].
A similar treatment (for DFAs) is given by \citet[DBLP:journals/acta/Gries73].
A \emph{partition} $P$ of $S$ is a set of pairwise disjoint non-empty subsets of $S$ whose union is exactly $S$.
Elements in $P$ are called \emph{blocks}.
@ -118,7 +118,7 @@ A partition is \emph{stable} if it is acceptable and for any input $a$ in $I$ an
Since an FSM has only finitely many states, partition refinement will terminate.
The output is the finest valid partition which is acceptable and stable.
For a more formal treatment on partition refinement we refer to \citet[Gries1973].
For a more formal treatment on partition refinement we refer to \citet[DBLP:journals/acta/Gries73].
\stopsubsection
@ -130,7 +130,7 @@ Both types of splits described above can be used to construct a separating seque
In a split w.r.t. the output after $a$, this sequence is simply $a$.
In a split w.r.t. the state after $a$, the sequence starts with an $a$ and continues with the separating sequence for states in $\delta(B, a)$.
In order to systematically keep track of this information, we maintain a \emph{splitting tree}.
The splitting tree was introduced by \citet[Lee1994] as a data structure for maintaining the operational history of a partition refinement algorithm.
The splitting tree was introduced by \citet[DBLP:journals/tc/LeeY94] as a data structure for maintaining the operational history of a partition refinement algorithm.
\startdefinition[reference=def:splitting-tree]
A \emph{splitting tree} for $M$ is a rooted tree $T$ with a finite set of nodes with the following properties:
@ -146,7 +146,7 @@ We use $C(u)$ to denote the set of children of a node $u$.
The \emph{lowest common ancestor} (lca) for a set $S' \subseteq S$ is the node $u$ such that $S' \subseteq l(u)$ and $S' \not \subseteq l(v)$ for all $v \in C(u)$ and is denoted by $\lca(S')$.
For a pair of states $s$ and $t$ we use the shorthand $\lca(s, t)$ for $\lca(\{s, t\})$.
The labels $l(u)$ can be stored as a \emph{refinable partition} data structure \citep[ValmariL2008].
The labels $l(u)$ can be stored as a \emph{refinable partition} data structure \citep[ValmariL08].
This is an array containing a permutation of the states, ordered so that states in the same block are adjacent.
The label $l(u)$ of a node then can be indicated by a slice of this array.
If node $u$ is split, some states in the \emph{slice} $l(u)$ may be moved to create the labels of its children, but this will not change the \emph{set} $l(u)$.
@ -267,7 +267,7 @@ A splitting tree $T$ is \emph{minimal} if for all states $s$ and $t$ in differen
Minimality of a splitting tree can be used to obtain minimal separating sequences for pairs of states.
If the tree is in addition stable, we obtain minimal separating sequences for all inequivalent pairs of states.
Note that if a minimal splitting tree is $(n-1)$-stable ($n$ is the number of states of $M$), then it is stable (\in{Definition}[def:stable]).
This follows from the well-known fact that $n-1$ is an upper bound for the length of a minimal separating sequence \citep[Moore1956].
This follows from the well-known fact that $n-1$ is an upper bound for the length of a minimal separating sequence \citep[Moore56].
\in{Algorithm}[alg:minimaltree] ensures a stable and minimal splitting tree.
The first repeat-loop is the same as before (in \in{Algorithm}[alg:tree]).
@ -432,7 +432,7 @@ and every block includes an indication of the slice containing its label and a p
\startsection
[title={Optimizing the Algorithm}]
In this section, we present an improvement on \in{Algorithm}[alg:increase-k] that uses two ideas described by \citet[Hopcroft1971] in his seminal paper on minimizing finite automata:
In this section, we present an improvement on \in{Algorithm}[alg:increase-k] that uses two ideas described by \citet[Hopcroft71] in his seminal paper on minimizing finite automata:
\emph{using the inverse transition set}, and \emph{processing the smaller half}.
The algorithm that we present is a drop-in replacement, so that \in{Algorithm}[alg:minimaltree] stays the same except for some bookkeeping.
This way, we can establish correctness of the new algorithms more easily.
@ -462,7 +462,7 @@ So far, the improved algorithm still would have time complexity $\bigO(mn)$.
To reduce the complexity we have to use Hopcroft's second idea of \emph{processing the smaller half}.
The key idea is that, when we fix a $k$-candidate $v$, all leaves are split with respect to $(v, a)$ simultaneously.
Instead of iterating over of all leaves to refine them, we iterate over $s \in \delta^{-1}(l(w), a)$ for all $w$ in $C(v)$ and look up in which leaf it is contained to move $s$ out of it.
From Lemma 8 by \citet[Knuutila2001] it follows that we can skip one of the children of $v$.
From Lemma 8 by \citet[DBLP:journals/tcs/Knuutila01] it follows that we can skip one of the children of $v$.
This lowers the time complexity to $\bigO(m \log n)$.
In order to move $s$ out of its leaf, each leaf $u$ is associated with a set of temporary children $C'(u)$ that is initially empty, and will be finalized after iterating over all $s$ and $w$.
@ -591,7 +591,7 @@ The operation on \inline[line:prepend] is done in constant time by using a linke
A splitting tree can be used to extract relevant information for two classical test generation methods:
a \emph{characterization set} for the W-method and a \emph{separating family} for the HSI-method.
For an introduction and comparison of FSM-based test generation methods we refer to \citet[dorofeeva2010fsm].
For an introduction and comparison of FSM-based test generation methods we refer to \citet[DBLP:journals/infsof/DorofeevaEMCY10].
\startdefinition[def:cset]
A set $W \subset I^{\ast}$ is called a \emph{characterization set} if for every pair of inequivalent states $s, t$ there is a sequence $w \in W$ such that $\lambda(s, w) \neq \lambda(t, w)$.
@ -642,8 +642,8 @@ Since we have to do this for every state, this takes $\bigO(n^2)$ time.
For test generation one moreover needs a transition cover.
This can be constructed in linear time with a breadth first search.
We conclude that we can construct all necessary information for the W-method in time $\bigO(m \log n)$ as opposed the the $\bigO(mn)$ algorithm used by \citet[dorofeeva2010fsm].
Furthermore, we conclude that we can construct all the necessary information for the HSI-method in time $\bigO(m \log n + n^2)$, improving on the the reported bound $\bigO(mn^3)$ by \citet[Hierons2015].
We conclude that we can construct all necessary information for the W-method in time $\bigO(m \log n)$ as opposed the the $\bigO(mn)$ algorithm used by \citet[DBLP:journals/infsof/DorofeevaEMCY10].
Furthermore, we conclude that we can construct all the necessary information for the HSI-method in time $\bigO(m \log n + n^2)$, improving on the the reported bound $\bigO(mn^3)$ by \citet[DBLP:journals/cj/HieronsT15].
The original HSI-method was formulated differently and might generate smaller sets.
We conjecture that our separating family has the same size if we furthermore remove redundant prefixes.
This can be done in $\bigO(n^2)$ time using a trie data structure.
@ -655,19 +655,20 @@ This can be done in $\bigO(n^2)$ time using a trie data structure.
We have implemented \in{Algorithms}[alg:increase-k, alg:increase-k-v3] in Go, and we have compared their running time on two sets of FSMs.
\footnote{Available at \todo{https://gitlab.science.ru.nl/rick/partition/}.}
The first set is from \cite{Smeenk}, where FSMs for embedded control software were automatically constructed.
The first set is from \citet[DBLP:conf/icfem/SmeenkMVJ15], where FSMs for embedded control software were automatically constructed.
These FSMs are of increasing size, varying from 546 to 3~410 states, with 78 inputs and up to 151 outputs.
The second set is inferred from \citet[Hopcroft1971], where two classes of finite automata, $A$ and $B$, are described that serve as a worst case for \in{Algorithms}[alg:increase-k, alg:increase-k-v3] respectively.
The second set is inferred from \citet[Hopcroft71], where two classes of finite automata, $A$ and $B$, are described that serve as a worst case for \in{Algorithms}[alg:increase-k, alg:increase-k-v3] respectively.
The FSMs that we have constructed for these automata have 1 input, 2 outputs, and $2^2$~--~$2^{15}$ states.
The running times in seconds on an Intel Core i5-2500 are plotted in \in{Figure}[fig:results].
We note that different slopes imply different complexity classes, since both axes have a logarithmic scale.
\startplacefigure
[title={Running time in seconds of \in{Algorithm}[alg:increase-k] (grey) and \in{Algorithm}[alg:increase-k-v3] (black).},
list={Running time in seconds of \in{Algorithms}[alg:increase-k, alg:increase-k-v3].}
reference=fig:results]
\startcombination[2*1]
{\starttikzpicture
\startloglogaxis[width=0.5\textwidth, xtick={500, 1000, 2000, 3000}, xticklabels={500, 1000, 2000, 3000}]
\startloglogaxis[width=0.5\textwidth, xtick={500, 1000, 2000, 3000}, xticklabels={$500$, $1000$, $2000$, $3000$}]
\addplot[color=darkgray] table[x=states, y=Moore] {esm.table};
\addplot[color=black] table[x=states, y=Hopcroft] {esm.table};
\stoploglogaxis
@ -700,7 +701,7 @@ Second, it is required to consider nodes as a candidate before any one of its ch
This order follows naturally from the construction of a splitting tree.
Experimental results show that our algorithm outperforms the classic approach for both worst-case finite state machines and models of embedded control software.
Applications of minimal separating sequences such as the ones described by \citet[dorofeeva2010fsm, Smeenk] therefore show that our algorithm is useful in practice.
Applications of minimal separating sequences such as the ones described by \citet[DBLP:journals/infsof/DorofeevaEMCY10, DBLP:conf/icfem/SmeenkMVJ15] therefore show that our algorithm is useful in practice.
\stopsection

View file

@ -7,6 +7,7 @@
% defaults
\tikzset{node distance=2cm} % 'shorten >=2pt' only for automata
\tikzset{/pgfplots/table/search path={data,../data}}
% observation table
\tikzset{obs table/.style={matrix of math nodes, ampersand replacement=\&, column 1/.style={anchor=base west}}}