Test methods and Learning nominal automata. Loads of references.

2018-08-14 11:42:22 +02:00 · 2018-08-14 11:42:22 +02:00 · ba248a3aa4
commit ba248a3aa4
parent a41524b7c8
11 changed files with 1913 additions and 432 deletions
--- a/biblio-eigen.bib
+++ b/biblio-eigen.bib
@ -0,0 +1,73 @@
@inproceedings{DBLP:conf/icfem/SmeenkMVJ15,
 	author    = {Wouter Smeenk and
 	             Joshua Moerman and
 	             Frits W. Vaandrager and
 	             David N. Jansen},
 	title     = {Applying Automata Learning to Embedded Control Software},
 	booktitle = {Formal Methods and Software Engineering - 17th International Conference
 	             on Formal Engineering Methods, {ICFEM} 2015, Paris, France, November
 	             3-5, 2015, Proceedings},
 	pages     = {67--83},
 	year      = {2015},
 	crossref  = {DBLP:conf/icfem/2015},
 	url       = {https://doi.org/10.1007/978-3-319-25423-4\_5},
 	doi       = {10.1007/978-3-319-25423-4\_5},
 	timestamp = {Tue, 03 Oct 2017 16:28:01 +0200},
 	biburl    = {https://dblp.org/rec/bib/conf/icfem/SmeenkMVJ15},
 	bibsource = {dblp computer science bibliography, https://dblp.org}
 }
@proceedings{DBLP:conf/icfem/2015,
 	editor    = {Michael J. Butler and
 	             Sylvain Conchon and
 	             Fatiha Za{\"{\i}}di},
 	title     = {Formal Methods and Software Engineering - 17th International Conference
 	             on Formal Engineering Methods, {ICFEM} 2015, Paris, France, November
 	             3-5, 2015, Proceedings},
 	series    = {Lecture Notes in Computer Science},
 	volume    = {9407},
 	publisher = {Springer},
 	year      = {2015},
 	url       = {https://doi.org/10.1007/978-3-319-25423-4},
 	doi       = {10.1007/978-3-319-25423-4},
 	isbn      = {978-3-319-25422-7},
 	timestamp = {Tue, 03 Oct 2017 16:28:01 +0200},
 	biburl    = {https://dblp.org/rec/bib/conf/icfem/2015},
 	bibsource = {dblp computer science bibliography, https://dblp.org}
 }
@inproceedings{DBLP:conf/lata/SmetsersMJ16,
 	author    = {Rick Smetsers and
 	             Joshua Moerman and
 	             David N. Jansen},
 	title     = {Minimal Separating Sequences for All Pairs of States},
 	booktitle = {Language and Automata Theory and Applications - 10th International
 	             Conference, {LATA} 2016, Prague, Czech Republic, March 14-18, 2016, Proceedings},
 	pages     = {181--193},
 	year      = {2016},
 	crossref  = {DBLP:conf/lata/2016},
 	url       = {https://doi.org/10.1007/978-3-319-30000-9\_14},
 	doi       = {10.1007/978-3-319-30000-9\_14},
 	timestamp = {Sat, 16 Sep 2017 12:09:57 +0200},
 	biburl    = {https://dblp.org/rec/bib/conf/lata/SmetsersMJ16},
 	bibsource = {dblp computer science bibliography, https://dblp.org}
 }
@proceedings{DBLP:conf/lata/2016,
 	editor    = {Adrian-Horia Dediu and
 	             Jan Janousek and
 	             Carlos Mart{\'{\i}}n-Vide and
 	             Bianca Truthe},
 	title     = {Language and Automata Theory and Applications - 10th International
 	             Conference, {LATA} 2016, Prague, Czech Republic, March 14-18, 2016, Proceedings},
 	series    = {Lecture Notes in Computer Science},
 	volume    = {9618},
 	publisher = {Springer},
 	year      = {2016},
 	url       = {https://doi.org/10.1007/978-3-319-30000-9},
 	doi       = {10.1007/978-3-319-30000-9},
 	isbn      = {978-3-319-29999-0},
 	timestamp = {Wed, 24 May 2017 08:29:26 +0200},
 	biburl    = {https://dblp.org/rec/bib/conf/lata/2016},
 	bibsource = {dblp computer science bibliography, https://dblp.org}
 }
--- a/biblio-web.bib
+++ b/biblio-web.bib
@ -0,0 +1,6 @@
@misc{nlambda-code,
 	author    = {Michal Szynwelski},
 	title     = {{N}\lambda},
 	note      = {Website},
 	url       = {http://www.mimuw.edu.pl/~szynwelski/nlambda/}
 }
--- a/biblio.bib
+++ b/biblio.bib
--- a/content.tex
+++ b/content.tex
@ -1,4 +1,5 @@
 \startproduct content
 \enablemode[everything]
 \project thesis
 \starttext
@ -24,7 +25,7 @@
 \chapter{Ordered Nominal Sets}
 \chapter{Succinct Nominal Automata?}
-
+\title{References}
 \placelistofpublications
 \stoptext
--- a/content/learning-nominal-automata.tex
+++ b/content/learning-nominal-automata.tex
@ -724,8 +724,590 @@ We note that this number is polynomial in $k, l, m$ and $n$ but not in $p$.
  [title={Learning Non-Deterministic Nominal Automata},
   reference=sec:nondet]
-\stopsection
+In this section, we introduce a variant of \nLStar{}, which we call \nNLStar{}, where the learnt automaton is non-deterministic.
-\stopchapter
+It will be based on the \NLStar{} algorithm by \cite[authoryears][DBLP:conf/ijcai/BolligHKL09], an Angluin-style algorithm for learning NFAs.
-\placelistofpublications
+The algorithm is shown in \in{Algorithm}[alg:nfa-alg].
-\stopcomponent
+We first illustrate \NLStar{}, then we discuss its extension to nominal automata.
 \NLStar{} crucially relies on the use of \emph{residual finite-state automata} (RFSA) \cite[DBLP:journals/fuin/DenisLT02], which are NFAs admitting \emph{unique minimal canonical representatives}.
 The states of this automaton correspond to Myhill-Nerode right-congruence classes, but can be exponentially smaller than the corresponding minimal DFA:
 \emph{Composed} states, language-equivalent to sets of other states, can be dropped.
 \todo{alg \tt 9-39}
 The algorithm \NLStar{} equips the observation table $(S, E)$ with a union operation, allowing for the detection of \emph{composed} and \emph{prime} rows.
 \startdefinition[reference=def:rows-jsl]
 Let $(row(s_1) \rowunion row(s_2))(e) = row(s_1)(e) \lor row(s_2)(e)$ (regarding cells as booleans).
 This operation induces an ordering between rows: $row(s_1) \rowincl row(s_2)$ whenever $row(s_1)(e) = 1$ implies $row(s_2)(e) = 1$, for all $e \in E$.
 \stopdefinition
 A row $row(s)$ is \emph{composed} if $row(s) = row(s_1) \rowunion \cdots \rowunion row(s_n)$, for $row(s_i) \neq row(s)$.
 Otherwise it is \emph{prime}.
 We denote by $PR^\top(S, E)$ the rows in the top part of the table (ranging over $S$) which are prime w.r.t. the whole table (not only w.r.t. the top part).
 We write $PR(S, E)$ for all the prime rows of $(S, E)$.
 As in \LStar{}, states of hypothesis automata will be rows of $(S, E)$ but, as the aim is to construct a minimal RFSA, only prime rows are picked.
 New notions of closedness and consistency are introduced, to reflect features of RFSAs.
 \startdefinition
 A table $(S, E)$ is:
 \startitemize
 \item \emph{RFSA-closed} if, for all $t \in S \Lext A$, $row(t) = \Rowunion \{ row(s) \in PR^\top(S,E) \mid row(s) \rowincl row(t)\}$;
 \item \emph{RFSA-consistent} if, for all $s_1, s_2 \in S$ and $a \in A$, $row(s_1) \rowincl row(s_2)$ implies $row(s_1a) \rowincl row(s_2a)$.
 \stopitemize
 \stopdefinition
 If $(S, E)$ is not RFSA-closed, then there is a row in the bottom part of the table which is prime, but not contained in the top part.
 This row is then added to $S$ (line \ref{line:nfa-clos-witness}).
 If $(S,E)$ is not RFSA-consistent, then there is a suffix which does not preserve the containment of two existing rows, so those rows are actually incomparable.
 A new column is added to distinguish those rows (line \ref{line:nfa-cons-witness}).
 Notice that counterexamples supplied by the teacher are added \emph{to columns} (line \ref{line:nfa-addcol-ex}).
 Indeed, it is shown by \cite[authoryears][DBLP:conf/ijcai/BolligHKL09] that treating the counterexamples as in the original \LStar{}, namely adding them to rows, does not lead to a terminating algorithm.
 \startdefinition[reference=def:nfa-conj]
 Given a RFSA-closed and RFSA-consistent table $(S, E)$, the conjecture automaton is $N(S, E) = (Q, Q_0, F, \delta)$, where:
 \startitemize
 \item $Q = PR^\top(S,E)$;
 \item $Q_0 = \{ r \in Q \mid r \rowincl row(\epsilon)\}$;
 \item $F = \{r \in Q \mid r(\epsilon) = 1\}$;
 \item the transition relation $\delta \subseteq Q \times A \times Q$ is given by $\delta(row(s),a) = \{ r \in Q \mid r \rowincl row(sa) \}$.
 \stopitemize
 \stopdefinition
 As observed by \cite[authoryears][DBLP:conf/ijcai/BolligHKL09], $N(S, E)$ is not necessarily a RFSA, but it is a canonical RFSA if it is consistent with $(S, E)$.
 If the algorithm terminates, then $N(S, E)$ must be consistent with $(S, E)$, which ensures correctness.
 The termination argument is more involved than that of \LStar{}, but still it relies on the minimal DFA.
 Developing an algorithm to learn nominal NFAs is not an obvious extension of \NLStar:
 Non-deterministic nominal languages strictly contain nominal regular languages, so it is not clear what the developed algorithm should be able to learn.
 To deal with this, we introduce a nominal notion of RFSAs.
 They are a \emph{proper subclass} of nominal NFAs, because they recognize nominal regular languages.
 Nonetheless, they are more succinct than nominal DFAs. 
 \startsubsection
  [title={Nominal Residual Finite-State Automata},
   reference=subsec:nrfsa]
 Let $\lang$ be a nominal regular language and let $u$ be a finite string.
 The \emph{derivative} of $\lang$ w.r.t. $u$ is
 \startformula
 \lder{u}\lang = \{ v \in A^{\ast} \mid uv \in \lang \}.
 \stopformula
 A set $\lang' \subseteq \EAlph^*$ is a \emph{residual} of $\lang$ if there is $u$ with $\lang' = \lder{u}\lang$.
 Note that a residual might not be equivariant, but it does have a finite support.
 We write $R(\lang)$ for the set of residuals of $\lang$.
 Residuals form an orbit-finite nominal set:
 They are in bijection with the state-space of the minimal nominal DFA for $\lang$.
 A \emph{nominal residual finite-state automaton} for $\lang$ is a nominal NFA whose states are subsets of such minimal automaton.
 Given a state $q$ of an automaton, we write $\lang(q)$ for the set of words leading from $q$ to a set of states containing a final one.
 \startdefinition
 	A \emph{nominal residual finite-state automaton} (nominal RFSA) is a nominal NFA $\autom$ such that $\lang(q) \in R(\lang(\autom))$, for all $q \in Q_\autom$.
 \stopdefinition
 Intuitively, all states of a nominal RSFA recognize residuals, but not all residuals are recognized by a single state:
 There may be a residual $\lang'$ and a set of states $Q'$ such that $\lang' = \bigcup_{q \in Q'} \lang(q)$, but no state $q'$ is such that $\lang(q') = \lang'$.
 A residual $\lang'$ is called \emph{composed} if it is equal to the union of the components it strictly contains, explicitly
 \startformula
 \lang' = \Union \{ \lang'' \in R(\lang) \mid \lang'' \subsetneqq \lang' \} ;
 \stopformula
 otherwise it is called \emph{prime}.
 In an ordinary RSFA, composed residuals have finitely-many components.
 This is not the case in a nominal RFSA.
 However, the set of components of $\lang'$ always has a finite support, namely $\supp(\lang')$.
 The set of prime residuals $PR(\lang)$ is an orbit-finite nominal set, and can be used to define a \emph{canonical} nominal RFSA for $\lang$, which has the minimal number of states and the maximal number of transitions.
 This can be regarded as obtained from the minimal nominal DFA, by removing composed states and adding all initial states and transitions that do not change the recognized language.
 This automaton is necessarily unique.
 \startlemma[reference=lem:can-rfsa]
 Let the \emph{canonical} nominal RSFA of $\lang$ be $(Q, Q_0, F, \delta)$ such that:
 \startitemize
 \item $Q = PR(\lang)$;
 \item $Q_0 = \{ \lang ' \in Q \mid \lang' \subseteq \lang\}$;
 \item $F = \{ \lang' \in Q \mid \epsilon \in \lang' \}$;
 \item $\delta(\lang_1,a) = \{ \lang_2 \in Q \mid \lang_2 \subseteq \lder{a}\lang_1 \}$.
 \stopitemize
 It is a well-defined nominal NFA accepting $\lang$.
 \stoplemma
 \stopsubsection
 \startsubsection
  [title={\nNLStar{}}]
 Our nominal version of \NLStar{} again makes use of an observation table $(S, E)$ where $S$ and $E$ are equivariant subsets of $A^{\ast}$ and $row$ is an equivariant function.
 As in the basic algorithm, we equip $(S, E)$ with a union operation $\rowunion$ and row containment relation $\rowincl$, defined as in \in{Definition}[def:rows-jsl].
 It is immediate to verify that $\rowunion$ and $\rowincl$ are equivariant.
 Our algorithm is a simple modification of the algorithm in \in{Algorithm}[fig:nfa-alg], where a few lines are replaced:
 \startformula
 6'    S \gets S \cup \orb(sa)        \qquad
 9'    E \gets E \cup \orb(ae)        \qquad
 12'   E \gets E \cup \suff(\orb(t))
 \stopformula
 Switching to nominal sets, several decidability issues arise.
 The most critical one is that rows may be the union of infinitely many component rows, as happens for residuals of nominal languages, so finding all such components can be challenging.
 We adapt the notion of composed to rows: $row(t)$ is composed whenever
 \startformula
 row(t) = \Rowunion \{ row(s) \mid row(s) \rowincls row(t) \}.
 \stopformula
 where $\rowincls$ is \emph{strict} row inclusion; otherwise $row(t)$ is prime.
 We now check that relevant parts of our algorithm terminate.
 {\bf Row Containment Check.}
 The basic containment check $row(s) \rowincl row(t)$ is decidable, as $row(s)$ and $row(t)$ are supported by the finite supports of $s$ and $t$ respectively.
 {\bf Line \ref{line:nfa-checks}.}
 RFSA-Closedness and RFSA-Consistency Checks.
 We first show that prime rows form orbit-finite nominal sets.
 \startlemma[reference=lem:pr-ns]
 $PR(S,E)$, $PR^\top(S,E)$ and $PR(S,E) \setminus PR^\top(S,E)$ are orbit-finite nominal sets.
 \stoplemma
 Consider now RFSA-closedness.
 It requires computing the set $C(row(t))$ of components of $row(t)$ contained in $PR^{\top}(S, E)$ (possibly including $row(t)$).
 This may not be equivariant under permutations $Perm(\EAlph)$, but it is if we pick a subgroup.
 \startlemma[reference=lem:comp-prop]
 The set $C(row(t))$ has the following properties:
 \startitemize
 \item $\supp(C(row(t))) \subseteq \supp(row(t))$. \label{supp}
 \item it is equivariant and orbit-finite under the action of the group
 \startformula
 G_t = \{ \pi \in Perm(\EAlph) \mid \restr{\pi}{\supp(row(t))} = \id \}
 \stopformula
 of permutations fixing $\supp(row(t))$.
 \label{orb-fin}
 \stopitemize
 \stoplemma
 We established that $C(row(t))$ can be effectively computed, and the same holds for $\Rowunion C(row(t))$.
 In fact, $\Rowunion$ is equivariant w.r.t the whole $Perm(\EAlph)$ and then, in particular, w.r.t. $G_t$, so it preserves orbit-finiteness.
 Now, to check $row(t) = \Rowunion C(row(t))$, we can just pick one representative of every orbit of $S \Lext A$, because we have $C(\pi \cdot row(t)) = \pi \cdot C(row(t))$ and permutations distribute over $\rowunion$, so permuting both sides of the equation gives again a valid equation.
 For RFSA-consistency, consider the two sets:
 \startformula
 N = \{ (s_1, s_2) \in S \times S \mid row(s_1) \rowincl row(s_2) \} \\
 M = \{ (s_1, s_2) \in S \times S \mid \forall a \in A : row(s_1a) \rowincl row(s_2a) \}
 \stopformula
 They are both orbit-finite nominal sets, by equivariance of $row$, $\rowincl$ and $A$.
 We can check RFSA-consistency in finite time by picking orbit representatives from $N$ and $M$.
 For each representative $n \in N$, we look for a representative $m \in M$ and a permutation $\pi$ such that $n = \pi \cdot m$.
 If no such $m$ and $\pi$ exist, then $n$ does not belong to any orbit of $M$, so it violates RFSA-consistency.
 {\bf Lines \ref{line:nfa-clos-witness} and \ref{line:nfa-cons-witness}: Finding Witnesses for Violations.}
 We can find witnesses by comparing orbit representatives of orbit-finite sets, as we did with RFSA-consistency.
 Specifically, we can pick representatives in $S \times A$ and $S \times S \times A \times E$ and check them against the following orbit-finite nominal sets:
 \startitemize
 \item $\{ (s, a) \in S \times A \mid row(sa) \in PR(S, E) \setminus PR^{\top}(S, E) \}$;
 \item $\{ (s_1, s_2, a, e) \in S \times S \times A \times E \mid row(s_1 a)(e) = 1, row(s_2 a)(e) = 0, row(s_1) \rowincl row(s_2) \}$;
 \stopitemize
 \stopsubsection
 \startsubsection
  [title={Correctness}]
 Now we prove correctness and termination of the algorithm.
 First, we prove that hypothesis automata are nominal NFAs.
 \startlemma[reference=lem:nlstarwelldef]
 The hypothesis automaton $N(S, E)$ (see \in{Definition}[def:nfa-conj]) is a nominal NFA.
 \stoplemma
 $N(S, E)$, as in ordinary \NLStar{}, is not always a nominal RFSA.
 However, we have the following.
 \starttheorem[reference=th:nlstar-rfsa]
 If the table $(S, E)$ is RFSA-closed, RFSA-consistent and $N(S, E)$ is consistent with $(S, E)$, then $N(S, E)$ is a canonical nominal RFSA.
 \stoptheorem
 This is proved by \cite[authoryears][DBLP:conf/ijcai/BolligHKL09] for ordinary RFSAs, using the standard theory of regular languages.
 The nominal proof is exactly the same, using derivatives of nominal regular languages and nominal RFSAs as defined in \in{Section}[subsec:nrfsa].
 \startlemma[reference=lem:rows-min-dfa]
 The table $(S, E)$ cannot have more than $n$ orbits of distinct rows, where $n$ is the number of orbits of the minimal nominal DFA for the target language.
 \stoplemma
 \startproof
 Rows are residuals of $\lang$, which are states of the minimal nominal DFA for $\lang$, so orbits cannot be more than $n$.
 \stopproof
 \starttheorem[reference=th:nom-nl-term]
 The algorithm \nNLStar{} terminates and returns the canonical nominal RFSA for $\lang$.
 \stoptheorem
 \startproof
 If the algorithm terminates, then it must return the canonical nominal RFSA for $\lang$ by \in{Theorem}[th:nlstar-rfsa].
 We prove that a table can be made RFSA-closed and RFSA-consistent in finite time.
 This is similar to the proof of \in{Theorem}[thm:termination] and is inspired by the proof of Theorem 2 of \cite[authoryears][DBLP:conf/ijcai/BolligHKL09].
 If the table is not RFSA-closed, we find a row $s \in S \Lext A$ such that $row(s) \in PR(S, E) \setminus PR^{\top}(S, E)$.
 The algorithm then adds $\orb(s)$ to $S$.
 Since $s$ was nonequivalent to all upper prime rows, and thus from all the rows indexed by $S$, we find that $S \cup \orb(t) / {\sim}$ has strictly more orbits than $S / {\sim}$ (recall that $s \sim t \iff row(s) = row(t)$).
 This addition can only be done finitely many times, because the number of orbits of $S / {\sim}$ is bounded, by \in{Lemma}[lem:rows-min-dfa].
 Now, the case of RFSA-consistency needs some additional notions.
 Let $R$ be the (orbit-finite) nominal set of all rows, and let
 $I = \{ (r,r') \in R \times R \mid r \rowincls r'\}$
 be the set of all inclusion relations among rows.
 The set $I$ is orbit-finite. In fact, consider
 \startformula
 J = \{ (s,t) \in ( S \cup S\Lext A) \times ( S \cup S\Lext A) \mid row(s) \rowincls row(t) \}.
 \stopformula
 This set is an equivariant, thus orbit-finite, subset of $( S \cup S\Lext A) \times ( S \cup S\Lext A)$.
 The set $I$ is the image of $J$ via $row \times row$, which is equivariant, so it preserves orbit-finiteness.
 Now, suppose the algorithm finds two elements $s_1, s_2 \in S$ with $row(s_1) \rowincl row(s_2)$ but $row(s_1 a)(e) = 1$ and $row(s_2 a)(e) = 0$ for some $a \in A$ and $e \in E$.
 Adding a column to fix RFSA-consistency may:
 {\bf C1)} increase orbits of $(S \cup S \Lext A) / {\sim}$, or;
 {\bf C2)} decrease orbits of $I$, or;
 {\bf C3)} decrease local symmetries/increase dimension of one orbit of rows.
 In fact, if no new rows are added ({\bf C1}), we have two cases.
 \startitemize
 \item
 If $row(s_1) \rowincls row(s_2)$, i.e., $(row(s_1), row(s_2)) \in I$, then $row'(s_1) \not \rowincls row'(s_2)$, where $row'$ is the new table.
 Therefore the orbit of $(row'(s_1), row'(s_2))$ is not in $I$.
 Moreover, $row'(s) \rowincls row'(t)$ implies $row(s) \rowincls row(t)$ (as no new rows are added), so no new pairs are added to $I$.
 Overall, $I$ has less orbits ({\bf C2}).
 \item
 If $row(s_1) = row(s_2)$, then we must have $row(s_1) = \pi \cdot row(s_1)$, for some $\pi$, because line~\ref{line:begin-nfa-closed} forbids equal rows in different orbits.
 In this case $row'(s_1) \neq \pi \cdot row'(s_1)$ and we can use part of the proof of \in{Theorem}[thm:termination] to see that the orbit of $row'(s_1)$ has bigger dimension or less local symmetries than that of $row(s_1)$ ({\bf C3}).
 \stopitemize
 Orbits of $(S \cup S \Lext A)/{\sim}$ and of $I$ are finitely-many, by \in{Lemma}[lem:rows-min-dfa] and what we proved above.
 Moreover, local symmetries can decrease finitely many times, and the dimension of each orbit of rows is bounded by the dimension of the minimal DFA state-space.
 Therefore all the above changes can happen finitely many times.
 We have proved that the table eventually becomes RFSA-closed and RFSA-consistent.
 Now we prove that a finite number of equivalence queries is needed to reach the final hypothesis automaton.
 To do this, we cannot use a suitable version of \in{Lemma}[lem:minimal_wrt_table], because this relies on $N(S, E)$ being consistent with $(S, E)$, which in general is not true (see \cite[BolligHKL08] for an example of this).
 We can, however, use an argument similar to that for RFSA-consistency, because the algorithm adds columns in response to counterexamples.
 Let $w$ the counterexample provided by the teacher.
 When $12'$ is executed, the table must change.
 In fact, by Lemma 2 in \cite[DBLP:conf/ijcai/BolligHKL09], if it does not, then $w$ is already correctly classified by $N(S, E)$, which is absurd.
 We have the following cases.
 {\bf E1)} orbits of $(S \cup S \Lext A)/{\sim}$ increase ({\bf C1}).
 Or, {\bf E2)} either: Orbits in $PR(S, E)$ increase, or any of the following happens:
 Orbits in $I$ decrease ({\bf C2}), local symmetries/dimension of an orbit of rows change ({\bf C3}).
 In fact, if {\bf E1} does not happen and $PR(S, E)$, $I$ and local symmetries/dimension of orbits of rows do not change, the automaton $\autom$ for the new table coincides with $N(S, E)$.
 But $N(S, E) = \autom$ is a contradiction, because $\autom$ correctly classifies $w$ (by Lemma 2 in \cite[DBLP:conf/ijcai/BolligHKL09], as $w$ now belongs to columns), whereas $N(S, E)$ does not.
 Both {\bf E1} and {\bf E2} can only happen finitely many times.
 \stopproof
 \stopsubsection
 \startsubsection
  [title={Query Complexity}]
 We now give bounds for the number of equivalence and membership queries needed by \nNLStar{}.
 Let $n$ be the number of orbits of the minimal DFA $M$ for the target language and let $k$ be the dimension (i.e., the size of the maximum support) of its nominal set of states.
 \startlemma[reference=lem:nfa-eq-queries]
 The number of equivalence queries $E'_{n,k}$ is $O(n^2 f_\EAlph(k,k) + nk \log k)$.
 \stoplemma
 \startproof
 In the proof of \in{Theorem}[th:nom-nl-term], we saw that equivalence queries lead to more orbits in $(S \cup S\Lext A)/{\sim}$, in $PR(S, E)$, less orbits in $I$ or less local symmetries/bigger dimension for an orbit.
 Clearly the first two ones can happen at most $n$ times.
 We now estimate how many times $I$ can decrease.
 Suppose $(S \cup S\Lext A)/{\sim}$ has $d$ orbits and $h$ orbits are added to it.
 Recall that, given an orbit $O$ of rows of dimension at most $m$, $f_\EAlph(m, m)$ is an upper bound for the number of orbits in the product $O \times O$.
 Since  the support of rows is bounded by $k$, we can give a bound for the number of orbits added to $I$: $dhf_\EAlph(k, k)$, for new pairs $r \rowincls r'$ with $r$ in a new orbit of rows and $r'$ in an old one (or viceversa); plus $(h(h-1)/2)f_\EAlph(k, k)$, for $r$ and $r'$ both in (distinct) new orbits; plus $hf_\EAlph(k, k)$, for $r$ and $r'$ in the same new orbit. 
 Notice that, if $PR(S, E)$ grows but $(S \cup S\Lext A)/{\sim}$ does not, $I$ does not increase.
 By \in{Lemma}[lem:rows-min-dfa], $h, d \leq n$, so $I$ cannot decrease more than $(n^2 + n(n-1)/2 + n)f_\EAlph(k,k)$ times.
 Local symmetries of an orbit of rows can decrease at most $k \log k$ times (see proof of \in{Lemma}[lem:bound_eq]), and its dimension can increase at most $k$ times.
 Therefore $n(k + \log k)$ is a bound for all the orbits of rows, which are at most $n$, by \in{Lemma}[lem:rows-min-dfa].
 Summing up, we get the main result.
 \stopproof
 \startlemma
 Let $m$ be the length of the longest counterexample given by the teacher.
 Then the table has:
 \startitemize
 \item at most $n$ orbits in $S$, with words of length at most $n$;
 \item at most $mE'_{n,k}$ orbits in $E$, with words of length at most $mE'_{n,k}$.
 \stopitemize
 \stoplemma
 \startproof
 By \in{Lemma}[lem:rows-min-dfa], the number of orbits of rows indexed by $S$ is at most $n$.
 Now, notice that line~\ref{line:nfa-clos-witness} does not add $\orb(sa)$ to $S$ if $sa \in S$, and lines~\ref{line:nfa-addcol-ex}~and~\ref{line:nfa-addcol-cons} cannot identify rows, so $S$ has at most $n$ orbits.
 The length of the longest word in $S$ must be at most $n$, as $S = \{ \epsilon \}$ when the algorithm starts, and line~$6'$ adds words with one additional symbol than those in $S$.
 For columns, we note that both fixing RFSA-consistency and adding counterexamples increase the number of columns, but this can happen at most $E'_{n,k}$ times (see proof of \in{Lemma}[lem:nfa-eq-queries]).
 Each time at most $m$ suffixes are added to $E$.
 \stopproof
 We compute the maximum number of cells as in \in{Section}[ssec:lstar-compl].
 \startlemma
 The number of orbits in the lower part of the table, $S \Lext A$, is bounded by $n l f_\EAlph( pn, p)$.
 \stoplemma
 Then $C_{n,k,m}' = n (l f_\EAlph(p n, p) + 1) m E'_{n,k}$ is the maximal number of cells in the table.
 This bound is polynomial in $n, m$ and $l$, but not in $k$ and $p$.
 \startcorollary
 The number of membership queries is bounded by $C_{n,k,m}' f_\EAlph(p n, p m E'_{n,k})$.
 \stopcorollary
 \stopsubsection
 \stopsection
 \startsection
  [title={Implementation and Preliminary Experiments},
   reference=sec:impl]
 Our algorithms for learning nominal automata operate on infinite sets of rows and columns, and hence it is  not immediately clear how to actually implement them on a computer.
 We have used NLambda, a recently developed Haskell library by \cite[authoryears][DBLP:journals/corr/KlinS16] designed to allow direct manipulation of infinite (but orbit-finite) nominal sets, within the functional programming paradigm.
 The semantics of NLambda is based on \cite[DBLP:conf/popl/BojanczykBKL12], and the library itself is inspired by Fresh O'Caml by \cite[authoryears][DBLP:journals/entcs/Shinwell06], a language for functional programming over nominal data structures with binding.
 \subsection{NLambda}
 NLambda extends Haskell with a new type \kw{Atoms}.
 Values of this type are atomic values that can be compared for equality and have no other discernible structure.
 They correspond to the elements of the infinite alphabet $\EAlph$ described in \in{Section}[sec:prelim].
 Furthermore, NLambda provides a unary type constructor \kw{Set}.
 This appears similar to the the \kw{Data.Set} type constructor from the standard Haskell library, but its semantics is markedly different:
 Whereas the latter is used to construct finite sets, the former has {\em orbit-finite} sets as values.
 The new constructor \kw{Set} can be applied to a range of equality types that include \kw{Atoms}, but also the tuple type $(\kw{Atoms}, \kw{Atoms})$, the list type $[\kw{Atoms}]$, the set type $\kw{Set}\ \kw{Atoms}$, and other types that provide basic infrastructure necessary to speak of supports and orbits.
 All these are instances of a type class \kw{NominalType} specified in NLambda for this purpose.
 NLambda, in addition to all the standard machinery of Haskell, offers primitives to manipulate values of any nominal types $\tau, \sigma$:
 \startitemize
 \item $\kw{empty} : \kw{Set}\ \tau$, returns the empty set of any type;
 \item $\kw{atoms} : \kw{Set}\ \kw{Atoms}$, returns the (infinite but single-orbit) set of all atoms;
 \item $\kw{insert} : \tau \to \kw{Set}\ \tau \to \kw{Set}\ \tau$, adds an element to a set;
 \item $\kw{map} : (\tau \to \sigma) \to (\kw{Set}\ \tau \to \kw{Set}\ \sigma)$, applies a function to every element of a set;
 \item $\kw{sum} : \kw{Set}\ (\kw{Set}\ \tau) \to \kw{Set}\ \tau$, computes the union of a family of sets;
 \item $\kw{isEmpty} : \kw{Set}\ \tau \to \kw{Formula}$, checks whether a set is empty.
 \stopitemize
 The type \kw{Formula} takes the role of a Boolean type.
 For technical reasons it is distinct from the standard Haskell type \kw{Bool}, but it provides standard logical operations such as
 \startformula
 \kw{not} : \kw{Formula} \to \kw{Formula}, \qquad
 \kw{or} : \kw{Formula} \to \kw{Formula} \to \kw{Formula},
 \stopformula
 as well as a conditional operator $\kw{ite} : \kw{Formula} \to \tau \to \tau \to \tau$ that mimics the standard \kw{if-then-else} construction.
 It is also the result type of a built-in equality test on atoms, $\kw{eq} : \kw{Atoms} \to \kw{Atoms} \to \kw{Formula}$.
 Using these primitives, one builds more functions to operate on orbit-finite sets, such as a function to build singleton sets:
 \startformula\startalign[align={left}]
 \NC \kw{singleton} : \tau \to \kw{Set}\ \tau \NR
 \NC \kw{singleton}\ x = \kw{insert}\ x\ \kw{empty} \NR
 \stopalign\stopformula
 or a filtering function to select elements that satisfy a given predicate:
 \startformula\startalign[align={left}]
 \NC \kw{filter} : (\tau \to \kw{Formula}) \to \kw{Set}\ \tau \to \kw{Set}\ \tau \NR
 \NC \kw{filter}\ p\ s = \kw{sum}\ (\kw{map}\ (\lambda x .\ \kw{ite}\ (p\ x)\ (\kw{singleton}\ x)\ \kw{empty})\ s) \NR
 \stopalign\stopformula
 or functions to quantify a predicate over a set:
 \startformula\startalign[align={left}]
 \NC \kw{exists}, \kw{forall} : (\tau \to \kw{Formula}) \to \kw{Set}\ \tau \to \kw{Formula} \NR
 \NC \kw{exists}\ p\ s = \kw{not}\ (\kw{isEmpty}\ (\kw{filter}\ p\ s)) \NR
 \NC \kw{forall}\ p\ s = \kw{isEmpty}\ (\kw{filter}\ (\lambda x .\ \kw{not}\ (p\ x))\ s) \NR
 \stopalign\stopformula
 and so on.
 Note that these functions are written in exactly the same way as they would be for finite sets and the standard \kw{Data.Set} type.
 This is not an accident, and indeed the programmer can use the convenient set-theoretic intuition of NLambda primitives.
 For example, one could conveniently construct various orbit-finite sets such as the set of all pairs of atoms:
 \startformula
 \kw{atomPairs} = \kw{sum}\ (\kw{map}\ (\lambda x .\ \kw{map}\ (\lambda y .\ (x, y))\ \kw{atoms})\ \kw{atoms}),
 \stopformula
 the set of all pairs of {\em distinct atoms}:
 \startformula
 \kw{distPairs} = \kw{filter}\ (\lambda (x, y) .\ \kw{not}\ (\kw{eq}\ x\ y))\ \kw{atomPairs}
 \stopformula
 and so on.
 It should be stressed that all these constructions terminate in finite time, even though they formally involve infinite sets.
 To achieve this, values of orbit-finite set types $\kw{Set}\ \tau$ are internally not represented as lists or trees of elements of type $\tau$.
 Instead, they are stored and manipulated symbolically, using first-order formulas over variables that range over atom values.
 For example, the value of \kw{distPairs} above is stored as the formal expression:
 \startformula
 \{ (a, b) \mid a, b \in \EAlph,\ a\neq b \}
 \stopformula
 or, more specifically, as a triple:
 \startitemize
 \item a pair $(a, b)$ of \quotation{atom variables},
 \item a list $[a, b]$ of those atom variables that are bound in the expression (in this case, the expression contains no free variables),
 \item a formula $a \neq b$ over atom variables.
 \stopitemize
 All the primitives listed above, such as \kw{isEmpty}, \kw{map} and \kw{sum}, are implemented on this internal representation.
 In some cases, this involves checking the satisfiability of certain formulas over atoms.
 In the current implementation of NLambda, an external SMT solver Z3 \cite[DBLP:conf/tacas/MouraB08] is used for that purpose.
 For example, to evaluate the expression $\kw{isEmpty}\ \kw{distPairs}$,
 NLambda makes a system call to the SMT solver to check whether the formula $a \neq b$ is satisfiable in the first-order theory of equality and, after receiving the affirmative answer, returns the value \kw{False}.
 For more details about the semantics and implementation of NLambda, see \cite[DBLP:journals/corr/KlinS16].
 The library itself can be downloaded from \cite[nlambda-code].
 \stopsubsection
 \startsubsection
  [title={Implementation of \nLStar\ and \nNLStar}]
 Using NLambda we implemented the algorithms from \in{Sections}[sec:nangluin] and \in{}[sec:nondet].
 We note that the internal representation is slightly different than the one discussed in \in{Section}[sec:nangluin].
 Instead of representing the table $(S, E)$ with actual representatives of orbits, the sets are represented logically as described above.
 Furthermore the control flow of the algorithm is adapted to fit in the functional programming paradigm.
 In particular, recursion is used instead of a while loop.
 In addition to the nominal adaptation of Angluin's algorithm \nLStar{}, we implemented a variant, \nLStarCol{} which adds counterexamples to the columns instead of rows.
 Target automata are defined using NLambda as well, using the automaton data type provided by the library.
 Membership queries are already implemented by the library.
 Equivalence queries are implemented by constructing a bisimulation (recall that bisimulation implies language equivalence),
 where a counterexample is obtained when two DFAs are not bisimilar.
 For nominal NFAs, however, we cannot implement a complete equivalence query as their language equivalence is undecidable.
 We approximated the equivalence by bounding the depth of the bisimulation for nominal NFAs.
 As an optimization, we use bisimulation up to congruence as described by \cite[authoryears][DBLP:journals/cacm/BonchiP15].
 Having an approximate teacher is a minor issue since in many applications no complete teacher can be implemented and one relies on testing \cite[DBLP:conf/ictac/AartsFKV15, DBLP:conf/dlt/BolligHLM13].
 For the experiments listed here the bound was chosen large enough for the learner to terminate with the correct automaton.
 We remark that our algorithms seamlessly merge with teachers written in NLambda, but the current version of the library does not allow generating concrete membership queries for external teachers.
 We are currently working on a new version of the library in which this will be possible.
 \todo{Dit kan nu al}
 \stopsubsection
 \startsubsection
  [title={Test Cases},
   reference=sec:tests]
 To provide a benchmark for future improvements, we tested our algorithms on a few simple automata described below. We report results in \in{Table}[tab:results].
 The experiments were performed on a machine with an Intel Core i5 (Skylake, 2.4 GHz) and 8 GB RAM.
 \todo{Tabel \tt 110-134}
 {\bf Queue Data Structure.}
 A queue is a data structure to store elements which can later be retrieved in a first-in, first-out order.
 It has two operations: \kw{push} and \kw{pop}.
 We define the alphabet $\Sigma_{\text{FIFO}} = \{ \kw{push}(a), \kw{pop}(a) \mid a \in \EAlph \}$.
 The language $\text{FIFO}_n$ contains all valid traces of \kw{push} and \kw{pop} using a bounded queue of size $n$.
 The minimal nominal DFA for $\text{FIFO}_2$ is
 \todo{tikz \tt 143-160}
 The state reached from $q_{1,x}$ via $\xrightarrow{push(x)}$ is omitted:
 Its outgoing transitions are those of $q_{2,x,y}$, where $y$ is replaced by $x$.
 Similar benchmarks appear in \cite[DBLP:conf/ictac/AartsFKV15, DBLP:journals/ml/IsbernerHS14].
 {\bf Double Word.}
 $\lang_n = \{ ww \mid w \in \EAlph^n \}$ from \in{Section}[sec:overview].
 {\bf NFA.}
 Consider the language $\lang_{eq} = \bigcup_{a \in \EAlph} \EAlph^{\ast} a \EAlph^{\ast} a \EAlph^{\ast}$ of words where some letter appears twice.
 This is accepted by an NFA which guesses the position of the first occurrence of a repeated letter $a$ and then waits for the second $a$ to appear.
 The language is not accepted by a DFA \cite[DBLP:journals/corr/BojanczykKL14].
 Despite this \nNLStar{} is able to learn the automaton:
 \todo{tikz \tt 173-190}
 where the transition from $q_2'$ to $q_{1,x}'$ is defined as $\delta(q_2', a) = \{ q_{1,b}' \mid b \in \EAlph \}$.
 {\bf $n$-last Position.}
 A prototypical example of regular languages which are accepted by very small NFAs is the set of words where a distinguished symbol $a$ appears on the $n$-last position \cite[DBLP:conf/ijcai/BolligHKL09].
 We define a similar nominal language $\lang'_n = \bigcup_{a \in \EAlph} a \EAlph^{\ast} a \EAlph^n$.
 To accept such words non-deterministically, one simply guesses the $n$-last position.
 This language is also accepted by a much larger deterministic automaton.
 \stopsubsection
 \stopsection
 \startsection
  [title={Related Work},
   reference=sec:related]
 This section compares \nLStar{} with other algorithms from the literature.
 We stress that no comparison is possible for \nNLStar{}, as it is the first learning algorithm for non-deterministic automata over infinite alphabets.
 The first one to consider learning automata over infinite alphabets was \cite[authoryears][DBLP:conf/alt/Sakamoto97].
 In his work the problem is reduced to \LStar{} with some finite sub-alphabet.
 The sub-alphabet grows in stages and \LStar{} is rerun at every stage, until the alphabet is big enough to capture the whole language.
 In Sakamoto's approach, any learning algorithm can be used as a back-end.
 This, however, comes at a cost:
 It has to be rerun at every stage, and each symbol is treated in isolation, which might require more queries.
 Our algorithm \nLStar{}, instead, works with the whole alphabet from the very start, and it exploits its symmetry.
 An example is in \in{Sections}[sec:execution_example_original] and \in{}[ssec:nom-learning]:
 The ordinary learner uses four equivalence queries, whereas the nominal one, using the symmetry, only needs three.
 Moreover, our algorithm is easier to generalize to other alphabets and computational models, such as non-determinism.
 More recently papers appeared on learning register automata by \cite[authoryears][DBLP:conf/vmcai/HowarSJC12, DBLP:journals/fac/CasselHJS16].
 Their register automata are as expressive as our deterministic nominal automata.
 The state-space is similar to our orbit-wise representation:
 It is formed by finitely many locations with registers.
 Transitions are defined symbolically using propositional logic.
 We remark that the most recent paper \cite[DBLP:journals/fac/CasselHJS16] generalizes the algorithm to alphabets with different structures (which correspond to different atom symmetries in our work), but at the cost of changing Angluin's framework.
 Instead of membership queries the algorithm requires more sophisticated tree queries.
 In our approach, using a different symmetry does not affect neither the algorithm nor its correctness proof.
 Tree queries can be reduced to membership queries by enumerating all $n$-types for some $n$ ($n$-types in logic correspond to orbits in the set of $n$-tuples).
 Keeping that in mind, their complexity results are roughly the same as ours, although this is hard to verify, as they do not give bounds on the length of individual tree queries.
 Finally, our approach lends itself better to be extended to other variations on \LStar{} (of which many exist), as it is closer to Angluin's original work.
 Another class of learning algorithms for systems with large alphabets is based on abstraction and refinement, which is orthogonal to the approach in the present paper but connections and possible transference of techniques are worth exploring in the future.
 \cite[authoryears][DBLP:conf/ictac/AartsFKV15] reduce the alphabet to a finite alphabet of abstractions, and \LStar{} for ordinary DFAs over such finite alphabet is used.
 Abstractions are refined by counterexamples.
 Other similar approaches are \cite[DBLP:conf/vmcai/HowarSM11, DBLP:conf/nfm/IsbernerHS13], where global and local per-state abstractions of the alphabet are used, and \cite[DBLP:phd/hal/Mens17], where the alphabet can also have additional structure (e.g., an ordering relation).
 We can also mention \cite[DBLP:conf/popl/BotincanB13], a framework for learning symbolic models of software behavior.
 In \cite[DBLP:conf/fase/BergJR06, DBLP:conf/fase/BergJR08], authors cope with an infinite alphabet by running \LStar{} (adapted to Mealy machines) using a finite approximation of the alphabet, which may be augmented when equivalence queries are answered.
 A smaller symbolic model is derived subsequently.
 Their approach, unlike ours, does not exploit the symmetry over the full alphabet.
 The symmetry allows our algorithm to reduce queries and to produce the smallest possible automaton at every step.
 Finally we compare with results on session automata \cite[DBLP:conf/dlt/BolligHLM13].
 Session automata are defined over finite alphabets just like the work by Sakamoto.
 However, session automata are more restrictive than deterministic nominal automata.
 For example, the model cannot capture an acceptor for the language of words where consecutive data values are distinct.
 This language can be accepted by a three orbit nominal DFA, which can be learned by our algorithm.
 We implemented our algorithms in the nominal library NLambda as sketched before.
 Other implementation options include Fresh OCaml \cite[DBLP:journals/entcs/Shinwell06], a functional programming language designed for programming over nominal data structures with binding, and \LOIS{} by \cite[authoryears][DBLP:conf/cade/KopczynskiT16, DBLP:conf/popl/KopczynskiT17], a C++ library for imperative nominal programming.
 We chose NLambda for its convenient set-theoretic primitives, but the other options remain to be explored, in particular the low-level \LOIS{} could be expected to provide more efficient implementations.
 \stopsection
 \startsection
  [title={Discussion and Future Work}]
 In this paper we defined and implemented extensions of several versions of \LStar{} and of \NLStar{} for nominal automata. 
 We highlight two features of our approach:
 \startitemize
 \item it has strong theoretical foundations:
 The \emph{theory of nominal languages}, covering different alphabets and symmetries (see \in{Section}[sec:other-symms]); \emph{category theory}, where nominal automata have been characterized as \emph{coalgebras} \cite[DBLP:conf/icalp/KozenMP015, DBLP:journals/iandc/CianciaM10] and many properties and algorithms (e.g., minimization) have been studied at this abstract level.
 \item it follows a generic pattern for transporting computation models and algorithms from finite sets to nominal sets, which leads to simple correctness proofs.
 \stopitemize
 These features pave the way to several extensions and improvements.
 Future work includes a general version of \nNLStar{}, parametric in the notion of side-effect (an example is non-determinism).
 Different notions will yield models with different degree of succinctness w.r.t. deterministic automata.
 The key observation here is that many forms of non-determinism and other side effects can be captured via the categorical notion of \emph{monad}, i.e., an algebraic structure, on the state-space.
 Monads allow generalizing the notion of composed and prime state:
 A state is composed whenever it is obtained from other states via an algebraic operation.
 Our algorithm \nNLStar{} is based on the \emph{powerset} monad, representing classical non-determinism.
 We are currently investigating a \emph{substitution} monad, where the operation is \quotation{applying a (possibly non-injective) substitution of atoms in the support}.
 A minimal automaton over this monad, akin to a RFSA, will have states that can generate all the states of the associated minimal DFA via a substitution, but cannot be generated by other states (they are prime).
 For instance, we can give an automaton over the substitution monad that recognizes $\lang_2$ from \in{Section}[sec:overview]:
 \todo{tikz \tt 16-37}
 Here $[y \mapsto x]$ means that, if that transition is taken, $q_{xy}$ (hence its language) is subject to $y \mapsto x$.
 In general, the size of the minimal DFA for $\lang_n$ grows more than exponentially with $n$, but an automaton with substitutions on transitions, like the one above, only needs $\bigO(n)$ states.
 In principle, thanks to the generic approach we have taken, all our algorithms should work for various kinds of atoms with more structure than just equality, as advocated by \cite[authoryears][DBLP:journals/corr/BojanczykKL14].
 Details, such as precise assumptions on the underlying structure of atoms necessary for proofs to go through, remain to be checked.
 For an implementation of automata learning over other kinds of atoms without compromising the generic approach, an extension of NLambda to those atoms will be needed, as the current version of the library only supports equality and totally ordered atoms.
 The efficiency of our current implementation, as measured in \in{Section}[sec:tests], leaves much to be desired.
 There is plenty of potential for running time optimization, ranging from improvements in the learning algorithms itself, to optimizations in the NLambda library (such as replacing the external and general-purpose SMT solver with a purpose-built, internal one, or a tighter integration of nominal mechanisms with the underlying Haskell language as it was done by \cite[authoryears][DBLP:journals/entcs/Shinwell06]), to giving up the functional programming paradigm for an imperative language such as LOIS \cite[DBLP:conf/cade/KopczynskiT16, DBLP:conf/popl/KopczynskiT17].
 \stopsection
 \startsubject
  [title={Acknowledgements}]
 We thank Frits Vaandrager and Gerco van Heerdt for useful comments and discussions. We also thank the anonymous reviewers.
 \stopsubject
 \referencesifcomponent
 \stopchapter
 \stopcomponent
--- a/content/test-methods.tex
+++ b/content/test-methods.tex
@ -2,19 +2,10 @@
 \startcomponent test-methods
 %\usepackage[vlined]{algorithm2e}
 %
 %\newcommand{\todo}[1]{{\bf * }\marginpar{#1}}
 %
 %\newcommand{\Def}[1]{\emph{#1}}
 %\newcommand{\bigO}{\mathcal{O}}
 %\newcommand{\N}{\mathbb{N}}
 %\newcommand{\tot}[1]{\xrightarrow{\,\,{#1}\,\,}}
 \startchapter[title={A Framework for FSM-based Test Methods}]
 \startsection[title={Introduction}]
 \stopsection
 \startsection[title={Preliminaries}]
 \startsubsection[title={Mealy machines}]
@ -77,7 +68,7 @@ An example Mealy machine is given in \in{Figure}[fig:running-example].
 \startsubsection[title={Testing}]
-In conformance testing we have a specification modelled as a Mealy machine and an implementation (the system under test, or SUT) which we assume to behave as a Mealy machine \cite{LeeYannakakis}.
+In conformance testing we have a specification modelled as a Mealy machine and an implementation (the system under test, or SUT) which we assume to behave as a Mealy machine \cite[DBLP:journals/tc/LeeY94].
 Tests are generated from the specification and applied to the implementation. We assume that we can reset the implementation before every test.
 If the output is different than the specified output, then we know the implementation is flawed.
 The goals is to test as little as possible, while covering as much as possible.
@ -93,7 +84,7 @@ We denote the set of maximal tests of $T$ with $max(T)$.
 The maximal tests are the only tests in $T$ we actually have to apply to our SUT as we can record the intermediate outputs.
 Also note that we do not have to consider outputs in the test suite, as those follow from the deterministic specification.
-We define the size of a test suite as usual \cite{Dorofeeva,PLGHO14}.
+We define the size of a test suite as usual \cite[DBLP:journals/infsof/DorofeevaEMCY10, DBLP:conf/hase/Petrenko0GHO14].
 The size of a test suite is measured as the sum of the lengths of all its maximal tests plus one reset per test.
 \startdefinition[reference=test-suite-size]
@ -159,7 +150,7 @@ In order to define a test suite modularly, we introduce notation for combining s
 We require all sets to be \emph{prefix-closed}, this is very convenient in later proofs.
 For sets of words $X$ and $Y$, we define:
-\startitemize
+\startitemize[after]
 \item their concatenation $X \cdot Y = \{ xy \mid x \in X, y \in Y \}$,
 \item iterated concatenation $X^0 = \{ \epsilon \}$ and $X^{n+1} = X \cdot X^{n}$,
 \item bounded concatenation $X^{\leq n} = \bigcup_{i \leq n} X^i$, and
@ -186,16 +177,16 @@ If $P$ is a state cover, $P \cdot I$ is called a \emph{transition cover}.
 \stopdefinition
 \startdefinition
-A family of sets $\Fam{X}$ is a \emph{separating family} (or set of \emph{harmonized state identifiers} \cite{YP90}) if $x \sim_\Fam{X} y$ implies $x \sim y$.
+A family of sets $\Fam{X}$ is a \emph{separating family} (or set of \emph{harmonized state identifiers} \cite[YevtushenkoP90]) if $x \sim_\Fam{X} y$ implies $x \sim y$.
 \stopdefinition
 In other words, a family of words is a separating family if for each pair of inequivalent states it contains a word separating those states.
-Following the definitions in \cite{LeeYannakakis}, a separating family where each set is a singleton is an \emph{adaptive distinguishing sequence} (ads).
+Following the definitions in \cite[DBLP:journals/tc/LeeY94], a separating family where each set is a singleton is an \emph{adaptive distinguishing sequence} (ads).
 An ads is of special interest since they can identify a state using a single word.
 Separating families always exist for Mealy machines, but an ads might not exist.
 Given a specification $M$ (with states $S$) we define two operations:
-\startitemize
+\startitemize[after]
 \item concatenation:
 $X \odot \Fam{Y} = \{ xy \mid x \in X, y \in Y_{\delta(s_0, x)} \}$ and
 \item refinement: $\Fam{X} ; \Fam{Y}$ defined by
@ -223,16 +214,16 @@ For all families $\Fam{X}$ and $\Fam{Y}$:
 In this section we describe a new test generation method for Mealy machines.
 Its completeness will be proven in a later section, together with completeness for related, known, methods.
-From a high level perspective, the method uses the algorithm by Lee and Yannakakis \cite{LeeYannakakis} to obtain an ads.
+From a high level perspective, the method uses the algorithm by Lee and Yannakakis \cite[DBLP:journals/tc/LeeY94] to obtain an ads.
 If no ads exists, their algorithm still provides some sequences which separates some inequivalent states.
 Our extension is to refine the set of sequences by using pairwise separating sequences.
-The reason we do this is the fact that the ADS-method generally constructs small test suites \cite{Dorofeeva}.
+The reason we do this is the fact that the ADS-method generally constructs small test suites \cite[DBLP:journals/infsof/DorofeevaEMCY10].
 The test suites are small since an ads can identify a state with a single word, instead of a set of words which is generally needed.
 Even if the ads does not exist, using the partial result of Lee and Yannakakis' algorithm can reduce the size of test suites.
 We will describe how to obtain a separating families and adaptive distinguishing sequences for Mealy machines.
-For the former, one typically uses Moore's or Hopcroft's minimization algorithm \cite{Smetsers}.
+For the former, one typically uses Moore's or Hopcroft's minimization algorithm \cite[DBLP:conf/lata/SmetsersMJ16].
-For the latter, an efficient algorithm is described in \cite{LeeYannakakis}.
+For the latter, an efficient algorithm is described in \cite[DBLP:journals/tc/LeeY94].
 Both algorithms use a splitting tree as data structure.
 \startdefinition[reference=splitting-tree]
@ -255,24 +246,25 @@ The splitting tree records the execution of a partition refinement algorithm (su
 Each non-leaf node encode a \defn{split} together with a witness.
 Since a complete splitting tree contains witnesses for all inequivalences, we can extract a separating family from it.
 Briefly, for each state we define the set of words as follows: locate the leaf containing the state and collect all the sequences you read when traversing to the root of the tree.
-We refer to \cite{Smetsers} for more details on these algorithms.
+We refer to \cite[DBLP:conf/lata/SmetsersMJ16] for more details on these algorithms.
 For adaptive distinguishing sequences an additional requirement is put on the splitting tree:
 for each non-leaf node $u$, the sequence $\sigma(u)$ defines an injective map $x \mapsto (\delta(x, \sigma(u)), \lambda(x, \sigma(u)))$ on the set $l(u)$.
-In \cite{LeeYannakakis} such splits are called \defn{valid}.
+In \cite[DBLP:journals/tc/LeeY94] such splits are called \defn{valid}.
 \in{Figure}[fig:example-splitting-tree] shows both a valid and invalid split.
 Validity precisely ensures that after performing a split, the states are still distinguishable.
-\startplacefigure[title={A complete splitting tree with shortest witnesses for the specification of \in{Figure}[fig:running-example].	Only the splits $a$ and $aa$ are valid.},
+\startplacefigure
-reference=fig:example-splitting-tree]
+  [title={A complete splitting tree with shortest witnesses for the specification of \in{Figure}[fig:running-example]. Only the splits $a$ and $aa$ are valid.},
   reference=fig:example-splitting-tree]
 \hbox{
-\starttikzpicture[node distance=1.4cm]
+\starttikzpicture[node distance=1.5cm]
-\node (0) [text width=2cm, align=center, ]                 {$s_0, s_1, s_2, s_3, s_4$ $a$};
+\node (0) [text width=3cm, align=center, ]                 {$s_0, s_1, s_2, s_3, s_4$ $a$};
 \node (1) [text width=1cm, align=center, below left of=0 ] {$s_1$};
-\node (2) [text width=2cm, align=center, below right of=0] {$s_0, s_2, s_3, s_4$ $b$};
+\node (2) [text width=2.5cm, align=center, below right of=0] {$s_0, s_2, s_3, s_4$ $b$};
-\node (3) [text width=1cm, align=center, below left of=2 ] {$s_0, s_2, s_3$ $c$};
+\node (3) [text width=2.0cm, align=center, below left of=2 ] {$s_0, s_2, s_3$ $c$};
 \node (4) [text width=1cm, align=center, below right of=2] {$s_4$};
-\node (5) [text width=1cm, align=center, below left of=3 ] {$s_0, s_2$ $aa$};
+\node (5) [text width=1.8cm, align=center, below left of=3 ] {$s_0, s_2$ $aa$};
 \node (6) [text width=1cm, align=center, below right of=3] {$s_3$};
 \node (7) [text width=1cm, align=center, below left of=5 ] {$s_0$};
 \node (8) [text width=1cm, align=center, below right of=5] {$s_2$};
@ -289,8 +281,10 @@ reference=fig:example-splitting-tree]
 }
 \stopplacefigure
 \todo{Commands maken voor deze plaatjes.}
 \startfact
-A complete splitting tree with valid splits exists if and only if there exists an adaptive distinguishing sequence \cite{LeeYannakakis}.
+A complete splitting tree with valid splits exists if and only if there exists an adaptive distinguishing sequence \cite[DBLP:journals/tc/LeeY94].
 \stopfact
 Our method uses the exact same algorithm as the one by Lee and Yannakakis.
@ -310,7 +304,7 @@ For this reason we call the method a hybrid method.
 	\startlinenumbering
 	\STATE{$T_1 \leftarrow$ splitting tree for Moore's minimization algorithm}
 	\STATE{$\Fam{H} \leftarrow$ separating family extracted from $T_1$}
-	\STATE{$T_2 \leftarrow$ splitting tree with valid splits (see \cite{LeeYannakakis})}
+	\STATE{$T_2 \leftarrow$ splitting tree with valid splits (see \cite[DBLP:journals/tc/LeeY94])}
 	\STATE{$\Fam{Z'} \leftarrow$ (incomplete) family as constructed from $T_2$}
 	\FORALL{inequivalent states $s, t$ in the same leaf of $T_2$}{
 		\STATE{$Z'_s \leftarrow Z_s \cup H_s$}
@ -324,7 +318,7 @@ For this reason we call the method a hybrid method.
 With the hybrid family we can define the test suite as follows.
 In words, the suite consists of tests of the form $p w s$, where $p$ brings the SUT to a certain state, $w$ is to detect $k$ extra states and $s$ is to identify the state.
-Its $m$-completeness is proven in Section~\ref{sec:completeness}.
+Its $m$-completeness is proven in \in{Section}[sec:completeness].
 \startdefinition
 Let $P$ be a state cover,
@ -343,419 +337,384 @@ T = P \cdot I^{\leq k+1} \odot (\Fam{Z'} ; \Fam{H}).
 In the figure we see the (unique) result of Lee and Yannakakis' algorithm.
 We note that the states $s_2, s_3, s_4$ are not split, so we need to refine the family for those states.
-\todo{Hier was ik gebleven}
+\startplacefigure
-\stopsubsection
+  [title={(a): Largest splitting tree with only valid splits for \in{Figure}[fig:running-example].
-\stopsection
+          (b): Its adaptive distinguishing tree in notation of \cite[DBLP:journals/tc/LeeY94]},
-\stopchapter
+   reference=fig:example-splitting-tree]
-\stopcomponent
+\startcombination[2*1]{
 \hbox{
 \starttikzpicture[node distance=2.0cm]
 \node (0) [text width=2cm, align=center, ]                 {$s_0, s_1, s_2, s_3, s_4$ $a$};
 \node (1) [text width=1cm, align=center, below left of=0 ] {$s_1$};
 \node (2) [text width=2cm, align=center, below right of=0] {$s_0, s_2, s_3, s_4$ $aa$};
 \node (3) [text width=1cm, align=center, below left of=2 ] {$s_0$};
 \node (4) [text width=1cm, align=center, below right of=2] {$s_2, s_3, s_4$};
 \path[->]
 (0) edge (1)
 (0) edge (2)
 (2) edge (3)
 (2) edge (4);
 \stoptikzpicture
 }} {a}
 {\hbox{
 \starttikzpicture[node distance=2.0cm]
 \node (0) [text width=2cm, align=center, ]                 {$s_0, s_1, s_2, s_3, s_4$ $s_0, s_1, s_2, s_3, s_4$ $a$};
 \node (1) [text width=1cm, align=center, below left of=0 ] {$s_1$ $s_2$};
 \node (2) [text width=2cm, align=center, below right of=0] {$s_0, s_2, s_3, s_4$ $s_1, s_2, s_4, s_3$ $a$};
 \node (3) [text width=1cm, align=center, below left of=2 ] {$s_0$ $s_2$};
 \node (4) [text width=1cm, align=center, below right of=2] {$s_2, s_3, s_4$ $s_2, s_3, s_4$};
 \path[->]
 (0) edge (1)
 (0) edge (2)
 (2) edge (3)
 (2) edge (4);
 \stoptikzpicture
 }} {b}
 \stopcombination
 \stopplacefigure
-\begin{figure}
+From the splitting tree in \in{Figure}[fig:example-splitting-tree], we obtain the following separating family $\Fam{H}$.
 \centering
 \begin{subfigure}{0.45\textwidth}
 	\centering
 	\begin{tikzpicture}[node distance=1.4cm]
 	\node (0) [text width=2cm, align=center, ]                 {$s_0, s_1, s_2, s_3, s_4$\\$a$};
 	\node (1) [text width=1cm, align=center, below left of=0 ] {$s_1$};
 	\node (2) [text width=2cm, align=center, below right of=0] {$s_0, s_2, s_3, s_4$\\$aa$};
 	\node (3) [text width=1cm, align=center, below left of=2 ] {$s_0$};
 	\node (4) [text width=1cm, align=center, below right of=2] {$s_2, s_3, s_4$};
 	\path[->]
 	(0) edge (1)
 	(0) edge (2)
 	(2) edge (3)
 	(2) edge (4);
 	\end{tikzpicture}
 	\caption{Largest splitting tree with only valid splits for Figure~\ref{fig:running-example}.}
 	\label{fig:example-splitting-tree-valid}
 \end{subfigure}~
 \begin{subfigure}{0.45\textwidth}
 	\centering
 	\begin{tikzpicture}[node distance=1.4cm]
 	\node (0) [text width=2cm, align=center, ]                 {\tiny $s_0, s_1, s_2, s_3, s_4$\\$s_0, s_1, s_2, s_3, s_4$\\$a$};
 	\node (1) [text width=1cm, align=center, below left of=0 ] {\tiny $s_1$\\$s_2$};
 	\node (2) [text width=2cm, align=center, below right of=0] {\tiny $s_0, s_2, s_3, s_4$\\$s_1, s_2, s_4, s_3$\\$a$};
 	\node (3) [text width=1cm, align=center, below left of=2 ] {\tiny $s_0$\\$s_2$};
 	\node (4) [text width=1cm, align=center, below right of=2] {\tiny $s_2, s_3, s_4$\\$s_2, s_3, s_4$};
 	\path[->]
 	(0) edge (1)
 	(0) edge (2)
 	(2) edge (3)
 	(2) edge (4);
 	\end{tikzpicture}
 	\caption{The adaptive distinguishing tree in notation of \cite{LeeYannakakis}.}
 	\label{fig:example-splitting-tree-ads}
 \end{subfigure}
 \end{figure}
 From the splitting tree in Figure~\ref{fig:example-splitting-tree}, we obtain the following separating family $\Fam{H}$.
 From the figure above we obtain the family $\Fam{Z}$.
 These families and the refinement $\Fam{Z'};\Fam{H}$ are given below:
-\begin{center}
+\starttabulate[|l|l|l|]
-\begin{tabular}{l @{\hspace{1cm}} l @{\hspace{1cm}} l}
+\NC $H_0 = \{aa,b,c\}$ \NC $Z'_0 = \{aa\}$ \NC $(Z';H)_0 = \{aa\}$     \NR
-	$H_0 = \{aa,b,c\}$ & $Z'_0 = \{aa\}$ & $(Z';H)_0 = \{aa\}$ \\
+\NC $H_1 = \{a\}$      \NC $Z'_1 = \{a\}$  \NC $(Z';H)_1 = \{a\}$      \NR
-	$H_1 = \{a\}$      & $Z'_1 = \{a\}$  & $(Z';H)_1 = \{a\}$  \\
+\NC $H_2 = \{aa,b,c\}$ \NC $Z'_2 = \{aa\}$ \NC $(Z';H)_2 = \{aa,b,c\}$ \NR
-	$H_2 = \{aa,b,c\}$ & $Z'_2 = \{aa\}$ & $(Z';H)_2 = \{aa,b,c\}$ \\
+\NC $H_3 = \{a,b,c\}$  \NC $Z'_3 = \{aa\}$ \NC $(Z';H)_3 = \{a,b,c\}$  \NR
-	$H_3 = \{a,b,c\}$  & $Z'_3 = \{aa\}$ & $(Z';H)_3 = \{a,b,c\}$ \\
+\NC $H_4 = \{a,b\}$    \NC $Z'_4 = \{aa\}$ \NC $(Z';H)_4 = \{aa,b\}$   \NR
-	$H_4 = \{a,b\}$    & $Z'_4 = \{aa\}$ & $(Z';H)_4 = \{aa,b\}$
+\stoptabulate
-\end{tabular}
+\todo{In startformula/startalign zetten}
 \end{center}
-\subsection{When $k$ is not known}
+\stopsubsection
 \startsubsection
  [title={When $k$ is not known}]
-In many of the applications described in Section~\ref{sec:applications} no bound on the number of states of the SUT was known.
+In many of the applications described in \in{Section}[sec:applications] no bound on the number of states of the SUT was known.
 In such cases it is possible to randomly select test cases from an infinite test suite.
 Unfortunately, we lose the theoretical guarantees of completeness with random generation.
-Still, for the applications in Section~\ref{sec:applications} it has worked well in finding flaws.
+Still, for the applications in \in{Section}[sec:applications] it has worked well in finding flaws.
 We can randomly test cases as follows.
 In the above definition for the hybrid ADS test suite we replace $I^{\leq k}$ by $I^{\ast}$ to obtain an infinite test suite.
 Then we sample tests as follows:
-1) sample an element $p$ from $P$ uniformly,
+\startitemize[n]
-2) sample a word $w$ from $I^{\ast}$ with a geometric distribution then
+\item sample an element $p$ from $P$ uniformly,
-3) sample uniformly from $(\Fam{Z'} ; \Fam{H})_s$ for the state $s = \delta(s_0, pw)$.
+\item sample a word $w$ from $I^{\ast}$ with a geometric distribution, and
 \item sample uniformly from $(\Fam{Z'} ; \Fam{H})_s$ for the state $s = \delta(s_0, pw)$.
 \stopitemize
-
+\stopsection
-\section{Related test methods}
+\startsection
-\label{sec:methods}
+  [title={Related test methods},
   reference=sec:methods]
 In this section, we briefly review the classical conformance testing methods:
 the W, Wp, UIO, UIOv, HSI, ADS methods.
-Our method described is very similar to some of these methods, so we will
+Our method described is very similar to some of these methods, so we will relate them by describing them uniformly.
 relate them by describing them uniformly.
-There are many more test generation methods. Literature shows, however, that not
+There are many more test generation methods.
-all of them are complete. For example, the methods in \cite{Bernhard} are
+Literature shows, however, that not all of them are complete.
-falsified by \cite{Petrenko} and the UIO-method \cite{Sabnani} is shown to be
+For example, the methods in \cite[DBLP:journals/tosem/Bernhard94] are falsified by \cite[DBLP:journals/tosem/Petrenko97] and the UIO-method \cite[DBLP:journals/cn/SabnaniD88] is shown to be incomplete in \cite[DBLP:conf/sigcomm/ChanVO89].
-incomplete in \cite{Vuong}. For that reason, completeness of the correct methods
+For that reason, completeness of the correct methods is shown (again) in the next section.
 is shown (again) in the next section.
 We fix a state cover $P$ throughout this section and take the transition cover $Q = P \cdot I$.
-\subsection{W-method \cite{Chow,Vasilevskii}}
+\startsubsection
  [title={W-method \cite[DBLP:journals/tse/Chow78, Vasilevskii73]}]
 Possibly one of the earliest $m$-complete test methods.
-\begin{mdefinition}\label{w-method}
+\startdefinition
-	A set of words $W$ is a \Def{characterization set} if for each pair of inequivalent states $s$ and $t$ there exists a word $w \in W$ with $\lambda(s,w) \neq \lambda(t,w)$.
+  [reference=w-method]
-	\newline
+A set of words $W$ is a \defn{characterization set} if for each pair of inequivalent states $s$ and $t$ there exists a word $w \in W$ with $\lambda(s,w) \neq \lambda(t,w)$.
-	Let $W$ be a characterization set, the \Def{W test suite} is
+
-	defined as $(P \cup Q) \cdot I^{\leq k} \cdot W$.
+Let $W$ be a characterization set, the \defn{W test suite} is
-\end{mdefinition}
+defined as $(P \cup Q) \cdot I^{\leq k} \cdot W$.
 \stopdefinition
 If we have a separating family $\Fam{W}$, we can obtain a characterization
 set by flattening: take $W = \bigcup \Fam{W}$.
 \stopsubsection
 \startsubsection
  [title={The Wp-method \cite[DBLP:journals/tse/FujiwaraBKAG91]},
   reference=sec:wp]
 \subsection{The Wp-method \cite{Fujiwara}}\label{sec:wp}
 The W-method was refined by Fujiwara to use smaller sets when identifying states.
 In order to do that he defined state-local sets of words.
-\begin{mdefinition}
+\startdefinition
-\label{state-identifier}
+  [reference={state-identifier,wp-method}]
-\label{wp-method}
+A \defn{state identifier for $s$} is a set $W_s$ such that for every inequivalent
 A \Def{state identifier for $s$} is a set $W_s$ such that for every inequivalent
 state $t$ there is a $w \in W_s$ such that $\lambda(s, w) \neq \lambda(t, w)$.
-\newline
+
-Let $\Fam{W}$ be a family of state identifiers, the \Def{Wp test suite} is
+Let $\Fam{W}$ be a family of state identifiers, the \defn{Wp test suite} is
 defined as $P \cdot I^{\leq k} \cdot \bigcup \Fam{W} \,\cup\, Q \cdot I^{\leq k}
 \odot \Fam{W}$.
-\end{mdefinition}
+\stopdefinition
 Note that $\bigcup \Fam{W}$ is a characterization set as defined for the W-method.
 It is needed for completeness to test states with the whole set $\bigcup \Fam{W}$.
 Once states are tested as such, we can use the smaller sets $W_s$ for testing transitions.
 \stopsubsection
 \startsubsection
  [title={The HSI-method \cite[LuoPB95, YevtushenkoP90]},
   reference=sec:hsi]
 \subsection{The HSI-method \cite{Luo,YP90}}\label{sec:hsi}
 The Wp-method in turn was refined by Yevtushenko and Petrenko.
-They make use of so called \emph{harmonized} state identifiers (which are called
+They make use of so called \emph{harmonized} state identifiers (which are called separating families in \cite[DBLP:journals/tc/LeeY94] and in present paper).
-separating families in \cite{LeeYannakakis} and in present paper).
+By having this global property of the family, less tests need to be executing when testing a state.
 By having this global property of the family, less tests need to
 be executing when testing a state.
-\begin{mdefinition}\label{hsi-method}
+\startdefinition
-Let $\Fam{H}$ be a separating family, the \Def{HSI test suite} is defined as
+  [reference=hsi-method]
 Let $\Fam{H}$ be a separating family, the \defn{HSI test suite} is defined as
 $(P \cup Q) \cdot I^{\leq k} \odot \Fam{H}$.
-\end{mdefinition}
+\stopdefinition
 Our newly described test method is an instance of the HSI-method.
-However, in \cite{Luo,YP90} they describe the HSI-method together with a
+However, in \cite[LuoPB95, YevtushenkoP90] they describe the HSI-method together with a specific way of generating the separating families.
-specific way of generating the separating families. Namely, the set obtained
+Namely, the set obtained by a splitting tree with shortest witnesses.
-by a splitting tree with shortest witnesses. In present paper that
+In present paper that is generalized, allowing for our extension to be an instance.
 is generalized, allowing for our extension to be an instance.
 \stopsubsection
 \startsubsection
  [title={The ADS-method \cite[DBLP:journals/tc/LeeY94]},
   reference=sec:ads]
-\subsection{The ADS-method \cite{LeeYannakakis}}\label{sec:ads}
+As discussed before, when a Mealy machine admits a adaptive distinguishing sequence, only one test has to be performed for identifying a state.
 As discussed before, when a Mealy machine admits a adaptive distinguishing sequence,
 only one test has to be performed for identifying a state.
 This is exploited in the ADS-method.
-\begin{mdefinition}\label{ds-method}
+\startdefinition
-Let $\Fam{Z}$ be a separating family where every set is a singleton, the
+  [reference=ds-method]
-\Def{ADS test suite} is defined as $(P \cup Q) \cdot I^{\leq k} \odot \Fam{Z}$.
+Let $\Fam{Z}$ be a separating family where every set is a singleton, the \defn{ADS test suite} is defined as
-\end{mdefinition}
+$(P \cup Q) \cdot I^{\leq k} \odot \Fam{Z}$.
 \stopdefinition
 \stopsubsection
 \startsubsection
  [title={The UIOv-method \cite[DBLP:conf/sigcomm/ChanVO89]},
   reference=sec:uiov]
 \subsection{The UIOv-method \cite{Vuong}}\label{sec:uiov}
 Some Mealy machines which do not admit an adaptive distinguishing sequence,
 may still admit state identifiers which are singletons.
 These are exactly UIO sequences and gives rise to the UIOv-method.
-In a way this is a generalization of the ADS-method, since the requirement
+In a way this is a generalization of the ADS-method, since the requirement that state identifiers are harmonized is dropped.
 that state identifiers are harmonized is dropped.
-\begin{mdefinition}\label{uio}\label{uiov-method}
+\startdefinition
-	Given $s \in M$, we say that a word $w \in I^{\ast}$ is an \Def{UIO sequence for
+  [reference={uio, uiov-method}]
-		$s$} if for all inequivalent $t \in M$ we have $\lambda(s, w) \neq \lambda(t,
+Given $s \in M$, we say that a word $w \in I^{\ast}$ is an \defn{UIO sequence for $s$} if for all inequivalent $t \in M$ we have $\lambda(s, w) \neq \lambda(t, w)$.
 	w)$.
 	\newline
 	Let $\Fam{U} = \{ \text{a single UIO for } s \}_{s \in S}$ be a family of UIO
 	sequences, the \Def{UIOv test suite} is defined as $P \cdot I^{\leq k} \cdot
 	\bigcup \Fam{U} \,\cup\, Q \cdot I^{\leq k} \odot \Fam{U}$.
 \end{mdefinition}
 Let $\Fam{U} = \{ \text{a single UIO for } s \}_{s \in S}$ be a family of UIO sequences, the \defn{UIOv test suite} is defined as
 $P \cdot I^{\leq k} \cdot \bigcup \Fam{U} \,\cup\, Q \cdot I^{\leq k} \odot \Fam{U}$.
 \stopdefinition
-\subsection{Overview}
+\stopsubsection
-\label{sec:overview}
+\startsubsection
  [title={Overview},
   reference=sec:overview]
 We give an overview of the aforementioned test methods.
 We classify them in two directions,
-1) whether they use harmonized state identifiers or not and
+\startitemize
-2) whether it used singleton state identifiers or not.
+\item whether they use harmonized state identifiers or not and
 \item whether it used singleton state identifiers or not.
 \stopitemize
 \starttheorem
  [reference=thm:completeness]
 The following test suites are all $n+k$-complete:
 \starttabulate[|c|c|c|]
 \NC                 \NC Arbitrary \NC Harmonized
 \NR \HL %----------------------------------------
 \NC Many / pairwise \NC Wp        \NC HSI
 \NR
 \NC % empty cell
 \NC $ P \cdot I^{\leq k} \cdot \bigcup \Fam{W} \,\cup\, Q \cdot I^{\leq k} \odot \Fam{W} $
 \NC $ (P \cup Q) \cdot I^{\leq k} \odot \Fam{H} $
 \NR \HL %----------------------------------------
 \NC Hybrid          \NC           \NC Hybrid ADS
 \NR
 \NC % empty cell
 \NC % empty cell
 \NC $ (P \cup Q) \cdot I^{\leq k} \odot (\Fam{Z'} ; \Fam{H}) $
 \NR \HL %----------------------------------------
 \NC Single / global \NC UIOv      \NC ADS
 \NR
 \NC % empty cell
 \NC $ P \cdot I^{\leq k} \cdot \bigcup \Fam{U} \,\cup\, Q \cdot I^{\leq k} \odot \Fam{U} $
 \NC $ (P \cup Q) \cdot I^{\leq k} \odot \Fam{Z} $
 \NR \HL %----------------------------------------
 \stoptabulate
 \stoptheorem
 \begin{theorem}\label{thm:completeness}
  The following test suites are all $n+k$-complete:
 \begin{center}
 \begin{tabular}{l|p{4.5cm}|p{4.5cm}|}
 % empty cell
  & Arbitrary
  & Harmonized
  \\ \hline
 Many / pairwise
  & Wp  \newline $ P \cdot I^{\leq k} \cdot \bigcup \Fam{W} \,\cup\, Q \cdot I^{\leq k} \odot \Fam{W} $
  & HSI \newline $ (P \cup Q) \cdot I^{\leq k} \odot \Fam{H} $
  \\ \hline
 Hybrid
  & 
  & Hybrid ADS \newline
    $ (P \cup Q) \cdot I^{\leq k} \odot (\Fam{Z'} ; \Fam{H}) $
  \\ \hline
 Single / global
  & UIOv \newline $ P \cdot I^{\leq k} \cdot \bigcup \Fam{U} \,\cup\, Q \cdot I^{\leq k} \odot \Fam{U} $
  & ADS  \newline $ (P \cup Q) \cdot I^{\leq k} \odot \Fam{Z} $
  \\ \hline
 \end{tabular}
 \end{center}
 \end{theorem}
 \todo{Iets zeggen over de hybrid UIO method}
 \todo{Geef groottes van suites voor het voorbeeld. Merk op dat ADS and UIOv niet van toepassing zijn (state 3 heeft geen UIO)}
-The incomplete \Def{UIO test suite} is defined as $(P \cup Q) \cdot
+The incomplete \defn{UIO test suite} is defined as $(P \cup Q) \cdot I^{\leq k} \odot \Fam{U}$, incompleteness is shown in \cite[DBLP:conf/sigcomm/ChanVO89].
 I^{\leq k} \odot \Fam{U}$, incompleteness is shown in \cite{Vuong}.
-It should be noted that the ADS-method is a specific instance of the HSI-method
+It should be noted that the ADS-method is a specific instance of the HSI-method and similarly the UIOv-method is an instance of the Wp-method.
-and similarly the UIOv-method is an instance of the Wp-method. What is
+What is generally meant by the Wp-method and HSI-method is the above formula together with a particular way to obtain the (harmonised) state identifiers.
 generally meant by the Wp-method and HSI-method is the above formula together
 with a particular way to obtain the (harmonised) state identifiers.
-We are often interested in the size of the test suite. In the worst case, all
+We are often interested in the size of the test suite.
-methods generate a test suite with a size in $\bigO(pn^3)$ and this bound is
+In the worst case, all methods generate a test suite with a size in $\bigO(pn^3)$ and this bound is tight \cite[Vasilevskii73].
-tight \cite{Vasilevskii}. Nevertheless we expect intuitively that the right column
+Nevertheless we expect intuitively that the right column performs better, as we are using a more structured set (given a separating family for the HSI-method, we can always forget about the common prefixes and apply the Wp-method, which will never be smaller if constructed in this way).
-performs better, as we are using a more structured set (given a separating
+Also we expect the bottom row to perform better as there is a single test for each state.
-family for the HSI-method, we can always forget about the common prefixes and
+Small experimental results confirm this intuition
-apply the Wp-method, which will never be smaller if constructed in this way).
+\cite[DBLP:journals/infsof/DorofeevaEMCY10].
 Also we expect the bottom row to perform better as there is a single test for
 each state. Small experimental results confirm this intuition
 \cite{Dorofeeva}.
-\section{Proof of completeness}
+\stopsubsection
-\label{sec:completeness}
+\stopsection
 \startsection
  [title={Proof of completeness},
   reference=sec:completeness]
-We fix a specification $M$ which has a minimal representative with $n$ states and an implementation $M'$ with at
+We fix a specification $M$ which has a minimal representative with $n$ states and an implementation $M'$ with at most $n+k$ states.
-most $n+k$ states. We assume that all states are reachable from the initial
+We assume that all states are reachable from the initial state in both machines (i.e., both are \defn{connected}).
-state in both machines (i.e. both are \Def{connected}). We define the following
+We define the following notation:
 notation:
-\begin{mdefinition}
+\startdefinition
-	Let $W \subseteq I^{\ast}$ be a set of words. Two states $x, y$ (of possibly
+Let $W \subseteq I^{\ast}$ be a set of words.
-	different machines) are $W$-equivalent, written $x \sim_W y$, if $\lambda(x,
+Two states $x, y$ (of possibly different machines) are $W$-equivalent, written $x \sim_W y$, if $\lambda(x, w) = \lambda(y, w)$ for all $w \in W$.
-	w) = \lambda(y, w)$ for all $w \in W$.
+\stopdefinition
 \end{mdefinition}
-We note that for a fixed set $W$ the relation $\sim_W$ is a equivalence relation
+We note that for a fixed set $W$ the relation $\sim_W$ is a equivalence relation and that $W \subseteq V$ gives $x \sim_V y \Rightarrow x \sim_W y$.
-and that $W \subseteq V$ gives $x \sim_V y \Rightarrow x \sim_W y$. The next lemma
+The next lemma gives sufficient conditions for a test suite of the given anatomy to be
-gives sufficient conditions for a test suite of the given anatomy to be
+complete.
-complete. We then prove that these conditions hold for the test suites in this
+We then prove that these conditions hold for the test suites in this paper.
 paper.
-\begin{lemma}
+\startlemma
-	Let $\Fam{W}$ and $\Fam{W'}$ be two families of words and $P$ a state cover
+Let $\Fam{W}$ and $\Fam{W'}$ be two families of words and $P$ a state cover for $M$.
-	for $M$. Let $T = P \cdot I^{\leq k} \odot \Fam{W} \,\cup\, P \cdot I^{\leq
+Let $T = P \cdot I^{\leq k} \odot \Fam{W} \,\cup\, P \cdot I^{\leq k+1} \odot \Fam{W'}$ be a test suite.
-		k+1} \odot \Fam{W'}$ be a test suite.
+If
 \startitemize[n]
 \item for all $x, y \in M:$ $x \sim_{W_x \cap W_y} y$ implies $x \sim y$,
 \item for all $x, y \in M$ and $z \in M'$: $x \sim_{W_x} z$ and $z \sim_{W'_y} y$ implies $x \sim y$, and
 \item the machines $M$ and $M'$ agree on $T$,
 \stopitemize
 then $M$ and $M'$ are equivalent.
 \stoplemma
 \startproof
 First, we prove that $P \cdot I^{\leq k}$ reaches all states in $M'$.
 For $p, q \in P$ and $x = \delta(s_0, p), y = \delta(s_0, q)$ such that $x \not\sim_{W_x \cap W_y} y$, we also have $\delta'(m'_0, p) \not\sim_{W_x \cap W_y} \delta'(m'_0, q)$ in the implementation $M'$.
 By (1) this means that there are at least $n$ different behaviours in $M'$, hence at least $n$ states.
-	If
+Now $n$ states are reached by the previous argument (using the set $P$).
-	\vspace{-3mm}
+By assumption $M'$ has at most $k$ extra states and so we can reach all those extra states by using $I^{\leq k}$ after $P$.
 	\begin{itemize}
 		\item[1] for all $x, y \in M:$ $x \sim_{W_x \cap W_y} y$ implies $x \sim y$,
 		\item[2] for all $x, y \in M$ and $z \in M'$: $x \sim_{W_x} z$ and $z
 		\sim_{W'_y} y$ implies $x \sim y$ and
 		\item[3] the machines $M$ and $M'$ agree on $T$,
 	\end{itemize}\vspace{-3mm}
 	then $M$ and $M'$ are equivalent.
 \end{lemma}
 \begin{proof}
 	First, we prove that $P \cdot I^{\leq k}$ reaches all states in $M'$.
 	For	$p, q \in P$ and $x = \delta(s_0, p), y = \delta(s_0, q)$ such that $x
 	\not\sim_{W_x \cap W_y} y$, we also have $\delta'(m'_0, p) \not\sim_{W_x
 		\cap W_y} \delta'(m'_0, q)$ in the implementation $M'$.
 	By (1) this means that there
 	are at least $n$ different behaviours in $M'$, hence at least $n$ states.
-	Now $n$ states are reached by the previous argument (using the set $P$).
+Second, we show that the reached states are bisimilar.
-	By assumption $M'$ has at most $k$ extra states and so we can reach all those extra states by using $I^{\leq k}$ after $P$.
+Define the relation $R = \{(\delta(q_0, p), \delta'(q_0', p)) \mid p \in P \cdot I^{\leq k}\}$.
 Note that for each $(s, i) \in R$ we have $s \sim_{W_s} i$.
 For each state $i \in M'$ there is a state $s \in M$ such that $(s, i) \in R$, since we reach all states in both machines by $P \cdot I^{\leq k}$.
 We will prove that this relation is in fact a bisimulation up-to equivalence.
-	Second, we show that the reached states are bisimilar.
+For output, we note that $(s, i) \in R$ implies $\lambda(s, a) = \lambda'(i, a)$ for all $a$,
-	Define the relation $R = \{(\delta(q_0, p), \delta'(q_0', p)) \mid p \in P \cdot I^{\leq k}\}$.
+since the machines agree on $P \cdot I^{\leq k+1}$.
-	Note that for each $(s, i) \in R$ we have $s \sim_{W_s} i$.
+For the successors, let $(s, i) \in R$ and $a \in I$ and consider the successors $s_2 = \delta(s, a)$ and $i_2 = \delta'(i, a)$.
-	For each state $i \in M'$ there is a state $s \in M$ such that $(s, i) \in R$, since we reach all states in both machines by $P \cdot I^{\leq k}$.
+We know that there is some $t \in M$ with $(t, i_2) \in R$.
-	We will prove that this relation is in fact a bisimulation up-to equivalence.
+We also know that we tested $i_2$ with the set $W'_t$.
-	
+So we have:
-	For output, we note that $(s, i) \in R$ implies $\lambda(s, a) = \lambda'(i, a)$ for all $a$,
+\startformula s_2 \sim_{W'_{s_2}} i_2 \sim_{W_t} t. \stopformula
-	since the machines agree on $P \cdot I^{\leq k+1}$.
+By the second assumption, we conclude that $s_2 \sim t$.
-	For the successors, let $(s, i) \in R$ and $a \in I$ and consider the successors $s_2 = \delta(s, a)$ and $i_2 = \delta'(i, a)$.	
+So $s_2 \sim t$ and $(t, i) \in R$, which means that $R$ is a bisimulation up-to equivalence.
-	We know that there is some
+Using the theory of up-to techniques \cite[DBLP:journals/cacm/BonchiP15] we know that there exists a bisimulation relation containing $R$.
-	$t \in M$ with $(t, i_2) \in R$. We also know that we
+\todo{Up to is best nieuw. Misschien meer aandacht aan besteden. Ook nog Jurriaan citeren?}
-	tested $i_2$ with the set $W'_t$. So we have:
+In particular $q_0$ and $q'_0$ are bisimilar.
-	$$ s_2 \sim_{W'_{s_2}} i_2 \sim_{W_t} t. $$ By the
+And so the machines $M$ and $M'$ are equivalent.
-	second assumption, we conclude that $s_2 \sim t$.
+\stopproof
 	So $s_2 \sim t$ and $(t, i) \in R$, which means that $R$ is a bisimulation up-to equivalence.
 	Using the theory of up-to techniques \cite{RotUpTo} we know that there exists a bisimulation relation containing $R$.
 	In particular $q_0$ and $q'_0$ are bisimilar.
 	And so the machines $M$ and $M'$ are equivalent.
 	\qed
 \end{proof}
 Before we show that the conditions hold for the test methods described in this paper, we reflect on the above proof first.
-This proof is very similar to the completeness proof in \cite{Chow}.
+This proof is very similar to the completeness proof in \cite[DBLP:journals/tse/Chow78].
-(In fact, it is also similar to Lemma 4 in \cite{Angluin} which proves termination in the L* learning algorithm. This correspondence was noted in \cite{BergCorrespondence}.)
+(In fact, it is also similar to Lemma 4 in \cite[DBLP:journals/iandc/Angluin87] which proves termination in the L* learning algorithm. This correspondence was noted in \cite[DBLP:conf/fase/BergGJLRS05].)
 In the first part we argue that all states are visited by using some sort of counting and reachability argument.
 Then in the second part we show the actual equivalence.
 To the best of the authors knowledge, this is first $m$-completeness proof which explicitly uses the concept of a bisimulation.
 Using a bisimulation allows us to slightly generalize and use bisimulation up-to equivalence, dropping the the often-assumed requirement that $M$ is minimal.
-\begin{lemma}
+\startlemma
-	Let $\Fam{W'}$ be a family of state identifiers for $M$. Define the family
+Let $\Fam{W'}$ be a family of state identifiers for $M$.
-	$\Fam{W}$ by $W_s = \bigcup \Fam{W'}$. Then the conditions (1) and (2) in the
+Define the family $\Fam{W}$ by $W_s = \bigcup \Fam{W'}$.
-	previous lemma are satisfied.
+Then the conditions (1) and (2) in the previous lemma are satisfied.
-\end{lemma}
+\stoplemma
-\begin{proof}
+\startproof
-	The first condition we note that $W_x \cap W_y = W_x = W_y$, and so $x
+The first condition we note that $W_x \cap W_y = W_x = W_y$, and so $x \sim_{W_x \cap W_y} y$ implies $x \sim_{W_x} y$, now by definition of state identifier we get $x \sim y$.
 	\sim_{W_x \cap W_y} y$ implies $x \sim_{W_x} y$, now by definition of state
 	identifier we get $x \sim y$.
-	For the second condition, let $x \sim_{\bigcup \Fam{W'}} z \sim_{W'_y} y$.
+For the second condition, let $x \sim_{\bigcup \Fam{W'}} z \sim_{W'_y} y$.
-	Then we note that $W'_y \subseteq \bigcup{W'}$ and so we get $x \sim_{W'_y} z
+Then we note that $W'_y \subseteq \bigcup{W'}$ and so we get $x \sim_{W'_y} z \sim_{W'_y} y$.
-	\sim_{W'_y} y$. By transitivity we get $x \sim_{W'_y} y$ and so by definition
+By transitivity we get $x \sim_{W'_y} y$ and so by definition of state identifier we get $x \sim y$.
-	of state identifier we get $x \sim y$.
+\stopproof
 	\qed
 \end{proof}
 \begin{corollary}
 	The W, Wp, UIOv and hybrid UIOv test suites are $n+k$-complete.
 \end{corollary}
-\begin{lemma}
+\startcorollary
-	Let $\Fam{H}$ be a separating family and take $\Fam{W} = \Fam{W'} = \Fam{H}$.
+The W, Wp, UIOv and hybrid UIOv test suites are $n+k$-complete.
-	Then the conditions (1) and (2) in the previous lemma are satisfied.
+\stopcorollary
-\end{lemma}
+
-\begin{proof}
+\startlemma
-	Let $x \sim_{H_x \cap H_y} y$, then by definition of separating family $x \sim
+Let $\Fam{H}$ be a separating family and take $\Fam{W} = \Fam{W'} = \Fam{H}$.
-	y$.
+Then the conditions (1) and (2) in the previous lemma are satisfied.
-	For the second condition, let $x \sim_{H_x} z \sim_{H_y} y$. Then we get $x
+\stoplemma
-	\sim_{H_x \cap H_y} z \sim_{H_x \cap H_y} y$ and so by transitivity $x
+\startproof
-	\sim_{H_x \cap H_y} y$, hence again $x \sim y$.
+Let $x \sim_{H_x \cap H_y} y$, then by definition of separating family $x \sim y$.
-	\qed
+For the second condition, let $x \sim_{H_x} z \sim_{H_y} y$.
-\end{proof}
+Then we get $x \sim_{H_x \cap H_y} z \sim_{H_x \cap H_y} y$ and so by transitivity $x \sim_{H_x \cap H_y} y$, hence again $x \sim y$.
-\begin{corollary}
+\stopproof
-	The HSI, ADS and hybrid ADS test suites are $n+k$-complete.
+
-\end{corollary}
+\startcorollary
 The HSI, ADS and hybrid ADS test suites are $n+k$-complete.
 \stopcorollary
-\section{Related Work}
+\stopsection
 \startsection
  [title={Related Work}]
 \todo{Opnieuw lezen, want verouderd}
-Comparison of test methods already appeared in the recent papers
+Comparison of test methods already appeared in the recent papers \cite[DBLP:journals/infsof/DorofeevaEMCY10] and \cite[DBLP:journals/infsof/EndoS13].
-\cite{Dorofeeva} and \cite{Endo}. Their work is mainly evaluated on randomly
+Their work is mainly evaluated on randomly generated Mealy machines.
-generated Mealy machines. We continue their work by evaluating on many
+We continue their work by evaluating on many specifications from industry.
-specifications from industry. In addition, we evaluate in the context of
+In addition, we evaluate in the context of learning, which has been lacking so far.
 learning, which has been lacking so far.
-Many of the methods describe here are benchmarked on small Mealy machines in \cite{Dorofeeva} and \cite{Endo}.
+Many of the methods describe here are benchmarked on small Mealy machines in \cite[DBLP:journals/infsof/DorofeevaEMCY10] and \cite[DBLP:journals/infsof/EndoS13].
 Additionally, they included the P, H an SPY methods.
-Unfortunately, the P and H methods do not fit naturally in the overview presented in Section~\ref{sec:overview}.
+Unfortunately, the P and H methods do not fit naturally in the overview presented in \in{Section}[sec:overview].
 The P method is not able to test for extra states, making it less usable.
-And the H method 
+And the H method \todo{?}
-The SPY method builds upon the HSI-method and changes the
+The SPY method builds upon the HSI-method and changes the prefixes in order to minimize the size of a test suite, exploiting overlap in test sequences.
 prefixes in order to minimize the size of a test suite, exploiting overlap in test sequences.
 We believe that this technique is independent of the HSI-method and can in fact be applied to all methods presented in this paper.
 As such, the SPY method should be considered as an optimization technique, orthogonal to the work in this paper.
-More recently, a novel test method was devised which uses incomplete distinguishing sequences \cite{Hierons}.
+More recently, a novel test method was devised which uses incomplete distinguishing sequences \cite[DBLP:journals/cj/HieronsT15].
 They use sequences which can be considered to be adaptive distinguishing sequences on a subset of the state space.
 With several of those one can cover the whole state space, obtaining a $m$-complete test suite.
-This is a bit dual to our approach, as our "incomplete" adaptive distinguishing sequences define a course partition of the state space.
+This is a bit dual to our approach, as our \quotation{incomplete} adaptive distinguishing sequences define a course partition of the state space.
 Our method becomes complete by refining the tests with pairwise separating sequences.
 Some work is put into minimizing the adaptive distinguishing sequences themselves.
-In \cite{TY14} the authors describe greedy algorithms which construct small adaptive distinguishing sequences.
+In \cite[DBLP:journals/fmsd/TurkerY14] the authors describe greedy algorithms which construct small adaptive distinguishing sequences.
 \todo{We expect that similar
 	heuristics also exist for the other test methods and that it will improve the
 	performance.}
-However, they show that finding the minimal adaptive distinguishing sequence is 
+However, they show that finding the minimal adaptive distinguishing sequence is NP-complete, even approximation is NP-complete.
 NP-complete, even approximation is NP-complete.
 We would like to incorporate their greedy algorithms in our implementation.
-\section{Applications}
+\stopsection
-\label{sec:applications}
+\startsection
  [title={Applications},
   reference=sec:applications]
 The presented test generation methods is implemented and used in a couple of applications.
-The implementation can be found on \url{https://gitlab.science.ru.nl/moerman/Yannakakis}.
+The implementation can be found on {\tt https://gitlab.science.ru.nl/moerman/hybrid-ads}.
-This implementations has been used in several model learning applications: learning embedded controller software \cite{Smeenk}, learning the TCP protocol \cite{FiteruauTCP} and learning the MQTT protocol \cite{TapplerMQTT}.
+This implementations has been used in several model learning applications:
 learning embedded controller software \cite[DBLP:conf/icfem/SmeenkMVJ15], learning the TCP protocol \cite[DBLP:conf/cav/Fiterau-Brostean16] and learning the MQTT protocol \cite[DBLP:conf/icst/TapplerAB17].
 \todo{Enkele resultaten bespreken, test-suite-groottes vergelijken}
 \todo{Future work? Meer benchmarks? Andere automaat-modellen?}
-\bibliographystyle{splncs03}
+\stopsection
-\bibliography{references}
+\referencesifcomponent
-
+\stopchapter
-
+\stopcomponent
 % \subsection{Known algorithms}
 % We expose the well-known classical algorithms to find state identifiers here.
 % \begin{algorithm}[h]
 %     \DontPrintSemicolon
 %     \KwIn{A Mealy machine $M$}
 %     \KwResult{A complete splitting tree $T$}
 %     initialize with single node\;
 %     \While{true}{
 %       \ForEach{symbol $a \in I$ and leaf $u \in T$}{
 %         \If{if $\lambda(l(u), a)$ hits two or more outputs}{
 %           split $u$ accordingly\;
 %           store $a$ as a witness in $\sigma(u)$\;
 %         } \ElseIf{$\delta(l(u), a)$ lands in labels of different leaves}{
 %           $v \leftarrow lca(\delta(l(u), a))$\;
 %           split $u$ similar to $v$\;
 %           store $a \sigma(v)$ as witness in $\sigma(u)$\;
 %         }
 %       }
 %       \If{no split has been made}{
 %         \Return{constructed tree}
 %       }
 %     }
 %     \caption{Moore's minimization algorithm adopted to record witnesses. When
 %     implemented efficiently it runs in $\bigO(pn^2)$. A more involved algorithm
 %     based on ideas of Hopcroft exists and runs in $\bigO(pn \log n)$.}
 %     \label{moore}
 % \end{algorithm}
 % \begin{algorithm}[h]
 %     \DontPrintSemicolon
@ -782,5 +741,3 @@ This implementations has been used in several model learning applications: learn
 %     allows \emph{valid} splits.}
 %     \label{lee-yannakakis}
 % \end{algorithm}
 \end{document}
--- a/environment.tex
+++ b/environment.tex
@ -3,6 +3,8 @@
 % Bug in context? Zonder de ../ kunnen componenten de files niet vinden
 \usepath[{environment,../environment}]
 \definemode[draft][yes]
 \environment bib
 \environment font
 \environment notation
@ -11,7 +13,6 @@
 \mainlanguage[en]
 \setuppagenumbering[alternative=doublesided]
 \setupindenting[yes, medium]
 \setupwhitespace[medium]
@ -40,13 +41,19 @@
  [headstyle=bold, alternative=serried, width=fit, text={Step }]
 % The black dot is missing from Euler?
-\setupitemize[2,joinedup,nowhite,after]
+\setupitemize[2,joinedup,nowhite,autointro]
 % For more whitespace between items
 \defineitemgroup[bigitemize]
 \setupitemgroup[bigitemize][2,autointro]
 % Standaard \colon had niet veel ruimte
 \define\colon{{\,{:}\,\,}}
 \define[1]\defn{\emph{#1}}
 \define[1]\todo{\color[red]{\ss \bf #1}}
 \define[1]\label{\todo{\tt \\label #1}}
 \define\referencesifcomponent{\doifnotmode{everything}{\section{References}\placelistofpublications}}
 \usemodule[tikz]
 \usetikzlibrary[automata, arrows, positioning, matrix, fit, decorations.pathreplacing]
@ -55,6 +62,7 @@
 \setupalgorithmic[align=middle,numbercolor=darkgray] % center line numbers
 \setupalgorithmic[before={\startframedtext[width=0.85\textwidth,offset=none,frame=off]},after={\stopframedtext}] % To enable linenumbers in a float
 % Debugging
 %\enabletrackers[references.references, visualizers.justification]
 %\showmakeup
--- a/environment/bib.tex
+++ b/environment/bib.tex
@ -2,6 +2,8 @@
 \usebtxdefinitions[apa]
 \usebtxdataset[../biblio.bib]
 \usebtxdataset[../biblio-eigen.bib]
 \usebtxdataset[../biblio-web.bib]
 \setupbtx[apa:cite][etallimit=2,separator:4={ and }]
@ -9,6 +11,7 @@
 \btxremapauthor [Paul Fiterau{-}Brostean] [Paul Fiterău-Broștean]
 \btxremapauthor [Michal Szynwelski] [Micha\l{} Szynwelski]
 \btxremapauthor [Slawomir Lasota] [S\l{}awomir Lasota]
 \btxremapauthor [Eryk Kopczynski] [Eryk Kopczy\'nski]
 %\define[1]\citenp{\cite[left=,right=][#1]}   % Bla and Foo, 2015
 %\define[1]\citet{\cite[authoryears][#1]}   % Bla and Foo (2015)
--- a/environment/font.tex
+++ b/environment/font.tex
@ -10,7 +10,7 @@
 % Voor sans-serif en teletype gebruiken we de Latin Modern fonts
 % Het teletype font is iets groter gemaakt
 \definefontfamily[mainfamily] [ss] [Latin Modern Sans]
-\definefontfamily[mainfamily] [tt] [Latin Modern Mono][rscale=1.05]
+\definefontfamily[mainfamily] [tt] [Latin Modern Mono][rscale=1.03]
 % (Neo) Euler heeft geen mathbb symbolen, dus die halen we uit Pagella
 \definefallbackfamily[mainfamily] [mm] [TeX Gyre Pagella Math][range=uppercasedoublestruck]
 % Eventueel kunnen we mathcal vervangen door een ander font
--- a/environment/notation.tex
+++ b/environment/notation.tex
@ -6,16 +6,20 @@
 \define\bigO{\mathcal{O}}
 \define\pref{pref}
 \define\suff{suff}
 \define[1]\kw{\text{\tt #1}}
 % L* algorithms
 \define\LStar{{\tt L}$^{\ast}$}
 \define\NLStar{{\tt NL}$^{\ast}$}
-\define\nLStar{$\nu$\LStar}
+\define\nLStar{$\nu$\LStar{}}
-\define\nNLStar{$\nu$\NLStar}
+\define\nLStarCol{$\nu${\tt L}$^{\ast}_{\text{col}}$}
 \define\nNLStar{$\nu$\NLStar{}}
 % Libraries
 \define\ONS{{\sc Ons}}
-\define\NLambda{{\sc N}$\lambda$}
+\define\NLambda{{\sc Nλ}}
 \define\LOIS{{\sc Lois}}
 % Maths
@ -26,6 +30,14 @@
 \define\id{id}
 \define\supp{{\tt supp}}
 \define\orb{{\tt orb}}
 \define\union{\cup}
 \define\Union{\bigcup}
 \define\rowunion{\sqcup}
 \define\Rowunion{\bigsqcup}
 \define\rowincl{\sqsubseteq}
 \define\rowincls{\sqsubset}
 \define[1]\lder{{#1}^{-1}}
 \define[2]\restr{{#1}|_{#2}}
 % language extension
 \define\Lext{{\cdot}}
--- a/environment/paper.tex
+++ b/environment/paper.tex
@ -4,10 +4,32 @@
 % We doen aan beide zijden 5mm afloop, zodat we plaatjes vol kunnen printen
 \definepapersize[proefschrift+rand][width=180mm, height=250mm]
 \definepapersize[proefschrift][width=170mm, height=240mm]
 \setuppapersize[proefschrift][proefschrift+rand]
 \setuplayout[location={middle,middle}]
 \setuplayout
  [width=130mm,
   height=195mm,
   backspace=20mm,
   topspace=25mm,
   header=0mm, % nodig?
   footer=0mm]
 % Snijranden
 \setuplayout[marking=on]
 % Assymetrisch
 \setuppagenumbering[alternative=doublesided]
 \startmode[draft]
 \setuppapersize[A5]
 \setuplayout
  [width=130mm,
   height=195mm,
   backspace=12mm,
   topspace=8mm,
   header=0mm, % nodig?
   footer=0mm]
 \stopmode
 \stopenvironment