From 56f14e8ce2090303485ec9ec1ad5ca122b6d2e2e Mon Sep 17 00:00:00 2001 From: Joshua Moerman Date: Wed, 19 Sep 2018 14:19:08 +0200 Subject: [PATCH] More on sep seqs. Did some tikz work too. --- content/learning-nominal-automata.tex | 215 ++++++------ content/minimal-separating-sequences.tex | 415 ++++++++++++++++++++++- environment.tex | 4 +- environment/tikz.tex | 36 ++ 4 files changed, 557 insertions(+), 113 deletions(-) create mode 100644 environment/tikz.tex diff --git a/content/learning-nominal-automata.tex b/content/learning-nominal-automata.tex index bceb72d..2da5224 100644 --- a/content/learning-nominal-automata.tex +++ b/content/learning-nominal-automata.tex @@ -84,16 +84,16 @@ The algorithm is able to fill the table with membership queries. As an example, and to set notation, consider the following table (over $A=\{a, b\}$). \starttikzpicture -\matrix[matrix of nodes, column 1/.style={anchor=base west}](tab) +\matrix[obs table](tab) { - \phantom{$\epsilon$} & {$\epsilon$} & $a$ & $aa$ \\ - $\epsilon$ & $0$ & $0$ & $1$ \\ - $a$ & $0$ & $1$ & $0$ \\ - $b$ & $0$ & $0$ & $0$ \\ + \phantom{\epsilon} \& \epsilon \& a \& aa \\ + \epsilon \& 0 \& 0 \& 1 \\ + a \& 0 \& 1 \& 0 \\ + b \& 0 \& 0 \& 0 \\ }; -\draw (tab-1-1.south west) -| (tab-1-4.south east); -\draw (tab-2-1.south west) -| (tab-2-4.south east); -\draw (tab-1-1.north east) -| (tab-4-1.south east); +\draw (tab-2-2.north -| tab.west) -- (tab-2-2.north -| tab.east); +\draw (tab-3-2.north -| tab.west) -- (tab-3-2.north -| tab.east); +\draw (tab-2-2.west |- tab.north) -- (tab-2-2.west |- tab.south); \draw[decorate, decoration=brace] (tab-1-2.north) -- node[above]{$E$} (tab-1-4.north) ; \draw[decorate, decoration=brace] (tab-4-1.west) -- node[left]{$S \cup S \Lext A$} (tab-2-1.west); @@ -171,16 +171,16 @@ We start from $S, E = \{\epsilon\}$, and we fill the entries of the table below The table is closed and consistent, so we construct the hypothesis $\autom_1$. \starttikzpicture -\matrix[matrix of nodes, column 1/.style={anchor=base west}](tab) +\matrix[obs table](tab) { - \phantom{$\epsilon$} & {$\epsilon$} \\ - $\epsilon$ & $0$ \\ - $a$ & $0$ \\ - $b$ & $0$ \\ + \phantom{\epsilon} \& \epsilon \\ + \epsilon \& 0 \\ + a \& 0 \\ + b \& 0 \\ }; -\draw (tab-1-1.south west) -| (tab-1-2.south east); -\draw (tab-2-1.south west) -| (tab-2-2.south east); -\draw (tab-1-1.north east) -| (tab-4-1.south east); +\draw (tab-2-2.north -| tab.west) -- (tab-2-2.north -| tab.east); +\draw (tab-3-2.north -| tab.west) -- (tab-3-2.north -| tab.east); +\draw (tab-2-2.west |- tab.north) -- (tab-2-2.west |- tab.south); \stoptikzpicture \starttikzpicture \node[initial,state,initial text={$\autom_1 = $}] (q0) {$q_0$}; @@ -202,20 +202,20 @@ Therefore, line 9 is triggered and an extra column $a$, highlighted in red, is a The new table is closed and consistent and a new hypothesis $\autom_2$ is constructed. \starttikzpicture -\matrix[matrix of nodes, column 1/.style={anchor=base west}](tab) +\matrix[obs table](tab) { - \phantom{$\epsilon$} & \epsilon \\ - \epsilon & 0 \\ - a & 0 \\ - aa & 1 \\ - b & 0 \\ - ab & 0 \\ - aaa & 0 \\ - aab & 0 \\ + \phantom{\epsilon} \& \epsilon \\ + \epsilon \& 0 \\ + a \& 0 \\ + aa \& 1 \\ + b \& 0 \\ + ab \& 0 \\ + aaa \& 0 \\ + aab \& 0 \\ }; -\draw (tab-1-1.south west) -| (tab-1-2.south east); -\draw (tab-4-1.south west) -| (tab-4-2.south east); -\draw (tab-1-2.north west) -| (tab-8-2.south west); +\draw (tab-2-2.north -| tab.west) -- (tab-2-2.north -| tab.east); +\draw (tab-5-2.north -| tab.west) -- (tab-5-2.north -| tab.east); +\draw (tab-2-2.west |- tab.north) -- (tab-2-2.west |- tab.south); \node[fit=(tab-2-1)(tab-2-2), dotted, draw, inner sep=-2pt] (r1) {}; \node[fit=(tab-3-1)(tab-3-2), dotted, draw, inner sep=-2pt] (r2) {}; @@ -225,22 +225,22 @@ The new table is closed and consistent and a new hypothesis $\autom_2$ is constr \path[->] (r2.east) edge[in=20,out=-10,looseness=2] node[right] {$a$} (r3.east); \stoptikzpicture \starttikzpicture -\matrix[matrix of nodes, column 1/.style={anchor=base west}, column 3/.style={nodes={color=red}}](tab) +\matrix[obs table, column 3/.style={nodes={color=red}}](tab) { - \phantom{$\epsilon$} & \epsilon & a \\ - \epsilon & 0 & 0 \\ - a & 0 & 1 \\ - aa & 1 & 0 \\ - b & 0 & 0 \\ - ab & 0 & 0 \\ - aaa & 0 & 0 \\ - aab & 0 & 0 \\ + \phantom{\epsilon} \& \epsilon \& a \\ + \epsilon \& 0 \& 0 \\ + a \& 0 \& 1 \\ + aa \& 1 \& 0 \\ + b \& 0 \& 0 \\ + ab \& 0 \& 0 \\ + aaa \& 0 \& 0 \\ + aab \& 0 \& 0 \\ }; -\draw (tab-1-1.south west) -| (tab-1-3.south east); -\draw (tab-4-1.south west) -| (tab-4-3.south east); -\draw (tab-1-2.north west) -| (tab-8-2.south west); +\draw (tab-2-2.north -| tab.west) -- (tab-2-2.north -| tab.east); +\draw (tab-5-2.north -| tab.west) -- (tab-5-2.north -| tab.east); +\draw (tab-2-2.west |- tab.north) -- (tab-2-2.west |- tab.south); \stoptikzpicture -\starttikzpicture[node distance=2cm] +\starttikzpicture \node[initial,state,initial text={$\autom_2 = $}] (q0) {$q_0$}; \node[state,right of=q0] (q1) {$q_1$}; \node[state,accepting,below of=q1] (q2) {$q_2$}; @@ -263,24 +263,24 @@ Therefore we add the column $b$ and we get the table on the right, which is clos The new hypothesis is $\autom_3$. \starttikzpicture -\matrix[matrix of nodes, column 1/.style={anchor=base west}](tab) +\matrix[obs table](tab) { - \phantom{$\epsilon$} & \epsilon & a \\ - \epsilon & 0 & 0 \\ - a & 0 & 1 \\ - aa & 1 & 0 \\ - b & 0 & 0 \\ - bb & 1 & 0 \\ - ab & 0 & 0 \\ - aaa & 0 & 0 \\ - aab & 0 & 0 \\ - ba & 0 & 0 \\ - bba & 0 & 0 \\ - bbb & 0 & 0 \\ + \phantom{\epsilon} \& \epsilon \& a \\ + \epsilon \& 0 \& 0 \\ + a \& 0 \& 1 \\ + aa \& 1 \& 0 \\ + b \& 0 \& 0 \\ + bb \& 1 \& 0 \\ + ab \& 0 \& 0 \\ + aaa \& 0 \& 0 \\ + aab \& 0 \& 0 \\ + ba \& 0 \& 0 \\ + bba \& 0 \& 0 \\ + bbb \& 0 \& 0 \\ }; -\draw (tab-1-1.south west) -| (tab-1-3.south east); -\draw (tab-6-1.south west) -| (tab-6-3.south east); -\draw (tab-1-2.north west) -| (tab-12-2.south west); +\draw (tab-2-2.north -| tab.west) -- (tab-2-2.north -| tab.east); +\draw (tab-7-2.north -| tab.west) -- (tab-7-2.north -| tab.east); +\draw (tab-2-2.west |- tab.north) -- (tab-2-2.west |- tab.south); \node[fit=(tab-2-1)(tab-2-3), dotted, draw, inner sep=-2pt] (r1) {}; \node[fit=(tab-5-1)(tab-5-3), dotted, draw, inner sep=-2pt] (r2) {}; @@ -290,26 +290,26 @@ The new hypothesis is $\autom_3$. \path[->] (r2.east) edge[in=20,out=-10,looseness=2] node[right] {$b$} (r3.east); \stoptikzpicture \starttikzpicture -\matrix[matrix of nodes, column 1/.style={anchor=base west}, column 4/.style={nodes={color=red}}](tab) +\matrix[obs table, column 4/.style={nodes={color=red}}](tab) { - \phantom{$\epsilon$} & \epsilon & a & b \\ - \epsilon & 0 & 0 & 0 \\ - a & 0 & 1 & 0 \\ - aa & 1 & 0 & 0 \\ - b & 0 & 0 & 1\\ - bb & 1 & 0 & 0 \\ - ab & 0 & 0 & 0 \\ - aaa & 0 & 0 & 0 \\ - aab & 0 & 0 & 0 \\ - ba & 0 & 0 & 0 \\ - bba & 0 & 0 & 0 \\ - bbb & 0 & 0 & 0 \\ + \phantom{\epsilon} \& \epsilon \& a \& b \\ + \epsilon \& 0 \& 0 \& 0 \\ + a \& 0 \& 1 \& 0 \\ + aa \& 1 \& 0 \& 0 \\ + b \& 0 \& 0 \& 1 \\ + bb \& 1 \& 0 \& 0 \\ + ab \& 0 \& 0 \& 0 \\ + aaa \& 0 \& 0 \& 0 \\ + aab \& 0 \& 0 \& 0 \\ + ba \& 0 \& 0 \& 0 \\ + bba \& 0 \& 0 \& 0 \\ + bbb \& 0 \& 0 \& 0 \\ }; -\draw (tab-1-1.south west) -| (tab-1-4.south east); -\draw (tab-6-1.south west) -| (tab-6-4.south east); -\draw (tab-1-2.north west) -| (tab-12-2.south west); +\draw (tab-2-2.north -| tab.west) -- (tab-2-2.north -| tab.east); +\draw (tab-7-2.north -| tab.west) -- (tab-7-2.north -| tab.east); +\draw (tab-2-2.west |- tab.north) -- (tab-2-2.west |- tab.south); \stoptikzpicture -\starttikzpicture[node distance=2cm] +\starttikzpicture \node[initial,state,initial text={$\autom_3 = $}] (q0) {$q_0$}; \node[state,right of=q0] (q1) {$q_1$}; \node[state,below of=q0] (q2) {$q_2$}; @@ -331,7 +331,7 @@ The Teacher replies {\bf no} and provides the counterexample $babb$, so $S \gets \startstep{4} One more step brings us to the correct hypothesis $\autom_4$ (details are omitted). -\starttikzpicture[node distance=2cm] +\starttikzpicture \node[initial,state,initial text={$\autom_4 = $}] (q0) {$q_0$}; \node[state,above right= 15pt of q0] (q1) {$q_1$}; \node[state,below right= 15pt of q0] (q2) {$q_2$}; @@ -361,7 +361,7 @@ Consider now an infinite alphabet $A = \{a,b,c,d,\dots\}$. The language $\lang_1$ becomes $\{aa,bb,cc,dd,\dots\}$. Classical theory of finite automata does not apply to this kind of languages, but one may draw an infinite deterministic automaton that recognizes $\lang_1$ in the standard sense: -\starttikzpicture[node distance=2cm] +\starttikzpicture \node[initial,state,initial text={$\autom_5 = $}] (q0) {$q_0$}; \node[state, right of=q0] (qb) {$q_b$}; \node[state, above=10 pt of qb] (qa) {$q_a$}; @@ -386,7 +386,7 @@ Classical theory of finite automata does not apply to this kind of languages, bu where $\tot{A}$ and $\tot{{\neq} a}$ stand for the infinitely-many transitions labelled by elements of $A$ and $A \setminus \{a\}$, respectively. This automaton is infinite, but it can be finitely presented in a variety of ways, for example: -\starttikzpicture[node distance=2cm] +\starttikzpicture \node[initial,state,initial text={}] (q0) {$q_0$}; \node[state, right of=q0] (qx) {$q_x$}; \node[state,accepting,right of=qx] (q3) {$q_3$}; @@ -458,15 +458,15 @@ In fact, for any $a \in A$, we have $A = \{ \pi(a) \mid \text{$\pi$ is a permuta Then, by {\bf (P2)}, $row(\pi(a))(\epsilon) = row(a)(\epsilon) = 0$, for all $\pi$, so the first table can be written as: \starttikzpicture -\matrix[matrix of nodes,column 1/.style={anchor=base west}](tab) +\matrix[obs table](tab) { - \phantom{$\epsilon$} & {$\epsilon$} \\ - $\epsilon$ & $0$ \\ - $a$ & $0$ \\ + \phantom{\epsilon} \& \epsilon \\ + \epsilon \& 0 \\ + a \& 0 \\ }; -\draw (tab-1-1.south west) -| (tab-1-2.south east); -\draw (tab-2-1.south west) -| (tab-2-2.south east); -\draw (tab-1-1.north east) -| (tab-3-1.south east); +\draw (tab-2-2.north -| tab.west) -- (tab-2-2.north -| tab.east); +\draw (tab-3-2.north -| tab.west) -- (tab-3-2.north -| tab.east); +\draw (tab-2-2.west |- tab.north) -- (tab-2-2.west |- tab.south); \stoptikzpicture \starttikzpicture \node[initial,state,initial text={$\autom'_1 = $}] (q0) {$q_0$}; @@ -484,19 +484,19 @@ Therefore we extend $S$ with the (infinite!) set of such words. The new symbolic table is: \starttikzpicture -\matrix[matrix of nodes,column 1/.style={anchor=base west}](tab) +\matrix[obs table](tab) { - \phantom{$\epsilon$} & \epsilon \\ - \epsilon & 0 \\ - a & 0 \\ - aa & 1 \\ - ab & 0 \\ - aaa & 0 \\ - aab & 0 \\ + \phantom{\epsilon} \& \epsilon \\ + \epsilon \& 0 \\ + a \& 0 \\ + aa \& 1 \\ + ab \& 0 \\ + aaa \& 0 \\ + aab \& 0 \\ }; -\draw (tab-1-1.south west) -| (tab-1-2.south east); -\draw (tab-4-1.south west) -| (tab-4-2.south east); -\draw (tab-1-2.north west) -| (tab-7-2.south west); +\draw (tab-2-2.north -| tab.west) -- (tab-2-2.north -| tab.east); +\draw (tab-5-2.north -| tab.west) -- (tab-5-2.north -| tab.east); +\draw (tab-2-2.west |- tab.north) -- (tab-2-2.west |- tab.south); \stoptikzpicture The lower part stands for elements of $S \Lext A$. @@ -512,19 +512,19 @@ In order to fix consistency, while keeping $E$ equivariant, we need to add colum The resulting table is \starttikzpicture -\matrix[matrix of nodes,column 1/.style={anchor=base west}](tab) +\matrix[obs table](tab) { - \phantom{$\epsilon$} & \epsilon & a & b & c & \dots \\ - \epsilon & 0 & 0 & 0 & 0 & \dots \\ - a & 0 & 1 & 0 & 0 & \dots \\ - aa & 1 & 0 & 0 & 0 & \dots \\ - ab & 0 & 0 & 0 & 0 & \dots \\ - aaa & 0 & 0 & 0 & 0 & \dots \\ - aab & 0 & 0 & 0 & 0 & \dots \\ + \phantom{\epsilon} \& \epsilon \& a \& b \& c \& \dots \\ + \epsilon \& 0 \& 0 \& 0 \& 0 \& \dots \\ + a \& 0 \& 1 \& 0 \& 0 \& \dots \\ + aa \& 1 \& 0 \& 0 \& 0 \& \dots \\ + ab \& 0 \& 0 \& 0 \& 0 \& \dots \\ + aaa \& 0 \& 0 \& 0 \& 0 \& \dots \\ + aab \& 0 \& 0 \& 0 \& 0 \& \dots \\ }; -\draw (tab-1-1.south west) -| (tab-1-6.south east); -\draw (tab-4-1.south west) -| (tab-4-6.south east); -\draw (tab-1-2.north west) -| (tab-7-2.south west); +\draw (tab-2-2.north -| tab.west) -- (tab-2-2.north -| tab.east); +\draw (tab-5-2.north -| tab.west) -- (tab-5-2.north -| tab.east); +\draw (tab-2-2.west |- tab.north) -- (tab-2-2.west |- tab.south); \stoptikzpicture where non-specified entries are $0$. @@ -610,8 +610,7 @@ This map can be used to get an upper bound for the number of orbits of $X_1 \tim Suppose $O_i$ is an orbit of $X_i$. Then we have a surjection \startformula -\EAlph^{(k_i)} \times \cdots \times \EAlph^{(k_n)} -\xrightarrow{f_{O_1} \times \cdots \times f_{O_n}} O_1 \times \cdots \times O_n +f_{O_1} \times \cdots \times f_{O_n} \colon \EAlph^{(k_i)} \times \cdots \times \EAlph^{(k_n)} \to O_1 \times \cdots \times O_n \stopformula stipulating that the codomain cannot have more orbits than the domain. Let $f_\EAlph(\{k_i\})$ denote the number of orbits of $\EAlph^{(k_1)} \times \cdots \times \EAlph^{(k_n)}$, for any finite sequence of natural numbers $\{k_i\}$. diff --git a/content/minimal-separating-sequences.tex b/content/minimal-separating-sequences.tex index 4eeca97..0a968b8 100644 --- a/content/minimal-separating-sequences.tex +++ b/content/minimal-separating-sequences.tex @@ -209,7 +209,7 @@ We continue until $T$ is complete. [title={A FSM (a) and a complete splitting tree for it (b)}, reference=fig:automaton-tree] -\starttikzpicture[shorten >=1pt,node distance=2cm,>=stealth'] +\starttikzpicture \node[state] (0) {$s_0$}; \node[state] (1) [above right of=0] {$s_1$}; \node[state] (2) [right of=1] {$s_2$}; @@ -237,7 +237,418 @@ We continue until $T$ is complete. \stopplacefigure -\todo{Ga verder bij \tt 285} +\stopsubsection +\stopsection +\startsection + [title={Minimal Separating Sequences}] + +In \in{Section}[sec:tree] we have described an algorithm for constructing a complete splitting tree. +This algorithm is non-deterministic, as there is no prescribed order on the splits. +In this section we order them to obtain minimal separating sequences. + +Let $u$ be a non-root inner node in a splitting tree, then the sequence $\sigma(u)$ can also be used to split the parent of $u$. +This allows us to construct splitting trees where children will never have shorter sequences than their parents, as we can always split with those sequences first. +Trees obtained in this way are guaranteed to be \emph{layered}, which means that for all nodes $u$ and all $u' \in C(u)$, $|\sigma(u)| \leq |\sigma(u')|$. +Each layer consists of nodes for which the associated separating sequences have the same length. + +Our approach for constructing minimal sequences is to ensure that each layer is as large as possible before continuing to the next one. +This idea is expressed formally by the following definitions. + +\startdefinition +A splitting tree $T$ is \emph{$k$-stable} if for all states $s$ and $t$ in the same leaf we have $\lambda(s, x) = \lambda(t, x)$ for all $x \in I^{\leq k}$. +\stopdefinition + +\startdefinition +A splitting tree $T$ is \emph{minimal} if for all states $s$ and $t$ in different leaves $\lambda(s, x) \neq \lambda(t, x)$ implies $|x| \geq |\sigma(\lca(s, t))|$ for all $x \in I^{*}$. +\stopdefinition + +Minimality of a splitting tree can be used to obtain minimal separating sequences for pairs of states. +If the tree is in addition stable, we obtain minimal separating sequences for all inequivalent pairs of states. +Note that if a minimal splitting tree is $(n-1)$-stable ($n$ is the number of states of $M$), then it is stable (\in{Definition}[def:stable]). +This follows from the well-known fact that $n-1$ is an upper bound for the length of a minimal separating sequence \citep[Moore1956]. + +\in{Algorithm}[alg:minimaltree] ensures a stable and minimal splitting tree. +The first repeat-loop is the same as before (in \in{Algorithm}[alg:tree]). +Clearly, we obtain a $1$-stable and minimal splitting tree here. +It remains to show that we can extend this to a stable and minimal splitting tree. +\in{Algorithm}[alg:increase-k] will perform precisely one such step towards stability, while maintaining minimality. +Termination follows from the same reason as for \in{Algorithm}[alg:tree]. +Correctness for this algorithm is shown by the following key lemma. +We will denote the input tree by $T$ and the tree after performing \in{Algorithm}[alg:increase-k] by $T'$. +Observe that $T$ is an initial segment of $T'$. + +\startlemma + [reference=lem:stability] +\in{Algorithm}[alg:increase-k] ensures a $(k+1)$-stable minimal splitting tree. +\stoplemma +\startproof +Let us proof stability. +Let $s$ and $t$ be in the same leaf of $T'$ and let $x \in I^{*}$ be such that $\lambda(s, x) \neq \lambda(t, x)$. +We show that $|x| > k+1$. + +Suppose for the sake of contradiction that $|x| \leq k+1$. +Let $u$ be the leaf containing $s$ and $t$ and write $x = a x'$. +We see that $\delta( s, a)$ and $\delta(t, a)$ are separated by $k$-stability of $T$. +So the node $v = \lca(\delta(l(u), a))$ has children and an associated sequence $\sigma(v)$. +There are two cases: +\startitemize +\item +$|\sigma(v)| < k$, then $a \sigma(v)$ separates $s$ and $t$ and is of length $\leq k$. +This case contradicts the $k$-stability of $T$. +\item +$|\sigma(v)| = k$, then the loop in \in{Algorithm}[alg:increase-k] will consider this case and split. +Note that this may not split $s$ and $t$ (it may occur that $a \sigma(v)$ splits different elements in $l(u)$). +We can repeat the above argument inductively for the newly created leaf containing $s$ and $t$. +By finiteness of $l(u)$, the induction will stop and, in the end, $s$ and $t$ are split. +\stopitemize +Both cases end in contradiction, so we conclude that $|x| > k+1$. + +Let us now prove minimality. +It suffices to consider only newly split states in $T'$. +Let $s$ and $t$ be two states with $|\sigma(\lca(s, t))| = k+1$. +Let $x \in I^{*}$ be a sequence such that $\lambda(s, x) \neq \lambda(t, x)$. +We need to show that $|x| \geq k+1$. +Since $x \neq \epsilon$ we can write $x = a x'$ and consider the states $s' = \delta(s, a)$ and $t' = \delta(t, a)$ which are separated by $x'$. +Two things can happen: +\startitemize +\item +The states $s'$ and $t'$ are in the same leaf in $T$. +Then by $k$-stability of $T$ we get $\lambda(s', y) = \lambda(t', y)$ for all $y \in I^{\leq k}$. +So $|x'| > k$. +\item +The states $s'$ and $t'$ are in different leaves in $T$ and let $u = \lca(s', t')$. +Then $a\sigma (u)$ separates $s$ and $t$. +Since $s$ and $t$ are in the same leaf in $T$ we get $|a \sigma(u)| \geq k+1$ by $k$-stability. +This means that $|\sigma(u)| \geq k$ and by minimality of $T$ we get $|x'| \geq k$. +\stopitemize +In both cases we have shown that $|x| \geq k+1$ as required. +\stopproof + +\todo{algorithms 2 and 3} + +%\begin{algorithm}[t] +% \DontPrintSemicolon +% \KwIn{A FSM $M$ with $n$ states} +% \KwResult{A stable, minimal splitting tree $T$} +% initialize $T$ to be a tree with a single node labeled $S$\; +% \Repeat{$P(T)$ is acceptable}{ +% find $a \in I, B \in P(T)$ such that we can split $B$ w.r.t.\@ output $\lambda(\cdot, a)$\; +% expand the $u \in T$ with $l(u)=B$ as described in (split-output)\; +% } +% \For{$k=1$ to $n-1$}{ +% perform Algorithm~\ref{alg:increase-k} or Algorithm~\ref{alg:increase-k-v3} on $T$ for +% $k$\; +% } +% \caption{Constructing a stable and minimal splitting tree} +% \label{alg:minimaltree} +%\end{algorithm} + +%\begin{algorithm}[t] +% \DontPrintSemicolon +% \KwIn{a $k$-stable and minimal splitting tree $T$} +% \KwResult{$T$ is a $(k+1)$-stable, minimal splitting tree} +% \ForAll{leaves $u \in T$ and all inputs $a$}{ +% locate $v=\lca(\delta(l(u), a))$\; +% \If{$v$ is an inner node and $|\sigma(v)| = k$}{\label{sigma-k} +% expand $u$ as described in (split-state) (which generates new leaves)\; +% } +% } +% \caption{A step towards the stability of a splitting tree} +% \label{alg:increase-k} +%\end{algorithm} + +\startexample +\in{Figure}[fig:splitting-tree-only] shows a stable and minimal splitting tree $T$ for the machine in \in{Figure}[fig:automaton]. +This tree is constructed by \in{Algorithm}[alg:minimaltree] as follows. +It executes the same as \in{Algorithm}[alg:tree] until we consider the node labelled $\{s_0, s_2\}$. +At this point $k = 1$. +We observe that the sequence of $\lca(\delta(\{s_0,s_2\}, a))$ has length $2$, which is too long, so we continue with the next input. +We find that we can indeed split w.r.t. the state after $b$, so the associated sequence is $ba$. +Continuing, we obtain the same partition as before, but with smaller witnesses. + +The internal data structure (a refinable partition) is shown in \in{Figure}[fig:internal-data-structure]: +the array with the permutation of the states is at the bottom, +and every block includes an indication of the slice containing its label and a pointer to its parent (as our final algorithm needs to find the parent block, but never the child blocks). +\stopexample + +\todo{Figure: splitting tree + internal representation} + +%\begin{figure} +% \centering +% \subfloat[\label{fig:splitting-tree-only}]{ +% \includegraphics[width=0.43\textwidth]{splitting-tree.pdf} +% } +% \subfloat[\label{fig:internal-data-structure}]{ +% \setcounter{maxdepth}{3} +% \begin{tikzpicture}[scale=1.0,>=stealth'] +% \gasblock{0}{6}{0}{2}{\makebox[0pt][r]{\tiny\textcolor{gray}{$\sigma(B_2) = \mbox{}$}} a} +% \gasblock{0}{3}{1}{4}{b} +% \gasblock{3}{6}{1}{8}{a \sigma(B_4)} +% \gasblock{0}{2}{2}{6}{b \sigma(B_2)} +% \gasblock{2}{3}{2}{3}{} +% \gasblock{3}{5}{2}{10}{a \sigma(B_6)} +% \gasblock{5}{6}{2}{7}{} +% \gasblock{0}{1}{3}{0}{} +% \gasblock{1}{2}{3}{5}{} +% \gasblock{3}{4}{3}{1}{} +% \gasblock{4}{5}{3}{9}{} +% \draw (0,0) node[shape=rectangle,minimum width=1cm,draw] (s2) {\footnotesize $s_2$}; +% \draw (1,0) node[shape=rectangle,minimum width=1cm,draw] (s0) {\footnotesize $s_0$}; +% \draw (2,0) node[shape=rectangle,minimum width=1cm,draw] (s4) {\footnotesize $s_4$}; +% \draw (3,0) node[shape=rectangle,minimum width=1cm,draw] (s5) {\footnotesize $s_5$}; +% \draw (4,0) node[shape=rectangle,minimum width=1cm,draw] (s1) {\footnotesize $s_1$}; +% \draw (5,0) node[shape=rectangle,minimum width=1cm,draw] (s3) {\footnotesize $s_3$}; +% \draw[->] +% (s2) edge (B0) +% (B0) edge (B6) +% (B6) edge (B4) +% (B4) edge (B2) +% (s0) edge (B5) +% (B5) edge (B6) +% (s4) edge (B3) +% (B3) edge (B4) +% (s5) edge (B1) +% (B1) edge (B10) +% (B10) edge (B8) +% (B8) edge (B2) +% (s1) edge (B9) +% (B9) edge (B10) +% (s3) edge (B7) +% (B7) edge (B8); +% \end{tikzpicture} +% } +% \caption{A complete and minimal splitting tree for the FSM in Fig.~\ref{fig:automaton} (a) and its +% internal refinable partition data structure (b)} +% \label{fig:splitting-tree} +%\end{figure} + + +\stopsection +\startsection + [title={Optimizing the Algorithm}] + +In this section, we present an improvement on \in{Algorithm}[alg:increase-k] that uses two ideas described by \citet[Hopcroft1971] in his seminal paper on minimizing finite automata: +\emph{using the inverse transition set}, and \emph{processing the smaller half}. +The algorithm that we present is a drop-in replacement, so that \in{Algorithm}[alg:minimaltree] stays the same except for some bookkeeping. +This way, we can establish correctness of the new algorithms more easily. +The variant presented in this section reduces the amount of redundant computations that were made in \in{Algorithm}[alg:increase-k]. + +Using Hopcroft's first idea, we turn our algorithm upside down: +instead of searching for the lca for each leaf, we search for the leaves $u$ for which $l(u) \subseteq \delta^{-1}(l(v), a)$, for each potential lca $v$ and input $a$. +To keep the order of splits as before, we define \emph{$k$-candidates}. + +\startdefinition[reference=def:k-candidate] +A \emph{$k$-candidate} is a node $v$ with $|\sigma(v)| = k$. +\stopdefinition + +A $k$-candidate $v$ and an input $a$ can be used to split a leaf $u$ if $v = \lca(\delta(l(u), a))$, because in this case there are at least two states $s, t$ in $l(u)$ such that $\delta(s, a)$ and $\delta(t, a)$ are in labels of different nodes in $C(v)$. +Refining $u$ this way is called \emph{splitting $u$ with respect to $(v, a)$}. +The set $C(u)$ is constructed according to (split-state), where each child $w \in C(v)$ defines a child $u_w$ of $u$ with states +\placeformula[eq:split-fw] +\startformula\startalign +\NC l(u_w) \NC = \{ s \in l(u) \,|\, \delta(s, a) \in l(w) \} \NR +\NC \NC = l(u) \cap \delta^{-1}(l(w), a) \NR +\stopalign\stopformula +In order to perform the same splits in each layer as before, we maintain a list $L_{k}$ of $k$-candidates. +We keep the list in order of the construction of nodes, because when we split w.r.t. a child of a node $u$ before we split w.r.t. $u$, the result is not well-defined. +Indeed, the order on $L_{k}$ is the same as the order used by \in{Algorithm}[alg:minimaltree]. +So far, the improved algorithm still would have time complexity $\bigO(mn)$. + +To reduce the complexity we have to use Hopcroft's second idea of \emph{processing the smaller half}. +The key idea is that, when we fix a $k$-candidate $v$, all leaves are split with respect to $(v, a)$ simultaneously. +Instead of iterating over of all leaves to refine them, we iterate over $s \in \delta^{-1}(l(w), a)$ for all $w$ in $C(v)$ and look up in which leaf it is contained to move $s$ out of it. +From Lemma 8 by \citet[Knuutila2001] it follows that we can skip one of the children of $v$. +This lowers the time complexity to $\bigO(m \log n)$. +In order to move $s$ out of its leaf, each leaf $u$ is associated with a set of temporary children $C'(u)$ that is initially empty, and will be finalized after iterating over all $s$ and $w$. + +\todo{algorithm 4?} + +%\begin{algorithm}[t] +% \LinesNumbered +% \DontPrintSemicolon +% \SetKw{KwContinue}{continue} +% \KwIn{a $k$-stable and minimal splitting tree $T$, and a list $L_k$} +% \KwResult{$T$ is a $(k+1)$-stable and minimal splitting tree, and a list $L_{k+1}$} +% $L_{k+1} \gets \emptyset$\; +% \ForAll{$k$-candidates $v$ in $L_{k}$ in order}{ +% let $w'$ be a node in $C(v)$ such that $|l(w')| \geq |l(w)|$ for all nodes $w$ in $C(v)$\; +% \ForAll{inputs $a$ in $I$}{ +% \ForAll{nodes $w$ in $C(v) \setminus w'$}{ +% \ForAll{states $s$ in $\delta^{-1}(l(w), a)$}{ +% locate leaf $u$ such that $s \in l(u)$\; +% \If{$C'(u)$ does not contain node $u_{w}$}{ +% add a new node $u_{w}$ to $C'(u)$\;\label{line:add-temp-child} +% } +% move $s$ from $l(u)$ to $l(u_{w})$\;\label{line:move-to-temp-child} +% } +% } +% \ForEach{leaf $u$ with $C'(u) \neq \emptyset$}{\label{line:finalize} +% \eIf{$|l(u)| = 0$}{ +% \If{$|C'(u)| = 1$}{ +% recover $u$ by moving its elements back and clear +% $C'(u)$\;\label{line:recover} +% \KwContinue with the next leaf\; +% } +% set $p=u$ and $C(u)=C'(u)$\;\label{line:setptou} +% }{ +% construct a new node $p$ and set $C(p) = C'(u) \cup \{ u \}$\; +% insert $p$ in the tree in the place where $u$ was\; +% } +% set $\sigma(p) = a \sigma(v)$\;\label{line:prepend} +% append $p$ to $L_{k+1}$ and clear $C'(u)$\;\label{line:finalize-end} +% } +% } +% } +% \caption{A better step towards the stability of a splitting tree} +% \label{alg:increase-k-v3} +%\end{algorithm} + +In \in{Algorithm}[alg:increase-k-v3] we use the ideas described above. +For each $k$-candidate $v$ and input $a$, we consider all children $w$ of $v$, except for the largest one (in case of multiple largest children, we skip one of these arbitrarily). +For each state $s \in \delta^{-1}(l(w), a)$ we consider the leaf $u$ containing it. +If this leaf does not have an associated temporary child for $w$ we create such a child (line~\ref{line:add-temp-child}), if this child exists we move $s$ into that child (line~\ref{line:move-to-temp-child}). + +Once we have done the simultaneous splitting for the candidate $v$ and input $a$, we finalize the temporary children. +This is done at lines~\ref{line:finalize}--\ref{line:finalize-end}. +If there is only one temporary child with all the states, no split has been made and we recover this node (line~\ref{line:recover}). In the other case we make the temporary children permanent. + +The states remaining in $u$ are those for which $\delta(s, a)$ is in the child of $v$ that we have skipped; therefore we will call it the \emph{implicit child}. +We should not touch these states to keep the theoretical time bound. +Therefore, we construct a new parent node $p$ that will \quotation{adopt} the children in $C'(u)$ together with $u$ (line~\ref{line:setptou}). + +We will now explain why considering all but the largest children of a node lowers the algorithm's time complexity. +Let $T$ be a splitting tree in which we color all children of each node blue, except for the largest one. +Then: + +\startlemma[reference=lem:even] +A state $s$ is in at most $(\log_{2} n) - 1$ labels of blue nodes. +\stoplemma +\startproof +Observe that every blue node $u$ has a sibling $u'$ such that $|l(u')| \geq |l(u)|$. +So the parent $p(u)$ has at least $2|l(u)|$ states in its label, and the largest blue node has at most $n/2$ states. + +Suppose a state $s$ is contained in $m$ blue nodes. +When we walk up the tree starting at the leaf containing $s$, we will visit these $m$ blue nodes. +With each visit we can double the lower bound of the number of states. +Hence $n/2 \geq 2^m$ and $m \leq (\log_{2} n) - 1$. +\stopproof + +\startcorollary[reference=cor:even-better] +A state $s$ is in at most $\log_{2} n$ sets $\delta^{-1}(l(u), a)$, where $u$ is a blue node and $a$ is an input in $I$. +\stopcorollary + +If we now quantify over all transitions, we immediately get the following result. +We note that the number of blue nodes is at most $n-1$, but since this fact is not used, we leave this to the reader. + +\startcorollary[reference=cor:count-all +Let $\mathcal{B}$ denote the set of blue nodes and define +\startformula +\mathcal{X} = \{ (b, a, s) \,|\, b \in \mathcal{B}, a \in I, s \in \delta^{-1}(l(b), a) \}. +\stopformula +Then $\mathcal{X}$ has at most $m \log_{2} n$ elements. +\stopcorollary + +The important observation is that when using \in{Algorithm}[alg:increase-k-v3] we iterate in total over every element in $\mathcal{X}$ at most once. + +\starttheorem[reference=th:complexity] +\in{Algorithm}[alg:minimaltree] using \in{Algorithm}[alg:increase-k-v3] runs in $\bigO(m \log n)$ time. +\stoptheorem +\startproof +We prove that bookkeeping does not increase time complexity by discussing the implementation. + +\startdescription{Inverse transition.} +$\delta^{-1}$ can be constructed as a preprocessing step in $\bigO(m)$. +\stopdescription +\startdescription{State sorting.} +As described in \in{Section}[sec:tree], we maintain a refinable partition data structure. +Each time new pair of a $k$-candidate $v$ and input $a$ is considered, leaves are split by performing a bucket sort. + +First, buckets are created for each node in $w \in C(v) \setminus w'$ and each leaf $u$ that contains one or more elements from $\delta^{-1}(l(w), a)$, where $w'$ is a largest child of $v$. +The buckets are filled by iterating over the states in $\delta^{-1}(l(w), a)$ for all $w$. +Then, a pivot is set for each leaf $u$ such that exactly the states that have been placed in a bucket can be moved right of the pivot (and untouched states in $\delta^{-1}(l(w'), a)$ end up left of the pivot). +For each leaf $u$, we iterate over the states in its buckets and the corresponding indices right of its pivot, and we swap the current state with the one that is at the current index. +For each bucket a new leaf node is created. +The refinable partition is updated such that the current state points to the most recently created leaf. + +This way, we assure constant time lookup of the leaf for a state, and we can update the array in constant time when we move elements out of a leaf. +\stopdescription +\startdescription{Largest child.} +For finding the largest child, we maintain counts for the temporary children and a current biggest one. +On finalizing the temporary children we store (a reference to) the biggest child in the node, so that we can skip this node later in the algorithm. +\stopdescription +\startdescription{Storing sequences.} +The operation on line~\ref{line:prepend} is done in constant time by using a linked list. +\stopdescription +\stopproof + + +\stopsection +\startsection + [title=Application in Conformance Testing] + +A splitting tree can be used to extract relevant information for two classical test generation methods: +a \emph{characterization set} for the W-method and a \emph{separating family} for the HSI-method. +For an introduction and comparison of FSM-based test generation methods we refer to \citet[dorofeeva2010fsm]. + +\startdefinition[def:cset] +A set $W \subset I^{\ast}$ is called a \emph{characterization set} if for every pair of inequivalent states $s, t$ there is a sequence $w \in W$ such that $\lambda(s, w) \neq \lambda(t, w)$. +\stopdefinition + +\startlemma[reference=lem:cset] +Let $T$ be a complete splitting tree, then $\{\sigma(u)|u \in T\}$ is a characterization set. +\stoplemma +\startproof +Let $W = \{\sigma(u) \mid u \in T\}$. +Let $s, t \in S$ be inequivalent states, then by completeness $s$ and $t$ are contained in different leaves of $T$. +Hence $u=lca(s, t)$ exists and $\sigma(u)$ separates $s$ and $t$. +Furthermore, $\sigma(u) \in W$. +This shows that $W$ is a characterisation set. +\stopproof + +\startlemma +A characterization set with minimal length sequences can be constructed in time $\bigO(m \log n)$. +\stoplemma +\startproof +By \in{Lemma}[lem:cset] the sequences associated with the inner nodes of a splitting tree form a characterization set. +By \in{Theorem}[th:complexity], such a tree can be constructed in time $\bigO(m \log n)$. +Traversing the tree to obtain the characterization set is linear in the number of nodes (and hence linear in the number of states). +\stopproof + +\startdefinition[reference=def:separating_fam] +A collection of sets $\{H_s\}_{s \in S}$ is called a \emph{separating family} if for every pair of inequivalent states $s, t$ there is a sequence $h$ such that $\lambda(s, h) \neq \lambda(t, h)$ and $h$ is a prefix of some $h_s \in H_s$ and some $h_t \in H_t$. +\stopdefinition + +\startlemma[reference=lem:testing] +Let $T$ be a complete splitting tree, the sets $\{ \sigma(u) \mid s \in l(u), u \in T \}_{s \in S}$ form a separating family. +\stoplemma +\startproof +Let $H_s = \{\sigma(u) \mid s \in l(u)\}$. +Let $s, t \in S$ be inequivalent states, then by completeness $s$ and $t$ are contained in different leaves of $T$. +Hence $u=lca(s,t)$ exists. Since both $s$ and $t$ are contained in $l(u)$, the separating sequence $\sigma(u)$ is contained in both sets $H_s$ and $H_t$. +Therefore, it is a (trivial) prefix of some word $h_s \in H_s$ and some $h_t \in H_t$. +Hence $\{H_s\}_{s \in S}$ is a separating family. +\stopproof + +\startlemma +A separating family with minimal length sequences can be constructed in time $\bigO(m \log n + n^2)$. +\stoplemma +\startproof +The separating family can be constructed from the splitting tree by collecting all sequences of all parents of a state (by \in{Lemma}[lem:testing]). +Since we have to do this for every state, this takes $\bigO(n^2)$ time. +\stopproof + +For test generation one moreover needs a transition cover. +This can be constructed in linear time with a breadth first search. +We conclude that we can construct all necessary information for the W-method in time $\bigO(m \log n)$ as opposed the the $\bigO(mn)$ algorithm used by \citet[dorofeeva2010fsm]. +Furthermore, we conclude that we can construct all the necessary information for the HSI-method in time $\bigO(m \log n + n^2)$, improving on the the reported bound $\bigO(mn^3)$ by \citet[Hierons2015]. +The original HSI-method was formulated differently and might generate smaller sets. +We conjecture that our separating family has the same size if we furthermore remove redundant prefixes. +This can be done in $\bigO(n^2)$ time using a trie data structure. + + +\stopsection + +\todo{ga verder bij \tt 732} \referencesifcomponent \stopchapter diff --git a/environment.tex b/environment.tex index 0d62855..1a25cd1 100644 --- a/environment.tex +++ b/environment.tex @@ -12,6 +12,7 @@ \environment notation \environment paper \environment pdf +\environment tikz \mainlanguage[en] @@ -56,9 +57,6 @@ \define\referencesifcomponent{\doifnotmode{everything}{\section{References}\placelistofpublications}} -\usemodule[tikz] -\usetikzlibrary[automata, arrows, positioning, matrix, fit, decorations.pathreplacing] - \usemodule[algorithmic] \setupalgorithmic[align=middle,numbercolor=darkgray] % center line numbers \setupalgorithmic[before={\startframedtext[width=0.85\textwidth,offset=none,frame=off]},after={\stopframedtext}] % To enable linenumbers in a float diff --git a/environment/tikz.tex b/environment/tikz.tex new file mode 100644 index 0000000..834a8fa --- /dev/null +++ b/environment/tikz.tex @@ -0,0 +1,36 @@ +\startenvironment tikz + +\usemodule[tikz] +\usetikzlibrary[automata, arrows, positioning, matrix, fit, decorations.pathreplacing] + +% defaults +\tikzset{node distance=2cm} % 'shorten >=2pt' only for automata + +% observation table +\tikzset{obs table/.style={matrix of math nodes, ampersand replacement=\&, column 1/.style={anchor=base west}}} + +\stopenvironment + +% This command is used to draw the internal data structure for the minimal separating sequences paper +% this is not used yet. +\newcounter{maxdepth} +\newcommand{\gasblock}[5]{ +\fill (#1-0.495,{(\themaxdepth-#3)*0.75+0.5}) [rounded corners=0.2cm] -- (#1-0.495,{(\themaxdepth-#3)*0.75+0.655}) -- (#2-0.505,{(\themaxdepth-#3)*0.75+0.655}) [sharp corners] -- (#2-0.505,{(\themaxdepth-#3)*0.75+0.5}) [rounded corners=0.2cm] -- (#2-0.505,{(\themaxdepth-#3)*0.75+0.645}) -- (#1-0.495,{(\themaxdepth-#3)*0.75+0.645}) [sharp corners] -- cycle; +%\draw[rounded corners=0.225cm] (#1-0.45,{(\themaxdepth-#3)*0.75+0.5}) -- (#1-0.45,{(\themaxdepth-#3)*0.75+0.65}) -- (#2-0.55,{(\themaxdepth-#3)*0.75+0.65}) -- (#2-0.55,{(\themaxdepth-#3)*0.75+0.5}); +\draw ({(#1+#2)/2-0.5},{(\themaxdepth-#3)*0.75+0.65}) node[fill=white] (B#4) {\footnotesize $B_{#4}$}; +%\draw (#1-0.5,{(\themaxdepth-#3)*0.75+0.3}) node {\tiny $#1$}; +%\draw (#2-0.5,{(\themaxdepth-#3)*0.75+0.3}) node {\tiny $#2$}; +%\draw ({(#1+#2)/2-0.5},{(\themaxdepth-#3)*0.75+0.3}) node {\scriptsize \smash{$#5$}}; +%\begin{scope} +% \clip[overlay] (-0.55,{(\themaxdepth-#3)*0.75+0.6}) rectangle (#2-3.45,0); +% \draw (#1-0.5,{(\themaxdepth-#3)*0.75+0.3}) node{\tiny $\phantom{#1}$\makebox[0pt][l]{\textcolor{gray}{$\mbox{} = \smash{#4.\mathit{begin}}$}}}; +%\end{scope} +%\begin{scope} +% \clip[overlay] (#1+2.45,{(\themaxdepth-#3)*0.75+0.6}) rectangle (#2-0.45,0); +% \draw (#2-0.5,{(\themaxdepth-#3)*0.75+0.3}) node{\tiny\makebox[0pt][r]{\textcolor{gray}{$\smash{#4.\mathit{end}} = \mbox{}$}}$\phantom{#2}$}; +%\end{scope} +%% \begin{scope} +%% \clip[overlay] (#1*2-1.7,{(\themaxdepth-#3)*0.75+0.8}) rectangle (-0.5,0); +%% \draw[overlay] (-0.5,{(\themaxdepth-#3)*0.75+0.65}) node{\tiny\makebox[0pt][r]{\textcolor{gray}{$#4.\mathit{level} = #3$}~~\quad}}; +%% \end{scope} +} \ No newline at end of file