Archived
1
Fork 0

Aan intro gewerkt

This commit is contained in:
Joshua Moerman 2018-11-16 00:00:57 +01:00
parent 1df5bdf4f8
commit b7eb986943
7 changed files with 438 additions and 81 deletions

View file

@ -1,3 +1,38 @@
@inproceedings{BosJM17,
author = {Petra van den Bos and
Ramon Janssen and
Joshua Moerman},
editor = {Nina Yevtushenko and
Ana Rosa Cavalli and
H{\"{u}}sn{\"{u}} Yenig{\"{u}}n},
title = {$n$-Complete Test Suites for {IOCO}},
booktitle_= {Testing Software and Systems - 29th {IFIP} {WG} 6.1 International
Conference, {ICTSS} 2017, St. Petersburg, Russia, October 9-11, 2017,
Proceedings},
booktitle = {{ICTSS} 2017 Proceedings},
series = {Lecture Notes in Computer Science},
volume = {10533},
pages = {91--107},
publisher = {Springer},
year = {2017},
url = {https://doi.org/10.1007/978-3-319-67549-7\_6},
doi = {10.1007/978-3-319-67549-7\_6},
timestamp = {Mon, 25 Sep 2017 17:37:19 +0200},
biburl = {https://dblp.org/rec/bib/conf/pts/BosJM17},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{BosJM18,
author = {Petra van den Bos and
Ramon Janssen and
Joshua Moerman},
title = {$n$-Complete Test Suites for {IOCO}},
journal = {Software Quality Journal},
publisher = {Springer},
year = {2018},
note = {To appear}
}
@inproceedings{DBLP:conf/icfem/SmeenkMVJ15, @inproceedings{DBLP:conf/icfem/SmeenkMVJ15,
author = {Wouter Smeenk and author = {Wouter Smeenk and
Joshua Moerman and Joshua Moerman and
@ -7,9 +42,10 @@
Sylvain Conchon and Sylvain Conchon and
Fatiha Za{\"{\i}}di}, Fatiha Za{\"{\i}}di},
title = {Applying Automata Learning to Embedded Control Software}, title = {Applying Automata Learning to Embedded Control Software},
booktitle = {Formal Methods and Software Engineering - 17th International Conference booktitle_= {Formal Methods and Software Engineering - 17th International Conference
on Formal Engineering Methods, {ICFEM} 2015, Paris, France, November on Formal Engineering Methods, {ICFEM} 2015, Paris, France, November
3-5, 2015, Proceedings}, 3-5, 2015, Proceedings},
booktitle = {{ICFEM} 2015, Proceedings},
series = {Lecture Notes in Computer Science}, series = {Lecture Notes in Computer Science},
volume = {9407}, volume = {9407},
pages = {67--83}, pages = {67--83},
@ -31,9 +67,10 @@
Carlos Mart{\'{\i}}n{-}Vide and Carlos Mart{\'{\i}}n{-}Vide and
Bianca Truthe}, Bianca Truthe},
title = {Minimal Separating Sequences for All Pairs of States}, title = {Minimal Separating Sequences for All Pairs of States},
booktitle = {Language and Automata Theory and Applications - 10th International booktitle_= {Language and Automata Theory and Applications - 10th International
Conference, {LATA} 2016, Prague, Czech Republic, March 14-18, 2016, Conference, {LATA} 2016, Prague, Czech Republic, March 14-18, 2016,
Proceedings}, Proceedings},
booktitle = {{LATA} 2016, Proceedings},
series = {Lecture Notes in Computer Science}, series = {Lecture Notes in Computer Science},
volume = {9618}, volume = {9618},
pages = {181--193}, pages = {181--193},
@ -46,6 +83,23 @@
bibsource = {dblp computer science bibliography, https://dblp.org} bibsource = {dblp computer science bibliography, https://dblp.org}
} }
@article{SmetsersMJV16,
author = {Rick Smetsers and
Joshua Moerman and
Mark Janssen and
Sicco Verwer},
title = {Complementing Model Learning with Mutation-Based Fuzzing},
journal = {CoRR},
volume = {abs/1611.02429},
year = {2016},
url = {http://arxiv.org/abs/1611.02429},
archivePrefix = {arXiv},
eprint = {1611.02429},
timestamp = {Mon, 13 Aug 2018 16:46:49 +0200},
biburl = {https://dblp.org/rec/bib/journals/corr/SmetsersMJV16},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{MoermanS0KS17, @inproceedings{MoermanS0KS17,
author = {Joshua Moerman and author = {Joshua Moerman and
Matteo Sammartino and Matteo Sammartino and
@ -55,9 +109,10 @@
editor = {Giuseppe Castagna and editor = {Giuseppe Castagna and
Andrew D. Gordon}, Andrew D. Gordon},
title = {Learning nominal automata}, title = {Learning nominal automata},
booktitle = {Proceedings of the 44th {ACM} {SIGPLAN} Symposium on Principles of booktitle_= {Proceedings of the 44th {ACM} {SIGPLAN} Symposium on Principles of
Programming Languages, {POPL} 2017, Paris, France, January 18-20, Programming Languages, {POPL} 2017, Paris, France, January 18-20,
2017}, 2017},
booktitle = {{POPL} 2017, Proceedings},
pages = {613--625}, pages = {613--625},
publisher = {{ACM}}, publisher = {{ACM}},
year = {2017}, year = {2017},
@ -67,27 +122,23 @@
bibsource = {dblp computer science bibliography, https://dblp.org} bibsource = {dblp computer science bibliography, https://dblp.org}
} }
@inproceedings{BosJM17, @inproceedings{Moerman18,
author = {Petra van den Bos and author = {Joshua Moerman},
Ramon Janssen and title = {Learning Product Automata},
Joshua Moerman}, editor = {Olgierd Unold and
editor = {Nina Yevtushenko and Witold Dyrka and
Ana Rosa Cavalli and Wojciech Wieczorek},
H{\"{u}}sn{\"{u}} Yenig{\"{u}}n}, booktitle = {ICGI 2018, Proceedings},
title = {$n$-Complete Test Suites for {IOCO}}, publisher = {Proceedings of Machine Learning Research},
booktitle = {Testing Software and Systems - 29th {IFIP} {WG} 6.1 International year = {2018}
Conference, {ICTSS} 2017, St. Petersburg, Russia, October 9-11, 2017, }
Proceedings},
series = {Lecture Notes in Computer Science}, @misc{MoermanR19,
volume = {10533}, author = {Joshua Moerman and
pages = {91--107}, Jurriaan Rot},
publisher = {Springer}, title = {Separated Nominal Automata},
year = {2017}, note = {Under submission},
url = {https://doi.org/10.1007/978-3-319-67549-7\_6}, year = {2018}
doi = {10.1007/978-3-319-67549-7\_6},
timestamp = {Mon, 25 Sep 2017 17:37:19 +0200},
biburl = {https://dblp.org/rec/bib/conf/pts/BosJM17},
bibsource = {dblp computer science bibliography, https://dblp.org}
} }
@inproceedings{VenhoekMR18, @inproceedings{VenhoekMR18,
@ -97,8 +148,9 @@
editor = {Bernd Fischer and editor = {Bernd Fischer and
Tarmo Uustalu}, Tarmo Uustalu},
title = {Fast Computations on Ordered Nominal Sets}, title = {Fast Computations on Ordered Nominal Sets},
booktitle = {Theoretical Aspects of Computing - {ICTAC} 2018 - 15th International booktitle_= {Theoretical Aspects of Computing - {ICTAC} 2018 - 15th International
Colloquium, Stellenbosch, South Africa, October 16-19, 2018, Proceedings}, Colloquium, Stellenbosch, South Africa, October 16-19, 2018, Proceedings},
booktitle = {{ICTAC} 2018, Proceedings},
series = {Lecture Notes in Computer Science}, series = {Lecture Notes in Computer Science},
volume = {11187}, volume = {11187},
pages = {493--512}, pages = {493--512},

View file

@ -18,6 +18,24 @@
bibsource = {dblp computer science bibliography, https://dblp.org} bibsource = {dblp computer science bibliography, https://dblp.org}
} }
@inproceedings{AartsRP13,
author = {Fides Aarts and
Joeri de Ruiter and
Erik Poll},
title = {Formal Models of Bank Cards for Free},
booktitle = {Sixth {IEEE} International Conference on Software Testing, Verification
and Validation, {ICST} 2013 Workshops Proceedings, Luxembourg, Luxembourg,
March 18-22, 2013},
pages = {461--468},
publisher = {{IEEE} Computer Society},
year = {2013},
url = {https://doi.org/10.1109/ICSTW.2013.60},
doi = {10.1109/ICSTW.2013.60},
timestamp = {Wed, 24 May 2017 08:30:36 +0200},
biburl = {https://dblp.org/rec/bib/conf/icst/AartsRP13},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@phdthesis{Aarts14, @phdthesis{Aarts14,
title = {Tomte: bridging the gap between active learning and real-world systems}, title = {Tomte: bridging the gap between active learning and real-world systems},
author = {Fides Dorothea Aarts}, author = {Fides Dorothea Aarts},
@ -540,6 +558,24 @@
bibsource = {dblp computer science bibliography, https://dblp.org} bibsource = {dblp computer science bibliography, https://dblp.org}
} }
@inproceedings{ChaluparPPR14,
author = {Georg Chalupar and
Stefan Peherstorfer and
Erik Poll and
Joeri de Ruiter},
editor = {Sergey Bratus and
Felix "FX" Lindner},
title = {Automated Reverse Engineering using {Lego}®},
booktitle = {8th {USENIX} Workshop on Offensive Technologies, {WOOT} '14, San Diego,
CA, USA, August 19, 2014.},
publisher = {{USENIX} Association},
year = {2014},
url = {https://www.usenix.org/conference/woot14/workshop-program/presentation/chalupar},
timestamp = {Mon, 04 Dec 2017 17:32:55 +0100},
biburl = {https://dblp.org/rec/bib/conf/woot/ChaluparPPR14},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{ChanVO89, @inproceedings{ChanVO89,
author = {Wendy Y. L. Chan and author = {Wendy Y. L. Chan and
Son T. Vuong and Son T. Vuong and
@ -840,6 +876,53 @@
bibsource = {dblp computer science bibliography, https://dblp.org} bibsource = {dblp computer science bibliography, https://dblp.org}
} }
@inproceedings{Fiterau-BrosteanH17,
author = {Paul Fiterau{-}Brostean and
Falk Howar},
editor = {Laure Petrucci and
Cristina Seceleanu and
Ana Cavalcanti},
title = {Learning-Based Testing the Sliding Window Behavior of {TCP} Implementations},
booktitle = {Critical Systems: Formal Methods and Automated Verification - Joint
22nd International Workshop on Formal Methods for Industrial Critical
Systems - and - 17th International Workshop on Automated Verification
of Critical Systems, FMICS-AVoCS 2017, Turin, Italy, September 18-20,
2017, Proceedings},
series = {Lecture Notes in Computer Science},
volume = {10471},
pages = {185--200},
publisher = {Springer},
year = {2017},
url = {https://doi.org/10.1007/978-3-319-67113-0\_12},
doi = {10.1007/978-3-319-67113-0\_12},
timestamp = {Tue, 05 Sep 2017 14:29:34 +0200},
biburl = {https://dblp.org/rec/bib/conf/fmics/Fiterau-Brostean17},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{Fiterau-BrosteanLPRVV17,
author = {Paul Fiterau{-}Brostean and
Toon Lenaerts and
Erik Poll and
Joeri de Ruiter and
Frits W. Vaandrager and
Patrick Verleg},
editor = {Hakan Erdogmus and
Klaus Havelund},
title = {Model learning and model checking of {SSH} implementations},
booktitle = {Proceedings of the 24th {ACM} {SIGSOFT} International {SPIN} Symposium
on Model Checking of Software, Santa Barbara, CA, USA, July 10-14,
2017},
pages = {142--151},
publisher = {{ACM}},
year = {2017},
url = {https://doi.org/10.1145/3092282.3092289},
doi = {10.1145/3092282.3092289},
timestamp = {Tue, 06 Nov 2018 11:06:50 +0100},
biburl = {https://dblp.org/rec/bib/conf/spin/Fiterau-Brostean17},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@phdthesis{Fiterau-Brostean18, @phdthesis{Fiterau-Brostean18,
title = {Active Model Learning for the Analysis of Network Protocols}, title = {Active Model Learning for the Analysis of Network Protocols},
author = {Paul Fiterau{-}Brostean}, author = {Paul Fiterau{-}Brostean},
@ -1674,6 +1757,21 @@
bibsource = {dblp computer science bibliography, https://dblp.org} bibsource = {dblp computer science bibliography, https://dblp.org}
} }
@article{PeledVY02,
author = {Doron A. Peled and
Moshe Y. Vardi and
Mihalis Yannakakis},
title = {Black Box Checking},
journal = {Journal of Automata, Languages and Combinatorics},
volume = {7},
number = {2},
pages = {225--246},
year = {2002},
timestamp = {Fri, 12 Feb 2016 07:36:50 +0100},
biburl = {https://dblp.org/rec/bib/journals/jalc/PeledVY02},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{Petrenko97, @article{Petrenko97,
author = {Alexandre Petrenko}, author = {Alexandre Petrenko},
title = {Technical Correspondence Comments on "A Reduced Test Suite for Protocol title = {Technical Correspondence Comments on "A Reduced Test Suite for Protocol
@ -1822,6 +1920,23 @@
url = {http://hdl.handle.net/1887/35814} url = {http://hdl.handle.net/1887/35814}
} }
@inproceedings{RuiterP15,
author = {Joeri de Ruiter and
Erik Poll},
editor = {Jaeyeon Jung and
Thorsten Holz},
title = {Protocol State Fuzzing of {TLS} Implementations},
booktitle = {24th {USENIX} Security Symposium, {USENIX} Security 15, Washington,
D.C., USA, August 12-14, 2015.},
pages = {193--206},
publisher = {{USENIX} Association},
year = {2015},
url = {https://www.usenix.org/conference/usenixsecurity15/technical-sessions/presentation/de-ruiter},
timestamp = {Fri, 19 Aug 2016 15:38:04 +0200},
biburl = {https://dblp.org/rec/bib/conf/uss/RuiterP15},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{Rutten98, @inproceedings{Rutten98,
author = {Jan J. M. M. Rutten}, author = {Jan J. M. M. Rutten},
editor = {Davide Sangiorgi and editor = {Davide Sangiorgi and
@ -1875,6 +1990,28 @@
bibsource = {dblp computer science bibliography, https://dblp.org} bibsource = {dblp computer science bibliography, https://dblp.org}
} }
@inproceedings{SchutsHV16,
author = {Mathijs Schuts and
Jozef Hooman and
Frits W. Vaandrager},
editor = {Erika {\'{A}}brah{\'{a}}m and
Marieke Huisman},
title = {Refactoring of Legacy Software Using Model Learning and Equivalence
Checking: {A}n Industrial Experience Report},
booktitle = {Integrated Formal Methods - 12th International Conference, {IFM} 2016,
Reykjavik, Iceland, June 1-5, 2016, Proceedings},
series = {Lecture Notes in Computer Science},
volume = {9681},
pages = {311--325},
publisher = {Springer},
year = {2016},
url = {https://doi.org/10.1007/978-3-319-33693-0\_20},
doi = {10.1007/978-3-319-33693-0\_20},
timestamp = {Wed, 24 May 2017 08:28:10 +0200},
biburl = {https://dblp.org/rec/bib/conf/ifm/SchutsHV16},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{Segoufin06, @inproceedings{Segoufin06,
author = {Luc Segoufin}, author = {Luc Segoufin},
editor = {Zolt{\'{a}}n {\'{E}}sik}, editor = {Zolt{\'{a}}n {\'{E}}sik},

View file

@ -6,6 +6,15 @@
\startfrontmatter \startfrontmatter
\start
\switchtobodyfont[20pt]
\midaligned{\framed[width=12cm,align=center,offset=2cm,frame=off]{Nominal Techniques \& Testing Techniques for Automata}}
\switchtobodyfont[12pt]
\midaligned{Joshua Moerman}
\midaligned{draft 16 November 2018}
\stop
\page[yes]
\setupwhitespace[none] \setupwhitespace[none]
\completecontent \completecontent
\completelistoffigures \completelistoffigures
@ -32,7 +41,7 @@
\startbackmatter \startbackmatter
\switchtobodyfont[9pt] \switchtobodyfont[8pt]
\title{References} \title{References}
\placelistofpublications \placelistofpublications
\stopbackmatter \stopbackmatter

View file

@ -18,7 +18,6 @@ The goal of this technique can be manifold:
It can be used to reverse-engineer a system, to find bugs in it, to verify properties of the system, or to understand the system in one way or another. It can be used to reverse-engineer a system, to find bugs in it, to verify properties of the system, or to understand the system in one way or another.
It is \emph{not} just random testing: the information learned during the interaction with the system is actively used to guide following interactions. It is \emph{not} just random testing: the information learned during the interaction with the system is actively used to guide following interactions.
Additionally, the information learned can be inspected and analysed. Additionally, the information learned can be inspected and analysed.
\todo{We open the box?}
This thesis is about model learning and related techniques. This thesis is about model learning and related techniques.
In the first part, I present results concerning \emph{black box testing} of automata. In the first part, I present results concerning \emph{black box testing} of automata.
@ -73,7 +72,6 @@ This type of learning is hence called \emph{exact learning}.
It should be clear why this is called \emph{query learning} or \emph{active learning}. It should be clear why this is called \emph{query learning} or \emph{active learning}.
The learning algorithm initiates interaction with the teacher by posing queries, it may construct its own data points and ask for their corresponding label. The learning algorithm initiates interaction with the teacher by posing queries, it may construct its own data points and ask for their corresponding label.
\todo{Nog afzetten tegen passive learning?}
Another paradigm which is relevant for our type of applications is \emph{PAC-learning with membership queries}. Another paradigm which is relevant for our type of applications is \emph{PAC-learning with membership queries}.
Here, the algorithm can again use MQs as before, but the EQs are replace by random sampling. Here, the algorithm can again use MQs as before, but the EQs are replace by random sampling.
@ -127,8 +125,8 @@ We deviate even further from the PAC-model as we sometimes change our distributi
Yet, as applications show, this is a useful way of learning software behaviour. Yet, as applications show, this is a useful way of learning software behaviour.
\stopsubsection \stopsection
\startsubsection \startsection
[title={Research challenges}] [title={Research challenges}]
Model learning is far from a Model learning is far from a
@ -139,6 +137,7 @@ The restrictions include the following.
\item The system behaves deterministically. \item The system behaves deterministically.
\item One can reliably reset the system. \item One can reliably reset the system.
\item The system can be modelled with a finite state space. \item The system can be modelled with a finite state space.
This also means that the model cannot cope with time or data.
\item The set of input actions is finite. \item The set of input actions is finite.
\item One knows when the target is reached. \item One knows when the target is reached.
\stopitemize \stopitemize
@ -158,32 +157,58 @@ Not only the alphabet is infinite in these cases, the state-space is as well, si
In the second part of this thesis, we will see how nominal techniques can be used to tackle this challenge. In the second part of this thesis, we will see how nominal techniques can be used to tackle this challenge.
Being able to learn automata over an infinite alphabet is not new. Being able to learn automata over an infinite alphabet is not new.
It has been tackled in the theses of \cite[Aarts14, Fiterau-Brostean18, Cassel15]. It has been tackled in the theses of \citet[Aarts14], \citet[Cassel15], and \citet[Fiterau-Brostean18].
\todo{Dus...} In the first, the problem is solved by considering abstractions, which reduces the alphabet to a finite one.
These abstractions are automatically refined when a counterexample is presented to the algorithms.
\citet[Fiterau-Brostean18] extends this approach to cope with \quotation{fresh values}, crucial for protocols such as TCP.
In the thesis by \citet[Cassel15], another approach is taken.
The queries are changed to tree queries.
The approach in my thesis will be based on symmetries, which gives yet another insight into the problem of learning such automata.
\stopdescription \stopdescription
\stopsubsection \stopsection
\startsubsection \startsection
[title={A few applications of learning}] [title={Applications of Model Learning}]
Since this thesis only contains one \quote{real-world} application on learning in \in{Chapter}[chap:applying-automata-learning], I think it is good to mention a few others. Since this thesis only contains one real-world application on learning in \in{Chapter}[chap:applying-automata-learning], I think it is good to mention a few others.
Although we remain in the context of learning software behaviour, the applications are quite different. Although we remain in the context of learning software behaviour, the applications are quite different.
This is by no means a complete list.
Finite state machine conformance testing is a core topic in testing literature that is relevant for communication protocols and other reactive systems. \description{Bug finding in protocols.}
The combination of conformance testing and automata learning is applied in practice and it proves to be an effective way of finding bugs \cite[Joeri], replacing legacy software \cite[Philips], or inferring models to be used in model checking \cite[MeinkeSindhu]. A prominent example is by \citet[Fiterau-Brostean16].
They learn models of TCP implementations -- both clients and server sides.
Interestingly, they found bugs in the (closed source) Windows implementation.
Later, \citet[Fiterau-BrosteanH17] also found a bug in the sliding window of the Linux implementation.
Other protocols have been learned as well, such as the MQTT protocol by \citet[TapplerAB17], TLS by \citet[RuiterP15], and SSH by \citet[Fiterau-BrosteanLPRVV17].
Many of these applications reveal bugs by learning a model and consequently apply model checking.
The combination of learning and model checking was first described by \citet[PeledVY02].
Several model learning applications: \description{Bug finding in smart cards.}
learning embedded controller software \cite[DBLP:conf/icfem/SmeenkMVJ15], learning the TCP protocol \cite[Fiterau-Brostean16] and learning the MQTT protocol \cite[TapplerAB17]. \citet[AartsRP13] learn the software on smart cards of several Dutch and German banks.
These cards use the EMV protocol, which is run on the card itself.
So this is an example of a real black box system, where no other monitoring is possible and no code is available.
No vulnerabilities were found, although each card had a slightly different protocol.
Card readers have been learned by \citet[ChaluparPPR14].
They built a Lego machine which could automatically press buttons and the researchers found a security flaw.
\description{Regression testing.}
\citet[HungarNS03] describe the potential of automata learning in regression testing.
The aim is not to find bugs, but to monitor the development process of a system.
By considering the differences between models at different stages, one can generate regressions tests.
\description{Refactoring legacy software.}
Model learning can also be used in order to verify refactored software.
\citet[SchutsHV16] have applied this at a project within Philips.
They learn both an old version and a new version of the same component.
By comparing the learned models, some differences could be seen.
This gave developers opportunities to solve problems before replacing the old component by the new one.
\stopsubsection
\stopsection \stopsection
\startsection \startsection
[title={Testing Techniques}] [title={Testing Techniques}]
\todo{Even lang maken als Nominal Techniques.}
An important step in automata learning is equivalence checking. An important step in automata learning is equivalence checking.
Normally, this is abstracted away and done by an oracle, but we intend to implement such an oracle ourselves for our applications. Normally, this is abstracted away and done by an oracle, but we intend to implement such an oracle ourselves for our applications.
Concretely, the problem we need to solve is that of \emph{conformance checking}\footnote{It is also known as \emph{machine verification} or \emph{fault detection}.} and this was first described by \citet[Moore56]. Concretely, the problem we need to solve is that of \emph{conformance checking}\footnote{It is also known as \emph{machine verification} or \emph{fault detection}.} and this was first described by \citet[Moore56].
@ -202,6 +227,9 @@ In particular, the set of experiments is polynomial in size.
These techniques will be discussed in detail in \in{Chapter}[chap:test-methods]. These techniques will be discussed in detail in \in{Chapter}[chap:test-methods].
More background and other related problems, as well as their complexity results, are well exposed in a survey of \citet[LeeY94]. More background and other related problems, as well as their complexity results, are well exposed in a survey of \citet[LeeY94].
\todo{I want to give an example (by giving a state machine) of why black box testing is hard.}
\todo{I want to move some related work from the second chapter to here}.
\stopsection \stopsection
\startsection \startsection
@ -279,6 +307,8 @@ They provide an equivalence between register automata and nominal automata.
Additionally, they generalise the work on nominal sets to other symmetries. Additionally, they generalise the work on nominal sets to other symmetries.
The symmetries we encounter in this thesis are the following, but other symmetries can be found in the literature. The symmetries we encounter in this thesis are the following, but other symmetries can be found in the literature.
The symmetry directly corresponds to the data values (and operations) used in an automaton.
The data values are often called \emph{atoms}.
\startitemize[after] \startitemize[after]
\item \item
The \quotation{equality symmetry}. The \quotation{equality symmetry}.
@ -298,33 +328,155 @@ Typically, this means we use the rational numbers, $\Q$, as data values and symm
So what exactly is a nominal set? So what exactly is a nominal set?
I will not define it here and leave the formalities to the corresponding chapters. I will not define it here and leave the formalities to the corresponding chapters.
It suffices, for now, to think of nominal sets with abstract sets (often infinite) on which a group of symmetries acts. It suffices, for now, to think of nominal sets as abstract sets (often infinite) on which a group of symmetries acts.
The group of symmetries is not just any group, we fix a group of bijections on some fixed data domain. This action makes it possible to interpret the symmetries of the data values in the abstract set.
For automata, this allows us to talk about symmetries on the state space, the set of transitions, and the alphabet.
In order to implement such sets algorithmically, we impose two finiteness requirements In order to implement these sets algorithmically, we impose two finiteness requirements.
\startitemize Both properties can be expressed using only the group action.
\startitemize[after]
\item \item
Finite support. Each element is \emph{finitely supported}.
A way to think of this requirement is that each element is \quotation{constructed} out of finitely many data values.
\item \item
Orbit-finite. The set is \emph{orbit-finite}.
This means that we can choose finitely many elements such that any other element is a permuted version of one of those elements.
\stopitemize \stopitemize
If we wish to model the automaton from \in{Figure}[fig:login-system] as a nominal automaton, then we can simply define the state space as
\startformula
Q = \{ q_0 \} \cup \{ q_{1, a} \mid a \in \atoms \} \cup \{ q_{2, a} \mid a \in \atoms \},
\stopformula
where $\atoms$ is the set of atoms.
In this example, $\atoms$ is the set of all possible passwords.
The set $Q$ is infinite, but satisfies the two finiteness requirements.
The upshot of doing this is that the set $Q$ (and transition structure) corresponds directly to the semantics of the automaton.
We do not have to \emph{encode} how values relate or how they interact.
Instead, the set (and transition structure) defines all we need to know.
Algorithms, such as reachability, minimisation, and learning, can be run on such automata, despite the sets being infinite.
These algorithms can be implemented rather easily, by using a library which implements nominal sets.
One has to be careful as not all results from automata theory transfer to nominal automata.
A notable example is the powerset construction, which converts a non-deterministic automaton into a deterministic one.
The problem here is that the powerset of a set is generally \emph{not} orbit-finite and so the finiteness requirement is not met.
Consequently, languages accepted by nominal DFAs are \emph{not} closed under Kleene star, or even concatenation.
\stopsubsection \stopsubsection
\startsubsection \stopsection
\startsection
[title={Contributions}] [title={Contributions}]
The following chapters are split into two parts. The following chapters are split into two parts.
\in{Part}[part:testing] contains material about testing techniques, while \in{Part}[part:nominal] is about nominal techniques. \in{Part}[part:testing] contains material about testing techniques, while \in{Part}[part:nominal] is about nominal techniques.
Each chapter could be read in isolation. Each chapter could be read in isolation.
However, the chapters do get more technical and mathematical -- espacially in \in{Part}[part:nominal]. However, the chapters do get more technical and mathematical -- especially in \in{Part}[part:nominal].
\todo{Should I be more clear about my contribution?}
\description{\in{Chapter}[chap:test-methods].} \startdescription[title={\in{Chapter}[chap:test-methods]: FSM-based test methods.}]
Bla This chapter introduces test generation methods which can be used for learning.
The methods are presented in a uniform way, which allows to give a single proof of completeness for all these methods.
Moreover, the uniform presentation gives room to develop new test generation methods.
One particular contribution (besides the theoretical framework) is the new \emph{hybrid ADS} test generation method.
\stopdescription
\startdescription[title={\in{Chapter}[chap:applying-automata-learning]: Applying automata learning to embedded control software.}]
In this chapter we will apply model learning to an industrial case study.
It is a unique benchmark as it is much bigger than any of the applications seen before (3410 states and 77 inputs).
This makes it very challenging to learn a model and the main obstacle is finding counterexamples.
Here, my main contribution is the new test generation method from \in{Chapter}[chap:test-methods].
This is based on the following publication:
\cite[entry][DBLP:conf/icfem/SmeenkMVJ15].
\stopdescription
\startdescription[title={\in{Chapter}[chap:minimal-separating-sequences]: Minimal separating sequences for all pairs of states.}]
Continuing on test generation methods, this chapter presents an efficient algorithm to construct separating sequences.
Not only is the algorithm efficient -- it runs in $\bigO(n \log n)$ time -- it also constructs minimal length sequences.
The algorithm is based on a minimisation algorithm by \citet[Hopcroft71], but extending it to construct witnesses is non-trivial.
This is based on the following publication:
\cite[entry][DBLP:conf/lata/SmetsersMJ16].
\stopdescription
\startdescription[title={\in{Chapter}[chap:learning-nominal-automata]: Learning Nominal Automata.}]
In this chapter, we show how to learn automata over infinite alphabets.
We do this by translating the \LStar{} algorithm directly to a nominal version, \nLStar{}.
The correctness proofs mimic the original proofs by \citet[Angluin87].
Since our new algorithm is close to the original, we are able to translate variants of the \LStar{} algorithm as well.
In particular, we provide a learning algorithm for nominal non-deterministic automata.
This is based on the following publication:
\cite[entry][MoermanS0KS17].
\stopdescription
\startdescription[title={\in{Chapter}[chap:ordered-nominal-sets]: Fast computations on ordered nominal sets.}]
In this chapter, we provide a library to compute with nominal sets.
We restrict our attention to nominal sets over the total order symmetry.
This symmetry allows for a rather easy characterisation of orbits, and hence an easy implementation.
We experimentally show that it is competitive with existing tools, which are based on SMT solvers.
This is based on the following publication:
\cite[entry][VenhoekMR18].
\stopdescription
\startdescription[title={\in{Chapter}[chap:separated-nominal-automata]: Separated Nominal Automata.}]
We investigate how to reduce the size of certain nominal automata.
This is based on the observation that some languages (with outputs) are not just invariant under symmetries, but invariant under arbitrary \emph{transformations}, or \emph{renamings}.
We define a new type of automaton, the \emph{separated nominal automaton}, and show that they exactly accept those languages which are closed under renamings.
All of this is shown by using a theoretical framework: we establish a strong relationship between nominal sets on one hand, and nominal renaming sets on the other.
This is based on a paper under submission:
\cite[entry][MoermanR19].
\stopdescription
Besides these chapters, of which many are based on publications, the author has published the following papers.
These are not included in this thesis, but a short summary of those papers is presented below.
\startdescription[title={Complementing Model Learning with Mutation-Based Fuzzing.}]
Our group at the Radboud University participated in the RERS challenge.
This is a challenge where reactive software is provided and researchers have to asses validity of certain properties (given as LTL specifications).
We approached this with model learning:
Instead of analysing the source code, we simply learned the external behaviour, and then used model checking on the learned model.
This has worked remarkably well, as the models of the external behaviour are not too big.
Our results were presented at the RERS workshop (ISOLA 2016).
The report can be found on ar$\Chi$iv:
\cite[entry][SmetsersMJV16].
\stopdescription
\startdescription[title={$n$-Complete test suites for IOCO.}]
In this paper, we investigate complete test suites for labelled transition systems (LTSs), instead of deterministic Mealy machines.
This is a much harder problem than conformance testing of deterministic systems.
The system may adversarially avoid certain states the tester wishes to test.
We provide a test suite which is $n$-complete (provided the implementation is a suspension automaton).
My personal contribution here is the proof of completeness, which resembles the proof presented in \in{Chapter}[chap:test-methods] closely.
The conference paper was presented at ICTSS:
\cite[entry][BosJM17].
An extended version will appear soon:
\cite[entry][BosJM18].
\stopdescription
\startdescription[title={Learning Product Automata.}]
In this article we consider Moore machines with multiple outputs.
These machines can be decomposed by projecting on each output,
resulting in smaller components that can be learned with fewer queries.
We give experimental evidence that this is a useful technique which can reduce the number of queries substantially.
This is all motivated by the idea that compositional methods are widely used throughout engineering and that we should use this in model learning.
This was presented by Vaandrager at ICGI:
\cite[entry][Moerman18].
\stopdescription
\stopsubsection
\stopsection \stopsection
\referencesifcomponent \referencesifcomponent
\stopchapter \stopchapter

View file

@ -714,11 +714,13 @@ The output of these programs was manually checked to see if the minimisation was
The results (shown in \in{Table}[minimize_results]) for random automata show a clear advantage for \ONS{}, which is capable of running all supplied testcases in less than one second. The results (shown in \in{Table}[minimize_results]) for random automata show a clear advantage for \ONS{}, which is capable of running all supplied testcases in less than one second.
This in contrast to both \LOIS{} and \NLambda{}, which take more than $2$ hours on the largest random automata. This in contrast to both \LOIS{} and \NLambda{}, which take more than $2$ hours on the largest random automata.
\placetable[here][minimize_results] \startplacetable
{Running times for \in{Algorithm}[alg:moore] implemented in the three libraries. [title={Running times for \in{Algorithm}[alg:moore] implemented in the three libraries.
$N(S)$ is the size of the input and $N(S^{\text{min}})$ the size of the minimal automaton. $N(S)$ is the size of the input and $N(S^{\text{min}})$ the size of the minimal automaton.
For \ONS{}, the time used to generate the automaton is reported separately (in grey). For \ONS{}, the time used to generate the automaton is reported separately (in grey).
Timeouts are indicated by $\infty$.} Timeouts are indicated by $\infty$.},
list={Running times for minimisation.},
reference=minimize_results]
\starttabulate[|l|c|c|cg{.}|cg{.}|cg{.}|cg{.}|][distance=none] \starttabulate[|l|c|c|cg{.}|cg{.}|cg{.}|cg{.}|][distance=none]
\NC Type \NC $\Nsize(S)$ \NC $\Nsize(S^{\text{min}})$ \VL \ONS{} (s) \NC \color[darkgray]{Gen (s)} \VL \NLambda{} (s) \VL \LOIS{} (s) \NC\NR \NC Type \NC $\Nsize(S)$ \NC $\Nsize(S^{\text{min}})$ \VL \ONS{} (s) \NC \color[darkgray]{Gen (s)} \VL \NLambda{} (s) \VL \LOIS{} (s) \NC\NR
\HL \HL
@ -742,6 +744,7 @@ Timeouts are indicated by $\infty$.}
\NC $\Lmax$ \NC 5 \NC 3 \VL 0.00 \NC \color[darkgray]{0.00} \VL 2.06 \VL 0.03 \NC\NR \NC $\Lmax$ \NC 5 \NC 3 \VL 0.00 \NC \color[darkgray]{0.00} \VL 2.06 \VL 0.03 \NC\NR
\NC $\Lint$ \NC 5 \NC 5 \VL 0.00 \NC \color[darkgray]{0.00} \VL 1.55 \VL 0.03 \NC\NR \NC $\Lint$ \NC 5 \NC 5 \VL 0.00 \NC \color[darkgray]{0.00} \VL 1.55 \VL 0.03 \NC\NR
\stoptabulate \stoptabulate
\stopplacetable
The results for structured automata show a clear effect of the extra structure. The results for structured automata show a clear effect of the extra structure.
Both \NLambda{} and \LOIS{} remain capable of minimising the automata in reasonable amounts of time for larger sizes. Both \NLambda{} and \LOIS{} remain capable of minimising the automata in reasonable amounts of time for larger sizes.
@ -813,10 +816,12 @@ For languages which are equivariant for the equality symmetry, the \NLambda{} im
This is expected as the automata themselves have fewer orbits. This is expected as the automata themselves have fewer orbits.
It is interesting to see that these languages can be learned more efficiently by choosing the right symmetry. It is interesting to see that these languages can be learned more efficiently by choosing the right symmetry.
\placetable[here][learning_results] \startplacetable
{Running times and number of membership queries for the \nLStar{} algorithm. [title={Running times and number of membership queries for the \nLStar{} algorithm.
For \NLambda{} we used two version: \NLambda{}$^{\mathit{ord}}$ uses the total order symmetry \NLambda{}$^{\mathit{eq}}$ uses the equality symmetry. For \NLambda{} we used two version: \NLambda{}$^{\mathit{ord}}$ uses the total order symmetry \NLambda{}$^{\mathit{eq}}$ uses the equality symmetry.
Timeouts are indicated by $\infty$.} Timeouts are indicated by $\infty$.},
list={Running times for automata learning.},
reference=learning_results]
\starttabulate[|l|c|c|cg(.)|r|cg(.)|r|cg(.)|r|][distance=none] \starttabulate[|l|c|c|cg(.)|r|cg(.)|r|cg(.)|r|][distance=none]
\NC \NC \NC \VL \NC \ONS{} \VL \NC \NLambda{}$^{\mathit{ord}}$ \VL \NC \NLambda{}$^{\mathit{eq}}$ \NC\NR \NC \NC \NC \VL \NC \ONS{} \VL \NC \NLambda{}$^{\mathit{ord}}$ \VL \NC \NLambda{}$^{\mathit{eq}}$ \NC\NR
\NC Model \NC $\Nsize(S)$ \NC $\dim(S)$ \VL time (s) \NC MQs \VL time (s) \NC MQs \VL time (s) \NC MQs \NC\NR \NC Model \NC $\Nsize(S)$ \NC $\dim(S)$ \VL time (s) \NC MQs \VL time (s) \NC MQs \VL time (s) \NC MQs \NC\NR
@ -838,6 +843,7 @@ Timeouts are indicated by $\infty$.}
\NC $\Lmax$ \NC 3 \NC 1 \VL 0.01 \NC 54 \VL 3.58 \NC 54 \VL \NC \NC\NR \NC $\Lmax$ \NC 3 \NC 1 \VL 0.01 \NC 54 \VL 3.58 \NC 54 \VL \NC \NC\NR
\NC $\Lint$ \NC 5 \NC 2 \VL 0.59 \NC 478 \VL 83 \NC 478 \VL \NC \NC\NR \NC $\Lint$ \NC 5 \NC 2 \VL 0.59 \NC 478 \VL 83 \NC 478 \VL \NC \NC\NR
\stoptabulate \stoptabulate
\stopplacetable
\stopsubsubject \stopsubsubject

View file

@ -12,11 +12,11 @@ A key aspect of test generation methods is the size and completeness of the test
On one hand, we want to cover as much as the specification as possible, hopefully ensuring that we find mistakes in any faulty implementation. On one hand, we want to cover as much as the specification as possible, hopefully ensuring that we find mistakes in any faulty implementation.
On the other hand: testing takes time, so we want to minimise the size of a test suite. On the other hand: testing takes time, so we want to minimise the size of a test suite.
\todo{Theoretische bijdrage scherper maken}
The test methods described here are well-known in the literature of FSM-based testing. The test methods described here are well-known in the literature of FSM-based testing.
They all share similar concepts, such as \emph{access sequences} and \emph{state identifiers}. They all share similar concepts, such as \emph{access sequences} and \emph{state identifiers}.
In this chapter we will define these concepts, relate them with one another and show how to build test suites from these concepts. In this chapter we will define these concepts, relate them with one another and show how to build test suites from these concepts.
From this theoretical discussion we derive a new algorithm: the \emph{hybrid ADS methods}, which is applied to an industrial case study in \in{Chapter}[chap:applying-automata-learning]. From this theoretical discussion we derive a new algorithm: the \emph{hybrid ADS methods}, which is applied to an industrial case study in \in{Chapter}[chap:applying-automata-learning].
\todo{Theoretische bijdrage scherper maken}
This chapter starts with the basics: Mealy machines, sequences and what it means to test a black box system. This chapter starts with the basics: Mealy machines, sequences and what it means to test a black box system.
Then, starting from \in{Section}[sec:separating-sequences] we define several concepts, such as state identifiers, in order to distinguish one state from another. Then, starting from \in{Section}[sec:separating-sequences] we define several concepts, such as state identifiers, in order to distinguish one state from another.
@ -104,15 +104,15 @@ Tests, or experiments, are generated from the specification and applied to the i
If the output is different from the specified output, then we know the implementation is flawed. If the output is different from the specified output, then we know the implementation is flawed.
The goals is to test as little as possible, while covering as much as possible. The goals is to test as little as possible, while covering as much as possible.
We assume the following %We assume the following
\todo{Herhaling van paragraaf hiervoor} %\todo{Herhaling van paragraaf hiervoor}
\startitemize[after] %\startitemize[after]
\item %\item
The system can be modelled as Mealy machine. %The system can be modelled as Mealy machine.
In particular, this means it is \emph{deterministic} and \emph{complete}. %In particular, this means it is \emph{deterministic} and \emph{complete}.
\item %\item
We are able to reset the system, i.e., bring it back to the initial state. %We are able to reset the system, i.e., bring it back to the initial state.
\stopitemize %\stopitemize
A test suite is nothing more than a set of sequences. A test suite is nothing more than a set of sequences.
We do not have to encode outputs in the test suite, as those follow from the deterministic specification. We do not have to encode outputs in the test suite, as those follow from the deterministic specification.
@ -197,7 +197,7 @@ This is crucial for black box testing, as we do not know the implementation, so
Before we construct test suites, we discuss several types of useful sequences. Before we construct test suites, we discuss several types of useful sequences.
All the following notions are standard in the literature, and the corresponding references will be given in \in{Section}[sec:methods], where we discuss the test generation methods using these notions. All the following notions are standard in the literature, and the corresponding references will be given in \in{Section}[sec:methods], where we discuss the test generation methods using these notions.
We fix a Mealy machine $M$ for the remainder of this chapter. We fix a Mealy machine $M$ for the remainder of this chapter.
\todo{Checken of dit echt het geval is!} %\todo{Checken of dit echt het geval is!}
For convenience we assume $M$ to be minimal, so that distinct states are, in fact, inequivalent. For convenience we assume $M$ to be minimal, so that distinct states are, in fact, inequivalent.
All definitions can be generalised to non-minimal $M$, by replacing the word \quote{distinct} (or \quote{other}) with \quote{inequivalent}. All definitions can be generalised to non-minimal $M$, by replacing the word \quote{distinct} (or \quote{other}) with \quote{inequivalent}.
@ -256,7 +256,7 @@ We return to this notion in more detail in \in{Example}[ex:uio-counterexample].
We may obtain a characterisation set by taking the union of state identifiers for each state. We may obtain a characterisation set by taking the union of state identifiers for each state.
For every machine we can construct a set of harmonised state identifiers as will be shown in \in{Chapter}[chap:separating-sequences] and hence every machine has a characterisation set. For every machine we can construct a set of harmonised state identifiers as will be shown in \in{Chapter}[chap:separating-sequences] and hence every machine has a characterisation set.
\todo{Iets versimpelen} %\todo{Iets versimpelen}
\startexample \startexample
As mentioned before, state $s_2$ from \in{Figure}[fig:running-example] has a state identifier $\{aa, ac, b\}$. As mentioned before, state $s_2$ from \in{Figure}[fig:running-example] has a state identifier $\{aa, ac, b\}$.
@ -399,7 +399,7 @@ On families we define
\item union: $\Fam{X} \cup \Fam{Y}$ is defined point-wise: \item union: $\Fam{X} \cup \Fam{Y}$ is defined point-wise:
$(\Fam{X} \cup \Fam{Y})_s = X_s \cup Y_s$, $(\Fam{X} \cup \Fam{Y})_s = X_s \cup Y_s$,
\item concatenation: \item concatenation:
\todo{Associativiteit?} %\todo{Associativiteit?}
$X \odot \Fam{Y} = \{ xy \mid x \in X, y \in Y_{\delta(s_0, x)} \}$, and $X \odot \Fam{Y} = \{ xy \mid x \in X, y \in Y_{\delta(s_0, x)} \}$, and
\item refinement: $\Fam{X} ; \Fam{Y}$ defined by \item refinement: $\Fam{X} ; \Fam{Y}$ defined by
\footnote{We use the convention that $\cap$ binds stronger than $\cup$. \footnote{We use the convention that $\cap$ binds stronger than $\cup$.
@ -451,8 +451,8 @@ We fix a state cover $P$ throughout this section and take the transition cover $
reference=sec:w] reference=sec:w]
After the work of \citet[Moore56], it was unclear whether a test suite of polynomial size could exist. After the work of \citet[Moore56], it was unclear whether a test suite of polynomial size could exist.
\todo{Melden dat Moore een exp. suite gaf, en dat een poly suite open is.} He presented a finite test suite which was complete, however it was exponential in size.
Both \citet[Chow78, Vasilevskii73] independently prove that such a test suite exists. Both \citet[Chow78, Vasilevskii73] independently prove that test suites of polynomial size exist.
\footnote{More precisely: the size of $T_{\text{W}}$ is polynomial in the size of the specification for each fixed $k$.} \footnote{More precisely: the size of $T_{\text{W}}$ is polynomial in the size of the specification for each fixed $k$.}
The W method is a very structured test suite construction. The W method is a very structured test suite construction.
It is called the W method as the characterisation set is often called $W$. It is called the W method as the characterisation set is often called $W$.
@ -606,11 +606,12 @@ With the same UIOs as above, the resulting UIOv test suite for the specification
[title={Example}, [title={Example},
reference=sec:all-example] reference=sec:all-example]
\todo{Beetje raar om Example Example te hebben.} %\todo{Beetje raar om Example Example te hebben.}
Let us compute all the previous test suites on the specification in \in{Figure}[fig:running-example]. Let us compute all the previous test suites on the specification in \in{Figure}[fig:running-example].
We will be testing without extra states, i.e., we construct 5-complete test suites. We will be testing without extra states, i.e., we construct 5-complete test suites.
We start by defining the state and transition cover. We start by defining the state and transition cover.
For this, we simply take all shortest sequences from the initial state to the other states: For this, we simply take all shortest sequences from the initial state to the other states:
\todo{Insert a picture so that one doesn't have to go back.}
\startformula\startalign \startformula\startalign
\NC P \NC = \{ \epsilon, a, aa, b, ba \} \NR \NC P \NC = \{ \epsilon, a, aa, b, ba \} \NR
\NC Q = P \cdot I \NC = \{ a, b, c, aa, ab, ac, aaa, aab, aac, ba, bb, bc, baa, bab, bac \} \NR \NC Q = P \cdot I \NC = \{ a, b, c, aa, ab, ac, aaa, aab, aac, ba, bb, bc, baa, bab, bac \} \NR

View file

@ -6,7 +6,7 @@
\setupexternalfigures[directory={images,../images}] \setupexternalfigures[directory={images,../images}]
% \definemode[afloop][yes] % \definemode[afloop][yes]
\definemode[draft][yes] % \definemode[draft][yes]
\environment bib \environment bib
\environment font \environment font
@ -49,7 +49,7 @@ This chapter is based on the following publication:
\define\colon{{\,{:}\,\,}} \define\colon{{\,{:}\,\,}}
\define[1]\defn{\emph{#1}} \define[1]\defn{\emph{#1}}
\define[1]\todo{\color[red]{\ss \bf #1}} \define[1]\todo{\color[red]{\ss todo: \bf #1}}
\define[1]\label{\todo{\tt \\label #1}} \define[1]\label{\todo{\tt \\label #1}}
\define\referencesifcomponent{\doifnotmode{everything}{\startsubject[title={References}]\switchtobodyfont[9pt]\placelistofpublications\stopsubject}} \define\referencesifcomponent{\doifnotmode{everything}{\startsubject[title={References}]\switchtobodyfont[9pt]\placelistofpublications\stopsubject}}