From b7eb986943d23ddff6b2712580180ebcdd63168a Mon Sep 17 00:00:00 2001 From: Joshua Moerman Date: Fri, 16 Nov 2018 00:00:57 +0100 Subject: [PATCH] Aan intro gewerkt --- biblio-eigen.bib | 102 +++++++++++---- biblio.bib | 137 ++++++++++++++++++++ content.tex | 11 +- content/introduction.tex | 208 ++++++++++++++++++++++++++----- content/ordered-nominal-sets.tex | 24 ++-- content/test-methods.tex | 33 ++--- environment.tex | 4 +- 7 files changed, 438 insertions(+), 81 deletions(-) diff --git a/biblio-eigen.bib b/biblio-eigen.bib index 6556914..58b0384 100644 --- a/biblio-eigen.bib +++ b/biblio-eigen.bib @@ -1,3 +1,38 @@ +@inproceedings{BosJM17, + author = {Petra van den Bos and + Ramon Janssen and + Joshua Moerman}, + editor = {Nina Yevtushenko and + Ana Rosa Cavalli and + H{\"{u}}sn{\"{u}} Yenig{\"{u}}n}, + title = {$n$-Complete Test Suites for {IOCO}}, + booktitle_= {Testing Software and Systems - 29th {IFIP} {WG} 6.1 International + Conference, {ICTSS} 2017, St. Petersburg, Russia, October 9-11, 2017, + Proceedings}, + booktitle = {{ICTSS} 2017 Proceedings}, + series = {Lecture Notes in Computer Science}, + volume = {10533}, + pages = {91--107}, + publisher = {Springer}, + year = {2017}, + url = {https://doi.org/10.1007/978-3-319-67549-7\_6}, + doi = {10.1007/978-3-319-67549-7\_6}, + timestamp = {Mon, 25 Sep 2017 17:37:19 +0200}, + biburl = {https://dblp.org/rec/bib/conf/pts/BosJM17}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + +@article{BosJM18, + author = {Petra van den Bos and + Ramon Janssen and + Joshua Moerman}, + title = {$n$-Complete Test Suites for {IOCO}}, + journal = {Software Quality Journal}, + publisher = {Springer}, + year = {2018}, + note = {To appear} +} + @inproceedings{DBLP:conf/icfem/SmeenkMVJ15, author = {Wouter Smeenk and Joshua Moerman and @@ -7,9 +42,10 @@ Sylvain Conchon and Fatiha Za{\"{\i}}di}, title = {Applying Automata Learning to Embedded Control Software}, - booktitle = {Formal Methods and Software Engineering - 17th International Conference + booktitle_= {Formal Methods and Software Engineering - 17th International Conference on Formal Engineering Methods, {ICFEM} 2015, Paris, France, November 3-5, 2015, Proceedings}, + booktitle = {{ICFEM} 2015, Proceedings}, series = {Lecture Notes in Computer Science}, volume = {9407}, pages = {67--83}, @@ -31,9 +67,10 @@ Carlos Mart{\'{\i}}n{-}Vide and Bianca Truthe}, title = {Minimal Separating Sequences for All Pairs of States}, - booktitle = {Language and Automata Theory and Applications - 10th International + booktitle_= {Language and Automata Theory and Applications - 10th International Conference, {LATA} 2016, Prague, Czech Republic, March 14-18, 2016, Proceedings}, + booktitle = {{LATA} 2016, Proceedings}, series = {Lecture Notes in Computer Science}, volume = {9618}, pages = {181--193}, @@ -46,6 +83,23 @@ bibsource = {dblp computer science bibliography, https://dblp.org} } +@article{SmetsersMJV16, + author = {Rick Smetsers and + Joshua Moerman and + Mark Janssen and + Sicco Verwer}, + title = {Complementing Model Learning with Mutation-Based Fuzzing}, + journal = {CoRR}, + volume = {abs/1611.02429}, + year = {2016}, + url = {http://arxiv.org/abs/1611.02429}, + archivePrefix = {arXiv}, + eprint = {1611.02429}, + timestamp = {Mon, 13 Aug 2018 16:46:49 +0200}, + biburl = {https://dblp.org/rec/bib/journals/corr/SmetsersMJV16}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + @inproceedings{MoermanS0KS17, author = {Joshua Moerman and Matteo Sammartino and @@ -55,9 +109,10 @@ editor = {Giuseppe Castagna and Andrew D. Gordon}, title = {Learning nominal automata}, - booktitle = {Proceedings of the 44th {ACM} {SIGPLAN} Symposium on Principles of + booktitle_= {Proceedings of the 44th {ACM} {SIGPLAN} Symposium on Principles of Programming Languages, {POPL} 2017, Paris, France, January 18-20, 2017}, + booktitle = {{POPL} 2017, Proceedings}, pages = {613--625}, publisher = {{ACM}}, year = {2017}, @@ -67,27 +122,23 @@ bibsource = {dblp computer science bibliography, https://dblp.org} } -@inproceedings{BosJM17, - author = {Petra van den Bos and - Ramon Janssen and - Joshua Moerman}, - editor = {Nina Yevtushenko and - Ana Rosa Cavalli and - H{\"{u}}sn{\"{u}} Yenig{\"{u}}n}, - title = {$n$-Complete Test Suites for {IOCO}}, - booktitle = {Testing Software and Systems - 29th {IFIP} {WG} 6.1 International - Conference, {ICTSS} 2017, St. Petersburg, Russia, October 9-11, 2017, - Proceedings}, - series = {Lecture Notes in Computer Science}, - volume = {10533}, - pages = {91--107}, - publisher = {Springer}, - year = {2017}, - url = {https://doi.org/10.1007/978-3-319-67549-7\_6}, - doi = {10.1007/978-3-319-67549-7\_6}, - timestamp = {Mon, 25 Sep 2017 17:37:19 +0200}, - biburl = {https://dblp.org/rec/bib/conf/pts/BosJM17}, - bibsource = {dblp computer science bibliography, https://dblp.org} +@inproceedings{Moerman18, + author = {Joshua Moerman}, + title = {Learning Product Automata}, + editor = {Olgierd Unold and + Witold Dyrka and + Wojciech Wieczorek}, + booktitle = {ICGI 2018, Proceedings}, + publisher = {Proceedings of Machine Learning Research}, + year = {2018} +} + +@misc{MoermanR19, + author = {Joshua Moerman and + Jurriaan Rot}, + title = {Separated Nominal Automata}, + note = {Under submission}, + year = {2018} } @inproceedings{VenhoekMR18, @@ -97,8 +148,9 @@ editor = {Bernd Fischer and Tarmo Uustalu}, title = {Fast Computations on Ordered Nominal Sets}, - booktitle = {Theoretical Aspects of Computing - {ICTAC} 2018 - 15th International + booktitle_= {Theoretical Aspects of Computing - {ICTAC} 2018 - 15th International Colloquium, Stellenbosch, South Africa, October 16-19, 2018, Proceedings}, + booktitle = {{ICTAC} 2018, Proceedings}, series = {Lecture Notes in Computer Science}, volume = {11187}, pages = {493--512}, diff --git a/biblio.bib b/biblio.bib index 69f329a..fb9c5bf 100644 --- a/biblio.bib +++ b/biblio.bib @@ -18,6 +18,24 @@ bibsource = {dblp computer science bibliography, https://dblp.org} } +@inproceedings{AartsRP13, + author = {Fides Aarts and + Joeri de Ruiter and + Erik Poll}, + title = {Formal Models of Bank Cards for Free}, + booktitle = {Sixth {IEEE} International Conference on Software Testing, Verification + and Validation, {ICST} 2013 Workshops Proceedings, Luxembourg, Luxembourg, + March 18-22, 2013}, + pages = {461--468}, + publisher = {{IEEE} Computer Society}, + year = {2013}, + url = {https://doi.org/10.1109/ICSTW.2013.60}, + doi = {10.1109/ICSTW.2013.60}, + timestamp = {Wed, 24 May 2017 08:30:36 +0200}, + biburl = {https://dblp.org/rec/bib/conf/icst/AartsRP13}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + @phdthesis{Aarts14, title = {Tomte: bridging the gap between active learning and real-world systems}, author = {Fides Dorothea Aarts}, @@ -540,6 +558,24 @@ bibsource = {dblp computer science bibliography, https://dblp.org} } +@inproceedings{ChaluparPPR14, + author = {Georg Chalupar and + Stefan Peherstorfer and + Erik Poll and + Joeri de Ruiter}, + editor = {Sergey Bratus and + Felix "FX" Lindner}, + title = {Automated Reverse Engineering using {Lego}®}, + booktitle = {8th {USENIX} Workshop on Offensive Technologies, {WOOT} '14, San Diego, + CA, USA, August 19, 2014.}, + publisher = {{USENIX} Association}, + year = {2014}, + url = {https://www.usenix.org/conference/woot14/workshop-program/presentation/chalupar}, + timestamp = {Mon, 04 Dec 2017 17:32:55 +0100}, + biburl = {https://dblp.org/rec/bib/conf/woot/ChaluparPPR14}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + @inproceedings{ChanVO89, author = {Wendy Y. L. Chan and Son T. Vuong and @@ -840,6 +876,53 @@ bibsource = {dblp computer science bibliography, https://dblp.org} } +@inproceedings{Fiterau-BrosteanH17, + author = {Paul Fiterau{-}Brostean and + Falk Howar}, + editor = {Laure Petrucci and + Cristina Seceleanu and + Ana Cavalcanti}, + title = {Learning-Based Testing the Sliding Window Behavior of {TCP} Implementations}, + booktitle = {Critical Systems: Formal Methods and Automated Verification - Joint + 22nd International Workshop on Formal Methods for Industrial Critical + Systems - and - 17th International Workshop on Automated Verification + of Critical Systems, FMICS-AVoCS 2017, Turin, Italy, September 18-20, + 2017, Proceedings}, + series = {Lecture Notes in Computer Science}, + volume = {10471}, + pages = {185--200}, + publisher = {Springer}, + year = {2017}, + url = {https://doi.org/10.1007/978-3-319-67113-0\_12}, + doi = {10.1007/978-3-319-67113-0\_12}, + timestamp = {Tue, 05 Sep 2017 14:29:34 +0200}, + biburl = {https://dblp.org/rec/bib/conf/fmics/Fiterau-Brostean17}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + +@inproceedings{Fiterau-BrosteanLPRVV17, + author = {Paul Fiterau{-}Brostean and + Toon Lenaerts and + Erik Poll and + Joeri de Ruiter and + Frits W. Vaandrager and + Patrick Verleg}, + editor = {Hakan Erdogmus and + Klaus Havelund}, + title = {Model learning and model checking of {SSH} implementations}, + booktitle = {Proceedings of the 24th {ACM} {SIGSOFT} International {SPIN} Symposium + on Model Checking of Software, Santa Barbara, CA, USA, July 10-14, + 2017}, + pages = {142--151}, + publisher = {{ACM}}, + year = {2017}, + url = {https://doi.org/10.1145/3092282.3092289}, + doi = {10.1145/3092282.3092289}, + timestamp = {Tue, 06 Nov 2018 11:06:50 +0100}, + biburl = {https://dblp.org/rec/bib/conf/spin/Fiterau-Brostean17}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + @phdthesis{Fiterau-Brostean18, title = {Active Model Learning for the Analysis of Network Protocols}, author = {Paul Fiterau{-}Brostean}, @@ -1674,6 +1757,21 @@ bibsource = {dblp computer science bibliography, https://dblp.org} } +@article{PeledVY02, + author = {Doron A. Peled and + Moshe Y. Vardi and + Mihalis Yannakakis}, + title = {Black Box Checking}, + journal = {Journal of Automata, Languages and Combinatorics}, + volume = {7}, + number = {2}, + pages = {225--246}, + year = {2002}, + timestamp = {Fri, 12 Feb 2016 07:36:50 +0100}, + biburl = {https://dblp.org/rec/bib/journals/jalc/PeledVY02}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + @article{Petrenko97, author = {Alexandre Petrenko}, title = {Technical Correspondence Comments on "A Reduced Test Suite for Protocol @@ -1822,6 +1920,23 @@ url = {http://hdl.handle.net/1887/35814} } +@inproceedings{RuiterP15, + author = {Joeri de Ruiter and + Erik Poll}, + editor = {Jaeyeon Jung and + Thorsten Holz}, + title = {Protocol State Fuzzing of {TLS} Implementations}, + booktitle = {24th {USENIX} Security Symposium, {USENIX} Security 15, Washington, + D.C., USA, August 12-14, 2015.}, + pages = {193--206}, + publisher = {{USENIX} Association}, + year = {2015}, + url = {https://www.usenix.org/conference/usenixsecurity15/technical-sessions/presentation/de-ruiter}, + timestamp = {Fri, 19 Aug 2016 15:38:04 +0200}, + biburl = {https://dblp.org/rec/bib/conf/uss/RuiterP15}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + @inproceedings{Rutten98, author = {Jan J. M. M. Rutten}, editor = {Davide Sangiorgi and @@ -1875,6 +1990,28 @@ bibsource = {dblp computer science bibliography, https://dblp.org} } +@inproceedings{SchutsHV16, + author = {Mathijs Schuts and + Jozef Hooman and + Frits W. Vaandrager}, + editor = {Erika {\'{A}}brah{\'{a}}m and + Marieke Huisman}, + title = {Refactoring of Legacy Software Using Model Learning and Equivalence + Checking: {A}n Industrial Experience Report}, + booktitle = {Integrated Formal Methods - 12th International Conference, {IFM} 2016, + Reykjavik, Iceland, June 1-5, 2016, Proceedings}, + series = {Lecture Notes in Computer Science}, + volume = {9681}, + pages = {311--325}, + publisher = {Springer}, + year = {2016}, + url = {https://doi.org/10.1007/978-3-319-33693-0\_20}, + doi = {10.1007/978-3-319-33693-0\_20}, + timestamp = {Wed, 24 May 2017 08:28:10 +0200}, + biburl = {https://dblp.org/rec/bib/conf/ifm/SchutsHV16}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + @inproceedings{Segoufin06, author = {Luc Segoufin}, editor = {Zolt{\'{a}}n {\'{E}}sik}, diff --git a/content.tex b/content.tex index 2fbbdc4..f551be7 100644 --- a/content.tex +++ b/content.tex @@ -6,6 +6,15 @@ \startfrontmatter +\start +\switchtobodyfont[20pt] +\midaligned{\framed[width=12cm,align=center,offset=2cm,frame=off]{Nominal Techniques \& Testing Techniques for Automata}} +\switchtobodyfont[12pt] +\midaligned{Joshua Moerman} +\midaligned{draft 16 November 2018} +\stop +\page[yes] + \setupwhitespace[none] \completecontent \completelistoffigures @@ -32,7 +41,7 @@ \startbackmatter -\switchtobodyfont[9pt] +\switchtobodyfont[8pt] \title{References} \placelistofpublications \stopbackmatter diff --git a/content/introduction.tex b/content/introduction.tex index 3035460..13a5fb2 100644 --- a/content/introduction.tex +++ b/content/introduction.tex @@ -18,7 +18,6 @@ The goal of this technique can be manifold: It can be used to reverse-engineer a system, to find bugs in it, to verify properties of the system, or to understand the system in one way or another. It is \emph{not} just random testing: the information learned during the interaction with the system is actively used to guide following interactions. Additionally, the information learned can be inspected and analysed. -\todo{We open the box?} This thesis is about model learning and related techniques. In the first part, I present results concerning \emph{black box testing} of automata. @@ -73,7 +72,6 @@ This type of learning is hence called \emph{exact learning}. It should be clear why this is called \emph{query learning} or \emph{active learning}. The learning algorithm initiates interaction with the teacher by posing queries, it may construct its own data points and ask for their corresponding label. -\todo{Nog afzetten tegen passive learning?} Another paradigm which is relevant for our type of applications is \emph{PAC-learning with membership queries}. Here, the algorithm can again use MQs as before, but the EQs are replace by random sampling. @@ -127,8 +125,8 @@ We deviate even further from the PAC-model as we sometimes change our distributi Yet, as applications show, this is a useful way of learning software behaviour. -\stopsubsection -\startsubsection +\stopsection +\startsection [title={Research challenges}] Model learning is far from a @@ -139,6 +137,7 @@ The restrictions include the following. \item The system behaves deterministically. \item One can reliably reset the system. \item The system can be modelled with a finite state space. +This also means that the model cannot cope with time or data. \item The set of input actions is finite. \item One knows when the target is reached. \stopitemize @@ -158,32 +157,58 @@ Not only the alphabet is infinite in these cases, the state-space is as well, si In the second part of this thesis, we will see how nominal techniques can be used to tackle this challenge. Being able to learn automata over an infinite alphabet is not new. -It has been tackled in the theses of \cite[Aarts14, Fiterau-Brostean18, Cassel15]. -\todo{Dus...} +It has been tackled in the theses of \citet[Aarts14], \citet[Cassel15], and \citet[Fiterau-Brostean18]. +In the first, the problem is solved by considering abstractions, which reduces the alphabet to a finite one. +These abstractions are automatically refined when a counterexample is presented to the algorithms. +\citet[Fiterau-Brostean18] extends this approach to cope with \quotation{fresh values}, crucial for protocols such as TCP. +In the thesis by \citet[Cassel15], another approach is taken. +The queries are changed to tree queries. +The approach in my thesis will be based on symmetries, which gives yet another insight into the problem of learning such automata. \stopdescription -\stopsubsection -\startsubsection - [title={A few applications of learning}] +\stopsection +\startsection + [title={Applications of Model Learning}] -Since this thesis only contains one \quote{real-world} application on learning in \in{Chapter}[chap:applying-automata-learning], I think it is good to mention a few others. +Since this thesis only contains one real-world application on learning in \in{Chapter}[chap:applying-automata-learning], I think it is good to mention a few others. Although we remain in the context of learning software behaviour, the applications are quite different. +This is by no means a complete list. -Finite state machine conformance testing is a core topic in testing literature that is relevant for communication protocols and other reactive systems. -The combination of conformance testing and automata learning is applied in practice and it proves to be an effective way of finding bugs \cite[Joeri], replacing legacy software \cite[Philips], or inferring models to be used in model checking \cite[MeinkeSindhu]. +\description{Bug finding in protocols.} +A prominent example is by \citet[Fiterau-Brostean16]. +They learn models of TCP implementations -- both clients and server sides. +Interestingly, they found bugs in the (closed source) Windows implementation. +Later, \citet[Fiterau-BrosteanH17] also found a bug in the sliding window of the Linux implementation. +Other protocols have been learned as well, such as the MQTT protocol by \citet[TapplerAB17], TLS by \citet[RuiterP15], and SSH by \citet[Fiterau-BrosteanLPRVV17]. +Many of these applications reveal bugs by learning a model and consequently apply model checking. +The combination of learning and model checking was first described by \citet[PeledVY02]. -Several model learning applications: -learning embedded controller software \cite[DBLP:conf/icfem/SmeenkMVJ15], learning the TCP protocol \cite[Fiterau-Brostean16] and learning the MQTT protocol \cite[TapplerAB17]. +\description{Bug finding in smart cards.} +\citet[AartsRP13] learn the software on smart cards of several Dutch and German banks. +These cards use the EMV protocol, which is run on the card itself. +So this is an example of a real black box system, where no other monitoring is possible and no code is available. +No vulnerabilities were found, although each card had a slightly different protocol. +Card readers have been learned by \citet[ChaluparPPR14]. +They built a Lego machine which could automatically press buttons and the researchers found a security flaw. + +\description{Regression testing.} +\citet[HungarNS03] describe the potential of automata learning in regression testing. +The aim is not to find bugs, but to monitor the development process of a system. +By considering the differences between models at different stages, one can generate regressions tests. + +\description{Refactoring legacy software.} +Model learning can also be used in order to verify refactored software. +\citet[SchutsHV16] have applied this at a project within Philips. +They learn both an old version and a new version of the same component. +By comparing the learned models, some differences could be seen. +This gave developers opportunities to solve problems before replacing the old component by the new one. -\stopsubsection \stopsection \startsection [title={Testing Techniques}] -\todo{Even lang maken als Nominal Techniques.} - An important step in automata learning is equivalence checking. Normally, this is abstracted away and done by an oracle, but we intend to implement such an oracle ourselves for our applications. Concretely, the problem we need to solve is that of \emph{conformance checking}\footnote{It is also known as \emph{machine verification} or \emph{fault detection}.} and this was first described by \citet[Moore56]. @@ -202,6 +227,9 @@ In particular, the set of experiments is polynomial in size. These techniques will be discussed in detail in \in{Chapter}[chap:test-methods]. More background and other related problems, as well as their complexity results, are well exposed in a survey of \citet[LeeY94]. +\todo{I want to give an example (by giving a state machine) of why black box testing is hard.} +\todo{I want to move some related work from the second chapter to here}. + \stopsection \startsection @@ -279,6 +307,8 @@ They provide an equivalence between register automata and nominal automata. Additionally, they generalise the work on nominal sets to other symmetries. The symmetries we encounter in this thesis are the following, but other symmetries can be found in the literature. +The symmetry directly corresponds to the data values (and operations) used in an automaton. +The data values are often called \emph{atoms}. \startitemize[after] \item The \quotation{equality symmetry}. @@ -298,33 +328,155 @@ Typically, this means we use the rational numbers, $\Q$, as data values and symm So what exactly is a nominal set? I will not define it here and leave the formalities to the corresponding chapters. -It suffices, for now, to think of nominal sets with abstract sets (often infinite) on which a group of symmetries acts. -The group of symmetries is not just any group, we fix a group of bijections on some fixed data domain. +It suffices, for now, to think of nominal sets as abstract sets (often infinite) on which a group of symmetries acts. +This action makes it possible to interpret the symmetries of the data values in the abstract set. +For automata, this allows us to talk about symmetries on the state space, the set of transitions, and the alphabet. -In order to implement such sets algorithmically, we impose two finiteness requirements -\startitemize +In order to implement these sets algorithmically, we impose two finiteness requirements. +Both properties can be expressed using only the group action. +\startitemize[after] \item -Finite support. +Each element is \emph{finitely supported}. +A way to think of this requirement is that each element is \quotation{constructed} out of finitely many data values. \item -Orbit-finite. +The set is \emph{orbit-finite}. +This means that we can choose finitely many elements such that any other element is a permuted version of one of those elements. \stopitemize +If we wish to model the automaton from \in{Figure}[fig:login-system] as a nominal automaton, then we can simply define the state space as +\startformula +Q = \{ q_0 \} \cup \{ q_{1, a} \mid a \in \atoms \} \cup \{ q_{2, a} \mid a \in \atoms \}, +\stopformula +where $\atoms$ is the set of atoms. +In this example, $\atoms$ is the set of all possible passwords. +The set $Q$ is infinite, but satisfies the two finiteness requirements. + +The upshot of doing this is that the set $Q$ (and transition structure) corresponds directly to the semantics of the automaton. +We do not have to \emph{encode} how values relate or how they interact. +Instead, the set (and transition structure) defines all we need to know. +Algorithms, such as reachability, minimisation, and learning, can be run on such automata, despite the sets being infinite. +These algorithms can be implemented rather easily, by using a library which implements nominal sets. + +One has to be careful as not all results from automata theory transfer to nominal automata. +A notable example is the powerset construction, which converts a non-deterministic automaton into a deterministic one. +The problem here is that the powerset of a set is generally \emph{not} orbit-finite and so the finiteness requirement is not met. +Consequently, languages accepted by nominal DFAs are \emph{not} closed under Kleene star, or even concatenation. \stopsubsection -\startsubsection +\stopsection +\startsection [title={Contributions}] The following chapters are split into two parts. \in{Part}[part:testing] contains material about testing techniques, while \in{Part}[part:nominal] is about nominal techniques. Each chapter could be read in isolation. -However, the chapters do get more technical and mathematical -- espacially in \in{Part}[part:nominal]. +However, the chapters do get more technical and mathematical -- especially in \in{Part}[part:nominal]. +\todo{Should I be more clear about my contribution?} -\description{\in{Chapter}[chap:test-methods].} -Bla +\startdescription[title={\in{Chapter}[chap:test-methods]: FSM-based test methods.}] +This chapter introduces test generation methods which can be used for learning. +The methods are presented in a uniform way, which allows to give a single proof of completeness for all these methods. +Moreover, the uniform presentation gives room to develop new test generation methods. +One particular contribution (besides the theoretical framework) is the new \emph{hybrid ADS} test generation method. +\stopdescription +\startdescription[title={\in{Chapter}[chap:applying-automata-learning]: Applying automata learning to embedded control software.}] +In this chapter we will apply model learning to an industrial case study. +It is a unique benchmark as it is much bigger than any of the applications seen before (3410 states and 77 inputs). +This makes it very challenging to learn a model and the main obstacle is finding counterexamples. +Here, my main contribution is the new test generation method from \in{Chapter}[chap:test-methods]. + +This is based on the following publication: + +\cite[entry][DBLP:conf/icfem/SmeenkMVJ15]. +\stopdescription + +\startdescription[title={\in{Chapter}[chap:minimal-separating-sequences]: Minimal separating sequences for all pairs of states.}] +Continuing on test generation methods, this chapter presents an efficient algorithm to construct separating sequences. +Not only is the algorithm efficient -- it runs in $\bigO(n \log n)$ time -- it also constructs minimal length sequences. +The algorithm is based on a minimisation algorithm by \citet[Hopcroft71], but extending it to construct witnesses is non-trivial. + +This is based on the following publication: + +\cite[entry][DBLP:conf/lata/SmetsersMJ16]. +\stopdescription + +\startdescription[title={\in{Chapter}[chap:learning-nominal-automata]: Learning Nominal Automata.}] +In this chapter, we show how to learn automata over infinite alphabets. +We do this by translating the \LStar{} algorithm directly to a nominal version, \nLStar{}. +The correctness proofs mimic the original proofs by \citet[Angluin87]. +Since our new algorithm is close to the original, we are able to translate variants of the \LStar{} algorithm as well. +In particular, we provide a learning algorithm for nominal non-deterministic automata. + +This is based on the following publication: + +\cite[entry][MoermanS0KS17]. +\stopdescription + +\startdescription[title={\in{Chapter}[chap:ordered-nominal-sets]: Fast computations on ordered nominal sets.}] +In this chapter, we provide a library to compute with nominal sets. +We restrict our attention to nominal sets over the total order symmetry. +This symmetry allows for a rather easy characterisation of orbits, and hence an easy implementation. +We experimentally show that it is competitive with existing tools, which are based on SMT solvers. + +This is based on the following publication: + +\cite[entry][VenhoekMR18]. +\stopdescription + +\startdescription[title={\in{Chapter}[chap:separated-nominal-automata]: Separated Nominal Automata.}] +We investigate how to reduce the size of certain nominal automata. +This is based on the observation that some languages (with outputs) are not just invariant under symmetries, but invariant under arbitrary \emph{transformations}, or \emph{renamings}. +We define a new type of automaton, the \emph{separated nominal automaton}, and show that they exactly accept those languages which are closed under renamings. +All of this is shown by using a theoretical framework: we establish a strong relationship between nominal sets on one hand, and nominal renaming sets on the other. + +This is based on a paper under submission: + +\cite[entry][MoermanR19]. +\stopdescription + +Besides these chapters, of which many are based on publications, the author has published the following papers. +These are not included in this thesis, but a short summary of those papers is presented below. + +\startdescription[title={Complementing Model Learning with Mutation-Based Fuzzing.}] +Our group at the Radboud University participated in the RERS challenge. +This is a challenge where reactive software is provided and researchers have to asses validity of certain properties (given as LTL specifications). +We approached this with model learning: +Instead of analysing the source code, we simply learned the external behaviour, and then used model checking on the learned model. +This has worked remarkably well, as the models of the external behaviour are not too big. +Our results were presented at the RERS workshop (ISOLA 2016). +The report can be found on ar$\Chi$iv: + +\cite[entry][SmetsersMJV16]. +\stopdescription + +\startdescription[title={$n$-Complete test suites for IOCO.}] +In this paper, we investigate complete test suites for labelled transition systems (LTSs), instead of deterministic Mealy machines. +This is a much harder problem than conformance testing of deterministic systems. +The system may adversarially avoid certain states the tester wishes to test. +We provide a test suite which is $n$-complete (provided the implementation is a suspension automaton). +My personal contribution here is the proof of completeness, which resembles the proof presented in \in{Chapter}[chap:test-methods] closely. +The conference paper was presented at ICTSS: + +\cite[entry][BosJM17]. + +An extended version will appear soon: + +\cite[entry][BosJM18]. +\stopdescription + +\startdescription[title={Learning Product Automata.}] +In this article we consider Moore machines with multiple outputs. +These machines can be decomposed by projecting on each output, +resulting in smaller components that can be learned with fewer queries. +We give experimental evidence that this is a useful technique which can reduce the number of queries substantially. +This is all motivated by the idea that compositional methods are widely used throughout engineering and that we should use this in model learning. +This was presented by Vaandrager at ICGI: + +\cite[entry][Moerman18]. +\stopdescription -\stopsubsection \stopsection \referencesifcomponent \stopchapter diff --git a/content/ordered-nominal-sets.tex b/content/ordered-nominal-sets.tex index a7586dd..bb96dd9 100644 --- a/content/ordered-nominal-sets.tex +++ b/content/ordered-nominal-sets.tex @@ -714,11 +714,13 @@ The output of these programs was manually checked to see if the minimisation was The results (shown in \in{Table}[minimize_results]) for random automata show a clear advantage for \ONS{}, which is capable of running all supplied testcases in less than one second. This in contrast to both \LOIS{} and \NLambda{}, which take more than $2$ hours on the largest random automata. -\placetable[here][minimize_results] -{Running times for \in{Algorithm}[alg:moore] implemented in the three libraries. -$N(S)$ is the size of the input and $N(S^{\text{min}})$ the size of the minimal automaton. -For \ONS{}, the time used to generate the automaton is reported separately (in grey). -Timeouts are indicated by $\infty$.} +\startplacetable + [title={Running times for \in{Algorithm}[alg:moore] implemented in the three libraries. + $N(S)$ is the size of the input and $N(S^{\text{min}})$ the size of the minimal automaton. + For \ONS{}, the time used to generate the automaton is reported separately (in grey). + Timeouts are indicated by $\infty$.}, + list={Running times for minimisation.}, + reference=minimize_results] \starttabulate[|l|c|c|cg{.}|cg{.}|cg{.}|cg{.}|][distance=none] \NC Type \NC $\Nsize(S)$ \NC $\Nsize(S^{\text{min}})$ \VL \ONS{} (s) \NC \color[darkgray]{Gen (s)} \VL \NLambda{} (s) \VL \LOIS{} (s) \NC\NR \HL @@ -742,6 +744,7 @@ Timeouts are indicated by $\infty$.} \NC $\Lmax$ \NC 5 \NC 3 \VL 0.00 \NC \color[darkgray]{0.00} \VL 2.06 \VL 0.03 \NC\NR \NC $\Lint$ \NC 5 \NC 5 \VL 0.00 \NC \color[darkgray]{0.00} \VL 1.55 \VL 0.03 \NC\NR \stoptabulate +\stopplacetable The results for structured automata show a clear effect of the extra structure. Both \NLambda{} and \LOIS{} remain capable of minimising the automata in reasonable amounts of time for larger sizes. @@ -813,10 +816,12 @@ For languages which are equivariant for the equality symmetry, the \NLambda{} im This is expected as the automata themselves have fewer orbits. It is interesting to see that these languages can be learned more efficiently by choosing the right symmetry. -\placetable[here][learning_results] -{Running times and number of membership queries for the \nLStar{} algorithm. -For \NLambda{} we used two version: \NLambda{}$^{\mathit{ord}}$ uses the total order symmetry \NLambda{}$^{\mathit{eq}}$ uses the equality symmetry. -Timeouts are indicated by $\infty$.} +\startplacetable + [title={Running times and number of membership queries for the \nLStar{} algorithm. + For \NLambda{} we used two version: \NLambda{}$^{\mathit{ord}}$ uses the total order symmetry \NLambda{}$^{\mathit{eq}}$ uses the equality symmetry. + Timeouts are indicated by $\infty$.}, + list={Running times for automata learning.}, + reference=learning_results] \starttabulate[|l|c|c|cg(.)|r|cg(.)|r|cg(.)|r|][distance=none] \NC \NC \NC \VL \NC \ONS{} \VL \NC \NLambda{}$^{\mathit{ord}}$ \VL \NC \NLambda{}$^{\mathit{eq}}$ \NC\NR \NC Model \NC $\Nsize(S)$ \NC $\dim(S)$ \VL time (s) \NC MQs \VL time (s) \NC MQs \VL time (s) \NC MQs \NC\NR @@ -838,6 +843,7 @@ Timeouts are indicated by $\infty$.} \NC $\Lmax$ \NC 3 \NC 1 \VL 0.01 \NC 54 \VL 3.58 \NC 54 \VL \NC \NC\NR \NC $\Lint$ \NC 5 \NC 2 \VL 0.59 \NC 478 \VL 83 \NC 478 \VL \NC \NC\NR \stoptabulate +\stopplacetable \stopsubsubject diff --git a/content/test-methods.tex b/content/test-methods.tex index 74cee95..b3cadf8 100644 --- a/content/test-methods.tex +++ b/content/test-methods.tex @@ -12,11 +12,11 @@ A key aspect of test generation methods is the size and completeness of the test On one hand, we want to cover as much as the specification as possible, hopefully ensuring that we find mistakes in any faulty implementation. On the other hand: testing takes time, so we want to minimise the size of a test suite. -\todo{Theoretische bijdrage scherper maken} The test methods described here are well-known in the literature of FSM-based testing. They all share similar concepts, such as \emph{access sequences} and \emph{state identifiers}. In this chapter we will define these concepts, relate them with one another and show how to build test suites from these concepts. From this theoretical discussion we derive a new algorithm: the \emph{hybrid ADS methods}, which is applied to an industrial case study in \in{Chapter}[chap:applying-automata-learning]. +\todo{Theoretische bijdrage scherper maken} This chapter starts with the basics: Mealy machines, sequences and what it means to test a black box system. Then, starting from \in{Section}[sec:separating-sequences] we define several concepts, such as state identifiers, in order to distinguish one state from another. @@ -104,15 +104,15 @@ Tests, or experiments, are generated from the specification and applied to the i If the output is different from the specified output, then we know the implementation is flawed. The goals is to test as little as possible, while covering as much as possible. -We assume the following -\todo{Herhaling van paragraaf hiervoor} -\startitemize[after] -\item -The system can be modelled as Mealy machine. -In particular, this means it is \emph{deterministic} and \emph{complete}. -\item -We are able to reset the system, i.e., bring it back to the initial state. -\stopitemize +%We assume the following +%\todo{Herhaling van paragraaf hiervoor} +%\startitemize[after] +%\item +%The system can be modelled as Mealy machine. +%In particular, this means it is \emph{deterministic} and \emph{complete}. +%\item +%We are able to reset the system, i.e., bring it back to the initial state. +%\stopitemize A test suite is nothing more than a set of sequences. We do not have to encode outputs in the test suite, as those follow from the deterministic specification. @@ -197,7 +197,7 @@ This is crucial for black box testing, as we do not know the implementation, so Before we construct test suites, we discuss several types of useful sequences. All the following notions are standard in the literature, and the corresponding references will be given in \in{Section}[sec:methods], where we discuss the test generation methods using these notions. We fix a Mealy machine $M$ for the remainder of this chapter. -\todo{Checken of dit echt het geval is!} +%\todo{Checken of dit echt het geval is!} For convenience we assume $M$ to be minimal, so that distinct states are, in fact, inequivalent. All definitions can be generalised to non-minimal $M$, by replacing the word \quote{distinct} (or \quote{other}) with \quote{inequivalent}. @@ -256,7 +256,7 @@ We return to this notion in more detail in \in{Example}[ex:uio-counterexample]. We may obtain a characterisation set by taking the union of state identifiers for each state. For every machine we can construct a set of harmonised state identifiers as will be shown in \in{Chapter}[chap:separating-sequences] and hence every machine has a characterisation set. -\todo{Iets versimpelen} +%\todo{Iets versimpelen} \startexample As mentioned before, state $s_2$ from \in{Figure}[fig:running-example] has a state identifier $\{aa, ac, b\}$. @@ -399,7 +399,7 @@ On families we define \item union: $\Fam{X} \cup \Fam{Y}$ is defined point-wise: $(\Fam{X} \cup \Fam{Y})_s = X_s \cup Y_s$, \item concatenation: -\todo{Associativiteit?} +%\todo{Associativiteit?} $X \odot \Fam{Y} = \{ xy \mid x \in X, y \in Y_{\delta(s_0, x)} \}$, and \item refinement: $\Fam{X} ; \Fam{Y}$ defined by \footnote{We use the convention that $\cap$ binds stronger than $\cup$. @@ -451,8 +451,8 @@ We fix a state cover $P$ throughout this section and take the transition cover $ reference=sec:w] After the work of \citet[Moore56], it was unclear whether a test suite of polynomial size could exist. -\todo{Melden dat Moore een exp. suite gaf, en dat een poly suite open is.} -Both \citet[Chow78, Vasilevskii73] independently prove that such a test suite exists. +He presented a finite test suite which was complete, however it was exponential in size. +Both \citet[Chow78, Vasilevskii73] independently prove that test suites of polynomial size exist. \footnote{More precisely: the size of $T_{\text{W}}$ is polynomial in the size of the specification for each fixed $k$.} The W method is a very structured test suite construction. It is called the W method as the characterisation set is often called $W$. @@ -606,11 +606,12 @@ With the same UIOs as above, the resulting UIOv test suite for the specification [title={Example}, reference=sec:all-example] -\todo{Beetje raar om Example Example te hebben.} +%\todo{Beetje raar om Example Example te hebben.} Let us compute all the previous test suites on the specification in \in{Figure}[fig:running-example]. We will be testing without extra states, i.e., we construct 5-complete test suites. We start by defining the state and transition cover. For this, we simply take all shortest sequences from the initial state to the other states: +\todo{Insert a picture so that one doesn't have to go back.} \startformula\startalign \NC P \NC = \{ \epsilon, a, aa, b, ba \} \NR \NC Q = P \cdot I \NC = \{ a, b, c, aa, ab, ac, aaa, aab, aac, ba, bb, bc, baa, bab, bac \} \NR diff --git a/environment.tex b/environment.tex index c216a5a..bfafc10 100644 --- a/environment.tex +++ b/environment.tex @@ -6,7 +6,7 @@ \setupexternalfigures[directory={images,../images}] % \definemode[afloop][yes] -\definemode[draft][yes] +% \definemode[draft][yes] \environment bib \environment font @@ -49,7 +49,7 @@ This chapter is based on the following publication: \define\colon{{\,{:}\,\,}} \define[1]\defn{\emph{#1}} -\define[1]\todo{\color[red]{\ss \bf #1}} +\define[1]\todo{\color[red]{\ss todo: \bf #1}} \define[1]\label{\todo{\tt \\label #1}} \define\referencesifcomponent{\doifnotmode{everything}{\startsubject[title={References}]\switchtobodyfont[9pt]\placelistofpublications\stopsubject}}