Archived
1
Fork 0

Refs korter gemaakt

This commit is contained in:
Joshua Moerman 2018-11-13 12:39:15 +01:00
parent 52277d73bd
commit b4f7370429
10 changed files with 233 additions and 233 deletions

View file

@ -1,4 +1,4 @@
@inproceedings{DBLP:conf/concur/AartsV10,
@inproceedings{AartsV10,
author = {Fides Aarts and
Frits W. Vaandrager},
editor = {Paul Gastin and
@ -29,7 +29,7 @@
url = {http://hdl.handle.net/2066/130428}
}
@article{DBLP:journals/ml/AartsKTVV14,
@article{AartsKTVV14,
author = {Fides Aarts and
Harco Kuppens and
Jan Tretmans and
@ -48,7 +48,7 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ictac/AartsFKV15,
@inproceedings{AartsFKV15,
author = {Fides Aarts and
Paul Fiterau{-}Brostean and
Harco Kuppens and
@ -71,7 +71,7 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/fmsd/AartsJUV15,
@article{AartsJUV15,
author = {Fides Aarts and
Bengt Jonsson and
Johan Uijen and
@ -90,7 +90,7 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/iandc/Angluin87,
@article{Angluin87,
author = {Dana Angluin},
title = {Learning Regular Sets from Queries and Counterexamples},
journal = {Inf. Comput.},
@ -105,7 +105,7 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/colt/AngluinC97,
@inproceedings{AngluinC97,
author = {Dana Angluin and
Mikl{\'{o}}s Cs{\"{u}}r{\"{o}}s},
editor = {Yoav Freund and
@ -123,7 +123,7 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/popl/DAntoniV14,
@inproceedings{DAntoniV14,
author = {Loris D'Antoni and
Margus Veanes},
editor = {Suresh Jagannathan and
@ -142,7 +142,7 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/cav/DAntoniV17,
@inproceedings{DAntoniV17,
author = {Loris D'Antoni and
Margus Veanes},
editor = {Rupak Majumdar and
@ -162,7 +162,7 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icwsm/BastianHJ09,
@inproceedings{BastianHJ09,
author = {Mathieu Bastian and
Sebastien Heymann and
Mathieu Jacomy},
@ -183,7 +183,7 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/qest/BehrmannDLHPYH06,
@inproceedings{BehrmannDLHPYH06,
author = {Gerd Behrmann and
Alexandre David and
Kim Guldstrand Larsen and
@ -204,7 +204,7 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/fase/BergGJLRS05,
@inproceedings{BergGJLRS05,
author = {Therese Berg and
Olga Grinchtein and
Bengt Jonsson and
@ -229,7 +229,7 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/fase/BergJR06,
@inproceedings{BergJR06,
author = {Therese Berg and
Bengt Jonsson and
Harald Raffelt},
@ -252,7 +252,7 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/fase/BergJR08,
@inproceedings{BergJR08,
author = {Therese Berg and
Bengt Jonsson and
Harald Raffelt},
@ -275,7 +275,7 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/tosem/Bernhard94,
@article{Bernhard94,
author = {Philip J. Bernhard},
title = {A Reduced Test Suite for Protocol Conformance Testing},
journal = {{ACM} Trans. Softw. Eng. Methodol.},
@ -290,7 +290,7 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icalp/BojanczykL12,
@inproceedings{BojanczykL12,
author = {Miko{\l}aj Boja{\'{n}}czyk and
Slawomir Lasota},
editor = {Artur Czumaj and
@ -312,7 +312,7 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/popl/BojanczykBKL12,
@inproceedings{BojanczykBKL12,
author = {Miko{\l}aj Boja{\'{n}}czyk and
Laurent Braud and
Bartek Klin and
@ -333,7 +333,7 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/BojanczykKL14,
@article{BojanczykKL14,
author = {Miko{\l}aj Boja{\'{n}}czyk and
Bartek Klin and
Slawomir Lasota},
@ -360,7 +360,7 @@
url = {http://www.lsv.fr/Publis/RAPPORTS_LSV/PDF/rr-lsv-2008-28.pdf}
}
@inproceedings{DBLP:conf/ijcai/BolligHKL09,
@inproceedings{BolligHKL09,
author = {Benedikt Bollig and
Peter Habermehl and
Carsten Kern and
@ -378,7 +378,7 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/dlt/BolligHLM13,
@inproceedings{BolligHLM13,
author = {Benedikt Bollig and
Peter Habermehl and
Martin Leucker and
@ -400,7 +400,7 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/popl/BonchiP13,
@inproceedings{BonchiP13,
author = {Filippo Bonchi and
Damien Pous},
editor = {Roberto Giacobazzi and
@ -418,7 +418,7 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/cacm/BonchiP15,
@article{BonchiP15,
author = {Filippo Bonchi and
Damien Pous},
title = {Hacking nondeterminism with induction and coinduction},
@ -456,7 +456,7 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/popl/BotincanB13,
@inproceedings{BotincanB13,
author = {Matko Botincan and
Domagoj Babic},
editor = {Roberto Giacobazzi and
@ -491,7 +491,7 @@
issn = {0021-8693}
}
@article{DBLP:journals/jlp/CasselHJMS15,
@article{CasselHJMS15,
author = {Sofia Cassel and
Falk Howar and
Bengt Jonsson and
@ -510,7 +510,7 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/fac/CasselHJS16,
@article{CasselHJS16,
author = {Sofia Cassel and
Falk Howar and
Bengt Jonsson and
@ -528,7 +528,7 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/sigcomm/ChanVO89,
@inproceedings{ChanVO89,
author = {Wendy Y. L. Chan and
Son T. Vuong and
M. Robert Ito},
@ -547,7 +547,7 @@
comment = {DBLP data was wrong}
}
@inproceedings{DBLP:conf/ccs/ChocSS10,
@inproceedings{ChocSS10,
author = {Chia Yuan Cho and
Domagoj Babic and
Eui Chul Richard Shin and
@ -569,7 +569,7 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/tse/Chow78,
@article{Chow78,
author = {Tsun S. Chow},
title = {Testing Software Design Modeled by Finite-State Machines},
journal = {{IEEE} Trans. Software Eng.},
@ -601,7 +601,7 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/iandc/CianciaM10,
@article{CianciaM10,
author = {Vincenzo Ciancia and
Ugo Montanari},
title = {Symmetries, local names and dynamic (de)-allocation of names},
@ -617,7 +617,7 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/fase/DavidMY02,
@inproceedings{DavidMY02,
author = {Alexandre David and
M. Oliver M{\"{o}}ller and
Wang Yi},
@ -640,7 +640,7 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/tocl/DemriL09,
@article{DemriL09,
author = {St{\'{e}}phane Demri and
Ranko Lazic},
title = {{LTL} with the freeze quantifier and register automata},
@ -656,7 +656,7 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/fuin/DenisLT02,
@article{DenisLT02,
author = {Fran{\c{c}}ois Denis and
Aur{\'{e}}lien Lemay and
Alain Terlutte},
@ -714,7 +714,7 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/infsof/DorofeevaEMCY10,
@article{DorofeevaEMCY10,
author = {Rita Dorofeeva and
Khaled El{-}Fakih and
St{\'{e}}phane Maag and
@ -733,7 +733,7 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/infsof/EndoS13,
@article{EndoS13,
author = {Andr{\'{e}} Takeshi Endo and
Adenilso da Silva Sim{\~{a}}o},
title = {Evaluating test suite characteristics, cost, and effectiveness of
@ -750,7 +750,7 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/re/EshuisJW02,
@article{EshuisJW02,
author = {Rik Eshuis and
David N. Jansen and
Roel Wieringa},
@ -785,7 +785,7 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/fmics/Fiterau-BrosteanJV14,
@inproceedings{Fiterau-BrosteanJV14,
author = {Paul Fiterau{-}Brostean and
Ramon Janssen and
Frits W. Vaandrager},
@ -807,7 +807,7 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/cav/Fiterau-Brostean16,
@inproceedings{Fiterau-Brostean16,
author = {Paul Fiterau{-}Brostean and
Ramon Janssen and
Frits W. Vaandrager},
@ -828,7 +828,7 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/tse/FujiwaraBKAG91,
@article{FujiwaraBKAG91,
author = {Susumu Fujiwara and
Gregor von Bochmann and
Ferhat Khendek and
@ -863,7 +863,7 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/tacas/GaravelLMS11,
@inproceedings{GaravelLMS11,
author = {Hubert Garavel and
Fr{\'{e}}d{\'{e}}ric Lang and
Radu Mateescu and
@ -896,7 +896,7 @@
isbn = {978-0070232433}
}
@inproceedings{DBLP:conf/mompes/GraafD07,
@inproceedings{GraafD07,
author = {Bas Graaf and
Arie van Deursen},
editor = {Jo{\~{a}}o M. Fernandes and
@ -917,7 +917,7 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/acta/Gries73,
@article{Gries73,
author = {David Gries},
title = {Describing an Algorithm by {Hopcroft}},
journal = {Acta Inf.},
@ -929,7 +929,7 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/GrigoreT16,
@article{GrigoreT16,
author = {Radu Grigore and
Nikos Tzevelekos},
title = {History-Register Automata},
@ -944,7 +944,7 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/pts/GrozLPS08,
@inproceedings{GrozLPS08,
author = {Roland Groz and
Keqin Li and
Alexandre Petrenko and
@ -985,7 +985,7 @@
note = {Wachten op doi?}
}
@inproceedings{DBLP:conf/fmco/HansenKLMPS10,
@inproceedings{HansenKLMPS10,
author = {Helle Hvid Hansen and
Jeroen Ketema and
Bas Luttik and
@ -1011,7 +1011,7 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/cj/HieronsT15,
@article{HieronsT15,
author = {Robert M. Hierons and
Uraz Cengiz T{\"{u}}rker},
title = {Incomplete Distinguishing Sequences for Finite State Machines},
@ -1053,7 +1053,7 @@
isbn = {978-0-12-417750-5}
}
@inproceedings{DBLP:conf/vmcai/HowarSM11,
@inproceedings{HowarSM11,
author = {Falk Howar and
Bernhard Steffen and
Maik Merten},
@ -1074,7 +1074,7 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/vmcai/HowarSJC12,
@inproceedings{HowarSJC12,
author = {Falk Howar and
Bernhard Steffen and
Bengt Jonsson and
@ -1097,7 +1097,7 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/cav/HungarNS03,
@inproceedings{HungarNS03,
author = {Hardi Hungar and
Oliver Niese and
Bernhard Steffen},
@ -1118,7 +1118,7 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nfm/IsbernerHS13,
@inproceedings{IsbernerHS13,
author = {Malte Isberner and
Falk Howar and
Bernhard Steffen},
@ -1140,7 +1140,7 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ml/IsbernerHS14,
@article{IsbernerHS14,
author = {Malte Isberner and
Falk Howar and
Bernhard Steffen},
@ -1166,7 +1166,7 @@
url = {https://eldorado.tu-dortmund.de/bitstream/2003/34282/1/Dissertation.pdf}
}
@inproceedings{DBLP:conf/birthday/JacobsS14a,
@inproceedings{JacobsS14a,
author = {Bart Jacobs and
Alexandra Silva},
editor = {Franck van Breugel and
@ -1188,7 +1188,7 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/tcs/KaminskiF94,
@article{KaminskiF94,
author = {Michael Kaminski and
Nissim Francez},
title = {Finite-Memory Automata},
@ -1217,7 +1217,7 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:journals/corr/KlinS16,
@inproceedings{KlinS16,
author = {Bartek Klin and
Michal Szynwelski},
editor = {Robert Atkey and
@ -1236,7 +1236,7 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/tcs/Knuutila01,
@article{Knuutila01,
author = {Timo Knuutila},
title = {Re-describing an algorithm by Hopcroft},
journal = {Theor. Comput. Sci.},
@ -1251,7 +1251,7 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/cade/KopczynskiT16,
@inproceedings{KopczynskiT16,
author = {Eryk Kopczynski and
Szymon Toru{\'{n}}czyk},
editor = {Tim King and
@ -1271,7 +1271,7 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/popl/KopczynskiT17,
@inproceedings{KopczynskiT17,
author = {Eryk Kopczynski and
Szymon Toru{\'{n}}czyk},
editor = {Giuseppe Castagna and
@ -1289,7 +1289,7 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icalp/KozenMP015,
@inproceedings{KozenMP015,
author = {Dexter Kozen and
Konstantinos Mamouras and
Daniela Petrisan and
@ -1313,7 +1313,7 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/fmco/KrennSA09,
@inproceedings{KrennSA09,
author = {Willibald Krenn and
Rupert Schlick and
Bernhard K. Aichernig},
@ -1357,7 +1357,7 @@
series = {CTIT Proceedings Series WP09-12}
}
@article{DBLP:journals/tc/LeeY94,
@article{LeeY94,
author = {David Lee and
Mihalis Yannakakis},
title = {Testing Finite-State Machines: State Identification and Verification},
@ -1373,7 +1373,7 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/fmco/Leucker06,
@inproceedings{Leucker06,
author = {Martin Leucker},
editor = {Frank S. de Boer and
Marcello M. Bonsangue and
@ -1395,7 +1395,7 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/forte/LiGS06,
@inproceedings{LiGS06,
author = {Keqin Li and
Roland Groz and
Muzammil Shahbaz},
@ -1436,7 +1436,7 @@
isbn = {978-0-387-34883-4}
}
@article{DBLP:journals/iandc/MalerP95,
@article{MalerP95,
author = {Oded Maler and
Amir Pnueli},
title = {On the Learnability of Infinitary Regular Sets},
@ -1477,7 +1477,7 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@phdthesis{DBLP:phd/hal/Mens17,
@phdthesis{Mens17,
author = {Irini-Eleftheria Mens},
title = {Learning regular languages over large alphabets},
title:fr = {Apprentissage de langages r{\'{e}}guliers sur des alphabets de grandes tailles}
@ -1489,7 +1489,7 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/tacas/MertenSHM11,
@inproceedings{MertenSHM11,
author = {Maik Merten and
Bernhard Steffen and
Falk Howar and
@ -1513,7 +1513,7 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/tacas/MertenHSCJ12,
@inproceedings{MertenHSCJ12,
author = {Maik Merten and
Falk Howar and
Bernhard Steffen and
@ -1553,7 +1553,7 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/tcs/MontanariS14,
@article{MontanariS14,
author = {Ugo Montanari and
Matteo Sammartino},
title = {A network-conscious {\(\pi\)}-calculus and its coalgebraic semantics},
@ -1577,7 +1577,7 @@
publisher = {Princeton University Press}
}
@inproceedings{DBLP:conf/tacas/MouraB08,
@inproceedings{MouraB08,
author = {Leonardo Mendon{\c{c}}a de Moura and
Nikolaj Bj{\o}rner},
editor = {C. R. Ramakrishnan and
@ -1639,7 +1639,7 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@phdthesis{DBLP:phd/de/Niese2003,
@phdthesis{Niese2003,
author = {Oliver Niese},
title = {An integrated approach to testing complex systems},
school = {Technical University of Dortmund, Germany},
@ -1651,7 +1651,7 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/tosem/Petrenko97,
@article{Petrenko97,
author = {Alexandre Petrenko},
title = {Technical Correspondence Comments on "A Reduced Test Suite for Protocol
Conformance Testing"},
@ -1667,7 +1667,7 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/hase/Petrenko0GHO14,
@inproceedings{Petrenko0GHO14,
author = {Alexandre Petrenko and
Keqin Li and
Roland Groz and
@ -1736,7 +1736,7 @@
school = {Master's thesis, Eindhoven University of Technology}
}
@article{DBLP:journals/sttt/RaffeltMSM09,
@article{RaffeltMSM09,
author = {Harald Raffelt and
Maik Merten and
Bernhard Steffen and
@ -1754,7 +1754,7 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/sttt/RaffeltSBM09,
@article{RaffeltSBM09,
author = {Harald Raffelt and
Bernhard Steffen and
Therese Berg and
@ -1772,7 +1772,7 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/iandc/RivestS93,
@article{RivestS93,
author = {Ronald L. Rivest and
Robert E. Schapire},
title = {Inference of Finite Automata Using Homing Sequences},
@ -1818,7 +1818,7 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/cn/SabnaniD88,
@article{SabnaniD88,
author = {Krishan K. Sabnani and
Anton T. Dahbura},
title = {A Protocol Test Generation Procedure},
@ -1833,7 +1833,7 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/alt/Sakamoto97,
@inproceedings{Sakamoto97,
author = {Hiroshi Sakamoto},
editor = {Ming Li and
Akira Maruoka},
@ -1871,7 +1871,7 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@book{DBLP:books/daglib/SelicGW94,
@book{SelicGW94,
author = {Bran Selic and
Garth Gullekson and
Paul T. Ward},
@ -1955,7 +1955,7 @@
issn = {1476-2986}
}
@article{DBLP:journals/entcs/Shinwell06,
@article{Shinwell06,
author = {Mark R. Shinwell},
title = {Fresh {O'Caml}: Nominal Abstract Syntax for the Masses},
journal = {Electr. Notes Theor. Comput. Sci.},
@ -1970,7 +1970,7 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icgi/SmetsersVVV14,
@inproceedings{SmetsersVVV14,
author = {Rick Smetsers and
Michele Volpato and
Frits W. Vaandrager and
@ -1993,7 +1993,7 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/sfm/SteffenHM11,
@inproceedings{SteffenHM11,
author = {Bernhard Steffen and
Falk Howar and
Maik Merten},
@ -2016,7 +2016,7 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icst/TapplerAB17,
@inproceedings{TapplerAB17,
author = {Martin Tappler and
Bernhard K. Aichernig and
Roderick Bloem},
@ -2033,7 +2033,7 @@
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/fmsd/TurkerY14,
@article{TurkerY14,
author = {Uraz Cengiz T{\"{u}}rker and
H{\"{u}}sn{\"{u}} Yenig{\"{u}}n},
title = {Hardness and inapproximability of minimizing adaptive distinguishing sequences},
@ -2125,4 +2125,4 @@
publisher = {Allerton Press Inc. USA},
comment = {More recent: On Test Derivation from Partial Specifications, 2000.
{\&} Testing from partial deterministic FSM specifications, 2005.}
}
}

View file

@ -34,7 +34,7 @@ To the best of our knowledge, this is the first paper in which active automata l
Once they have high-level models of the behaviour of software components, software engineers can construct better software in less time.
A key problem in practice, however, is the construction of models for existing software components, for which no or only limited documentation is available.
The construction of models from observations of component behaviour can be performed using regular inference -- also known as automata learning (see \citenp[DBLP:journals/iandc/Angluin87, Higuera10, DBLP:conf/sfm/SteffenHM11]).
The construction of models from observations of component behaviour can be performed using regular inference -- also known as automata learning (see \citenp[Angluin87, Higuera10, SteffenHM11]).
The most efficient such techniques use the set-up of \emph{active learning,} illustrated in \in{Figure}[fig:learning], in which a \quotation{learner} has the task to learn a model of a system by actively asking questions to a \quotation{teacher}.
\startplacefigure
@ -54,10 +54,10 @@ Based on such a counterexample, the learner may then construct an improved hypot
Hence, the task of the learner is to collect data by interacting with the teacher and to formulate hypotheses, and the task of the MBT tool is to establish the validity of these hypotheses.
It is important to note that it may occur that an SUT passes the test for an hypothesis, even though this hypothesis is not valid.
Triggered by various theoretical and practical results, see for instance the work by \citet[DBLP:conf/fase/BergGJLRS05, DBLP:conf/fmco/Leucker06, DBLP:journals/sttt/RaffeltSBM09, Aarts14, DBLP:journals/jlp/CasselHJMS15, DBLP:conf/tacas/MertenHSCJ12, DBLP:conf/vmcai/HowarSJC12], there is a fast-growing interest in automata learning technology.
In recent years, automata learning has been applied successfully, e.g., to regression testing of telecommunication systems \citep[DBLP:conf/cav/HungarNS03], checking conformance of communication protocols to a reference implementation \citep[DBLP:journals/ml/AartsKTVV14], finding bugs in Windows and Linux implementations of TCP \citep[DBLP:conf/fmics/Fiterau-BrosteanJV14], analysis of botnet command and control protocols \citep[DBLP:conf/ccs/ChocSS10], and integration testing \citep[DBLP:conf/forte/LiGS06, DBLP:conf/pts/GrozLPS08].
Triggered by various theoretical and practical results, see for instance the work by \citet[BergGJLRS05, Leucker06, RaffeltSBM09, Aarts14, CasselHJMS15, MertenHSCJ12, HowarSJC12], there is a fast-growing interest in automata learning technology.
In recent years, automata learning has been applied successfully, e.g., to regression testing of telecommunication systems \citep[HungarNS03], checking conformance of communication protocols to a reference implementation \citep[AartsKTVV14], finding bugs in Windows and Linux implementations of TCP \citep[Fiterau-BrosteanJV14], analysis of botnet command and control protocols \citep[ChocSS10], and integration testing \citep[LiGS06, GrozLPS08].
In this chapter, we explore whether LearnLib by \citet[DBLP:journals/sttt/RaffeltSBM09], a state-of-the-art automata learning tool, is able to learn a model of the Engine Status Manager (ESM), a piece of control software that is used in many printers and copiers of Oc\'{e}.
In this chapter, we explore whether LearnLib by \citet[RaffeltSBM09], a state-of-the-art automata learning tool, is able to learn a model of the Engine Status Manager (ESM), a piece of control software that is used in many printers and copiers of Oc\'{e}.
Software components like the ESM can be found in many embedded systems in one form or another.
Being able to retrieve models of such components automatically is potentially very useful.
For instance, if the software is fixed or enriched with new functionality, one may use a learned model for regression testing.
@ -68,20 +68,20 @@ The ESM software is actually well documented, and an extensive test suite exists
The ESM, which has been implemented using Rational Rose Real-Time (RRRT), is stable and has been in use for 10 years.
Due to these characteristics, the ESM is an excellent benchmark for assessing the performance of automata learning tools in this area.
The ESM has also been studied in other research projects:
\citet[Ploeger05] modelled the ESM and other related managers and verified properties based on the official specifications of the ESM, and \citet[DBLP:conf/mompes/GraafD07]
\citet[Ploeger05] modelled the ESM and other related managers and verified properties based on the official specifications of the ESM, and \citet[GraafD07]
have checked the consistency of the behavioural specifications defined in the ESM against the RRRT definitions.
Learning a model of the ESM turned out to be more complicated than expected.
The top level UML/RRRT statechart from which the software is generated only has 16 states.
However, each of these states contains nested states, and in total there are 70 states that do not have further nested states.
Moreover, the \cpp{} code contained in the actions of the transitions also creates some complexity, and this explains why the minimal Mealy machine that models the ESM has 3.410 states.
LearnLib has been used to learn models with tens of thousands of states by \citet[DBLP:journals/sttt/RaffeltMSM09], and therefore we expected that it would be easy to learn a model for the ESM.
LearnLib has been used to learn models with tens of thousands of states by \citet[RaffeltMSM09], and therefore we expected that it would be easy to learn a model for the ESM.
However, finding counterexamples for incorrect hypotheses turned out to be challenging due to the large number of 77 inputs.
The test algorithms implemented in LearnLib, such as random testing, the W-method by \citet[DBLP:journals/tse/Chow78, Vasilevskii73] and the Wp-method by \citet[DBLP:journals/tse/FujiwaraBKAG91], failed to deliver counterexamples within an acceptable time.
Automata learning techniques have been successfully applied to case studies in which the total number of input symbols is much larger, but in these cases it was possible to reduce the number of inputs to a small number (less than 10) using abstraction techniques \citep[DBLP:journals/fmsd/AartsJUV15, DBLP:conf/vmcai/HowarSM11].
The test algorithms implemented in LearnLib, such as random testing, the W-method by \citet[Chow78, Vasilevskii73] and the Wp-method by \citet[FujiwaraBKAG91], failed to deliver counterexamples within an acceptable time.
Automata learning techniques have been successfully applied to case studies in which the total number of input symbols is much larger, but in these cases it was possible to reduce the number of inputs to a small number (less than 10) using abstraction techniques \citep[AartsJUV15, HowarSM11].
In the case of ESM, use of abstraction techniques only allowed us to reduce the original 156 concrete actions to 77 abstract actions.
We therefore implemented an extension of an algorithm of \citet[DBLP:journals/tc/LeeY94] for computing adaptive distinguishing sequences.
We therefore implemented an extension of an algorithm of \citet[LeeY94] for computing adaptive distinguishing sequences.
Even when an adaptive distinguishing sequence does not exist, Lee \& Yannakakis' algorithm produces an adaptive sequence that \quotation{almost} identifies states.
In combination with a standard algorithm for computing separating sequences for pairs of states, we managed to verify states with on average 3 test queries and to learn a model of the ESM with 77 inputs and 3.410 states.
We also constructed a model directly from the ESM software and established equivalence with the learned model.
@ -90,7 +90,7 @@ Preliminary evidence suggests that our adaptation of Lee \& Yannakakis' algorith
During recent years most researchers working on active automata learning focused their efforts on efficient algorithms and tools for the construction of hypothesis models.
Our work shows that if we want to further scale automata learning to industrial applications, we also need better algorithms for finding counterexamples for incorrect hypotheses.
Following \citet[DBLP:conf/fase/BergGJLRS05], our work shows that the context of automata learning provides both new challenges and new opportunities for the application of testing algorithms.
Following \citet[BergGJLRS05], our work shows that the context of automata learning provides both new challenges and new opportunities for the application of testing algorithms.
All the models for the ESM case study together with the learning and testing statistics are available at \href{http://www.mbsd.cs.ru.nl/publications/papers/fvaan/ESM/}, as a benchmark for both the automata learning and testing communities.
It is now also included in the automata wiki at \href{http://automata.cs.ru.nl/}.
@ -186,8 +186,8 @@ Messages can be sent to these ports using the action code of the transition.
The transitions between the states, actions and guards are defined in \cpp{} code.
From the state diagram, \cpp{} source files are generated.
The RRRT language and semantics is based on UML \citep[OMGUML2] and ROOM \citep[DBLP:books/daglib/SelicGW94].
One important concept used in RRRT is the run-to-completion execution model \citep[DBLP:journals/re/EshuisJW02].
The RRRT language and semantics is based on UML \citep[OMGUML2] and ROOM \citep[SelicGW94].
One important concept used in RRRT is the run-to-completion execution model \citep[EshuisJW02].
This means that when a received message is processed, the execution cannot be interrupted by other arriving messages.
These messages are placed in a queue to be processed later.
@ -234,7 +234,7 @@ The possibility to transition from any state to the sleep or defect state also c
[title={Learning the ESM},
reference=sec:learning]
In order to learn a model of the ESM, we connected it to LearnLib by \citet[DBLP:conf/tacas/MertenSHM11], a state-of-the-art tool for learning Mealy machines developed at the University of Dortmund.
In order to learn a model of the ESM, we connected it to LearnLib by \citet[MertenSHM11], a state-of-the-art tool for learning Mealy machines developed at the University of Dortmund.
A \emph{Mealy machine} is a tuple $M = (I, O, Q, q_0, \delta, \lambda)$, where
\startitemize
\item $I$ is a finite set of input symbols,
@ -275,7 +275,7 @@ Currently, LearnLib cannot handle inputs with parameters; therefore, we introduc
Based on domain knowledge and discussions with the Oc\'{e} engineers, we could group some of these inputs together and reduce the total number of inputs.
When learning the ESM using one function, 83 concrete inputs are grouped into four abstract inputs.
When using two functions, 126 concrete inputs can be grouped.
When an abstract input needs to be sent to the ESM, one concrete input of the represented group is randomly selected, as in the approach of \citet[DBLP:journals/fmsd/AartsJUV15].
When an abstract input needs to be sent to the ESM, one concrete input of the represented group is randomly selected, as in the approach of \citet[AartsJUV15].
This is a valid abstraction because all the inputs in the group have exactly the same behaviour in any state of the ESM.
This has been verified by doing code inspection.
No other abstractions were found during the research.
@ -316,7 +316,7 @@ An average test query takes around 1 ms, so it would take about 7 years to execu
\startdescription{Augmented DS-method\footnote{This was later called the \emph{hybrid ADS-method}.}.}
In order to reduce the number of tests, \citet[DBLP:journals/tse/Chow78] and
In order to reduce the number of tests, \citet[Chow78] and
\citet[Vasilevskii73] pioneered the so called W-method.
In their framework a test query consists of a prefix $p$ bringing the SUT to a specific state, a (random) middle part $m$ and a suffix $s$ assuring that the SUT is in the appropriate state.
This results in a test suite of the form $P I^{\leq k} W$, where $P$ is a set of (shortest) access sequences, $I^{\leq k}$ the set of all sequences of length at most $k$, and $W$ is a characterisation set.
@ -326,14 +326,14 @@ By increasing $k$ we can check additional states.
We tried using the W-method as implemented by LearnLib to find counterexamples.
The generated test suite, however, was still too big in our learning context.
\citet[DBLP:journals/tse/FujiwaraBKAG91] observed that it is possible to let the set $W$ depend on the state the SUT is supposed to be.
\citet[FujiwaraBKAG91] observed that it is possible to let the set $W$ depend on the state the SUT is supposed to be.
This allows us to only take a subset of $W$ which is relevant for a specific state.
This slightly reduces the test suite without losing the power of the full test suite.
This method is known as the Wp-method.
More importantly, this observation allows for generalisations where we can carefully pick the suffixes.
In the presence of an (adaptive) distinguishing sequence one can take $W$ to be a single suffix, greatly reducing the test suite.
\citet[DBLP:journals/tc/LeeY94] describe an algorithm (which we will refer to as the LY algorithm) to efficiently construct this sequence, if it exists.
\citet[LeeY94] describe an algorithm (which we will refer to as the LY algorithm) to efficiently construct this sequence, if it exists.
In our case, unfortunately, most hypotheses did not enjoy existence of an adaptive distinguishing sequence.
In these cases the incomplete result from the LY algorithm still contained a lot of information which we augmented by pairwise separating sequences.
@ -412,12 +412,12 @@ As mentioned already, the ESM has been implemented using Rational Rose RealTime
Thus a statechart representation of the ESM is available.
However, we have not been able to find a tool that translates RRRT models to Mealy machines, allowing us to compare the RRRT-based model of the ESM with the learned model.
We considered several formalisms and tools that were proposed in the literature to flatten statecharts to state machines.
The first one was a tool for hierarchical timed automata (HTA) by \citet[DBLP:conf/fase/DavidMY02].
The first one was a tool for hierarchical timed automata (HTA) by \citet[DavidMY02].
However, we found it hard to translate the output of this tool, a network of Uppaal timed automata, to a Mealy machine that could be compared to the learned model.
The second tool that we considered has been developed by \citet[DBLP:conf/fmco/HansenKLMPS10].
The second tool that we considered has been developed by \citet[HansenKLMPS10].
This tool misses some essential features, for example the ability to assign new values to state variables on transitions.
Finally, we considered a formalism called object-oriented action systems
(OOAS) by \citet[DBLP:conf/fmco/KrennSA09], but no tools to use this could be found.
(OOAS) by \citet[KrennSA09], but no tools to use this could be found.
In the end we decided to implement the required model transformations ourselves.
\in{Figure}[fig:formats] displays the different formats for representing models that we used and the transformations between those formats.
@ -428,10 +428,10 @@ In the end we decided to implement the required model transformations ourselves.
\externalfigure[formats.pdf][width=0.7\textwidth]
\stopplacefigure
We used the bisimulation checker of CADP by \citet[DBLP:conf/tacas/GaravelLMS11] to check the equivalence of labelled transition system models in {\tt .aut} format.
We used the bisimulation checker of CADP by \citet[GaravelLMS11] to check the equivalence of labelled transition system models in {\tt .aut} format.
The Mealy machine models learned by LearnLib are represented as {\tt .dot} files.
A small script converts these Mealy machines to labelled transition systems in {\tt .aut} format.
We used the Uppaal tool by \citet[DBLP:conf/qest/BehrmannDLHPYH06] as an editor for defining extended finite state machines (EFSM), represented as {\tt .xml} files.
We used the Uppaal tool by \citet[BehrmannDLHPYH06] as an editor for defining extended finite state machines (EFSM), represented as {\tt .xml} files.
A script developed in the ITALIA project (\href{http://www.italia.cs.ru.nl/}) converts these EFSM models to LOTOS, and then CADP takes care of the conversion from LOTOS to the {\tt .aut} format.
The Uppaal syntax is not sufficiently expressive to directly encode the RRRT definition of the ESM, since this definition makes heavy use of UML \citep[OMGUML2] concepts such as state hierarchy and transitions from composite states, concepts which are not present in Uppaal.
@ -558,7 +558,7 @@ This function takes a variable name and evaluates to the value of this variable
\startsubsection
[title={Results}]
\in{Figure}[fig:esra-model] shows a visualisation of the learned model that was generated using Gephi \citep[DBLP:conf/icwsm/BastianHJ09].
\in{Figure}[fig:esra-model] shows a visualisation of the learned model that was generated using Gephi \citep[BastianHJ09].
States are coloured according to the strongly connected components.
The number of transitions between two states is represented by the thickness of the edge.
The large number of states (3.410) and transitions (262.570) makes it hard to visualise this model.
@ -578,7 +578,7 @@ This phase is performed only once and thus only transitions from the initialisat
During the construction of the RRRT-based model, the ESM code was thoroughly inspected.
This resulted in the discovery of missing behaviour in one transition of the ESM code.
An Oc\'{e} software engineer confirmed that this behaviour is a (minor) bug, which will be fixed.
We have verified the equivalence of the learned model and the RRRT-based model by using CADP \citep[DBLP:conf/tacas/GaravelLMS11].
We have verified the equivalence of the learned model and the RRRT-based model by using CADP \citep[GaravelLMS11].
\stopsubsection
@ -587,7 +587,7 @@ We have verified the equivalence of the learned model and the RRRT-based model b
[title={Conclusions and Future Work},
reference=sec:future]
Using an extension of algorithm by \citet[DBLP:journals/tc/LeeY94] for adaptive distinguishing sequences, we succeeded to learn a Mealy machine model of a piece of widely used industrial control software.
Using an extension of algorithm by \citet[LeeY94] for adaptive distinguishing sequences, we succeeded to learn a Mealy machine model of a piece of widely used industrial control software.
Our extension of Lee \& Yannakakis' algorithm is rather obvious, but nevertheless appears to be new.
Preliminary evidence suggests that it outperforms existing conformance testing algorithms.
We are currently performing experiments in which we compare the new algorithm with other test algorithms on a number of realistic benchmarks.
@ -600,7 +600,7 @@ We expect that the combination of LearnLib with the extended Lee \& Yannakakis a
In the specific case study described in this article, we know that our learning algorithm has succeeded to learn the correct model, since we established equivalence with a reference model that was constructed independently from the RRRT model of the ESM software.
In the absence of a reference model, we can never guarantee that the actual system behaviour conforms to a learned model.
In order to deal with this problem, it is important to define metrics that quantify the difference (or distance) between a hypothesis and a correct model of the SUT and to develop test generation algorithms that guarantee an upper bound on this difference.
Preliminary work in this area is reported by \citet[DBLP:conf/icgi/SmetsersVVV14].
Preliminary work in this area is reported by \citet[SmetsersVVV14].
\stopsection
@ -612,4 +612,4 @@ Fides Aarts and Harco Kuppens helped us with the use of LearnLib and CADP, and J
\stopsubject
\referencesifcomponent
\stopchapter
\stopcomponent
\stopcomponent

View file

@ -50,7 +50,7 @@ In such cases, a learning algorithm can interact with the software.
So it makes sense to study a learning paradigm which allows for \emph{queries}, and not just a data set of samples.
\footnote{Instead of query learning, people also use the term \emph{active learning}.}
A typical query learning framework was established by \citet[DBLP:journals/iandc/Angluin87].
A typical query learning framework was established by \citet[Angluin87].
In her framework, the learning algorithm may pose two types of queries to a \emph{teacher} (or \emph{oracle}):
\description{Membership queries (MQs)}
@ -66,7 +66,7 @@ If, however, the hypothesis is incorrect, the teacher replies with \kw{no}, toge
With these queries, the learner algorithm is supposed to converge to a correct model.
This type of learning is hence called \emph{exact learning}.
\citet[DBLP:journals/iandc/Angluin87] showed that one can do this efficiently for deterministic finite automata DFAs (when $\lang$ is in the class of regular languages).
\citet[Angluin87] showed that one can do this efficiently for deterministic finite automata DFAs (when $\lang$ is in the class of regular languages).
Another paradigm which is relevant for our type of applications is \emph{PAC-learning with membership queries}.
Here, the algorithm can again use MQs as before, but the EQs are replace by random sampling.
@ -133,7 +133,7 @@ Finite state machine conformance testing is a core topic in testing literature t
The combination of conformance testing and automata learning is applied in practice and it proves to be an effective way of finding bugs \cite[Joeri], replacing legacy software \cite[Philips], or inferring models to be used in model checking \cite[MeinkeSindhu].
Several model learning applications:
learning embedded controller software \cite[DBLP:conf/icfem/SmeenkMVJ15], learning the TCP protocol \cite[DBLP:conf/cav/Fiterau-Brostean16] and learning the MQTT protocol \cite[DBLP:conf/icst/TapplerAB17].
learning embedded controller software \cite[DBLP:conf/icfem/SmeenkMVJ15], learning the TCP protocol \cite[Fiterau-Brostean16] and learning the MQTT protocol \cite[TapplerAB17].
\stopsubsection
@ -156,10 +156,10 @@ In this thesis we often assume a \emph{bound on the number of states} of the sys
Under these conditions, \citet[Moore56] already solved the problem.
Unfortunately, his experiment is exponential in size, or in his own words: \quotation{fantastically large.}
Years later, \citet[DBLP:journals/tse/Chow78, Vasilevskii73] independently designed efficient experiments.
Years later, \citet[Chow78, Vasilevskii73] independently designed efficient experiments.
In particular, the set of experiments is polynomial in size.
These techniques will be discussed in detail in \in{Chapter}[chap:test-methods].
More background and other related problems, as well as their complexity results, are well exposed in a survey of \citet[DBLP:journals/tc/LeeY94].
More background and other related problems, as well as their complexity results, are well exposed in a survey of \citet[LeeY94].
\stopsection
@ -226,7 +226,7 @@ So we see that we cannot simply collapse the whole alphabet into one equivalence
For this reason, we keep the alphabet infinite and explicitly mention its symmetries.
Before formalising symmetries, let me remark the the use of symmetries in automata theory is not new.
One of the first to use symmetries was \citet[DBLP:conf/alt/Sakamoto97].
One of the first to use symmetries was \citet[Sakamoto97].
He devised an \LStar{} learning algorithm for register automata, much like the one presented in \in{Chapter}[chap:learning-nominal-automata].
The symmetries are crucial to reduce the problem to a finite alphabet and use the regular \LStar{} algorithm.
(\in{Chapter}[chap:learning-nominal-automata] shows how to do it with more general symmetries.)
@ -235,7 +235,7 @@ Their approach was based on the same symmetries and they developed a theory of \
Named sets are equivalent to nominal sets.
However, nominal sets are defined in a more elementary way.
The nominal sets we will soon see are introduced by \citet[GabbayP02] to solve certain problems in name binding in abstract syntaxes.
Although this is not really related to automata theory, it was picked up by \citet[DBLP:journals/corr/BojanczykKL14].
Although this is not really related to automata theory, it was picked up by \citet[BojanczykKL14].
They provide an equivalence between register automata and nominal automata.
Additionally, they generalise the work on nominal sets to other symmetries.
@ -272,7 +272,7 @@ However, in order to be general enough for the next chapters, we introduce group
Let $X$ be a set.
A (left)
\footnote{Many authors use left actions.
However, we note that \citet[DBLP:journals/corr/BojanczykKL14] use a right action.
However, we note that \citet[BojanczykKL14] use a right action.
For them to have a well-defined group action, their group multiplication has to be defined as $g \cdot f = f \circ g$ (i.e., reverse function composition).}
\emph{$G$-action} is a function ${\cdot} \colon G \times X \to X$ satisfying:
\startformula\startalign[n=3]
@ -449,7 +449,7 @@ G_{< \omega} = \{ \pi \in \Perm \mid \pi(x) \neq x \text{ for finitely many } x
\stopformula
This is the subgroup of $\Pm$ of \emph{finite} permutation.
The set $\{ X \subset \atoms \mid X \text{ is not finite nor co-finite} \}$ has infinitely many orbits when considered as a $G_{< \omega}$-set, but only one orbit as a $\Pm$-set.
This poses the question which group we should consider (for example, \citet[DBLP:journals/corr/BojanczykKL14] use the whole group $\Pm$).
This poses the question which group we should consider (for example, \citet[BojanczykKL14] use the whole group $\Pm$).
For nominal sets, there is no difference: nominal $G_{< \omega}$-sets and nominal $\Pm$-sets are equivalent, as shown by \citet[Pitts13].
It is only for non-nominal sets that we can distinguish them.
We will mostly work with the set of all permutations $\Pm$.
@ -471,4 +471,4 @@ For each of them, one can verify the requirements.
\stopsection
\referencesifcomponent
\stopchapter
\stopcomponent
\stopcomponent

View file

@ -35,15 +35,15 @@ In this context, it has become popular to infer or learn a model from a given sy
The learned model can then be used to ensure the system is complying to desired properties or to detect bugs and design possible fixes.
Automata learning, or regular inference, is a widely used technique for creating an automaton model from observations.
The original algorithm by \cite[authoryears][DBLP:journals/iandc/Angluin87] works for deterministic finite automata, but since then has been extended to other types of automata, including Mealy machines and I/O automata (see \citenp[DBLP:phd/de/Niese2003], \sectionmark 8.5, and \citenp[DBLP:conf/concur/AartsV10]), and even a special class of context-free grammars (see \citenp[Isberner15], \sectionmark 6)
The original algorithm by \cite[authoryears][Angluin87] works for deterministic finite automata, but since then has been extended to other types of automata, including Mealy machines and I/O automata (see \citenp[Niese2003], \sectionmark 8.5, and \citenp[AartsV10]), and even a special class of context-free grammars (see \citenp[Isberner15], \sectionmark 6)
Angluin's algorithm is sometimes referred to as {\em active learning}, because it is based on direct interaction of the learner with an oracle (\quotation{the Teacher}) that can answer different types of queries.
This is in contrast with \emph{passive} learning, where a fixed set of positive and negative examples is given and no interaction with the system is possible.
In this chapter, staying in the realm of active learning, we will extend Angluin's algorithm to a richer class of automata.
We are motivated by situations in which a program model, besides control flow, needs to represent basic data flow, where data items are compared for equality (or for other theories such as total ordering).
In these situations, values for individual symbols are typically drawn from an infinite domain and automata over \emph{infinite alphabets} become natural models, as witnessed by a recent trend \cite[DBLP:conf/dlt/BolligHLM13, DBLP:conf/ictac/AartsFKV15,DBLP:conf/popl/DAntoniV14, DBLP:journals/corr/BojanczykKL14, DBLP:journals/fac/CasselHJS16].
In these situations, values for individual symbols are typically drawn from an infinite domain and automata over \emph{infinite alphabets} become natural models, as witnessed by a recent trend \cite[BolligHLM13, AartsFKV15,DAntoniV14, BojanczykKL14, CasselHJS16].
One of the foundational approaches to formal language theory for infinite alphabets uses the notion of nominal sets \cite[DBLP:journals/corr/BojanczykKL14].
One of the foundational approaches to formal language theory for infinite alphabets uses the notion of nominal sets \cite[BojanczykKL14].
The theory of nominal sets originates from the work of Fraenkel in 1922, and they were originally used to prove the independence of the axiom of choice and other axioms.
They have been rediscovered in Computer Science by Gabbay and Pitts (see \citenp[Pitts13] for historical notes), as an elegant formalism for modelling name binding, and since then they form the basis of many research projects in the semantics and concurrency community.
In a nutshell, nominal sets are infinite sets equipped with symmetries which make them finitely representable and tractable for algorithms.
@ -54,14 +54,14 @@ Our main contributions are the following.
\startitemize[after]
\item A generalisation of Angluin's original algorithm to nominal automata.
The generalisation follows a generic pattern for transporting computation models from finite sets to nominal sets, which leads to simple correctness proofs and opens the door to further generalisations.
The use of nominal sets with different symmetries also creates potential for generalisation, e.g., to languages with time features \cite[DBLP:conf/icalp/BojanczykL12] or data dependencies represented as graphs \cite[DBLP:journals/tcs/MontanariS14].
The use of nominal sets with different symmetries also creates potential for generalisation, e.g., to languages with time features \cite[BojanczykL12] or data dependencies represented as graphs \cite[MontanariS14].
\item An extension of the algorithm to nominal non-deterministic automata (nominal NFAs).
To the best of our knowledge, this is the first learning algorithm for non-deterministic automata over infinite alphabets.
It is important to note that, in the nominal setting, NFAs are strictly more expressive than DFAs.
We learn a subclass of the languages accepted by nominal NFAs, which includes all the languages accepted by nominal DFAs.
The main advantage of learning NFAs directly is that they can provide exponentially smaller automata when compared to their deterministic counterpart.
This can be seen both as a generalisation and as an optimisation of the algorithm.
\item An implementation using a recently developed Haskell library tailored to nominal computation -- NLambda, or \NLambda{}, by \cite[authoryears][DBLP:journals/corr/KlinS16].
\item An implementation using a recently developed Haskell library tailored to nominal computation -- NLambda, or \NLambda{}, by \cite[authoryears][KlinS16].
Our implementation is the first non-trivial application of a novel programming paradigm of functional programming over infinite structures, which allows the programmer to rely on convenient intuitions of searching through infinite sets in finite time.
\stopitemize
@ -139,7 +139,7 @@ Each time the algorithm constructs an automaton, it poses an equivalence query t
It terminates when the answer is {\bf yes}, otherwise it extends the table with the counterexample provided.
\startplacealgorithm
[title={The \LStar{} learning algorithm from \cite[authoryears][DBLP:journals/iandc/Angluin87].},
[title={The \LStar{} learning algorithm from \cite[authoryears][Angluin87].},
location=top,
reference=alg:alg]
\startalgorithmic
@ -428,12 +428,12 @@ This automaton is infinite, but it can be finitely presented in a variety of way
\stoptikzpicture}}
One can formalise the quantifier notation above (or indeed the \quotation{dots} notation above that) in several ways.
A popular solution is to consider finite {\em register automata} \cite[DBLP:journals/tcs/KaminskiF94, DBLP:journals/tocl/DemriL09], i.e., finite automata equipped with a finite number of registers where alphabet letters can be stored and later compared for equality.
A popular solution is to consider finite {\em register automata} \cite[KaminskiF94, DemriL09], i.e., finite automata equipped with a finite number of registers where alphabet letters can be stored and later compared for equality.
Our language $\lang_1$ is recognised by a simple automaton with four states and one register.
The problem of learning registered automata has been successfully attacked before by, for instance,
\citet[DBLP:conf/vmcai/HowarSJC12].
\citet[HowarSJC12].
In this chapter, however, we consider nominal automata by \citet[DBLP:journals/corr/BojanczykKL14] instead.
In this chapter, however, we consider nominal automata by \citet[BojanczykKL14] instead.
These automata ostensibly have infinitely many states, but the set of states can be finitely presented in a way open to effective manipulation.
More specifically, in a nominal automaton the set of states is subject to an action of permutations of a set $\EAlph$ of {\em atoms}, and it is finite up to that action.
For example, the set of states of $\autom_5$ is:
@ -447,7 +447,7 @@ Moreover, it is required that in a nominal automaton the transition relation is
The automaton $\autom_5$ has this property:
For example, it has a transition $q_a \tot{a} q_3$, and for any $\pi \colon \EAlph \to \EAlph$ there is also a transition $\pi(q_a) = q_{\pi(a)} \tot{\pi(a)} q_3 = \pi(q_3)$.
Nominal automata with finitely many orbits of states are equi-expressive with finite register automata \cite[DBLP:journals/corr/BojanczykKL14], but they have an important theoretical advantage:
Nominal automata with finitely many orbits of states are equi-expressive with finite register automata \cite[BojanczykKL14], but they have an important theoretical advantage:
They are a direct reformulation of the classical notion of finite automaton, where one replaces finite sets with orbit-finite sets and functions (or relations) with equivariant ones.
A research programme advocated by Boja\'{n}czyk, et al. is to transport various computation models, algorithms and theorems along this correspondence.
This can often be done with remarkable accuracy, and our results are a witness to this.
@ -592,7 +592,7 @@ This is again incorrect, but one additional step will give the correct hypothesi
[title={Generalisation to Non-Deterministic Automata}]
Since our extension of Angluin's \LStar{} algorithm stays close to her original development, exploring extensions of other variations of \LStar{} to the nominal setting can be done in a systematic way.
We will show how to extend the algorithm \NLStar{} for learning NFAs by \cite[authoryears][DBLP:conf/ijcai/BolligHKL09].
We will show how to extend the algorithm \NLStar{} for learning NFAs by \cite[authoryears][BolligHKL09].
This has practical implications: It is well-known that NFAs are exponentially more succinct than DFAs.
This is true also in the nominal setting. However, there are challenges in the extension that require particular care.
\startitemize
@ -614,7 +614,7 @@ This means that in some cases fewer rows will be added for closedness.
reference=sec:prelim]
We recall the notions of nominal sets, nominal automata and nominal regular languages.
We refer to \citet[DBLP:journals/corr/BojanczykKL14] for a detailed account.
We refer to \citet[BojanczykKL14] for a detailed account.
Let $\EAlph$ be a countable set and let $\Perm(\EAlph)$ be the set of \emph{permutations on $\EAlph$}, i.e., the bijective functions $\pi \colon \EAlph \to \EAlph$.
Permutations form a group where the identity permutation $\id$ is the unit element, inverse is functional inverse and multiplication is function composition.
@ -684,7 +684,7 @@ for all $v \in A^{\ast}$.
This relation is equivariant and the set of equivalence classes $[w]_\lang$ is a nominal set.
\starttheorem
(Myhill-Nerode theorem for nominal sets by \citenp[DBLP:journals/corr/BojanczykKL14])\\
(Myhill-Nerode theorem for nominal sets by \citenp[BojanczykKL14])\\
Let $\lang$ be a nominal language.
The following conditions are equivalent:
\startitemize[m]
@ -708,7 +708,7 @@ This is the key idea underpinning our implementation.
[title={Different Atom Symmetries},
reference=sec:other-symms]
An important advantage of nominal set theory as considered by \cite[authoryears][DBLP:journals/corr/BojanczykKL14] is that it retains most of its properties when the structure of atoms $\EAlph$ is replaced with an arbitrary infinite relational structure subject to a few model-theoretic assumptions.
An important advantage of nominal set theory as considered by \cite[authoryears][BojanczykKL14] is that it retains most of its properties when the structure of atoms $\EAlph$ is replaced with an arbitrary infinite relational structure subject to a few model-theoretic assumptions.
An example alternative structure of atoms is the total order of rational numbers $(\Q, <)$, with the group of monotone bijections of $\Q$ taking the role of the group of all permutations.
The theory of nominal automata remains similar, and an example nominal language over the atoms $(\Q,<)$ is:
\startformula
@ -751,7 +751,7 @@ The table is called \defn{closed} if for each $t \in S \Lext A$ there is a $s \i
The table is called \defn{consistent} if for each pair $s_1, s_2 \in S$ with $s_1 \sim s_2$ we have $s_1 a \sim s_2 a$ for all $a \in A$.
\stopdefinition
The above definitions agree with the abstract definitions given by \cite[authoryears][DBLP:conf/birthday/JacobsS14a] and we may use some of their results implicitly.
The above definitions agree with the abstract definitions given by \cite[authoryears][JacobsS14a] and we may use some of their results implicitly.
The intuition behind the definitions is as follows.
Closedness assures us that for each state we have a successor state for each input.
Consistency assures us that each state has at most one successor for each input.
@ -762,7 +762,7 @@ When the table is closed and consistent it constructs a hypothesis automaton and
The pseudocode for the nominal version is the same as listed in \in{Algorithm}[alg:alg], modulo the changes displayed in (\in[eq:changes]).
However, we have to take care to ensure that all manipulations and tests on the (possibly) infinite sets $S, E$ and $A$ terminate in finite time.
We refer to \cite[authoryears][DBLP:journals/corr/BojanczykKL14, Pitts13] for the full details on how to represent these structures and provide a brief sketch here.
We refer to \cite[authoryears][BojanczykKL14, Pitts13] for the full details on how to represent these structures and provide a brief sketch here.
The sets $S, E, A$ and $S \Lext A$ can be represented by choosing a representative for each orbit.
The function $T$ in turn can be represented by cells $T_{i,j} \colon \orb(s_i) \times \orb(e_j) \to 2$ for each representative $s_i$ and $e_j$.
Note, however, that the product of two orbits may consist of several orbits, so that $T_{i,j}$ is not a single boolean value.
@ -782,7 +782,7 @@ Constructing the hypothesis happens in the same way as before (\in{Section}[sec:
Moreover, the function $row$ is equivariant, so all structure ($Q_0$, $F$ and $\delta$) is equivariant as well.
The representation given above is not the only way to represent nominal sets.
For example, first-order definable sets can be used as well \cite[DBLP:journals/corr/KlinS16].
For example, first-order definable sets can be used as well \cite[KlinS16].
From now on we assume to have set theoretic primitives so that each line in \in{Algorithm}[alg:alg] is well defined.
@ -812,10 +812,10 @@ We have $\supp([u]_{\sim}) \subseteq \supp([u]_{\equiv_\lang}) \subseteq \supp(u
The automaton constructed from a closed and consistent table is minimal.
\stoplemma
\startproof
Follows from the categorical perspective by \cite[authoryears][DBLP:conf/birthday/JacobsS14a].
Follows from the categorical perspective by \cite[authoryears][JacobsS14a].
\stopproof
We note that the constructed automaton is consistent with the table (we use that the set $S$ is prefix-closed and $E$ is suffix-closed \cite[DBLP:journals/iandc/Angluin87]).
We note that the constructed automaton is consistent with the table (we use that the set $S$ is prefix-closed and $E$ is suffix-closed \cite[Angluin87]).
The following lemma shows that there are no strictly \quotation{smaller} automata consistent with the table.
So the automaton is not just minimal, it is minimal w.r.t. the table.
@ -911,7 +911,7 @@ Second, the new hypothesis does not necessarily have more states, again it might
From the proof \in{Theorem}[thm:termination] we observe moreover that the way we handle counterexamples is not crucial.
Any other method which ensures a nonequivalent hypothesis will work.
In particular our algorithm is easily adapted to include optimisations such as the ones by \cite[authoryears][DBLP:journals/iandc/RivestS93, DBLP:journals/iandc/MalerP95], where counterexamples are added as columns.
In particular our algorithm is easily adapted to include optimisations such as the ones by \cite[authoryears][RivestS93, MalerP95], where counterexamples are added as columns.
\todo{Consistentie is nodig?}
@ -1087,16 +1087,16 @@ We note that this number is polynomial in $k, l, m$ and $n$ but it is not polyno
reference=sec:nondet]
In this section, we introduce a variant of \nLStar{}, which we call \nNLStar{}, where the learnt automaton is non-deterministic.
It will be based on the \NLStar{} algorithm by \cite[authoryears][DBLP:conf/ijcai/BolligHKL09], an Angluin-style algorithm for learning NFAs.
It will be based on the \NLStar{} algorithm by \cite[authoryears][BolligHKL09], an Angluin-style algorithm for learning NFAs.
The algorithm is shown in \in{Algorithm}[alg:nfa-alg].
We first illustrate \NLStar{}, then we discuss its extension to nominal automata.
\NLStar{} crucially relies on the use of \emph{residual finite-state automata} (RFSA) \cite[DBLP:journals/fuin/DenisLT02], which are NFAs admitting \emph{unique minimal canonical representatives}.
\NLStar{} crucially relies on the use of \emph{residual finite-state automata} (RFSA) \cite[DenisLT02], which are NFAs admitting \emph{unique minimal canonical representatives}.
The states of this automaton correspond to Myhill-Nerode right-congruence classes, but can be exponentially smaller than the corresponding minimal DFA:
\emph{Composed} states, language-equivalent to sets of other states, can be dropped.
\startplacealgorithm
[title={Algorithm for learning NFAs by \citet[DBLP:conf/ijcai/BolligHKL09].},
[title={Algorithm for learning NFAs by \citet[BolligHKL09].},
reference=alg:nfa-alg]
\startalgorithmic
\startlinenumbering
@ -1152,7 +1152,7 @@ This row is then added to $S$ (\inline[line:nfa-clos-witness]).
If $(S,E)$ is not RFSA-consistent, then there is a suffix which does not preserve the containment of two existing rows, so those rows are actually incomparable.
A new column is added to distinguish those rows (\inline[line:nfa-cons-witness]).
Notice that counterexamples supplied by the teacher are added \emph{to columns} (\inline[line:nfa-addcol-ex]).
Indeed, it is shown by \cite[authoryears][DBLP:conf/ijcai/BolligHKL09] that treating the counterexamples as in the original \LStar{}, namely adding them to rows, does not lead to a terminating algorithm.
Indeed, it is shown by \cite[authoryears][BolligHKL09] that treating the counterexamples as in the original \LStar{}, namely adding them to rows, does not lead to a terminating algorithm.
\startdefinition[reference=def:nfa-conj]
Given a RFSA-closed and RFSA-consistent table $(S, E)$, the conjecture automaton is $N(S, E) = (Q, Q_0, F, \delta)$, where:
@ -1164,7 +1164,7 @@ Given a RFSA-closed and RFSA-consistent table $(S, E)$, the conjecture automaton
\stopitemize
\stopdefinition
As observed by \cite[authoryears][DBLP:conf/ijcai/BolligHKL09], $N(S, E)$ is not necessarily a RFSA, but it is a canonical RFSA if it is consistent with $(S, E)$.
As observed by \cite[authoryears][BolligHKL09], $N(S, E)$ is not necessarily a RFSA, but it is a canonical RFSA if it is consistent with $(S, E)$.
If the algorithm terminates, then $N(S, E)$ must be consistent with $(S, E)$, which ensures correctness.
The termination argument is more involved than that of \LStar{}, but still it relies on the minimal DFA.
@ -1318,7 +1318,7 @@ However, we have the following.
If the table $(S, E)$ is RFSA-closed, RFSA-consistent and $N(S, E)$ is consistent with $(S, E)$, then $N(S, E)$ is a canonical nominal RFSA.
\stoptheorem
This is proved by \cite[authoryears][DBLP:conf/ijcai/BolligHKL09] for ordinary RFSAs, using the standard theory of regular languages.
This is proved by \cite[authoryears][BolligHKL09] for ordinary RFSAs, using the standard theory of regular languages.
The nominal proof is exactly the same, using derivatives of nominal regular languages and nominal RFSAs as defined in \in{Section}[subsec:nrfsa].
\startlemma[reference=lem:rows-min-dfa]
@ -1334,7 +1334,7 @@ The algorithm \nNLStar{} terminates and returns the canonical nominal RFSA for $
\startproof
If the algorithm terminates, then it must return the canonical nominal RFSA for $\lang$ by \in{Theorem}[th:nlstar-rfsa].
We prove that a table can be made RFSA-closed and RFSA-consistent in finite time.
This is similar to the proof of \in{Theorem}[thm:termination] and is inspired by the proof of Theorem 2 of \cite[authoryears][DBLP:conf/ijcai/BolligHKL09].
This is similar to the proof of \in{Theorem}[thm:termination] and is inspired by the proof of Theorem 2 of \cite[authoryears][BolligHKL09].
If the table is not RFSA-closed, we find a row $s \in S \Lext A$ such that $row(s) \in PR(S, E) \setminus PR^{\top}(S, E)$.
The algorithm then adds $\orb(s)$ to $S$.
@ -1378,13 +1378,13 @@ To do this, we cannot use a suitable version of \in{Lemma}[lem:minimal_wrt_table
We can, however, use an argument similar to that for RFSA-consistency, because the algorithm adds columns in response to counterexamples.
Let $w$ the counterexample provided by the teacher.
When $16'$ is executed, the table must change.
In fact, by Lemma 2 of \citet[DBLP:conf/ijcai/BolligHKL09], if it does not, then $w$ is already correctly classified by $N(S, E)$, which is absurd.
In fact, by Lemma 2 of \citet[BolligHKL09], if it does not, then $w$ is already correctly classified by $N(S, E)$, which is absurd.
We have the following cases.
{\bf E1)} orbits of $(S \cup S \Lext A)/{\sim}$ increase ({\bf C1}).
Or, {\bf E2)} either: Orbits in $PR(S, E)$ increase, or any of the following happens:
Orbits in $I$ decrease ({\bf C2}), local symmetries/dimension of an orbit of rows change ({\bf C3}).
In fact, if {\bf E1} does not happen and $PR(S, E)$, $I$ and local symmetries/dimension of orbits of rows do not change, the automaton $\autom$ for the new table coincides with $N(S, E)$.
But $N(S, E) = \autom$ is a contradiction, because $\autom$ correctly classifies $w$ (by Lemma 2 of \citet[DBLP:conf/ijcai/BolligHKL09], as $w$ now belongs to columns), whereas $N(S, E)$ does not.
But $N(S, E) = \autom$ is a contradiction, because $\autom$ correctly classifies $w$ (by Lemma 2 of \citet[BolligHKL09], as $w$ now belongs to columns), whereas $N(S, E)$ does not.
Both {\bf E1} and {\bf E2} can only happen finitely many times.
\stopproof
@ -1452,8 +1452,8 @@ The number of membership queries is bounded by $C_{n,k,m}' f_\EAlph(p n, p m E'_
reference=sec:impl]
Our algorithms for learning nominal automata operate on infinite sets of rows and columns, and hence it is not immediately clear how to actually implement them on a computer.
We have used NLambda, a recently developed Haskell library by \cite[authoryears][DBLP:journals/corr/KlinS16] designed to allow direct manipulation of infinite (but orbit-finite) nominal sets, within the functional programming paradigm.
The semantics of NLambda is based by \citet[DBLP:conf/popl/BojanczykBKL12], and the library itself is inspired by Fresh O'Caml by \cite[authoryears][DBLP:journals/entcs/Shinwell06], a language for functional programming over nominal data structures with binding.
We have used NLambda, a recently developed Haskell library by \cite[authoryears][KlinS16] designed to allow direct manipulation of infinite (but orbit-finite) nominal sets, within the functional programming paradigm.
The semantics of NLambda is based by \citet[BojanczykBKL12], and the library itself is inspired by Fresh O'Caml by \cite[authoryears][Shinwell06], a language for functional programming over nominal data structures with binding.
\subsection{NLambda}
@ -1534,11 +1534,11 @@ or, more specifically, as a triple:
\stopitemize
All the primitives listed above, such as \kw{isEmpty}, \kw{map} and \kw{sum}, are implemented on this internal representation.
In some cases, this involves checking the satisfiability of certain formulas over atoms.
In the current implementation of NLambda, an external SMT solver Z3 \cite[DBLP:conf/tacas/MouraB08] is used for that purpose.
In the current implementation of NLambda, an external SMT solver Z3 \cite[MouraB08] is used for that purpose.
For example, to evaluate the expression $\kw{isEmpty}\ \kw{distPairs}$,
NLambda makes a system call to the SMT solver to check whether the formula $a \neq b$ is satisfiable in the first-order theory of equality and, after receiving the affirmative answer, returns the value \kw{False}.
For more details about the semantics and implementation of NLambda, see \citet[DBLP:journals/corr/KlinS16].
For more details about the semantics and implementation of NLambda, see \citet[KlinS16].
The library itself can be downloaded from \cite[url][nlambda-code].
@ -1559,8 +1559,8 @@ Equivalence queries are implemented by constructing a bisimulation (recall that
where a counterexample is obtained when two DFAs are not bisimilar.
For nominal NFAs, however, we cannot implement a complete equivalence query as their language equivalence is undecidable.
We approximated the equivalence by bounding the depth of the bisimulation for nominal NFAs.
As an optimisation, we use bisimulation up to congruence as described by \cite[authoryears][DBLP:journals/cacm/BonchiP15].
Having an approximate teacher is a minor issue since in many applications no complete teacher can be implemented and one relies on testing \cite[DBLP:conf/ictac/AartsFKV15, DBLP:conf/dlt/BolligHLM13].
As an optimisation, we use bisimulation up to congruence as described by \cite[authoryears][BonchiP15].
Having an approximate teacher is a minor issue since in many applications no complete teacher can be implemented and one relies on testing \cite[AartsFKV15, BolligHLM13].
For the experiments listed here the bound was chosen large enough for the learner to terminate with the correct automaton.
%We remark that our algorithms seamlessly merge with teachers written in NLambda, but the current version of the library does not allow generating concrete membership queries for external teachers.
@ -1635,7 +1635,7 @@ The minimal nominal DFA for $\text{FIFO}_2$ is given in \in{Figure}[fig:fifo2].
The state reached from $q_{1,x}$ via $\kw{push}(x)$ is omitted:
Its outgoing transitions are those of $q_{2,x,y}$, where $y$ is replaced by $x$.
Similar benchmarks appear in \cite[DBLP:conf/ictac/AartsFKV15, DBLP:journals/ml/IsbernerHS14].
Similar benchmarks appear in \cite[AartsFKV15, IsbernerHS14].
\stopdescription
@ -1646,7 +1646,7 @@ $\lang_n = \{ ww \mid w \in \EAlph^n \}$ from \in{Section}[sec:overview].
\startdescription[title={NFA.}]
Consider the language $\lang_{eq} = \bigcup_{a \in \EAlph} \EAlph^{\ast} a \EAlph^{\ast} a \EAlph^{\ast}$ of words where some letter appears twice.
This is accepted by an NFA which guesses the position of the first occurrence of a repeated letter $a$ and then waits for the second $a$ to appear.
The language is not accepted by a DFA \cite[DBLP:journals/corr/BojanczykKL14].
The language is not accepted by a DFA \cite[BojanczykKL14].
Despite this \nNLStar{} is able to learn the automaton shown in \in{Figure}[fig:nfa].
\startplacefigure
@ -1675,7 +1675,7 @@ Despite this \nNLStar{} is able to learn the automaton shown in \in{Figure}[fig:
\description{$n$-last Position.}
A prototypical example of regular languages which are accepted by very small NFAs is the set of words where a distinguished symbol $a$ appears on the $n$-last position \cite[DBLP:conf/ijcai/BolligHKL09].
A prototypical example of regular languages which are accepted by very small NFAs is the set of words where a distinguished symbol $a$ appears on the $n$-last position \cite[BolligHKL09].
We define a similar nominal language $\lang'_n = \bigcup_{a \in \EAlph} a \EAlph^{\ast} a \EAlph^n$.
To accept such words non-deterministically, one simply guesses the $n$-last position.
This language is also accepted by a much larger deterministic automaton.
@ -1690,7 +1690,7 @@ This language is also accepted by a much larger deterministic automaton.
This section compares \nLStar{} with other algorithms from the literature.
We stress that no comparison is possible for \nNLStar{}, as it is the first learning algorithm for non-deterministic automata over infinite alphabets.
The first one to consider learning automata over infinite alphabets was \cite[authoryears][DBLP:conf/alt/Sakamoto97].
The first one to consider learning automata over infinite alphabets was \cite[authoryears][Sakamoto97].
In his work the problem is reduced to \LStar{} with some finite sub-alphabet.
The sub-alphabet grows in stages and \LStar{} is rerun at every stage, until the alphabet is big enough to capture the whole language.
In Sakamoto's approach, any learning algorithm can be used as a back-end.
@ -1701,12 +1701,12 @@ An example is in \in{Sections}[sec:execution_example_original] and \in{}[ssec:no
The ordinary learner uses four equivalence queries, whereas the nominal one, using the symmetry, only needs three.
Moreover, our algorithm is easier to generalise to other alphabets and computational models, such as non-determinism.
More recently papers appeared on learning register automata by \cite[authoryears][DBLP:conf/vmcai/HowarSJC12, DBLP:journals/fac/CasselHJS16].
More recently papers appeared on learning register automata by \cite[authoryears][HowarSJC12, CasselHJS16].
Their register automata are as expressive as our deterministic nominal automata.
The state space is similar to our orbit-wise representation:
It is formed by finitely many locations with registers.
Transitions are defined symbolically using propositional logic.
We remark that the most recent paper by \citet[DBLP:journals/fac/CasselHJS16] generalises the algorithm to alphabets with different structures (which correspond to different atom symmetries in our work), but at the cost of changing Angluin's framework.
We remark that the most recent paper by \citet[CasselHJS16] generalises the algorithm to alphabets with different structures (which correspond to different atom symmetries in our work), but at the cost of changing Angluin's framework.
Instead of membership queries the algorithm requires more sophisticated tree queries.
In our approach, using a different symmetry does not affect neither the algorithm nor its correctness proof.
Tree queries can be reduced to membership queries by enumerating all $n$-types for some $n$ ($n$-types in logic correspond to orbits in the set of $n$-tuples).
@ -1714,24 +1714,24 @@ Keeping that in mind, their complexity results are roughly the same as ours, alt
Finally, our approach lends itself better to be extended to other variations on \LStar{} (of which many exist), as it is closer to Angluin's original work.
Another class of learning algorithms for systems with large alphabets is based on abstraction and refinement, which is orthogonal to the approach in this thesis but connections and possible transference of techniques are worth exploring in the future.
\cite[authoryears][DBLP:conf/ictac/AartsFKV15] reduce the alphabet to a finite alphabet of abstractions, and \LStar{} for ordinary DFAs over such finite alphabet is used.
\cite[authoryears][AartsFKV15] reduce the alphabet to a finite alphabet of abstractions, and \LStar{} for ordinary DFAs over such finite alphabet is used.
Abstractions are refined by counterexamples.
Other similar approaches are by \citet[DBLP:conf/vmcai/HowarSM11, DBLP:conf/nfm/IsbernerHS13], where global and local per-state abstractions of the alphabet are used, and by \citet[DBLP:phd/hal/Mens17], where the alphabet can also have additional structure (e.g., an ordering relation).
We also mention that \citet[DBLP:conf/popl/BotincanB13] give a framework for learning symbolic models of software behaviour.
Other similar approaches are by \citet[HowarSM11, IsbernerHS13], where global and local per-state abstractions of the alphabet are used, and by \citet[Mens17], where the alphabet can also have additional structure (e.g., an ordering relation).
We also mention that \citet[BotincanB13] give a framework for learning symbolic models of software behaviour.
\citet[DBLP:conf/fase/BergJR06, DBLP:conf/fase/BergJR08] cope with an infinite alphabet by running \LStar{} (adapted to Mealy machines) using a finite approximation of the alphabet, which may be augmented when equivalence queries are answered.
\citet[BergJR06, BergJR08] cope with an infinite alphabet by running \LStar{} (adapted to Mealy machines) using a finite approximation of the alphabet, which may be augmented when equivalence queries are answered.
A smaller symbolic model is derived subsequently.
Their approach, unlike ours, does not exploit the symmetry over the full alphabet.
The symmetry allows our algorithm to reduce queries and to produce the smallest possible automaton at every step.
Finally we compare with results on session automata \cite[DBLP:conf/dlt/BolligHLM13].
Finally we compare with results on session automata \cite[BolligHLM13].
Session automata are defined over finite alphabets just like the work by Sakamoto.
However, session automata are more restrictive than deterministic nominal automata.
For example, the model cannot capture an acceptor for the language of words where consecutive data values are distinct.
This language can be accepted by a three orbit nominal DFA, which can be learned by our algorithm.
We implemented our algorithms in the nominal library NLambda as sketched before.
Other implementation options include Fresh OCaml \cite[DBLP:journals/entcs/Shinwell06], a functional programming language designed for programming over nominal data structures with binding, and \LOIS{} by \cite[authoryears][DBLP:conf/cade/KopczynskiT16, DBLP:conf/popl/KopczynskiT17], a C++ library for imperative nominal programming.
Other implementation options include Fresh OCaml \cite[Shinwell06], a functional programming language designed for programming over nominal data structures with binding, and \LOIS{} by \cite[authoryears][KopczynskiT16, KopczynskiT17], a C++ library for imperative nominal programming.
We chose NLambda for its convenient set-theoretic primitives, but the other options remain to be explored, in particular the low-level \LOIS{} could be expected to provide more efficient implementations.
@ -1745,7 +1745,7 @@ In this chapter we defined and implemented extensions of several versions of \LS
We highlight two features of our approach:
\startitemize[after]
\item It has strong theoretical foundations:
The \emph{theory of nominal languages}, covering different alphabets and symmetries (see \in{Section}[sec:other-symms]); \emph{category theory}, where nominal automata have been characterised as \emph{coalgebras} \cite[DBLP:conf/icalp/KozenMP015, DBLP:journals/iandc/CianciaM10] and many properties and algorithms (e.g., minimisation) have been studied at this abstract level.
The \emph{theory of nominal languages}, covering different alphabets and symmetries (see \in{Section}[sec:other-symms]); \emph{category theory}, where nominal automata have been characterised as \emph{coalgebras} \cite[KozenMP015, CianciaM10] and many properties and algorithms (e.g., minimisation) have been studied at this abstract level.
\item It follows a generic pattern for transporting computation models and algorithms from finite sets to nominal sets, which leads to simple correctness proofs.
\stopitemize
@ -1786,13 +1786,13 @@ Here $[y \mapsto x]$ means that, if that transition is taken, $q_{xy}$ (hence it
In general, the size of the minimal DFA for $\lang_n$ grows more than exponentially with $n$, but an automaton with substitutions on transitions, like the one above, only needs $\bigO(n)$ states.
This direction is investigated in \in{Chapter}[chap:separated-nominal-automata].
In principle, thanks to the generic approach we have taken, all our algorithms should work for various kinds of atoms with more structure than just equality, as advocated by \citet[DBLP:journals/corr/BojanczykKL14].
In principle, thanks to the generic approach we have taken, all our algorithms should work for various kinds of atoms with more structure than just equality, as advocated by \citet[BojanczykKL14].
Details, such as precise assumptions on the underlying structure of atoms necessary for proofs to go through, remain to be checked.
In the next chapter (\in{Chapter}[chap:ordered-nominal-sets]), we investigate learning with the total order symmetry.
We implement this in NLambda, as well as a new tool for computing with nominal sets over the total order symmetry.
The efficiency of our current implementation, as measured in \in{Section}[sec:tests], leaves much to be desired.
There is plenty of potential for running time optimisation, ranging from improvements in the learning algorithms itself, to optimisations in the NLambda library (such as replacing the external and general-purpose SMT solver with a purpose-built, internal one, or a tighter integration of nominal mechanisms with the underlying Haskell language as it was done by \citenp[DBLP:journals/entcs/Shinwell06]), to giving up the functional programming paradigm for an imperative language such as LOIS \cite[DBLP:conf/cade/KopczynskiT16, DBLP:conf/popl/KopczynskiT17].
There is plenty of potential for running time optimisation, ranging from improvements in the learning algorithms itself, to optimisations in the NLambda library (such as replacing the external and general-purpose SMT solver with a purpose-built, internal one, or a tighter integration of nominal mechanisms with the underlying Haskell language as it was done by \citenp[Shinwell06]), to giving up the functional programming paradigm for an imperative language such as LOIS \cite[KopczynskiT16, KopczynskiT17].
\stopsection

View file

@ -40,14 +40,14 @@ The separating sequences in Moore's framework are of minimal length \citep[Gill6
Obtaining minimal separating sequences in Hopcroft's framework, however, is a non-trivial task.
In this chapter, we present an algorithm for finding such minimal separating sequences for all pairs of inequivalent states of a FSM in $\bigO(m \log n)$ time.
Coincidentally, \citet[DBLP:conf/popl/BonchiP13] recently introduced a new algorithm for the equally fundamental problem of proving equivalence of states in non-deterministic automata.
Coincidentally, \citet[BonchiP13] recently introduced a new algorithm for the equally fundamental problem of proving equivalence of states in non-deterministic automata.
As both their and our work demonstrate, even classical problems in automata theory can still offer surprising research opportunities.
Moreover, new ideas for well-studied problems may lead to algorithmic improvements that are of practical importance in a variety of applications.
One such application for our work is in \emph{conformance testing}.
Here, the goal is to test if a black box implementation of a system is functioning as described by a given FSM.
It consists of applying sequences of inputs to the implementation, and comparing the output of the system to the output prescribed by the FSM.
Minimal separating sequences are used in many test generation methods \cite[DBLP:journals/infsof/DorofeevaEMCY10]. Therefore, our algorithm can be used to improve these methods.
Minimal separating sequences are used in many test generation methods \cite[DorofeevaEMCY10]. Therefore, our algorithm can be used to improve these methods.
\startsection
@ -86,7 +86,7 @@ Our goal is to obtain minimal separating sequences for all pairs of inequivalent
In this section we will discuss the basics of minimisation.
Both Moore's algorithm and Hopcroft's algorithm work by means of partition refinement.
A similar treatment (for DFAs) is given by \citet[DBLP:journals/acta/Gries73].
A similar treatment (for DFAs) is given by \citet[Gries73].
A \emph{partition} $P$ of $S$ is a set of pairwise disjoint non-empty subsets of $S$ whose union is exactly $S$.
Elements in $P$ are called \emph{blocks}.
@ -125,7 +125,7 @@ A partition is \emph{stable} if it is acceptable and for any input $a$ in $I$ an
Since an FSM has only finitely many states, partition refinement will terminate.
The output is the finest valid partition which is acceptable and stable.
For a more formal treatment on partition refinement we refer to \citet[DBLP:journals/acta/Gries73].
For a more formal treatment on partition refinement we refer to \citet[Gries73].
\stopsubsection
@ -137,7 +137,7 @@ Both types of splits described above can be used to construct a separating seque
In a split w.r.t. the output after $a$, this sequence is simply $a$.
In a split w.r.t. the state after $a$, the sequence starts with an $a$ and continues with the separating sequence for states in $\delta(B, a)$.
In order to systematically keep track of this information, we maintain a \emph{splitting tree}.
The splitting tree was introduced by \citet[DBLP:journals/tc/LeeY94] as a data structure for maintaining the operational history of a partition refinement algorithm.
The splitting tree was introduced by \citet[LeeY94] as a data structure for maintaining the operational history of a partition refinement algorithm.
\startdefinition[reference=def:splitting-tree]
A \emph{splitting tree} for $M$ is a rooted tree $T$ with a finite set of nodes with the following properties:
@ -462,7 +462,7 @@ So far, the improved algorithm still would have time complexity $\bigO(mn)$.
To reduce the complexity we have to use Hopcroft's second idea of \emph{processing the smaller half}.
The key idea is that, when we fix a $k$-candidate $v$, all leaves are split with respect to $(v, a)$ simultaneously.
Instead of iterating over of all leaves to refine them, we iterate over $s \in \delta^{-1}(l(w), a)$ for all $w$ in $C(v)$ and look up in which leaf it is contained to move $s$ out of it.
From Lemma 8 by \citet[DBLP:journals/tcs/Knuutila01] it follows that we can skip one of the children of $v$.
From Lemma 8 by \citet[Knuutila01] it follows that we can skip one of the children of $v$.
This lowers the time complexity to $\bigO(m \log n)$.
In order to move $s$ out of its leaf, each leaf $u$ is associated with a set of temporary children $C'(u)$ that is initially empty, and will be finalised after iterating over all $s$ and $w$.
@ -592,7 +592,7 @@ The operation on \inline[line:prepend] is done in constant time by using a linke
A splitting tree can be used to extract relevant information for two classical test generation methods:
a \emph{characterisation set} for the W-method and a \emph{separating family} for the HSI-method.
For an introduction and comparison of FSM-based test generation methods we refer to \citet[DBLP:journals/infsof/DorofeevaEMCY10] or \in{Chapter}[chap:test-methods].
For an introduction and comparison of FSM-based test generation methods we refer to \citet[DorofeevaEMCY10] or \in{Chapter}[chap:test-methods].
\startdefinition[def:cset]
A set $W \subset I^{\ast}$ is called a \emph{characterisation set} if for every pair of inequivalent states $s, t$ there is a sequence $w \in W$ such that $\lambda(s, w) \neq \lambda(t, w)$.
@ -642,8 +642,8 @@ Since we have to do this for every state, this takes $\bigO(n^2)$ time.
For test generation one also needs a transition cover.
This can be constructed in linear time with a breadth first search.
We conclude that we can construct all necessary information for the W-method in time $\bigO(m \log n)$ as opposed the the $\bigO(mn)$ algorithm used by \citet[DBLP:journals/infsof/DorofeevaEMCY10].
Furthermore, we conclude that we can construct all the necessary information for the HSI-method in time $\bigO(m \log n + n^2)$, improving on the the reported bound $\bigO(mn^3)$ by \citet[DBLP:journals/cj/HieronsT15].
We conclude that we can construct all necessary information for the W-method in time $\bigO(m \log n)$ as opposed the the $\bigO(mn)$ algorithm used by \citet[DorofeevaEMCY10].
Furthermore, we conclude that we can construct all the necessary information for the HSI-method in time $\bigO(m \log n + n^2)$, improving on the the reported bound $\bigO(mn^3)$ by \citet[HieronsT15].
The original HSI-method was formulated differently and might generate smaller sets.
We conjecture that our separating family has the same size if we furthermore remove redundant prefixes.
This can be done in $\bigO(n^2)$ time using a trie data structure.
@ -701,7 +701,7 @@ Second, it is required to consider nodes as a candidate before any one of its ch
This order follows naturally from the construction of a splitting tree.
Experimental results show that our algorithm outperforms the classic approach for both worst-case finite state machines and models of embedded control software.
Applications of minimal separating sequences such as the ones described by \citet[DBLP:journals/infsof/DorofeevaEMCY10, DBLP:conf/icfem/SmeenkMVJ15] therefore show that our algorithm is useful in practice.
Applications of minimal separating sequences such as the ones described by \citet[DorofeevaEMCY10, DBLP:conf/icfem/SmeenkMVJ15] therefore show that our algorithm is useful in practice.
\stopsection

View file

@ -30,21 +30,21 @@ In both cases, \ONS{} is competitive compared to existing implementations and ou
Automata over infinite alphabets are natural models for programs with unbounded data domains.
Such automata, often formalised as \emph{register automata}, are applied in modelling and analysis of communication protocols, hardware, and software systems (see \citenp[DBLP:journals/corr/BojanczykKL14, DBLP:conf/cav/DAntoniV17, DBLP:journals/corr/GrigoreT16, DBLP:journals/tcs/KaminskiF94, MontanariP97, Segoufin06] and references therein).
Such automata, often formalised as \emph{register automata}, are applied in modelling and analysis of communication protocols, hardware, and software systems (see \citenp[BojanczykKL14, DAntoniV17, GrigoreT16, KaminskiF94, MontanariP97, Segoufin06] and references therein).
Typical infinite alphabets include sequence numbers, timestamps, and identifiers.
This means one can model data flow in such automata beside the basic control flow provided by ordinary automata.
Recently, it has been shown in a series of papers that such models are amenable to learning \citep[DBLP:conf/ictac/AartsFKV15, DBLP:conf/dlt/BolligHLM13, DBLP:journals/fac/CasselHJS16, DrewsD17, MoermanS0KS17, Vaandrager17] with the verification of (closed source) TCP implementations by \citet[DBLP:conf/cav/Fiterau-Brostean16] as a prominent example.
Recently, it has been shown in a series of papers that such models are amenable to learning \citep[AartsFKV15, BolligHLM13, CasselHJS16, DrewsD17, MoermanS0KS17, Vaandrager17] with the verification of (closed source) TCP implementations by \citet[Fiterau-Brostean16] as a prominent example.
A foundational approach to infinite alphabets is provided by the notion of \emph{nominal set}, originally introduced in computer science as an elegant formalism for name binding \citep[GabbayP02, Pitts16].
Nominal sets have been used in a variety of applications in semantics, computation, and concurrency theory (see \citenp[Pitts13] for an overview).
\citet[DBLP:journals/corr/BojanczykKL14] introduce \emph{nominal automata}, which allow one to model languages over infinite alphabets with different symmetries.
\citet[BojanczykKL14] introduce \emph{nominal automata}, which allow one to model languages over infinite alphabets with different symmetries.
Their results are parametric in the structure of the data values.
Important examples of data domains are ordered data values (e.g., timestamps) and data values that can only be compared for equality (e.g., identifiers).
In both data domains, nominal automata and register automata are equally expressive.
Important for applications of nominal sets and automata are implementations.
A couple of tools exist to compute with nominal sets.
Notably, \NLambda{} \citep[DBLP:journals/corr/KlinS16] and \LOIS{} \citep[DBLP:conf/cade/KopczynskiT16, DBLP:conf/popl/KopczynskiT17] provide a general purpose programming language to manipulate infinite sets.
Notably, \NLambda{} \citep[KlinS16] and \LOIS{} \citep[KopczynskiT16, KopczynskiT17] provide a general purpose programming language to manipulate infinite sets.
\footnote{Other implementations of nominal techniques that are less directly related to our setting (Mihda, Fresh OCaml, and Nominal Isabelle) are discussed in \in{Section}[sec:related].}
Both tools are based on SMT solvers and use logical formulas to represent the infinite sets.
These implementations are very flexible, and the SMT solver does most of the heavy lifting, which makes the implementations themselves relatively straightforward.
@ -53,8 +53,8 @@ Since the formulas used to describe sets tend to grow as more calculations are d
In this chapter, we use a direct representation based on symmetries and orbits, to represent nominal sets.
We focus on the \emph{total order symmetry}, where data values are rational numbers and can be compared for their order.
Nominal automata over the total order symmetry are more expressive than automata over the equality symmetry (i.e., traditional register automata of \citenp[DBLP:journals/tcs/KaminskiF94]).
A key insight is that the representation of nominal sets from \citet[DBLP:journals/corr/BojanczykKL14] becomes rather simple in the total order symmetry; each orbit is presented solely by a natural number, intuitively representing the number of variables or registers.
Nominal automata over the total order symmetry are more expressive than automata over the equality symmetry (i.e., traditional register automata of \citenp[KaminskiF94]).
A key insight is that the representation of nominal sets from \citet[BojanczykKL14] becomes rather simple in the total order symmetry; each orbit is presented solely by a natural number, intuitively representing the number of variables or registers.
\noindent
Our main contributions include the following.
@ -70,7 +70,7 @@ Using the representation theory, we are able to \emph{implement nominal sets in
The library includes all the results from the representation theory (sets, products, and maps).
\item
We \emph{evaluate the performance} of \ONS{} and compare it to \NLambda{} and \LOIS{}, using two algorithms on nominal automata:
minimisation \citep[DBLP:conf/icalp/BojanczykL12] and automata learning \citep[MoermanS0KS17].
minimisation \citep[BojanczykL12] and automata learning \citep[MoermanS0KS17].
We use randomly generated automata as well as concrete, logically structured models such as FIFO queues.
For random automata, our methods are drastically faster than the other tools.
On the other hand, \LOIS{} and \NLambda{} are faster in minimising the structured automata as they exploit their logical structure.
@ -89,7 +89,7 @@ Related work is discussed in \in{Section}[sec:related], and future work in \in{S
reference=sec:nomsets]
Nominal sets are infinite sets that carry certain symmetries, allowing a finite representation in many interesting cases.
We recall their formalisation in terms of group actions, following \citet[DBLP:journals/corr/BojanczykKL14, Pitts13], to which we refer for an extensive introduction.
We recall their formalisation in terms of group actions, following \citet[BojanczykKL14, Pitts13], to which we refer for an extensive introduction.
\startsubsection
@ -147,7 +147,7 @@ The existence of least finite supports is crucial for representing orbits.
Unfortunately, even when elements have a finite support, in general they do not always have a least finite support.
A data symmetry $(\mathcal{D}, G)$ is said to \emph{admit least supports} if every element of every nominal set has a least finite support.
Both the equality and the total order symmetry admit least supports.
(\citenp[DBLP:journals/corr/BojanczykKL14] give additional (counter)examples of data symmetries admitting least supports.)
(\citenp[BojanczykKL14] give additional (counter)examples of data symmetries admitting least supports.)
Having least finite supports is useful for a finite representation.
Henceforth, we will write \emph{least support} to mean least finite support.
@ -163,7 +163,7 @@ For a single-orbit set $O$, observe that $\dim(O) = \dim(x)$ where $x$ is any el
reference=subsec:nomorbit]
We represent nominal sets as collections of single orbits.
The finite representation of single orbits is based on the theory of \citet[DBLP:journals/corr/BojanczykKL14], which uses the technical notions of \emph{restriction} and \emph{extension}.
The finite representation of single orbits is based on the theory of \citet[BojanczykKL14], which uses the technical notions of \emph{restriction} and \emph{extension}.
We only briefly report their definitions here.
However, the reader can safely move to the concrete representation theory in \in{Section}[sec:total-order] with only a superficial understanding of \in{Theorem}[representation_tuple] below.
@ -173,7 +173,7 @@ The \emph{extension} of a subgroup $S \le G|_C$ is defined as $\ext_G(S) = \{\pi
For $C \subseteq \mathcal{D}$ and $S\le G|_C$, define $[C,S]^{ec} = \{\{g s \mid s \in \ext_G(S)\}\mid g\in G\}$, i.e., the set of right cosets of $\ext_G(S)$ in $G$.
Then $[C,S]^{ec}$ is a single-orbit nominal set.
Using the above, we can formulate the representation theory from \citet[DBLP:journals/corr/BojanczykKL14].
Using the above, we can formulate the representation theory from \citet[BojanczykKL14].
This gives a finite description for all single-orbit nominal sets $X$, namely a finite set $C$ together with some of its symmetries.
\starttheorem[reference=representation_tuple]
@ -181,7 +181,7 @@ Let $X$ be a single-orbit nominal set for a data symmetry $(\mathcal{D}, G)$ tha
Then there exists a subgroup $S \le G|_C$ such that $X \cong [C,S]^{ec}$.
\stoptheorem
The proof by \citet[DBLP:journals/corr/BojanczykKL14] uses a bit of category theory:
The proof by \citet[BojanczykKL14] uses a bit of category theory:
it establishes an equivalence of categories between single-orbit sets and the pairs $(C, S)$.
We will not use the language of category theory much in order to keep the chapter self-contained.
@ -526,7 +526,7 @@ These languages naturally arise as the semantics of register automata.
The definition of register automata is not as simple as that of ordinary finite automata.
Consequently, transferring results from automata theory to this setting often requires non-trivial proofs.
Nominal automata, instead, are defined as ordinary automata by replacing finite sets with orbit-finite nominal sets.
The theory of nominal automata is developed by \citet[DBLP:journals/corr/BojanczykKL14] and it is shown that many, but not all, algorithms from automata theory transfer to nominal automata.
The theory of nominal automata is developed by \citet[BojanczykKL14] and it is shown that many, but not all, algorithms from automata theory transfer to nominal automata.
As an example we consider the following language on rational numbers:
\startformula
@ -636,7 +636,7 @@ Where applicable, the automata listed above were generated using the code from \
\startsubsection
[title={Minimising nominal automata}]
For languages recognised by nominal DFAs, a Myhill-Nerode theorem holds which relates states to right congruence classes \citep[DBLP:journals/corr/BojanczykKL14].
For languages recognised by nominal DFAs, a Myhill-Nerode theorem holds which relates states to right congruence classes \citep[BojanczykKL14].
This guarantees the existence of unique minimal automata.
We say an automaton is \emph{minimal} if its set of states has the least number of orbits and each orbit has the smallest dimension possible.
\footnote{Abstractly, an automaton is minimal if it has no proper quotients.
@ -700,7 +700,7 @@ This suggest that it is possible to further optimise an implementation with simi
[title={Implementations}]
We implemented the minimisation algorithm in \ONS{}.
For \NLambda{} and \LOIS{} we used their implementations of Moore's minimisation algorithm \citep[DBLP:journals/corr/KlinS16, DBLP:conf/cade/KopczynskiT16, DBLP:conf/popl/KopczynskiT17].
For \NLambda{} and \LOIS{} we used their implementations of Moore's minimisation algorithm \citep[KlinS16, KopczynskiT16, KopczynskiT17].
For each of the libraries, we wrote routines to read in an automaton from a file and,
for the structured test cases, to generate the requested automaton.
For \ONS{}, all automata were read from file.
@ -756,14 +756,14 @@ Despite this, it remains viable: even for the larger cases it falls behind signi
Another application that we implemented in \ONS{} is \emph{automata learning}.
The aim of automata learning is to infer an unknown regular language $\lang$.
We use the framework of active learning as set up by \citet[DBLP:journals/iandc/Angluin87]
We use the framework of active learning as set up by \citet[Angluin87]
where a learning algorithm can query an oracle to gather information about $\lang$.
Formally, the oracle can answer two types of queries:
\startitemize
\item \emph{membership queries}, where a query consists of a word $w \in A^{*}$ and the oracle replies whether $w \in \lang$, and
\item \emph{equivalence queries}, where a query consists of an automaton $\mathcal{H}$ and the oracle replies positively if $\lang(\mathcal{H}) = \lang$ or provides a counterexample if $\lang(\mathcal{H}) \neq \lang$.
\stopitemize
With these queries, the \LStar{} algorithm can learn regular languages efficiently \citep[DBLP:journals/iandc/Angluin87].
With these queries, the \LStar{} algorithm can learn regular languages efficiently \citep[Angluin87].
In particular, it learns the unique minimal automaton for $\lang$ using only finitely many queries.
The \LStar{} algorithm has been generalised to \nLStar{} in order to learn \emph{nominal} regular languages.
In particular, it learns a nominal DFA (over an infinite alphabet) using only finitely many queries.
@ -847,7 +847,7 @@ Timeouts are indicated by $\infty$.}
[title={Related work},
reference=sec:related]
As stated in the introduction, \NLambda{} by \citet[DBLP:journals/corr/KlinS16] and \LOIS{} by \citet[DBLP:conf/cade/KopczynskiT16] use first-order formulas to represent nominal sets and use SMT solvers to manipulate them.
As stated in the introduction, \NLambda{} by \citet[KlinS16] and \LOIS{} by \citet[KopczynskiT16] use first-order formulas to represent nominal sets and use SMT solvers to manipulate them.
This makes both libraries very flexible and they indeed implement the equality symmetry as well as the total order symmetry.
As their representation is not unique, the efficiency depends on how the logical formulas are constructed.
As such, they do not provide complexity results.
@ -857,11 +857,11 @@ A second big difference is that both \NLambda{} and \LOIS{} implement a \quotati
This means that they overload natural programming constructs in their host languages (\haskell{} and \cpp{} respectively).
For programmers this means they can think of infinite sets without having to know about nominal sets.
It is worth mentioning that an older (unreleased) version of \NLambda{} implemented nominal sets with orbits instead of SMT solvers \citep[DBLP:conf/popl/BojanczykBKL12].
It is worth mentioning that an older (unreleased) version of \NLambda{} implemented nominal sets with orbits instead of SMT solvers \citep[BojanczykBKL12].
However, instead of characterising orbits (e.g., by its dimension), they represent orbits by a representative element.
\citet[DBLP:journals/corr/KlinS16] reported that the current version is faster.
\citet[KlinS16] reported that the current version is faster.
The theoretical foundation of our work is the main representation theorem by \citet[DBLP:journals/corr/BojanczykKL14].
The theoretical foundation of our work is the main representation theorem by \citet[BojanczykKL14].
We improve on that by instantiating it to the total order symmetry and distil a concrete representation of nominal sets.
As far as we know, we provide the first implementation of their representation theory.
@ -870,7 +870,7 @@ Here, only the equality symmetry is implemented.
This tool implements a translation from $\pi$-calculus to history-dependent automata (HD-automata) with the aim of minimisation and checking bisimilarity.
The implementation in OCaml is based on \emph{named sets}, which are finite representations for nominal sets.
The theory of named sets is well-studied and has been used to model various behavioural models with local names.
For those results, the categorical equivalences between named sets, nominal sets and a certain (pre)sheaf category have been exploited \citep[CianciaKM10, DBLP:journals/iandc/CianciaM10].
For those results, the categorical equivalences between named sets, nominal sets and a certain (pre)sheaf category have been exploited \citep[CianciaKM10, CianciaM10].
The total order symmetry is not mentioned in their work.
We do, however, believe that similar equivalences between categories can be stated.
Interestingly, the product of named sets is similar to our representation of products of nominal sets: pairs of elements together with data which denotes the relation between data values.
@ -880,7 +880,7 @@ both specialised in name-binding and $\alpha$-conversion used in proof systems.
They only use the equality symmetry and do not provide a library for manipulating nominal sets.
Hence they are not suited for our applications.
On the theoretical side, there are many complexity results for register automata \citep[DBLP:journals/corr/GrigoreT16, MurawskiRT15].
On the theoretical side, there are many complexity results for register automata \citep[GrigoreT16, MurawskiRT15].
In particular, we note that problems such as emptiness and equivalence are NP-hard depending on the type of register automaton.
This does not easily compare to our complexity results for minimisation.
One difference is that we use the total order symmetry, where the local symmetries are always trivial (\in{Lemma}[group_trivial]).
@ -890,7 +890,7 @@ Another difference is that register automata allow for duplicate values in the r
In nominal automata, such configurations will be encoded in different orbits.
\todo{Equivalentie is polynomiaal: \cite[MurawskiRT18].}
Orthogonal to nominal automata, there is the notion of symbolic automata \citep[DBLP:conf/cav/DAntoniV17, MalerM17].
Orthogonal to nominal automata, there is the notion of symbolic automata \citep[DAntoniV17, MalerM17].
These automata are also defined over infinite alphabets but they use predicates on transitions, instead of relying on symmetries.
Symbolic automata are finite state (as opposed to infinite state nominal automata) and do not allow for storing values.
However, they do allow for general predicates over an infinite alphabet, including comparison to constants.
@ -911,7 +911,7 @@ A natural direction for future work is to consider other symmetries, such as the
Here, we may take inspiration from existing tools such as Mihda (see \in{Section}[sec:related]).
Another interesting question is whether it is possible to translate a nominal automaton over the total order symmetry which accepts an equality language to an automaton over the equality symmetry.
This would allow one to efficiently move between symmetries.
Finally, our techniques can potentially be applied to timed automata by exploiting the intriguing connection between the nominal automata that we consider and timed automata \citep[DBLP:conf/icalp/BojanczykL12].
Finally, our techniques can potentially be applied to timed automata by exploiting the intriguing connection between the nominal automata that we consider and timed automata \citep[BojanczykL12].
\stopsection
@ -925,4 +925,4 @@ At last, we want to thank the anonymous reviewers for their comments.
\stopsubject
\referencesifcomponent
\stopchapter
\stopcomponent
\stopcomponent

View file

@ -126,7 +126,7 @@ We denote the set of maximal tests of $T$ by $\max(T)$.
The maximal tests are the only tests in $T$ we actually have to apply to our SUT as we can record the intermediate outputs.
In the examples of this chapter we will show $\max(T)$ instead of $T$.
We define the size of a test suite as usual \citep[DBLP:journals/infsof/DorofeevaEMCY10, DBLP:conf/hase/Petrenko0GHO14].
We define the size of a test suite as usual \citep[DorofeevaEMCY10, Petrenko0GHO14].
The size of a test suite is measured as the sum of the lengths of all its maximal tests plus one reset per test.
\startdefinition[reference=test-suite-size]
@ -270,7 +270,7 @@ But note that such a family will \emph{not} be harmonised since the sets $\{ a \
One more type of state identifier is of our interest: the \emph{adaptive distinguishing sequence}.
It it the strongest type of state identifier, and as a result not many machines have one.
Like DSs, adaptive distinguishing sequences can identify a state using a single word.
We give a slightly different (but equivalent) definition than the one of \citet[DBLP:journals/tc/LeeY94].
We give a slightly different (but equivalent) definition than the one of \citet[LeeY94].
\startdefinition
A separating family $\Fam{H}$ is an \defn{adaptive distinguishing sequence} (ADS) if each set $\max(H_s)$ is a singleton.
@ -439,19 +439,19 @@ Our hybrid ADS method uses a similar construction.
There are many more test generation methods.
Literature shows, however, that not all of them are complete.
For example, the method by \citet[DBLP:journals/tosem/Bernhard94] is falsified by \citet[DBLP:journals/tosem/Petrenko97], and the UIO-method from \citet[DBLP:journals/cn/SabnaniD88] is shown to be incomplete by \citet[DBLP:conf/sigcomm/ChanVO89].
For example, the method by \citet[Bernhard94] is falsified by \citet[Petrenko97], and the UIO-method from \citet[SabnaniD88] is shown to be incomplete by \citet[ChanVO89].
For that reason, completeness of the correct methods is shown in \in{Theorem}[thm:completeness].
The proof is general enough to capture all the methods at once.
We fix a state cover $P$ throughout this section and take the transition cover $Q = P \cdot I$.
\startsubsection
[title={W-method \cite[DBLP:journals/tse/Chow78, Vasilevskii73]},
[title={W-method \cite[Chow78, Vasilevskii73]},
reference=sec:w]
After the work of \citet[Moore56], it was unclear whether a test suite of polynomial size could exist.
\todo{Melden dat Moore een exp. suite gaf, en dat een poly suite open is.}
Both \citet[DBLP:journals/tse/Chow78, Vasilevskii73] independently prove that such a test suite exists.
Both \citet[Chow78, Vasilevskii73] independently prove that such a test suite exists.
\footnote{More precisely: the size of $T_{\text{W}}$ is polynomial in the size of the specification for each fixed $k$.}
The W method is a very structured test suite construction.
It is called the W method as the characterisation set is often called $W$.
@ -473,10 +473,10 @@ Together, these two phases put enough constraints on the implementation to know
\stopsubsection
\startsubsection
[title={The Wp-method \cite[DBLP:journals/tse/FujiwaraBKAG91]},
[title={The Wp-method \cite[FujiwaraBKAG91]},
reference=sec:wp]
\citet[DBLP:journals/tse/FujiwaraBKAG91] realised that one needs fewer tests in the second phase of the W method.
\citet[FujiwaraBKAG91] realised that one needs fewer tests in the second phase of the W method.
Since we already know the right states are present after phase one, we only need to check if the state after a transition is consistent with the expected state.
This justifies the use of state identifiers for each state.
@ -520,7 +520,7 @@ Namely, the set obtained by a splitting tree with shortest witnesses.
\stopsubsection
\startsubsection
[title={The ADS-method \cite[DBLP:journals/tc/LeeY94]},
[title={The ADS-method \cite[LeeY94]},
reference=sec:ads]
As discussed before, when a Mealy machine admits a adaptive distinguishing sequence, only a single test has to be performed for identifying a state.
@ -538,7 +538,7 @@ T_{\text{ADS}} = (P \cup Q) \cdot I^{\leq k} \odot \Fam{Z}.
\stopsubsection
\startsubsection
[title={The UIOv-method \cite[DBLP:conf/sigcomm/ChanVO89]},
[title={The UIOv-method \cite[ChanVO89]},
reference=sec:uiov]
Some Mealy machines which do not admit an adaptive distinguishing sequence,
@ -556,7 +556,7 @@ T_{\text{UIOv}} = P \cdot I^{\leq k} \cdot \bigcup \Fam{U} \,\cup\, Q \cdot I^{\
One might think that using a single UIO sequence instead of the set $\bigcup \Fam{U}$ to verify the state is enough.
In fact, this idea was used for the \emph{UIO-method} which defines the test suite $(P \cup Q) \cdot I^{\leq k} \odot \Fam{U}$.
The following is a counterexample, due to \citet[DBLP:conf/sigcomm/ChanVO89], to such conjecture.
The following is a counterexample, due to \citet[ChanVO89], to such conjecture.
\startexample
[reference=ex:uio-counterexample]
@ -722,12 +722,12 @@ Such a brute-force approach is not scalable.
In this section, we describe a new test generation method for Mealy machines.
Its completeness will be proven in \in{Theorem}[thm:completeness], together with completeness for all methods defined in the previous section.
From a high level perspective, the method uses the algorithm by \cite[authoryears][DBLP:journals/tc/LeeY94] to obtain an ADS.
From a high level perspective, the method uses the algorithm by \cite[authoryears][LeeY94] to obtain an ADS.
If no ADS exists, their algorithm still provides some sequences which separates some inequivalent states.
Our extension is to refine the set of sequences by using pairwise separating sequences.
Hence, this method is a hybrid between the ADS-method and HSI-method.
The reason we do this is the fact that the ADS-method generally constructs small test suites as experiments by \cite[authoryears][DBLP:journals/infsof/DorofeevaEMCY10] suggest.
The reason we do this is the fact that the ADS-method generally constructs small test suites as experiments by \cite[authoryears][DorofeevaEMCY10] suggest.
The test suites are small since an ADS can identify a state with a single word, instead of a set of words which is generally needed.
Even if the ADS does not exist, using the partial result of Lee and Yannakakis' algorithm can reduce the size of test suites.
@ -758,7 +758,7 @@ From such a tree we can construct a state identifier for a state by locating the
For adaptive distinguishing sequences an additional requirement is put on the splitting tree:
for each non-leaf node $u$, the sequence $\sigma(u)$ defines an injective map $x \mapsto (\delta(x, \sigma(u)), \lambda(x, \sigma(u)))$ on the set $l(u)$.
\cite[authoryears][DBLP:journals/tc/LeeY94] call such splits \defn{valid}.
\cite[authoryears][LeeY94] call such splits \defn{valid}.
\in{Figure}[fig:example-splitting-tree] shows both valid and invalid splits.
Validity precisely ensures that after performing a split, the states are still distinguishable.
Hence, sequences of such splits can be concatenated.
@ -792,7 +792,7 @@ Hence, sequences of such splits can be concatenated.
}
\stopplacefigure
The following lemma is a result of \cite[authoryears][DBLP:journals/tc/LeeY94].
The following lemma is a result of \cite[authoryears][LeeY94].
\startlemma
A complete splitting tree with only valid splits exists if and only if there exists an adaptive distinguishing sequence.
@ -816,7 +816,7 @@ In all cases, the result is a separating family because $\Fam{H}$ is.
\ENSURE{A separating family $Z$}
\startlinenumbering
\STATE $T_1 \gets$ splitting tree for Moore's minimisation algorithm
\STATE $T_2 \gets$ splitting tree with valid splits (see \citenp[DBLP:journals/tc/LeeY94])
\STATE $T_2 \gets$ splitting tree with valid splits (see \citenp[LeeY94])
\STATE $\Fam{Z'} \gets$ (incomplete) family constructed from $T_2$
\FORALL{inequivalent states $s, t$ in the same leaf of $T_2$}{
\STATE $u \gets lca(T_1, s, t)$
@ -917,7 +917,7 @@ We also observe that one of the state identifiers grew in length: $\{ aa, c \}$
All the algorithms concerning the hybrid ADS-method have been implemented and can be found at
\href{https://gitlab.science.ru.nl/moerman/hybrid-ads}.
We note that \in{Algorithm}[alg:hybrid] is implemented a bit more efficiently, as we can walk the splitting trees in a particular order.
For constructing the splitting trees in the first place, we use Moore's minimisation algorithm and the algorithms by \citet[DBLP:journals/tc/LeeY94].
For constructing the splitting trees in the first place, we use Moore's minimisation algorithm and the algorithms by \citet[LeeY94].
We keep all relevant sets prefix-closed by maintaining a \emph{trie data structure}.
A trie data structure also allows us to immediately obtain the set of maximal tests only.
@ -1000,7 +1000,7 @@ In the worst case, all methods generate a test suite with a size in $\bigO(pn^3)
Nevertheless we expect intuitively that the right column performs better, as we are using a more structured set (given a separating family for the HSI-method, we can always forget about the common prefixes and apply the Wp-method, which will never be smaller if constructed in this way).
Also we expect the bottom row to perform better as there is a single test for each state.
Small experimental results confirm this intuition
\cite[DBLP:journals/infsof/DorofeevaEMCY10].
\cite[DorofeevaEMCY10].
On the example in \in{Figure}[fig:running-example], we computed all applicable test suites in \in{Sections}[sec:all-example] and \in{}[sec:hads-example].
The UIO and ADS methods are not applicable.
@ -1037,7 +1037,7 @@ If two states $s, t$ are related by a bisimulation $R$, then $s \sim t$.
In the following proof, we use a slight generalisation of the bisimulation proof technique, callend \emph{bisimulation up-to}.
This allows one to give a smaller set $R$ which extends to a bisimulation.
A good introduction of these up-to techniques is given by \citet[DBLP:journals/cacm/BonchiP15] or the thesis of \citet[Rot17].
A good introduction of these up-to techniques is given by \citet[BonchiP15] or the thesis of \citet[Rot17].
The next proposition gives sufficient conditions for a test suite of a certain shape to be
complete.
@ -1078,14 +1078,14 @@ So we have:
\startformula s_2 \sim_{W'_{s_2}} i_2 \sim_{W_t} t. \stopformula
By the second assumption, we conclude that $s_2 \sim t$.
So $s_2 \sim t$ and $(t, i) \in R$, which means that $R$ is a bisimulation up-to equivalence.
Using the theory of up-to techniques \cite[DBLP:journals/cacm/BonchiP15] we know that there exists a bisimulation relation containing $R$.
Using the theory of up-to techniques \cite[BonchiP15] we know that there exists a bisimulation relation containing $R$.
In particular, the initial $s_0$ and $s'_0$ are bisimilar.
And so the machines $M$ and $M'$ are equivalent.
\stopproof
Before we show that the conditions hold for the test methods, we reflect on the above proof first.
This proof is very similar to the completeness proof by \citet[DBLP:journals/tse/Chow78].
(In fact, it is also similar to Lemma 4 by \citet[DBLP:journals/iandc/Angluin87] which proves termination in the L* learning algorithm. This correspondence was noted by \citet[DBLP:conf/fase/BergGJLRS05].)
This proof is very similar to the completeness proof by \citet[Chow78].
(In fact, it is also similar to Lemma 4 by \citet[Angluin87] which proves termination in the L* learning algorithm. This correspondence was noted by \citet[BergGJLRS05].)
In the first part we argue that all states are visited by using some sort of counting and reachability argument.
Then in the second part we show the actual equivalence.
To the best of the authors knowledge, this is first $m$-completeness proof which explicitly uses the concept of a bisimulation.
@ -1141,14 +1141,14 @@ Last, the SPY method builds upon the HSI-method and changes the prefixes in orde
We believe that this technique is independent of the HSI-method and can in fact be applied to all methods presented in this chapter.
As such, the SPY method should be considered as an optimisation technique, orthogonal to the work in this chapter.
Recently, \citet[DBLP:journals/cj/HieronsT15] devise a novel test method which is based on incomplete distinguishing sequences and is similar to the hybrid ADS method.
Recently, \citet[HieronsT15] devise a novel test method which is based on incomplete distinguishing sequences and is similar to the hybrid ADS method.
They use sequences which can be considered to be adaptive distinguishing sequences on a subset of the state space.
With several of those one can cover the whole state space, obtaining a $m$-complete test suite.
This is a bit dual to our approach, as our \quotation{incomplete} adaptive distinguishing sequences define a course partition of the complete state space.
Our method becomes complete by refining the tests with pairwise separating sequences.
Some work is put into minimising the adaptive distinguishing sequences themselves.
\citet[DBLP:journals/fmsd/TurkerY14] describe greedy algorithms which construct small adaptive distinguishing sequences.
\citet[TurkerY14] describe greedy algorithms which construct small adaptive distinguishing sequences.
Moreover, they show that finding the minimal adaptive distinguishing sequence is NP-complete in general, even approximation is NP-complete.
We expect that similar heuristics also exist for the other test methods and that they will improve the performance.
Note that minimal separating sequences do not guarantee a minimal test suite.
@ -1167,11 +1167,11 @@ The test suites generally are of exponential size, depending on how non-determin
The assumption that the implementation is resettable is also challenged early on.
If the machine has no reliable reset (or the reset is too expensive), one tests the system with a single \emph{checking sequence}.
\citet[DBLP:journals/tc/LeeY94] give a randomised algorithm for constructing such a checking sequence using adaptive distinguishing sequences.
There is a similarity with the randomised algorithm by \citet[DBLP:journals/iandc/RivestS93] for learning non-resettable automata.
\citet[LeeY94] give a randomised algorithm for constructing such a checking sequence using adaptive distinguishing sequences.
There is a similarity with the randomised algorithm by \citet[RivestS93] for learning non-resettable automata.
Recently, \citet[GrozSPO16] give a deterministic learning algorithm for non-resettable machines based on adaptive distinguishing sequences.
Many of the methods described here are benchmarked on small or random Mealy machines by \citet[DBLP:journals/infsof/DorofeevaEMCY10, DBLP:journals/infsof/EndoS13].
Many of the methods described here are benchmarked on small or random Mealy machines by \citet[DorofeevaEMCY10, EndoS13].
The benchmarks are of limited scope, the machine from \in{Chapter}[chap:applying-automata-learning], for instance, is neither small nor random.
For this reason, we started to collect more realistic benchmarks at \href{http://automata.cs.ru.nl/}.

View file

@ -37,4 +37,4 @@
\definestartstop[proofnoqed][before={{\it Proof. }}, after={}]
\stopenvironment
\stopenvironment

View file

@ -70,4 +70,4 @@
% language extension
\define\Lext{{\cdot}}
\stopenvironment
\stopenvironment

View file

@ -36,4 +36,4 @@
\stopmathcases
}
\stopenvironment
\stopenvironment