From 41e0add7870296fc252ed5004e18f3a2f6c2ff18 Mon Sep 17 00:00:00 2001 From: Joshua Moerman Date: Sun, 14 Oct 2018 16:13:11 +0200 Subject: [PATCH] More intro --- biblio.bib | 13 +++++++++ content/introduction.tex | 62 ++++++++++++++++++++++++++++++++++++++++ environment.tex | 3 +- 3 files changed, 77 insertions(+), 1 deletion(-) diff --git a/biblio.bib b/biblio.bib index ef19237..ab3e95c 100644 --- a/biblio.bib +++ b/biblio.bib @@ -1374,6 +1374,19 @@ bibsource = {dblp computer science bibliography, https://dblp.org} } +@book{KearnsV94, + author = {Michael J. Kearns and + Umesh V. Vazirani}, + title = {An Introduction to Computational Learning Theory}, + publisher = {{MIT} Press}, + year = {1994}, + url = {https://mitpress.mit.edu/books/introduction-computational-learning-theory}, + isbn = {978-0-262-11193-5}, + timestamp = {Wed, 10 May 2017 15:11:16 +0200}, + biburl = {https://dblp.org/rec/bib/books/daglib/0041035}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + @inproceedings{DBLP:journals/corr/KlinS16, author = {Bartek Klin and Michal Szynwelski}, diff --git a/content/introduction.tex b/content/introduction.tex index 7f576e7..21dd72b 100644 --- a/content/introduction.tex +++ b/content/introduction.tex @@ -8,6 +8,68 @@ \startsection [title={Learning and Testing}] +??? + +First off, let me sketch the problem. +There is some fixed, but unkown to us, language $\lang \subseteq \Sigma^{*}$. +This can be a language of natural speech, a property in model checking, a set of traces from a protocol, etc. +We wish to infer a description of $\lang$ by only using finitely many words. +Note that this is not an easy task, as $\lang$ is often infinite. +\todo{Waarom sequentiele data?} + +Such a learning problem can be stated and solved in a variety of ways. +In the applications we do in our research group, we often try to infer a model of a software component. +(\in{Chapter}[chap:applying-automata-learning] describes such an application.) +In such cases, a learning algorithm can interact with the software. +So it makes sense to study a learning paradigm which allows for \emph{queries}, and not just a data set of samples. +\footnote{Instead of query learning, people also use the term \emph{active learning}.} + +A typical query learning framework was established by \citet[DBLP:journals/iandc/Angluin87]. +In her framework, the learning algorithm may pose two types of queries to a \emph{teacher} (or \emph{oracle}): + +\description{Membership queries (MQs)} +The learner poses such a query by providing a word $w \in \Sigma^{*}$ to the teacher. +The teacher will then reply whether $w \in \lang$ or not. +This type of query is often generalised to more general output, so the teacher replies with $\lang(w)$. +In some papers, such a query is then called an \emph{output query}. + +\description{Equivalence queries (EQs)} +The learner can provide a hypothesised description of $\lang$. +If the hypothesis is correct, the teacher replies with \kw{yes}. +If, however, the hypothesis is incorrect, the teacher replies with \kw{no}, together with a counterexample (i.e., a word which is in $\lang$ but not in the hypothesis or vice versa). + +With these queries, the learner algorithm is supposed to converge to a correct model. +This type of learning is hence called \emph{exact learning}. +\citet[DBLP:journals/iandc/Angluin87] showed that one can do this efficiently for deterministic finite automata DFAs (when $\lang$ is in the class of regular languages). + +Another paradigm which is relevant for our type of applications is PAC-learning with membership queries. +Here, the algorithm can again use MQs as before, but the EQs are replace by random sampling (by a fixed probability distribution). +So the allowed query is: + +\description{Random sample query (EX)} +If the learner poses this query (there are no parameters), the teacher will respond with a random word $w$, together with its label $w \in \lang$. + +Instead of requiring that the learner exactly learns the model, we only require the following. +The learner should \emph{probably} return a model which is \emph{approximate} to the target. +This gives the name probably approximately correct (PAC). +Note that there are two uncertainties: the probable and the approximate part. +Both part are bounded by parameters, so one can determine the confidence. + +\startremark +When using only EQs, only MQs, or only EXs, then there are hardness results for learning DFAs. +See the book of \citet[KearnsV94] for such results. +\stopremark + +DFAs can be efficiently learned in the PAC model. +In fact, any exact learning algorithm with MQs and EQs can be transformed into a PAC algorithm with MQs (see exercise X in \citenp[KearnsV94]). +For this reason, we mostly focus on the former type of learning in this thesis. +The transformation from exact learning to PAC learning is implemented by simply \emph{testing} the hypothesis with random samples. +The number of samples depend on the required precision (i.e., both parameters in the PAC model). +The type of testing we will see in later chapter is slightly different, however. +We will often choose the probability distribution ourself and even change it between hypotheses. +Although the PAC model does not allow this and we do not acquire the promised bounds, we are able to give other guarantees with those testing methods. + + \todo{Gekopieerd van test methods paper.} Finite state machine conformance testing is a core topic in testing literature that is relevant for communication protocols and other reactive systems. diff --git a/environment.tex b/environment.tex index 52bd018..7cfc229 100644 --- a/environment.tex +++ b/environment.tex @@ -20,12 +20,13 @@ \setupwhitespace[medium] \defineenumeration[definition][text=Definition] +\defineenumeration[remark][text=Remark] \defineenumeration[example][text=Example] \defineenumeration[lemma][text=Lemma] \defineenumeration[proposition][text=Proposition] \defineenumeration[theorem][text=Theorem] \defineenumeration[corollary][text=Corollary] -\setupenumeration[definition,example,lemma,proposition,theorem,corollary][alternative=serried,width=fit,right=.] +\setupenumeration[definition,remark,example,lemma,proposition,theorem,corollary][alternative=serried,width=fit,right=.] \define\QED{\hfill$\square$} \definestartstop[proof][before={{\it Proof. }}, after={\QED}]