diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..63234f3 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +dist-newstyle +*.code-workspace diff --git a/README.md b/README.md index fa20480..b08b8e2 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,8 @@ monoid-learner ============== +Learns the minimal monoid accepting an unknown language through an orcale. +Similar to Lstar, but for monoids instead of automata. The output is a monoid +representation which is furthermore minimised by the Knuth-Bendix completion. + +[Original](https://gist.github.com/Jaxan/d9bb9e3223e8fe8266fe4fe84d357088) diff --git a/Setup.hs b/Setup.hs new file mode 100644 index 0000000..9a994af --- /dev/null +++ b/Setup.hs @@ -0,0 +1,2 @@ +import Distribution.Simple +main = defaultMain diff --git a/app/Main.hs b/app/Main.hs new file mode 100644 index 0000000..64736ac --- /dev/null +++ b/app/Main.hs @@ -0,0 +1,41 @@ +module Main where + +import qualified Data.Map as Map +import qualified Data.Sequence as Seq +import qualified Data.Set as Set +import KnuthBendix +import MStar + +-- We use the alphabet {a, b} as always +alphabet :: Set.Set Char +alphabet = Set.fromList "ab" + +-- Example language L = { w | nonempty && even number of as && triple numbers of bs } +language :: MStar.Word Char -> Bool +language w = not (null w) && length aa `mod` 2 == 0 && length bb `mod` 3 == 0 + where + (aa, bb) = Seq.partition (== 'a') w + +main :: IO () +main = do + let -- Initialise + s = initialState alphabet language + -- make closed, consistent and associative + (_, s2) = learn language s + -- The corresponding hypothesis is wrong on the string bbb + -- So we add a row bb + s3 = addRow (Seq.fromList "bb") language s2 + -- Make closed, consistent and associative again + (_, s4) = learn language s3 + -- Extract the rewrite rules from the table + -- For this we simply look at products r1 r2 and see which row is equivalent to it + rowPairs = Set.filter (\w -> not (w `Set.member` rows s4)) . Set.map (uncurry (<>)) $ Set.cartesianProduct (rows s4) (rows s4) + representatives = Map.fromList (fmap (\w -> (row s4 w, w)) (Set.toList (rows s4))) + rules0 = Map.fromSet (\w -> representatives Map.! row s4 w) rowPairs + rules = Map.toList rules0 + + putStrLn "Inferred rules: (generators are a, b and the unit)" + print rules + + putStrLn "After KB:" + print (knuthBendix rules) diff --git a/monoid-learner.cabal b/monoid-learner.cabal new file mode 100644 index 0000000..a488fc1 --- /dev/null +++ b/monoid-learner.cabal @@ -0,0 +1,27 @@ +cabal-version: 2.2 +name: monoid-learner +version: 0.1.0.0 +author: Joshua Moerman +maintainer: joshua@cs.rwth-aachen.de +build-type: Simple + +common stuff + default-language: Haskell2010 + ghc-options: -Wall -O2 + build-depends: + base >=4.12 && <5, + containers, + HaskellForMaths >=0.4 + +library + import: stuff + hs-source-dirs: src + exposed-modules: + KnuthBendix, + MStar + +executable monoid-learner + import: stuff + hs-source-dirs: app + main-is: Main.hs + build-depends: monoid-learner diff --git a/src/KnuthBendix.hs b/src/KnuthBendix.hs new file mode 100644 index 0000000..ba7c407 --- /dev/null +++ b/src/KnuthBendix.hs @@ -0,0 +1,9 @@ +module KnuthBendix where + +import Data.Sequence (Seq, fromList) +import Data.Foldable (Foldable (toList)) +import Data.Bifunctor (Bifunctor (bimap)) +import qualified Math.Algebra.Group.StringRewriting as Rew + +knuthBendix :: Ord a => [(Seq a, Seq a)] -> [(Seq a, Seq a)] +knuthBendix = fmap (bimap fromList fromList) . Rew.knuthBendix . fmap (bimap toList toList) diff --git a/src/MStar.hs b/src/MStar.hs new file mode 100644 index 0000000..1092270 --- /dev/null +++ b/src/MStar.hs @@ -0,0 +1,191 @@ +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE PartialTypeSignatures #-} +{-# LANGUAGE RecordWildCards #-} +{-# OPTIONS_GHC -Wno-partial-type-signatures #-} + +module MStar where + +-- Copyright Joshua Moerman 2020 +-- M*: an algorithm to query learn the syntactic monoid +-- for a regular language. I hope it works correctly. +-- This is a rough sketch, and definitely not cleaned up. + +import Control.Applicative (Applicative ((<*>)), (<$>)) +import Data.Map (Map) +import qualified Data.Map as Map +import Data.Sequence (Seq, empty, singleton) +import Data.Set (Set) +import qualified Data.Set as Set +import Prelude hiding (Word) + +type Word a = Seq a +type Alphabet a = Set a + +type MembershipQuery a = Word a -> Bool + +-- If l includes the empty word, then this set also includes l +squares :: _ => Set (Word a) -> Set (Word a) +squares l = Set.map (uncurry (<>)) (Set.cartesianProduct l l) + +-- Left and Right concats, these are like columns, but they act +-- both left and right. Maybe a better word would be "tests". +type Context a = (Word a, Word a) +type Contexts a = Set (Context a) + +initialContexts :: Contexts a +initialContexts = Set.singleton (empty, empty) + +type Row a = Word a +type Rows a = Set (Row a) + +initialRows :: Ord a => Alphabet a -> Rows a +initialRows alphabet = Set.singleton empty `Set.union` Set.map singleton alphabet + +-- State of the M* algorithm +data State a = State + { rows :: Rows a, + contexts :: Contexts a, + cache :: Map (Word a) Bool + } + deriving (Show, Eq) + +-- Row data for an index +row :: _ => State a -> Row a -> Map (Context a) Bool +row State {..} m = Map.fromSet (\(l, r) -> cache Map.! (l <> m <> r)) contexts + +-- Difference of two rows (i.e., all contexts in which they differ) +difference :: _ => State a -> Row a -> Row a -> [Context a] +difference State {..} m1 m2 = [(l, r) | (l, r) <- Set.toList contexts, cache Map.! (l <> m1 <> r) /= cache Map.! (l <> m2 <> r)] + +-- Initial state of the algorithm +initialState :: Ord a => Alphabet a -> MembershipQuery a -> State a +initialState alphabet mq = + State + { rows = rows, + contexts = contexts, + cache = cache + } + where + rows = initialRows alphabet + contexts = initialContexts + initialQueries = + Set.map (\(m, (l, r)) -> l <> m <> r) $ + Set.cartesianProduct (squares rows) contexts + cache = Map.fromSet mq initialQueries + + +-- CLOSED -- +-- Returns all pairs which are not closed +closed :: _ => State a -> [(Row a, Row a)] +closed s@State {..} = [(m1, m2) | (m1, m2) <- Set.toList rowPairs, notExists (row s (m1 <> m2))] + where + rowPairs = Set.cartesianProduct rows rows + allRows = Set.map (row s) rows + notExists m = not (m `Set.member` allRows) + +-- Returns a fix for the non-closedness. +fixClosed :: _ => _ -> Maybe (Word a) +fixClosed [] = Nothing +fixClosed ((a, b) : _) = Just (a <> b) + +-- Adds a new element +addRow :: Ord a => Row a -> MembershipQuery a -> State a -> State a +addRow m mq s@State {..} = s {rows = newRows, cache = cache <> dCache} + where + newRows = Set.insert m rows + queries = Set.map (\(mi, (l, r)) -> l <> mi <> r) $ Set.cartesianProduct (squares newRows) contexts + queriesRed = queries `Set.difference` Map.keysSet cache + dCache = Map.fromSet mq queriesRed + + +-- CONSISTENT -- +-- Returns all inconsistencies +consistent :: _ => State a -> _ +consistent s@State {..} = [(m1, m2, n1, n2, d) | (m1, m2) <- equalRowPairs, (n1, n2) <- equalRowPairs, let d = difference s (m1 <> n1) (m2 <> n2), not (Prelude.null d)] + where + equalRowPairs = Prelude.filter (\(m1, m2) -> row s m1 == row s m2) $ (,) <$> Set.toList rows <*> Set.toList rows + +-- Returns a fix for consistency. +fixConsistent :: _ => State a -> _ -> Maybe (Context a) +fixConsistent _ [] = Nothing +fixConsistent _ ((_, _, _, _, []) : _) = error "Cannot happen cons" +fixConsistent s ((m1, m2, n1, n2, (l, r) : _) : _) = Just . head . Prelude.filter valid $ [(l <> m1, r), (l <> m2, r), (l, n1 <> r), (l, n2 <> r)] -- Many choices here + where + valid c = not (Set.member c (contexts s)) + +-- Adds a test +addContext :: Ord a => Context a -> MembershipQuery a -> State a -> State a +addContext lr mq s@State {..} = s {contexts = newContexts, cache = cache <> dCache} + where + newContexts = Set.insert lr contexts + queries = Set.map (\(m, (l, r)) -> l <> m <> r) $ Set.cartesianProduct (squares rows) newContexts + queriesRed = queries `Set.difference` Map.keysSet cache + dCache = Map.fromSet mq queriesRed + + +-- ASSOCIATIVITY -- +-- Returns non-associativity results. Implemented in a brute force way +-- This is something new in M*, it's obviously not needed in L* +associative :: _ => State a -> _ +associative s@State {..} = [(m1, m2, m3, m12, m23, d) | (m1, m2, m3, m12, m23) <- allCandidates, let d = difference s (m12 <> m3) (m1 <> m23), not (Prelude.null d)] + where + rs = Set.toList rows + allTriples = [(m1, m2, m3) | m1 <- rs, m2 <- rs, m3 <- rs] + allCandidates = [(m1, m2, m3, m12, m23) | (m1, m2, m3) <- allTriples, m12 <- rs, row s m12 == row s (m1 <> m2), m23 <- rs, row s m23 == row s (m2 <> m3)] + +-- Fix for associativity (hopefully) +fixAssociative :: _ => _ -> Maybe (Context a) +fixAssociative [] = Nothing +fixAssociative ((_, _, _, _, _, []) : _) = error "Cannot happen assoc" +fixAssociative ((_, _, m3, _, _, (l, r) : _) : _) = Just (l, m3 <> r) -- TODO: many choices + + +-- Abstract data type for a monoid. The map from the alphabet +-- determines the homomorphism from Words to this monoid +data MonoidAcceptor a q = MonoidAcceptor + { elements :: [q], -- set of elements + unit :: q, -- the unit element + multiplication :: q -> q -> q, -- multiplication functions + accept :: q -> Bool, -- accepting subset + alph :: a -> q -- map from alphabet + } + +-- Given a word, is it accepted by the monoid? +acceptMonoid :: MonoidAcceptor a q -> Word a -> Bool +acceptMonoid MonoidAcceptor {..} w = accept (foldr multiplication unit (fmap alph w)) + + +-- HYPOTHESIS -- +-- Syntactic monoid construction +constructMonoid :: _ => State a -> MonoidAcceptor a Int +constructMonoid s@State {..} = + MonoidAcceptor + { elements = [0 .. Set.size allRows - 1], + unit = unit, + multiplication = curry (multMap Map.!), + accept = (accMap Map.!), + alph = rowToInt . singleton + } + where + allRows = Set.map (row s) rows + rowMap = Map.fromList (zip (Set.toList allRows) [0 ..]) + rowToInt m = rowMap Map.! row s m + unit = rowToInt empty + accMap = Map.fromList [(rowMap Map.! r, r Map.! (empty, empty)) | r <- Set.toList allRows] + multList = [((rowToInt m1, rowToInt m2), rowToInt (m1 <> m2)) | m1 <- Set.toList rows, m2 <- Set.toList rows] + multMap = Map.fromList multList + + +-- Learns until it can construct a monoid +-- Please do counterexample handling yourself +learn :: _ => MembershipQuery a -> State a -> (MonoidAcceptor a _, State a) +learn mq s = + case fixClosed $ closed s of + Just m -> learn mq (addRow m mq s) + Nothing -> + case fixConsistent s $ consistent s of + Just c -> learn mq (addContext c mq s) + Nothing -> + case fixAssociative $ associative s of + Just c -> learn mq (addContext c mq s) + Nothing -> (constructMonoid s, s)