1
Fork 0
mirror of https://github.com/Jaxan/monoid-learner.git synced 2025-04-26 22:47:45 +02:00

Cleaned up a little bit

This commit is contained in:
Joshua Moerman 2021-03-29 16:52:47 +02:00
parent ecfc02c1c5
commit 0cb1e8d764
6 changed files with 226 additions and 114 deletions

View file

@ -3,15 +3,49 @@ monoid-learner
Learns the minimal monoid accepting an unknown language through an orcale.
Similar to Lstar, but for monoids instead of automata. The output is a monoid
representation which is furthermore minimised by the Knuth-Bendix completion.
presentation which is furthermore minimised by the Knuth-Bendix completion.
Only works for regular languages.
[Original](https://gist.github.com/Jaxan/d9bb9e3223e8fe8266fe4fe84d357088)
Output for the example in `app/Main.hs`:
This algorithm is made more precise and generalised to bimonoids (in the
sense of a finite set with two binary operations) in
[this paper.](https://doi.org/10.1007/978-3-030-71995-1_26)
## To run
Install Haskell and its cabal tool and clone this repo. Then run:
```
Inferred rules: (generators are a, b and the unit)
[(fromList "aaa",fromList "a"),(fromList "aaaa",fromList "aa"),(fromList "aaab",fromList "ab"),(fromList "aaabb",fromList "abb"),(fromList "aab",fromList "b"),(fromList "aabb",fromList "bb"),(fromList "aba",fromList "b"),(fromList "abaa",fromList "ab"),(fromList "abab",fromList "bb"),(fromList "ababb",fromList "aa"),(fromList "abba",fromList "bb"),(fromList "abbaa",fromList "abb"),(fromList "abbab",fromList "aa"),(fromList "abbabb",fromList "b"),(fromList "abbb",fromList "a"),(fromList "abbbb",fromList "ab"),(fromList "ba",fromList "ab"),(fromList "baa",fromList "b"),(fromList "bab",fromList "abb"),(fromList "babb",fromList "a"),(fromList "bba",fromList "abb"),(fromList "bbaa",fromList "bb"),(fromList "bbab",fromList "a"),(fromList "bbabb",fromList "ab"),(fromList "bbb",fromList "aa"),(fromList "bbbb",fromList "b")]
After KB:
[(fromList "ba",fromList "ab"),(fromList "aaa",fromList "a"),(fromList "aab",fromList "b"),(fromList "bbb",fromList "aa")]
cabal run monoid-learner
```
## Example output for the example in `app/Main.hs`:
For the language of non-empty words with an even number of as and a triple
number of bs. Note that the equations tell us that the language is
commutative.
```
Monoid on the generators:
fromList "ab"
with equations:
[(fromList "ba",fromList "ab"),(fromList "aaa",fromList "a"),(fromList "aab",fromList "b"),(fromList "bbb",fromList "aa")]
and accepting strings:
fromList [fromList "aa"]
```
For the language where a occurs on position 3 on the right and the empty
word.
```
... (many membership queries) ...
Monoid on the generators:
fromList "ab"
with equations:
[(fromList "bbbb",fromList "bbb"),(fromList "bbba",fromList "bba"),(fromList "bbab",fromList "bab"),(fromList "bbaa",fromList "baa"),(fromList "babb",fromList "abb"),(fromList "baba",fromList "aba"),(fromList "baab",fromList "aab"),(fromList "baaa",fromList "aaa"),(fromList "abbb",fromList "bbb"),(fromList "abba",fromList "bba"),(fromList "abab",fromList "bab"),(fromList "abaa",fromList "baa"),(fromList "aabb",fromList "abb"),(fromList "aaba",fromList "aba"),(fromList "aaab",fromList "aab"),(fromList "aaaa",fromList "aaa")]
and accepting strings:
fromList [fromList "",fromList "aaa",fromList "aab",fromList "aba",fromList "abb"]
```

View file

@ -1,5 +1,7 @@
module Main where
import Data.Foldable (Foldable (toList))
import Data.IORef
import qualified Data.Map as Map
import qualified Data.Sequence as Seq
import qualified Data.Set as Set
@ -7,8 +9,8 @@ import KnuthBendix
import MStar
-- We use the alphabet {a, b} as always
alphabet :: Set.Set Char
alphabet = Set.fromList "ab"
symbols :: Set.Set Char
symbols = Set.fromList "ab"
-- Example language L = { w | nonempty && even number of as && triple numbers of bs }
language :: MStar.Word Char -> Bool
@ -16,26 +18,64 @@ language w = not (null w) && length aa `mod` 2 == 0 && length bb `mod` 3 == 0
where
(aa, bb) = Seq.partition (== 'a') w
-- Let's count the number of membership queries
-- and print all the queries
languageM :: IORef Int -> MStar.Word Char -> IO Bool
languageM count w = do
modifyIORef' count succ
n <- readIORef count
let nstr = show n
putStr nstr
putStr (replicate (8 - length nstr) ' ')
putStrLn $ toList w
return $ language w
main :: IO ()
main = do
let -- Initialise
s = initialState alphabet language
-- make closed, consistent and associative
(_, s2) = learn language s
-- The corresponding hypothesis is wrong on the string bbb
-- So we add a row bb
s3 = addRow (Seq.fromList "bb") language s2
-- Make closed, consistent and associative again
(_, s4) = learn language s3
-- Extract the rewrite rules from the table
putStrLn "Welcome to the monoid learner"
count <- newIORef 0
let lang = languageM count
-- Initialise
s <- initialState symbols lang
-- make closed, consistent and associative
s2 <- learn lang s
-- Above hypothesis is trivial (accepts nothing)
-- Let's add a column so that aa can be reached
putStrLn "Adding counterexample aa"
s3 <- addContext (Seq.empty, Seq.singleton 'a') lang s2
-- Make closed, consistent and associative again
s5 <- learn lang s3
-- Still wrong, on bbb
-- Let's add a column to reach it
putStrLn "Adding counterexample bbb"
s6 <- addContext (Seq.singleton 'b', Seq.singleton 'b') lang s5
-- Make closed, consistent and associative again
s7 <- learn lang s6
-- Hypothesis is now correct
let sFinal = s7
let -- Extract the rewrite rules from the table
-- For this we simply look at products r1 r2 and see which row is equivalent to it
rowPairs = Set.filter (\w -> not (w `Set.member` rows s4)) . Set.map (uncurry (<>)) $ Set.cartesianProduct (rows s4) (rows s4)
representatives = Map.fromList (fmap (\w -> (row s4 w, w)) (Set.toList (rows s4)))
rules0 = Map.fromSet (\w -> representatives Map.! row s4 w) rowPairs
rowPairs = Set.filter (\w -> not (w `Set.member` rows sFinal)) . Set.map (uncurry (<>)) $ Set.cartesianProduct (rows sFinal) (rows sFinal)
representatives = Map.fromList (fmap (\w -> (row sFinal w, w)) (Set.toList (rows sFinal)))
rules0 = Map.fromSet (\w -> representatives Map.! row sFinal w) rowPairs
rules = Map.toList rules0
kbRules = knuthBendix rules
-- Also extract final set
accRows0 = Set.filter (\m -> row sFinal m Map.! (Seq.empty, Seq.empty)) $ rows sFinal
accRows = Set.map (rewrite kbRules) accRows0
putStrLn "Inferred rules: (generators are a, b and the unit)"
print rules
putStrLn "Monoid on the generators:"
putStr " "
print symbols
putStrLn "After KB:"
print (knuthBendix rules)
putStrLn "with equations:"
putStr " "
print kbRules
putStrLn "and accepting strings:"
putStr " "
print accRows

View file

@ -7,7 +7,6 @@ build-type: Simple
common stuff
default-language: Haskell2010
ghc-options: -Wall -O2
build-depends:
base >=4.12 && <5,
containers,
@ -24,4 +23,15 @@ executable monoid-learner
import: stuff
hs-source-dirs: app
main-is: Main.hs
build-depends: monoid-learner
ghc-options: -Wall -O2
build-depends:
monoid-learner
test-suite test
import: stuff
hs-source-dirs: test
main-is: test.hs
type: exitcode-stdio-1.0
build-depends:
monoid-learner,
hedgehog

View file

@ -1,9 +1,12 @@
module KnuthBendix where
import Data.Sequence (Seq, fromList)
import Data.Foldable (Foldable (toList))
import Data.Bifunctor (Bifunctor (bimap))
import Data.Foldable (Foldable (toList))
import Data.Sequence (Seq, fromList)
import qualified Math.Algebra.Group.StringRewriting as Rew
knuthBendix :: Ord a => [(Seq a, Seq a)] -> [(Seq a, Seq a)]
knuthBendix :: Ord a => [(Seq a, Seq a)] -> [(Seq a, Seq a)]
knuthBendix = fmap (bimap fromList fromList) . Rew.knuthBendix . fmap (bimap toList toList)
rewrite :: Ord a => [(Seq a, Seq a)] -> Seq a -> Seq a
rewrite system = fromList . Rew.rewrite (fmap (bimap toList toList) system) . toList

View file

@ -1,112 +1,114 @@
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE PartialTypeSignatures #-}
{-# LANGUAGE RecordWildCards #-}
{-# OPTIONS_GHC -Wno-partial-type-signatures #-}
module MStar where
-- Copyright Joshua Moerman 2020
-- Copyright Joshua Moerman 2020, 2021
-- M*: an algorithm to query learn the syntactic monoid
-- for a regular language. I hope it works correctly.
-- This is a rough sketch, and definitely not cleaned up.
import Control.Applicative (Applicative ((<*>)), (<$>))
import qualified Data.List as List
import Data.Map (Map)
import qualified Data.Map as Map
import Data.Sequence (Seq, empty, singleton)
import Data.Maybe (listToMaybe)
import Data.Sequence (Seq)
import qualified Data.Sequence as Seq
import Data.Set (Set)
import qualified Data.Set as Set
import Prelude hiding (Word)
type Word a = Seq a
type Alphabet a = Set a
type MembershipQuery m a = Word a -> m Bool
type MembershipQuery a = Word a -> Bool
-- If l includes the empty word, then this set also includes l
squares :: _ => Set (Word a) -> Set (Word a)
squares :: Ord a => Set (Word a) -> Set (Word a)
squares l = Set.map (uncurry (<>)) (Set.cartesianProduct l l)
setPlus :: Ord a => Set a -> Set (Word a) -> Set (Word a)
setPlus alph rows = Set.map pure alph `Set.union` squares rows
-- Left and Right concats, these are like columns, but they act
-- both left and right. Maybe a better word would be "tests".
type Context a = (Word a, Word a)
type Contexts a = Set (Context a)
initialContexts :: Contexts a
initialContexts = Set.singleton (empty, empty)
type Row a = Word a
type Rows a = Set (Row a)
initialRows :: Ord a => Alphabet a -> Rows a
initialRows alphabet = Set.singleton empty `Set.union` Set.map singleton alphabet
type Index a = Word a
-- State of the M* algorithm
data State a = State
{ rows :: Rows a,
contexts :: Contexts a,
cache :: Map (Word a) Bool
{ rows :: Set (Index a),
contexts :: Set (Context a),
cache :: Map (Word a) Bool,
alphabet :: Set a
}
deriving (Show, Eq)
-- Row data for an index
row :: _ => State a -> Row a -> Map (Context a) Bool
row :: Ord a => State a -> Index a -> Map (Context a) Bool
row State {..} m = Map.fromSet (\(l, r) -> cache Map.! (l <> m <> r)) contexts
-- Difference of two rows (i.e., all contexts in which they differ)
difference :: _ => State a -> Row a -> Row a -> [Context a]
difference :: Ord a => State a -> Index a -> Index a -> [Context a]
difference State {..} m1 m2 = [(l, r) | (l, r) <- Set.toList contexts, cache Map.! (l <> m1 <> r) /= cache Map.! (l <> m2 <> r)]
-- Initial state of the algorithm
initialState :: Ord a => Alphabet a -> MembershipQuery a -> State a
initialState alphabet mq =
State
{ rows = rows,
contexts = contexts,
cache = cache
}
where
rows = initialRows alphabet
contexts = initialContexts
initialQueries =
Set.map (\(m, (l, r)) -> l <> m <> r) $
Set.cartesianProduct (squares rows) contexts
cache = Map.fromSet mq initialQueries
initialState :: (Monad m, Ord a) => Alphabet a -> MembershipQuery m a -> m (State a)
initialState alphabet mq = do
let rows = Set.singleton Seq.empty
contexts = Set.singleton (Seq.empty, Seq.empty)
initialQueries = Set.map (\(m, (l, r)) -> l <> m <> r) $ Set.cartesianProduct (setPlus alphabet rows) contexts
initialQueriesL = Set.toList initialQueries
results <- mapM mq initialQueriesL
let cache = Map.fromList (zip initialQueriesL results)
return $
State
{ rows = rows,
contexts = contexts,
cache = cache,
alphabet = alphabet
}
-- CLOSED --
-- Returns all pairs which are not closed
closed :: _ => State a -> [(Row a, Row a)]
closed s@State {..} = [(m1, m2) | (m1, m2) <- Set.toList rowPairs, notExists (row s (m1 <> m2))]
closed :: Ord a => State a -> Set (Index a)
closed s@State {..} = Set.filter notExists (setPlus alphabet rows `Set.difference` rows)
where
rowPairs = Set.cartesianProduct rows rows
allRows = Set.map (row s) rows
notExists m = not (m `Set.member` allRows)
notExists m = not (row s m `Set.member` allRows)
-- Returns a fix for the non-closedness.
fixClosed :: _ => _ -> Maybe (Word a)
fixClosed [] = Nothing
fixClosed ((a, b) : _) = Just (a <> b)
-- Returns a fix for the non-closedness. (Some element)
fixClosed1 :: Set (Index a) -> Maybe (Word a)
fixClosed1 = listToMaybe . Set.toList
-- Returns a fix for the non-closedness. (Shortest element)
fixClosed2 :: Set (Index a) -> Maybe (Word a)
fixClosed2 = listToMaybe . List.sortOn Seq.length . Set.toList
-- Adds a new element
addRow :: Ord a => Row a -> MembershipQuery a -> State a -> State a
addRow m mq s@State {..} = s {rows = newRows, cache = cache <> dCache}
where
newRows = Set.insert m rows
queries = Set.map (\(mi, (l, r)) -> l <> mi <> r) $ Set.cartesianProduct (squares newRows) contexts
queriesRed = queries `Set.difference` Map.keysSet cache
dCache = Map.fromSet mq queriesRed
addRow :: (Monad m, Ord a) => Index a -> MembershipQuery m a -> State a -> m (State a)
addRow m mq s@State {..} = do
let newRows = Set.insert m rows
queries = Set.map (\(mi, (l, r)) -> l <> mi <> r) $ Set.cartesianProduct (setPlus alphabet newRows) contexts
queriesRed = queries `Set.difference` Map.keysSet cache
queriesRedL = Set.toList queriesRed
results <- mapM mq queriesRedL
let dCache = Map.fromList (zip queriesRedL results)
return $ s {rows = newRows, cache = cache <> dCache}
-- CONSISTENT --
-- Not needed when counterexamples are added as columns, the table
-- then remains sharp. There is a more efficient way of testing this
-- property, see https://doi.org/10.1007/978-3-030-71995-1_26
-- Returns all inconsistencies
consistent :: _ => State a -> _
consistent :: Ord a => State a -> [(Index a, Index a, Index a, Index a, [Context a])]
consistent s@State {..} = [(m1, m2, n1, n2, d) | (m1, m2) <- equalRowPairs, (n1, n2) <- equalRowPairs, let d = difference s (m1 <> n1) (m2 <> n2), not (Prelude.null d)]
where
equalRowPairs = Prelude.filter (\(m1, m2) -> row s m1 == row s m2) $ (,) <$> Set.toList rows <*> Set.toList rows
equalRowPairs = Set.toList . Set.filter (\(m1, m2) -> row s m1 == row s m2) $ Set.cartesianProduct rows rows
-- Returns a fix for consistency.
fixConsistent :: _ => State a -> _ -> Maybe (Context a)
fixConsistent :: Ord a => State a -> [(Index a, Index a, Index a, Index a, [Context a])] -> Maybe (Context a)
fixConsistent _ [] = Nothing
fixConsistent _ ((_, _, _, _, []) : _) = error "Cannot happen cons"
fixConsistent s ((m1, m2, n1, n2, (l, r) : _) : _) = Just . head . Prelude.filter valid $ [(l <> m1, r), (l <> m2, r), (l, n1 <> r), (l, n2 <> r)] -- Many choices here
@ -114,30 +116,37 @@ fixConsistent s ((m1, m2, n1, n2, (l, r) : _) : _) = Just . head . Prelude.filte
valid c = not (Set.member c (contexts s))
-- Adds a test
addContext :: Ord a => Context a -> MembershipQuery a -> State a -> State a
addContext lr mq s@State {..} = s {contexts = newContexts, cache = cache <> dCache}
where
newContexts = Set.insert lr contexts
queries = Set.map (\(m, (l, r)) -> l <> m <> r) $ Set.cartesianProduct (squares rows) newContexts
queriesRed = queries `Set.difference` Map.keysSet cache
dCache = Map.fromSet mq queriesRed
addContext :: (Monad m, Ord a) => Context a -> MembershipQuery m a -> State a -> m (State a)
addContext lr mq s@State {..} = do
let newContexts = Set.insert lr contexts
queries = Set.map (\(m, (l, r)) -> l <> m <> r) $ Set.cartesianProduct (setPlus alphabet rows) newContexts
queriesRed = queries `Set.difference` Map.keysSet cache
queriesRedL = Set.toList queriesRed
results <- mapM mq queriesRedL
let dCache = Map.fromList (zip queriesRedL results)
return $ s {contexts = newContexts, cache = cache <> dCache}
-- ASSOCIATIVITY --
-- Returns non-associativity results. Implemented in a brute force way
-- This is something new in M*, it's obviously not needed in L*
associative :: _ => State a -> _
associative :: Ord a => State a -> [(Index a, Index a, Index a, Index a, Index a, [Context a])]
associative s@State {..} = [(m1, m2, m3, m12, m23, d) | (m1, m2, m3, m12, m23) <- allCandidates, let d = difference s (m12 <> m3) (m1 <> m23), not (Prelude.null d)]
where
rs = Set.toList rows
allTriples = [(m1, m2, m3) | m1 <- rs, m2 <- rs, m3 <- rs]
allCandidates = [(m1, m2, m3, m12, m23) | (m1, m2, m3) <- allTriples, m12 <- rs, row s m12 == row s (m1 <> m2), m23 <- rs, row s m23 == row s (m2 <> m3)]
-- Fix for associativity (hopefully)
fixAssociative :: _ => _ -> Maybe (Context a)
fixAssociative [] = Nothing
fixAssociative ((_, _, _, _, _, []) : _) = error "Cannot happen assoc"
fixAssociative ((_, _, m3, _, _, (l, r) : _) : _) = Just (l, m3 <> r) -- TODO: many choices
-- Fix for associativity, needs a membership query
-- See https://doi.org/10.1007/978-3-030-71995-1_26
fixAssociative :: (Monad m, Ord a) => [(Index a, Index a, Index a, Index a, Index a, [Context a])] -> MembershipQuery m a -> State a -> m (Maybe (Context a))
fixAssociative [] _ _ = return Nothing
fixAssociative ((_, _, _, _, _, []) : _) _ _ = error "Cannot happen assoc"
fixAssociative ((m1, m2, m3, m12, m23, e@(l, r) : _) : _) mq table = do
b <- mq (l <> m1 <> m2 <> m3 <> r)
if row table (m12 <> m3) Map.! e /= b
then return (Just (l, m3 <> r))
else return (Just (l <> m1, r))
-- Abstract data type for a monoid. The map from the alphabet
@ -154,38 +163,50 @@ data MonoidAcceptor a q = MonoidAcceptor
acceptMonoid :: MonoidAcceptor a q -> Word a -> Bool
acceptMonoid MonoidAcceptor {..} w = accept (foldr multiplication unit (fmap alph w))
-- HYPOTHESIS --
-- Syntactic monoid construction
constructMonoid :: _ => State a -> MonoidAcceptor a Int
constructMonoid :: Ord a => State a -> MonoidAcceptor a Int
constructMonoid s@State {..} =
MonoidAcceptor
{ elements = [0 .. Set.size allRows - 1],
unit = unit,
multiplication = curry (multMap Map.!),
accept = (accMap Map.!),
alph = rowToInt . singleton
alph = rowToInt . Seq.singleton -- incorrect if symbols behave trivially
}
where
allRows = Set.map (row s) rows
rowMap = Map.fromList (zip (Set.toList allRows) [0 ..])
rowToInt m = rowMap Map.! row s m
unit = rowToInt empty
accMap = Map.fromList [(rowMap Map.! r, r Map.! (empty, empty)) | r <- Set.toList allRows]
unit = rowToInt Seq.empty
accMap = Map.fromList [(rowMap Map.! r, r Map.! (Seq.empty, Seq.empty)) | r <- Set.toList allRows]
multList = [((rowToInt m1, rowToInt m2), rowToInt (m1 <> m2)) | m1 <- Set.toList rows, m2 <- Set.toList rows]
multMap = Map.fromList multList
-- Learns until it can construct a monoid
-- Please do counterexample handling yourself
learn :: _ => MembershipQuery a -> State a -> (MonoidAcceptor a _, State a)
learn mq s =
case fixClosed $ closed s of
Just m -> learn mq (addRow m mq s)
Nothing ->
case fixConsistent s $ consistent s of
Just c -> learn mq (addContext c mq s)
Nothing ->
case fixAssociative $ associative s of
Just c -> learn mq (addContext c mq s)
Nothing -> (constructMonoid s, s)
learn :: (Monad m, Ord a) => MembershipQuery m a -> State a -> m (State a)
learn mq = makeClosedAndConsistentAndAssoc
where
makeClosed s = do
case fixClosed2 $ closed s of
Just m -> do
s2 <- addRow m mq s
makeClosed s2
Nothing -> return s
makeClosedAndConsistent s = do
s2 <- makeClosed s
case fixConsistent s2 $ consistent s2 of
Just c -> do
s3 <- addContext c mq s2
makeClosedAndConsistent s3
Nothing -> return s2
makeClosedAndConsistentAndAssoc s = do
s2 <- makeClosedAndConsistent s
result <- fixAssociative (associative s2) mq s2
case result of
Just a -> do
s3 <- addContext a mq s2
makeClosedAndConsistentAndAssoc s3
Nothing -> return s2

4
test/test.hs Normal file
View file

@ -0,0 +1,4 @@
import Hedgehog.Main
main :: IO ()
main = defaultMain []