From df6a2708c5c5b13d79d957ab1a84156bd2459dd6 Mon Sep 17 00:00:00 2001 From: Joshua Moerman Date: Mon, 23 Sep 2024 10:06:29 +0200 Subject: [PATCH] Added documentation and cleaned up some code --- README.md | 53 +++++++++++++++++++++++++ app/Main.hs | 16 ++++---- app/Playground.hs | 34 +--------------- src/Data/Partition.hs | 86 +++++++++++++++++++++++++++++------------ src/Data/UnionFind.hs | 13 +++++-- src/DotParser.hs | 23 +++++++++-- src/DotWriter.hs | 4 +- src/MealyRefine.hs | 90 +++---------------------------------------- src/SplittingTree.hs | 29 ++++++++++++-- 9 files changed, 185 insertions(+), 163 deletions(-) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 0000000..8a0e137 --- /dev/null +++ b/README.md @@ -0,0 +1,53 @@ +mealy-decompose +=============== + +Tools to investigate decomposition of Mealy machines, aka finite state +machines (FSMs). Notable entry points are: + +* `app/Main.hs` is a heuristic to decompose finite state machines into + multiple components, based on their outputs. + +* `other/decompose_fsm_optimise.py` does the same, but optimally and with a + SAT solver. This can only handle state spaces of at most a few hundred + states. + +* `app/RandomGen.hs` for generating FSMs which are decomposable. + + +## How to run + +The the haskell tools (tested with ghc 9.2.8 and ghc 9.10.1): +``` +cabal run mealy-decompose -- +``` + +For the python tools (tested with python 3.12): +``` +pip3 install python-sat +python3 decompose_fsm_optimise.py -h +``` + + +## Notes on `copar` + +In the first versions of this tool, I used the `copar` library: +[CoPaR (The Coalgebraic Partition Refiner)](https://git8.cs.fau.de/software/copar). +This is a great library and in particular very fast. Despite that, I switched +to my own partition refinement implementations because of the reasons below. +The last commit to use `copar` is `2b8b79a4`. + +* I needed witnesses for some tasks. So I not only construct a partition, + but also a splitting tree, where are track all the actions needed to + separate states. + +* I prefer to use the actual strings (or other data type) for the states, + inputs and outputs as opposed to `Int`s. This makes it easier to debug, and + less error-prone, as one cannot mix these entities anymore. + +* `copar` comes with many dependencies, and it was a bit tricky to get the + versioning correct. + + +## Copyright + +(c) 2024 Joshua Moerman, Open Universiteit diff --git a/app/Main.hs b/app/Main.hs index 97f09b1..6e0a23d 100644 --- a/app/Main.hs +++ b/app/Main.hs @@ -1,5 +1,4 @@ {-# LANGUAGE OverloadedStrings #-} -{-# OPTIONS_GHC -Wno-unrecognised-pragmas #-} module Main where @@ -33,13 +32,16 @@ myWriteFile filename = writeFile ("results/" ++ filename) main :: IO () main = do -- Read dot file - [dotFile] <- getArgs - print dotFile + ls <- getArgs + let dotFile = case ls of + [x] -> x + _ -> error "Please provide exactly one argument (filepath of dot file)" + + putStr "reading " >> putStrLn dotFile machine <- readDotFile dotFile -- print some basic info putStrLn $ (show . length $ states machine) <> " states, " <> (show . length $ inputs machine) <> " inputs and " <> (show . length $ outputs machine) <> " outputs" - putStrLn "Small sample:" let printPartition p = putStrLn $ "number of states = " <> show (numBlocks p) @@ -50,14 +52,14 @@ main = do reverseFuns = [(i, fun) | i <- inputs machine, let mm = reverseTransitionMaps i, let fun s = Map.findWithDefault [] s mm] -- Minimise input, so we know the actual number of states - printPartition (refineMealy3 outputFuns reverseFuns (states machine)) + printPartition (refineFuns outputFuns reverseFuns (states machine)) putStrLn "" -- Then compute each projection let outs = outputs machine mappedOutputFuns o = [(i, (o ==) . f) | (i, f) <- outputFuns] - projections = [(o, refineMealy3 (mappedOutputFuns o) reverseFuns (states machine)) | o <- outs] + projections = [(o, refineFuns (mappedOutputFuns o) reverseFuns (states machine)) | o <- outs] -- Print number of states of each projection mapM_ @@ -120,7 +122,7 @@ main = do componentOutputs = Set.fromList osWithRelAndEquiv proj = projectToComponent (`Set.member` componentOutputs) machine -- Sanity check: compute partition again - partition = refineMealy2 proj + partition = refineMealy proj putStrLn "" putStrLn $ "Component " <> show os diff --git a/app/Playground.hs b/app/Playground.hs index 50466ac..eb03e15 100644 --- a/app/Playground.hs +++ b/app/Playground.hs @@ -1,7 +1,6 @@ module Main where import Bisimulation (bisimulation2) -import Data.Partition (numBlocks) import Data.Trie qualified as Trie import Data.UnionFind import DotParser (readDotFile) @@ -14,7 +13,6 @@ import Data.List qualified as List import Data.Map.Strict qualified as Map import Data.Maybe (isJust) import Data.Set qualified as Set -import Data.Text.IO qualified as T import System.Environment (getArgs) main :: IO () @@ -23,8 +21,7 @@ main = do case args of ("HSI" : ls) -> mainHSI ls ("InputDecomp" : ls) -> mainInputDecomp ls - ("Refine" : ls) -> mainRefine ls - _ -> putStrLn "Please provide one of [HSI, InputDecomp, Refine]" + _ -> putStrLn "Please provide one of [HSI, InputDecomp]" mainHSI :: [String] -> IO () mainHSI args = case args of @@ -126,32 +123,3 @@ mainInputDecomp args = case args of 0 -> putStrLn "ERROR" 1 -> putStrLn "INDECOMPOSABLE" n -> putStrLn ("MAYBE DECOMPOSABLE: " ++ show n ++ " classes") - --- Used to determine whether Copar is faster than SplittingTree (it is). --- Copar is almost twice as fast on ESM, but SplittingTree is faster on a --- BRP benchmark. I guess, theoretically, Copar should be faster generally. -mainRefine :: [String] -> IO () -mainRefine args = case args of - [dotFile, copar] -> run dotFile (read copar) - _ -> putStrLn "Please provide a dot file and Boolean" - where - run dotFile copar = do - m <- readDotFile dotFile - putStr $ "file parsed, initial state = " - T.putStrLn $ initialState m - if copar - then runCopar m - else runSplittingTree m - - runCopar _ = error "no longer supported" - -- let printPartition p = putStrLn $ "Done " <> show (numBlocks p) - -- in printPartition (refineMealy (mealyMachineToEncoding m)) - - runSplittingTree MealyMachine{..} = do - let - outputFuns = [(i, fun) | i <- inputs, let fun s = fst (behaviour s i)] - reverseTransitionMaps i = Map.fromListWith (++) [(t, [s]) | s <- states, let t = snd (behaviour s i)] - reverseFuns = [(i, fun) | i <- inputs, let mm = reverseTransitionMaps i, let fun s = Map.findWithDefault [] s mm] - - (partition, _splittingTree) <- evalStateT (refine (\_ -> pure ()) outputFuns reverseFuns) (initialPRState states) - putStrLn $ "Done" <> show (numBlocks partition) diff --git a/src/Data/Partition.hs b/src/Data/Partition.hs index 32e346d..343b92c 100644 --- a/src/Data/Partition.hs +++ b/src/Data/Partition.hs @@ -1,20 +1,35 @@ -{-# LANGUAGE PackageImports #-} +{-# LANGUAGE DerivingVia #-} module Data.Partition where import Data.Preorder import Control.Monad.Trans.State.Strict as State +import Data.Function (on) +import Data.List (groupBy, sortOn) import Data.Map.Merge.Strict qualified as Map import Data.Map.Strict qualified as Map import Data.Tuple (swap) -import Data.List (groupBy, sortOn) -import Data.Function (on) +-- * Partitions + +-- $moduleDoc +-- +-- A partition on a set of type @a@ is represented as a map @a -> `Block`@, +-- where a `Block` is a unique identifier (integer) for a set of elements. +-- +-- Partitions form a lattice (the so-called /partition lattice/), where the +-- partial order is given by the refinement relation. We put the finest +-- partition at the top, and the coarsest at the bottom. This is the opposite +-- of the convection used on wikipedia. + +-- * Type definitions + +-- | A block is a unique identifier for a set of elements. newtype Block = Block Int - deriving (Eq, Ord, Show, Enum, Num) + deriving (Eq, Ord, Show, Enum, Num) via Int --- | A partition is represented by a finite map of type 's -> Block'. Two +-- | A partition is represented by a finite map of type @s -> `Block`@. Two -- elements mapped to the same block are equivalent. Note that a permutation -- of the blocks will not change the partition, but it does change the -- underlying representation. (That is why I haven't given it an Eq instance @@ -25,10 +40,12 @@ data Partition s = Partition } deriving Show --- | Constraint for using partitions. Currently this is 'Ord'. But it might --- change to '(Eq s, Hashable s)' in the future. +-- | Constraint for using partitions. Currently this is `Ord`. But it might +-- change to @(`Eq` s, Hashable s)@ in the future. type Partitionable s = Ord s +-- * Basic functions + -- | Determines whether two elements are equivalent in the partition. sameBlock :: Partitionable s => Partition s -> s -> s -> Bool sameBlock (Partition m _) s t = m Map.! s == m Map.! t @@ -37,15 +54,20 @@ sameBlock (Partition m _) s t = m Map.! s == m Map.! t toBlocks :: Partition s -> [[s]] toBlocks = fmap (fmap snd) . groupBy ((==) `on` fst) . sortOn fst . fmap swap . Map.assocs . getPartition +-- * Lattice structure on partitions + -- | Returns the common refinement of two partitions. This is the coarsest -- partition which is finer than either input, i.e., the lowest upper bound. --- Runs in O(n), where n is the number of elements of the partition. Note --- that both partitions must be on the same set of elements. +-- Runs in O(n + k log k), where n is the number of elements of the partition +-- and k is the number of blocks. Note that both partitions must be on the +-- same set of elements. commonRefinement :: Partitionable s => Partition s -> Partition s -> Partition s commonRefinement (Partition p1 _) (Partition p2 _) = let (p, (_, b)) = State.runState (mergeFun p1 p2) (Map.empty, Block 0) in Partition p b where + -- We merge the two maps, they should contain the same keys. While merging, + -- we keep track of a `Map` of type @(`Block`, `Block`) -> `Block`@. mergeFun = Map.mergeA Map.dropMissing Map.dropMissing (Map.zipWithAMatched checkPair) checkPair _ b1 b2 = do (m, n) <- get @@ -55,21 +77,6 @@ commonRefinement (Partition p1 _) (Partition p2 _) = put (Map.insert (b1, b2) n m, succ n) pure n --- | This function checks whether one partition is a refinement of the other. --- This function already appears in the `copar` library, but the one here is --- faster. This function is the same as `>=` in the partition lattice. - --- Could be made faster by doing what commonRefinement is doing but --- stopping early. But it's fast enough for now, so I won't bother. -isRefinementOf2 :: Partitionable s => Partition s -> Partition s -> Bool -isRefinementOf2 refined original = comparePartitions refined original == GT' - --- | Checks whether two partitions are equal as partitions. Note that the `Eq` --- instance on partitions checks for equality of the state assignments, not --- whether the partitions are equal as partitions. -isEquivalent :: Partitionable s => Partition s -> Partition s -> Bool -isEquivalent p1 p2 = comparePartitions p1 p2 == EQ' - -- | Compares two partitions. Returns `EQ'` if the partitions are equal, `GT'` -- if the first partition is a refinement of the second, `LT'` if the first -- partition is a coarsening of the second, and `IC'` if the partitions are @@ -78,11 +85,40 @@ comparePartitions :: Partitionable s => Partition s -> Partition s -> PartialOrd comparePartitions p1@(Partition m1 b1) p2@(Partition m2 b2) | m1 == m2 = EQ' | otherwise = + -- We compute the common refinement... let (Partition _ n3) = commonRefinement p1 p2 n1 = b1 n2 = b2 - in case (n1 == n3, n2 == n3) of + in -- ... and if it is equal to one of the paritions to begin with, we have + -- a refinement or coarsening. For this we only need the check the + -- number of blocks. + case (n1 == n3, n2 == n3) of (True, True) -> EQ' (True, False) -> GT' (False, True) -> LT' (False, False) -> IC' + +-- | This function checks whether one partition is a refinement of the other. +-- This function is the same as `>=` in the partition lattice. +isRefinementOf :: Partitionable s => Partition s -> Partition s -> Bool +isRefinementOf refined original = comparePartitions refined original == GT' + +-- | Checks whether two partitions are equal as partitions. +isEquivalent :: Partitionable s => Partition s -> Partition s -> Bool +isEquivalent p1 p2 = comparePartitions p1 p2 == EQ' + +{- Notes +~~~~~~~~ + +- `isRefinementOf` could be made faster by doing what commonRefinement is + doing but stopping early. But it's fast enough for now, so I won't bother. + +- We could change the data type to use a `HashMap` instead of a `Map`. This + could make things faster, but there is no `mergeA` function for `HashMap`. + That is why I'm using `Map` for now. Another options would be to convert + @s@ to `Int` and use an array. (This is what the copar library does.) + +- The `Block` type currently has a `Num` instance. It probably should not + have that. But it's convenient for now. + +-} diff --git a/src/Data/UnionFind.hs b/src/Data/UnionFind.hs index befb1bd..135516a 100644 --- a/src/Data/UnionFind.hs +++ b/src/Data/UnionFind.hs @@ -44,7 +44,12 @@ equate x y (MkUnionFind m1) = Nothing -> (z, m) Just w -> Map.insert z r <$> rootCP w m r --- We zouden ook nog een rank optimalisatie kunnen (moeten?) doen. Maar dan --- moeten we meer onthouden. Verder zou ik een functie kunnen maken die --- een `equivalent` en `equate` combineert, dat kan namelijk wel met pad- --- compressie. +{- Notes +~~~~~~~~ + +We zouden ook nog een rank optimalisatie kunnen (moeten?) doen. Maar dan +moeten we meer onthouden. Verder zou ik een functie kunnen maken die +een `equivalent` en `equate` combineert, dat kan namelijk wel met pad- +compressie. + +-} diff --git a/src/DotParser.hs b/src/DotParser.hs index 9bf0820..6bcd365 100644 --- a/src/DotParser.hs +++ b/src/DotParser.hs @@ -1,6 +1,4 @@ {-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE PartialTypeSignatures #-} -{-# OPTIONS_GHC -Wno-partial-type-signatures #-} module DotParser where @@ -36,7 +34,7 @@ readDotFile dotFile = convertToMealy . mapMaybe (parseMaybe lineP) . T.lines <$> -- * Internals --- | Type of tokens we accept is 'T.Text', but it could be any type which +-- | Type of tokens we accept is `T.Text`, but it could be any type which -- is compatible with Megaparsec. type Toks = T.Text @@ -45,7 +43,7 @@ type Toks = T.Text type Trans = (Toks, Toks, Toks, Toks) -- | Our parser does not have any custom error messages, and always consumes --- a stream as 'T.Text'. +-- a stream as `T.Text`. type Parser = Parsec Void Toks -- | Parse a single transition. @@ -85,3 +83,20 @@ convertToMealy l = -- We put some effort in sharing string values allStrs = Map.mapWithKey (\k _ -> k) . Set.toMap . Set.unions $ [states, ins, outs] base = Map.fromList . fmap (\(from, to, i, o) -> ((allStrs Map.! from, allStrs Map.! i), (allStrs Map.! o, allStrs Map.! to))) $ l + +{- Notes +~~~~~~~~ + +- Originally I used a `Data.Map` data structure to hold the transitions. But + it turns out that `Data.HashMap` is much faster. This is important, because + the `behaviour` function is called a lot during partition refinement. This + is also the reason for moving from `String` to `T.Text`. + +- Even better would be to move the `Int`. But that makes debugging and + experimenting with the code harder. It is now efficient enough for our + purposes. + +- I have tried @ShortText@ from the @short-text@ package, but it provided + no benefit over `T.Text`. + +-} diff --git a/src/DotWriter.hs b/src/DotWriter.hs index 780f593..8115bc0 100644 --- a/src/DotWriter.hs +++ b/src/DotWriter.hs @@ -4,7 +4,7 @@ import Data.Monoid (Endo (..)) import Data.Partition (Block (..)) import Data.Text qualified as T --- TODO: use Data.Text here instead of strings +-- TODO: use `Data.Text` here instead of strings type StringBuilder = Endo String @@ -30,7 +30,7 @@ instance ToDot a => ToDot (Maybe a) where instance ToDot Block where -- only works nicely when non-negative - toDot (Block n) = string "s" <> string (show n) + toDot b = string "s" <> string (show b) transitionToDot :: (ToDot s, ToDot i, ToDot o) => (s, i, o, s) -> StringBuilder transitionToDot (s, i, o, t) = diff --git a/src/MealyRefine.hs b/src/MealyRefine.hs index 072ff59..6bb5f38 100644 --- a/src/MealyRefine.hs +++ b/src/MealyRefine.hs @@ -1,5 +1,3 @@ -{-# LANGUAGE PackageImports #-} - module MealyRefine where import Data.Partition (Partition) @@ -20,9 +18,6 @@ project f MealyMachine{..} = , .. } -projectToBit :: Ord o => o -> MealyMachine s i o -> MealyMachine s i Bool -projectToBit o = project (o ==) - projectToComponent :: Ord o => (o -> Bool) -> MealyMachine s i o -> MealyMachine s i (Maybe o) projectToComponent oPred = project oMaybe where @@ -30,91 +25,16 @@ projectToComponent oPred = project oMaybe | oPred o = Just o | otherwise = Nothing --- We will project to all (relevant) outputs. Since a lot of data is shared --- among those mealy machines, I do this in one function. The static parts --- are converted only once. Only "eStructure" (the state-labels) are different --- for each projection. - -{- -allProjections :: (Ord s, Ord i, Eq o) => MealyMachine s i o -> [o] -> ([Encoding (Label Polynomial) (F1 Polynomial)], Map.Map s Int) -allProjections MealyMachine{..} outs = (fmap mkEncoding outs, state2idx) - where - numStates = length states - numInputs = length inputs - idx2state = Map.fromList $ zip [0 ..] states - state2idx = Map.fromList $ zip states [0 ..] - idx2input = Map.fromList $ zip [0 ..] inputs - stateInputIndex n = n `divMod` numInputs -- inverse of \(s, i) -> s * numInputs + i - edgesFrom = Data.Vector.Unboxed.generate (numStates * numInputs) (fst . stateInputIndex) - edgesLabel = Data.Vector.generate (numStates * numInputs) (snd . stateInputIndex) - edgesTo = Data.Vector.Unboxed.generate (numStates * numInputs) ((state2idx Map.!) . snd . (\(s, i) -> behaviour (idx2state Map.! s) (idx2input Map.! i)) . stateInputIndex) - bool2Int = bool 0 1 - structure o = - Data.Vector.generate - numStates - ( \s -> - PolyF1 - { polyF1Summand = 0 -- There is only one summand - , polyF1Variables = numInputs -- This many transitions per state - , polyF1Constants = - Data.Vector.Unboxed.generate - numInputs - (\i -> bool2Int . (o ==) . fst $ behaviour (idx2state Map.! s) (idx2input Map.! i)) - } - ) - mkEncoding o = - Encoding - { eStructure = structure o - , eInitState = Nothing -- not needed - , eEdgesFrom = edgesFrom - , eEdgesLabel = edgesLabel - , eEdgesTo = edgesTo - } - --- Refine a encoded mealy machine -refineMealy :: Encoding (Label Polynomial) (F1 Polynomial) -> Copar.Partition -refineMealy machine = runST $ Copar.refine (Proxy @Polynomial) machine True - -mealyMachineToEncoding :: (Ord s, Ord i, Ord o) => MealyMachine s i o -> Encoding (Label Polynomial) (F1 Polynomial) -mealyMachineToEncoding MealyMachine{..} = - let numStates = length states - numInputs = length inputs - idx2state = Map.fromList $ zip [0 ..] states - idx2input = Map.fromList $ zip [0 ..] inputs - out2idx = Map.fromList $ zip outputs [0 ..] - eStructure = - Data.Vector.generate - numStates - ( \s -> - PolyF1 - { polyF1Summand = 0 -- There is only one summand - , polyF1Variables = numInputs -- This many transitions per state - , polyF1Constants = - Data.Vector.Unboxed.generate - numInputs - (\i -> out2idx Map.! fst (behaviour (idx2state Map.! s) (idx2input Map.! i))) - } - ) - eInitState = Nothing - state2idx = Map.fromList $ zip states [0 ..] - stateInputIndex n = n `divMod` numInputs - -- stateInputPair (s, i) = s * numInputs + i - eEdgesFrom = Data.Vector.Unboxed.generate (numStates * numInputs) (fst . stateInputIndex) - eEdgesLabel = Data.Vector.generate (numStates * numInputs) (snd . stateInputIndex) - eEdgesTo = Data.Vector.Unboxed.generate (numStates * numInputs) ((state2idx Map.!) . snd . (\(s, i) -> behaviour (idx2state Map.! s) (idx2input Map.! i)) . stateInputIndex) - in Encoding{..} --} - -refineMealy2 :: (Ord s, Ord i, Ord o) => MealyMachine s i o -> Partition s -refineMealy2 MealyMachine{..} = +refineMealy :: (Ord s, Ord i, Ord o) => MealyMachine s i o -> Partition s +refineMealy MealyMachine{..} = let outputFuns = [(i, fun) | i <- inputs, let fun s = fst (behaviour s i)] reverseTransitionMaps i = Map.fromListWith (++) [(t, [s]) | s <- states, let t = snd (behaviour s i)] reverseFuns = [(i, fun) | i <- inputs, let mm = reverseTransitionMaps i, let fun s = Map.findWithDefault [] s mm] in - refineMealy3 outputFuns reverseFuns states + refineFuns outputFuns reverseFuns states -refineMealy3 :: (Ord o, Ord s) => [(i, s -> o)] -> [(i, s -> [s])] -> [s] -> Partition s -refineMealy3 outputFuns reverseFuns states = +refineFuns :: (Ord o, Ord s) => [(i, s -> o)] -> [(i, s -> [s])] -> [s] -> Partition s +refineFuns outputFuns reverseFuns states = let (partition, _splittingTree) = evalState (ST.refine (\_ -> Identity ()) outputFuns reverseFuns) (ST.initialPRState states) in partition diff --git a/src/SplittingTree.hs b/src/SplittingTree.hs index c2814fb..33cfeb1 100644 --- a/src/SplittingTree.hs +++ b/src/SplittingTree.hs @@ -1,6 +1,3 @@ -{-# LANGUAGE PartialTypeSignatures #-} -{-# OPTIONS_GHC -Wno-partial-type-signatures #-} - module SplittingTree where import Data.Partition (Block (..), Partition (..)) @@ -283,3 +280,29 @@ cleanupP (BarePartition m) = Nothing -> do put (Map.insert v n m2, succ n) pure n + +{- Notes on performance: +~~~~~~~~~~~~~~~~~~~~~~~~ + +- The above algorithm mimics the "process the smaller half" algorithm by + Hopcroft. However, this requires a slightly more sophisticated data + structure (sometimes called a /refinable partition/). This introduces a + second indirection, and is a bit more bookkeeping. Instead of processing + the smaller half, we process a half (not necessarily the smallest). When + lucky, this could be the smallest, and we get the O(n log n) bound. But + when unlucky, we get the O(n^2) bound. + +- Usually these algorithms are implemented using vectors. This would require + the mealy machine to be encoded with integers. This is possible and would + make the algorithm run faster. However, I like to have actual state names, + actual outputs and inputs. This makes it easier to debug and less + error-prone. (For instance, it is impossible to mix up the types.) + +- I do not know whether `cleanupP` is needed. I think it is possible that + gaps are introduced in the block numbering. This is not a problem for the + partitions refinement. But it is a problem later on, when I need to know + the exact number of blocks (for instance when comparing partitions in the + /partition lattice/). The function `cleanupP` does not really show in the + profiling results. + +-}