Added documentation and cleaned up some code

2025-07-03 23:57:46 +02:00 · 2024-09-23 10:06:29 +02:00 · 2024-09-23 10:06:29 +02:00 · df6a2708c5
commit df6a2708c5
parent ffca2592fc
9 changed files with 185 additions and 163 deletions
--- a/README.md
+++ b/README.md
@ -0,0 +1,53 @@
 mealy-decompose
 ===============
 Tools to investigate decomposition of Mealy machines, aka finite state
 machines (FSMs). Notable entry points are:
 * `app/Main.hs` is a heuristic to decompose finite state machines into
  multiple components, based on their outputs.
 * `other/decompose_fsm_optimise.py` does the same, but optimally and with a
  SAT solver. This can only handle state spaces of at most a few hundred
  states.
 * `app/RandomGen.hs` for generating FSMs which are decomposable.
 ## How to run
 The the haskell tools (tested with ghc 9.2.8 and ghc 9.10.1):
 ```
 cabal run mealy-decompose -- <path-to-fsm>
 ```
 For the python tools (tested with python 3.12):
 ```
 pip3 install python-sat
 python3 decompose_fsm_optimise.py -h
 ```
 ## Notes on `copar`
 In the first versions of this tool, I used the `copar` library:
 [CoPaR (The Coalgebraic Partition Refiner)](https://git8.cs.fau.de/software/copar).
 This is a great library and in particular very fast. Despite that, I switched
 to my own partition refinement implementations because of the reasons below.
 The last commit to use `copar` is `2b8b79a4`.
 * I needed witnesses for some tasks. So I not only construct a partition,
  but also a splitting tree, where are track all the actions needed to
  separate states.
 * I prefer to use the actual strings (or other data type) for the states,
  inputs and outputs as opposed to `Int`s. This makes it easier to debug, and
  less error-prone, as one cannot mix these entities anymore.
 * `copar` comes with many dependencies, and it was a bit tricky to get the
  versioning correct.
 ## Copyright
 (c) 2024 Joshua Moerman, Open Universiteit
--- a/app/Main.hs
+++ b/app/Main.hs
@ -1,5 +1,4 @@
 {-# LANGUAGE OverloadedStrings #-}
 {-# OPTIONS_GHC -Wno-unrecognised-pragmas #-}
 module Main where
@ -33,13 +32,16 @@ myWriteFile filename = writeFile ("results/" ++ filename)
 main :: IO ()
 main = do
  -- Read dot file
-  [dotFile] <- getArgs
+  ls <- getArgs
-  print dotFile
+  let dotFile = case ls of
        [x] -> x
        _ -> error "Please provide exactly one argument (filepath of dot file)"
  putStr "reading " >> putStrLn dotFile
  machine <- readDotFile dotFile
  -- print some basic info
  putStrLn $ (show . length $ states machine) <> " states, " <> (show . length $ inputs machine) <> " inputs and " <> (show . length $ outputs machine) <> " outputs"
  putStrLn "Small sample:"
  let
    printPartition p = putStrLn $ "number of states = " <> show (numBlocks p)
@ -50,14 +52,14 @@ main = do
    reverseFuns = [(i, fun) | i <- inputs machine, let mm = reverseTransitionMaps i, let fun s = Map.findWithDefault [] s mm]
  -- Minimise input, so we know the actual number of states
-  printPartition (refineMealy3 outputFuns reverseFuns (states machine))
+  printPartition (refineFuns outputFuns reverseFuns (states machine))
  putStrLn ""
  -- Then compute each projection
  let
    outs = outputs machine
    mappedOutputFuns o = [(i, (o ==) . f) | (i, f) <- outputFuns]
-    projections = [(o, refineMealy3 (mappedOutputFuns o) reverseFuns (states machine)) | o <- outs]
+    projections = [(o, refineFuns (mappedOutputFuns o) reverseFuns (states machine)) | o <- outs]
  -- Print number of states of each projection
  mapM_
@ -120,7 +122,7 @@ main = do
        componentOutputs = Set.fromList osWithRelAndEquiv
        proj = projectToComponent (`Set.member` componentOutputs) machine
        -- Sanity check: compute partition again
-        partition = refineMealy2 proj
+        partition = refineMealy proj
      putStrLn ""
      putStrLn $ "Component " <> show os
--- a/app/Playground.hs
+++ b/app/Playground.hs
@ -1,7 +1,6 @@
 module Main where
 import Bisimulation (bisimulation2)
 import Data.Partition (numBlocks)
 import Data.Trie qualified as Trie
 import Data.UnionFind
 import DotParser (readDotFile)
@ -14,7 +13,6 @@ import Data.List qualified as List
 import Data.Map.Strict qualified as Map
 import Data.Maybe (isJust)
 import Data.Set qualified as Set
 import Data.Text.IO qualified as T
 import System.Environment (getArgs)
 main :: IO ()
@ -23,8 +21,7 @@ main = do
  case args of
    ("HSI" : ls) -> mainHSI ls
    ("InputDecomp" : ls) -> mainInputDecomp ls
-    ("Refine" : ls) -> mainRefine ls
+    _ -> putStrLn "Please provide one of [HSI, InputDecomp]"
    _ -> putStrLn "Please provide one of [HSI, InputDecomp, Refine]"
 mainHSI :: [String] -> IO ()
 mainHSI args = case args of
@ -126,32 +123,3 @@ mainInputDecomp args = case args of
      0 -> putStrLn "ERROR"
      1 -> putStrLn "INDECOMPOSABLE"
      n -> putStrLn ("MAYBE DECOMPOSABLE: " ++ show n ++ " classes")
 -- Used to determine whether Copar is faster than SplittingTree (it is).
 -- Copar is almost twice as fast on ESM, but SplittingTree is faster on a
 -- BRP benchmark. I guess, theoretically, Copar should be faster generally.
 mainRefine :: [String] -> IO ()
 mainRefine args = case args of
  [dotFile, copar] -> run dotFile (read copar)
  _ -> putStrLn "Please provide a dot file and Boolean"
 where
  run dotFile copar = do
    m <- readDotFile dotFile
    putStr $ "file parsed, initial state = "
    T.putStrLn $ initialState m
    if copar
      then runCopar m
      else runSplittingTree m
  runCopar _ = error "no longer supported"
  --  let printPartition p = putStrLn $ "Done " <> show (numBlocks p)
  --   in printPartition (refineMealy (mealyMachineToEncoding m))
  runSplittingTree MealyMachine{..} = do
    let
      outputFuns = [(i, fun) | i <- inputs, let fun s = fst (behaviour s i)]
      reverseTransitionMaps i = Map.fromListWith (++) [(t, [s]) | s <- states, let t = snd (behaviour s i)]
      reverseFuns = [(i, fun) | i <- inputs, let mm = reverseTransitionMaps i, let fun s = Map.findWithDefault [] s mm]
    (partition, _splittingTree) <- evalStateT (refine (\_ -> pure ()) outputFuns reverseFuns) (initialPRState states)
    putStrLn $ "Done" <> show (numBlocks partition)
--- a/src/Data/Partition.hs
+++ b/src/Data/Partition.hs
@ -1,20 +1,35 @@
-{-# LANGUAGE PackageImports #-}
+{-# LANGUAGE DerivingVia #-}
 module Data.Partition where
 import Data.Preorder
 import Control.Monad.Trans.State.Strict as State
 import Data.Function (on)
 import Data.List (groupBy, sortOn)
 import Data.Map.Merge.Strict qualified as Map
 import Data.Map.Strict qualified as Map
 import Data.Tuple (swap)
 import Data.List (groupBy, sortOn)
 import Data.Function (on)
 -- * Partitions
 -- $moduleDoc
 --
 -- A partition on a set of type @a@ is represented as a map @a -> `Block`@,
 -- where a `Block` is a unique identifier (integer) for a set of elements.
 --
 -- Partitions form a lattice (the so-called /partition lattice/), where the
 -- partial order is given by the refinement relation. We put the finest
 -- partition at the top, and the coarsest at the bottom. This is the opposite
 -- of the convection used on wikipedia.
 -- * Type definitions
 -- | A block is a unique identifier for a set of elements.
 newtype Block = Block Int
-  deriving (Eq, Ord, Show, Enum, Num)
+  deriving (Eq, Ord, Show, Enum, Num) via Int
-- | A partition is represented by a finite map of type 's -> Block'. Two
+-- | A partition is represented by a finite map of type @s -> `Block`@. Two
 -- elements mapped to the same block are equivalent. Note that a permutation
 -- of the blocks will not change the partition, but it does change the
 -- underlying representation. (That is why I haven't given it an Eq instance
@ -25,10 +40,12 @@ data Partition s = Partition
  }
  deriving Show
-- | Constraint for using partitions. Currently this is 'Ord'. But it might
+-- | Constraint for using partitions. Currently this is `Ord`. But it might
-- change to '(Eq s, Hashable s)' in the future.
+-- change to @(`Eq` s, Hashable s)@ in the future.
 type Partitionable s = Ord s
 -- * Basic functions
 -- | Determines whether two elements are equivalent in the partition.
 sameBlock :: Partitionable s => Partition s -> s -> s -> Bool
 sameBlock (Partition m _) s t = m Map.! s == m Map.! t
@ -37,15 +54,20 @@ sameBlock (Partition m _) s t = m Map.! s == m Map.! t
 toBlocks :: Partition s -> [[s]]
 toBlocks = fmap (fmap snd) . groupBy ((==) `on` fst) . sortOn fst . fmap swap . Map.assocs . getPartition
 -- * Lattice structure on partitions
 -- | Returns the common refinement of two partitions. This is the coarsest
 -- partition which is finer than either input, i.e., the lowest upper bound.
-- Runs in O(n), where n is the number of elements of the partition. Note
+-- Runs in O(n + k log k), where n is the number of elements of the partition
-- that both partitions must be on the same set of elements.
+-- and k is the number of blocks. Note that both partitions must be on the
 -- same set of elements.
 commonRefinement :: Partitionable s => Partition s -> Partition s -> Partition s
 commonRefinement (Partition p1 _) (Partition p2 _) =
  let (p, (_, b)) = State.runState (mergeFun p1 p2) (Map.empty, Block 0)
   in Partition p b
 where
  -- We merge the two maps, they should contain the same keys. While merging,
  -- we keep track of a `Map` of type @(`Block`, `Block`) -> `Block`@.
  mergeFun = Map.mergeA Map.dropMissing Map.dropMissing (Map.zipWithAMatched checkPair)
  checkPair _ b1 b2 = do
    (m, n) <- get
@ -55,21 +77,6 @@ commonRefinement (Partition p1 _) (Partition p2 _) =
        put (Map.insert (b1, b2) n m, succ n)
        pure n
 -- | This function checks whether one partition is a refinement of the other.
 -- This function already appears in the `copar` library, but the one here is
 -- faster. This function is the same as `>=` in the partition lattice.
 -- Could be made faster by doing what commonRefinement is doing but
 -- stopping early. But it's fast enough for now, so I won't bother.
 isRefinementOf2 :: Partitionable s => Partition s -> Partition s -> Bool
 isRefinementOf2 refined original = comparePartitions refined original == GT'
 -- | Checks whether two partitions are equal as partitions. Note that the `Eq`
 -- instance on partitions checks for equality of the state assignments, not
 -- whether the partitions are equal as partitions.
 isEquivalent :: Partitionable s => Partition s -> Partition s -> Bool
 isEquivalent p1 p2 = comparePartitions p1 p2 == EQ'
 -- | Compares two partitions. Returns `EQ'` if the partitions are equal, `GT'`
 -- if the first partition is a refinement of the second, `LT'` if the first
 -- partition is a coarsening of the second, and `IC'` if the partitions are
@ -78,11 +85,40 @@ comparePartitions :: Partitionable s => Partition s -> Partition s -> PartialOrd
 comparePartitions p1@(Partition m1 b1) p2@(Partition m2 b2)
  | m1 == m2 = EQ'
  | otherwise =
      -- We compute the common refinement...
      let (Partition _ n3) = commonRefinement p1 p2
          n1 = b1
          n2 = b2
-       in case (n1 == n3, n2 == n3) of
+       in -- ... and if it is equal to one of the paritions to begin with, we have
          -- a refinement or coarsening. For this we only need the check the
          -- number of blocks.
          case (n1 == n3, n2 == n3) of
            (True, True) -> EQ'
            (True, False) -> GT'
            (False, True) -> LT'
            (False, False) -> IC'
 -- | This function checks whether one partition is a refinement of the other.
 -- This function is the same as `>=` in the partition lattice.
 isRefinementOf :: Partitionable s => Partition s -> Partition s -> Bool
 isRefinementOf refined original = comparePartitions refined original == GT'
 -- | Checks whether two partitions are equal as partitions.
 isEquivalent :: Partitionable s => Partition s -> Partition s -> Bool
 isEquivalent p1 p2 = comparePartitions p1 p2 == EQ'
 {- Notes
 ~~~~~~~~
 - `isRefinementOf` could be made faster by doing what commonRefinement is
  doing but stopping early. But it's fast enough for now, so I won't bother.
 - We could change the data type to use a `HashMap` instead of a `Map`. This
  could make things faster, but there is no `mergeA` function for `HashMap`.
  That is why I'm using `Map` for now. Another options would be to convert
  @s@ to `Int` and use an array. (This is what the copar library does.)
 - The `Block` type currently has a `Num` instance. It probably should not
  have that. But it's convenient for now.
 -}
--- a/src/Data/UnionFind.hs
+++ b/src/Data/UnionFind.hs
@ -44,7 +44,12 @@ equate x y (MkUnionFind m1) =
    Nothing -> (z, m)
    Just w -> Map.insert z r <$> rootCP w m r
-- We zouden ook nog een rank optimalisatie kunnen (moeten?) doen. Maar dan
+{- Notes
-- moeten we meer onthouden. Verder zou ik een functie kunnen maken die
+~~~~~~~~
-- een `equivalent` en `equate` combineert, dat kan namelijk wel met pad-
+
-- compressie.
+We zouden ook nog een rank optimalisatie kunnen (moeten?) doen. Maar dan
 moeten we meer onthouden. Verder zou ik een functie kunnen maken die
 een `equivalent` en `equate` combineert, dat kan namelijk wel met pad-
 compressie.
 -}
--- a/src/DotParser.hs
+++ b/src/DotParser.hs
@ -1,6 +1,4 @@
 {-# LANGUAGE OverloadedStrings #-}
 {-# LANGUAGE PartialTypeSignatures #-}
 {-# OPTIONS_GHC -Wno-partial-type-signatures #-}
 module DotParser where
@ -36,7 +34,7 @@ readDotFile dotFile = convertToMealy . mapMaybe (parseMaybe lineP) . T.lines <$>
 -- * Internals
-- | Type of tokens we accept is 'T.Text', but it could be any type which
+-- | Type of tokens we accept is `T.Text`, but it could be any type which
 -- is compatible with Megaparsec.
 type Toks = T.Text
@ -45,7 +43,7 @@ type Toks = T.Text
 type Trans = (Toks, Toks, Toks, Toks)
 -- | Our parser does not have any custom error messages, and always consumes
-- a stream as 'T.Text'.
+-- a stream as `T.Text`.
 type Parser = Parsec Void Toks
 -- | Parse a single transition.
@ -85,3 +83,20 @@ convertToMealy l =
  -- We put some effort in sharing string values
  allStrs = Map.mapWithKey (\k _ -> k) . Set.toMap . Set.unions $ [states, ins, outs]
  base = Map.fromList . fmap (\(from, to, i, o) -> ((allStrs Map.! from, allStrs Map.! i), (allStrs Map.! o, allStrs Map.! to))) $ l
 {- Notes
 ~~~~~~~~
 - Originally I used a `Data.Map` data structure to hold the transitions. But
  it turns out that `Data.HashMap` is much faster. This is important, because
  the `behaviour` function is called a lot during partition refinement. This
  is also the reason for moving from `String` to `T.Text`.
 - Even better would be to move the `Int`. But that makes debugging and
  experimenting with the code harder. It is now efficient enough for our
  purposes.
 - I have tried @ShortText@ from the @short-text@ package, but it provided
  no benefit over `T.Text`.
 -}
--- a/src/DotWriter.hs
+++ b/src/DotWriter.hs
@ -4,7 +4,7 @@ import Data.Monoid (Endo (..))
 import Data.Partition (Block (..))
 import Data.Text qualified as T
-- TODO: use Data.Text here instead of strings
+-- TODO: use `Data.Text` here instead of strings
 type StringBuilder = Endo String
@ -30,7 +30,7 @@ instance ToDot a => ToDot (Maybe a) where
 instance ToDot Block where
  -- only works nicely when non-negative
-  toDot (Block n) = string "s" <> string (show n)
+  toDot b = string "s" <> string (show b)
 transitionToDot :: (ToDot s, ToDot i, ToDot o) => (s, i, o, s) -> StringBuilder
 transitionToDot (s, i, o, t) =
--- a/src/MealyRefine.hs
+++ b/src/MealyRefine.hs
@ -1,5 +1,3 @@
 {-# LANGUAGE PackageImports #-}
 module MealyRefine where
 import Data.Partition (Partition)
@ -20,9 +18,6 @@ project f MealyMachine{..} =
    , ..
    }
 projectToBit :: Ord o => o -> MealyMachine s i o -> MealyMachine s i Bool
 projectToBit o = project (o ==)
 projectToComponent :: Ord o => (o -> Bool) -> MealyMachine s i o -> MealyMachine s i (Maybe o)
 projectToComponent oPred = project oMaybe
 where
@ -30,91 +25,16 @@ projectToComponent oPred = project oMaybe
    | oPred o = Just o
    | otherwise = Nothing
-- We will project to all (relevant) outputs. Since a lot of data is shared
+refineMealy :: (Ord s, Ord i, Ord o) => MealyMachine s i o -> Partition s
-- among those mealy machines, I do this in one function. The static parts
+refineMealy MealyMachine{..} =
 -- are converted only once. Only "eStructure" (the state-labels) are different
 -- for each projection.
 {-
 allProjections :: (Ord s, Ord i, Eq o) => MealyMachine s i o -> [o] -> ([Encoding (Label Polynomial) (F1 Polynomial)], Map.Map s Int)
 allProjections MealyMachine{..} outs = (fmap mkEncoding outs, state2idx)
 where
  numStates = length states
  numInputs = length inputs
  idx2state = Map.fromList $ zip [0 ..] states
  state2idx = Map.fromList $ zip states [0 ..]
  idx2input = Map.fromList $ zip [0 ..] inputs
  stateInputIndex n = n `divMod` numInputs -- inverse of \(s, i) -> s * numInputs + i
  edgesFrom = Data.Vector.Unboxed.generate (numStates * numInputs) (fst . stateInputIndex)
  edgesLabel = Data.Vector.generate (numStates * numInputs) (snd . stateInputIndex)
  edgesTo = Data.Vector.Unboxed.generate (numStates * numInputs) ((state2idx Map.!) . snd . (\(s, i) -> behaviour (idx2state Map.! s) (idx2input Map.! i)) . stateInputIndex)
  bool2Int = bool 0 1
  structure o =
    Data.Vector.generate
      numStates
      ( \s ->
          PolyF1
            { polyF1Summand = 0 -- There is only one summand
            , polyF1Variables = numInputs -- This many transitions per state
            , polyF1Constants =
                Data.Vector.Unboxed.generate
                  numInputs
                  (\i -> bool2Int . (o ==) . fst $ behaviour (idx2state Map.! s) (idx2input Map.! i))
            }
      )
  mkEncoding o =
    Encoding
      { eStructure = structure o
      , eInitState = Nothing -- not needed
      , eEdgesFrom = edgesFrom
      , eEdgesLabel = edgesLabel
      , eEdgesTo = edgesTo
      }
 -- Refine a encoded mealy machine
 refineMealy :: Encoding (Label Polynomial) (F1 Polynomial) -> Copar.Partition
 refineMealy machine = runST $ Copar.refine (Proxy @Polynomial) machine True
 mealyMachineToEncoding :: (Ord s, Ord i, Ord o) => MealyMachine s i o -> Encoding (Label Polynomial) (F1 Polynomial)
 mealyMachineToEncoding MealyMachine{..} =
  let numStates = length states
      numInputs = length inputs
      idx2state = Map.fromList $ zip [0 ..] states
      idx2input = Map.fromList $ zip [0 ..] inputs
      out2idx = Map.fromList $ zip outputs [0 ..]
      eStructure =
        Data.Vector.generate
          numStates
          ( \s ->
              PolyF1
                { polyF1Summand = 0 -- There is only one summand
                , polyF1Variables = numInputs -- This many transitions per state
                , polyF1Constants =
                    Data.Vector.Unboxed.generate
                      numInputs
                      (\i -> out2idx Map.! fst (behaviour (idx2state Map.! s) (idx2input Map.! i)))
                }
          )
      eInitState = Nothing
      state2idx = Map.fromList $ zip states [0 ..]
      stateInputIndex n = n `divMod` numInputs
      -- stateInputPair (s, i) = s * numInputs + i
      eEdgesFrom = Data.Vector.Unboxed.generate (numStates * numInputs) (fst . stateInputIndex)
      eEdgesLabel = Data.Vector.generate (numStates * numInputs) (snd . stateInputIndex)
      eEdgesTo = Data.Vector.Unboxed.generate (numStates * numInputs) ((state2idx Map.!) . snd . (\(s, i) -> behaviour (idx2state Map.! s) (idx2input Map.! i)) . stateInputIndex)
   in Encoding{..}
 -}
 refineMealy2 :: (Ord s, Ord i, Ord o) => MealyMachine s i o -> Partition s
 refineMealy2 MealyMachine{..} =
  let
    outputFuns = [(i, fun) | i <- inputs, let fun s = fst (behaviour s i)]
    reverseTransitionMaps i = Map.fromListWith (++) [(t, [s]) | s <- states, let t = snd (behaviour s i)]
    reverseFuns = [(i, fun) | i <- inputs, let mm = reverseTransitionMaps i, let fun s = Map.findWithDefault [] s mm]
   in
-    refineMealy3 outputFuns reverseFuns states
+    refineFuns outputFuns reverseFuns states
-refineMealy3 :: (Ord o, Ord s) => [(i, s -> o)] -> [(i, s -> [s])] -> [s] -> Partition s
+refineFuns :: (Ord o, Ord s) => [(i, s -> o)] -> [(i, s -> [s])] -> [s] -> Partition s
-refineMealy3 outputFuns reverseFuns states =
+refineFuns outputFuns reverseFuns states =
  let (partition, _splittingTree) = evalState (ST.refine (\_ -> Identity ()) outputFuns reverseFuns) (ST.initialPRState states)
   in partition
--- a/src/SplittingTree.hs
+++ b/src/SplittingTree.hs
@ -1,6 +1,3 @@
 {-# LANGUAGE PartialTypeSignatures #-}
 {-# OPTIONS_GHC -Wno-partial-type-signatures #-}
 module SplittingTree where
 import Data.Partition (Block (..), Partition (..))
@ -283,3 +280,29 @@ cleanupP (BarePartition m) =
      Nothing -> do
        put (Map.insert v n m2, succ n)
        pure n
 {- Notes on performance:
 ~~~~~~~~~~~~~~~~~~~~~~~~
 - The above algorithm mimics the "process the smaller half" algorithm by
  Hopcroft. However, this requires a slightly more sophisticated data
  structure (sometimes called a /refinable partition/). This introduces a
  second indirection, and is a bit more bookkeeping. Instead of processing
  the smaller half, we process a half (not necessarily the smallest). When
  lucky, this could be the smallest, and we get the O(n log n) bound. But
  when unlucky, we get the O(n^2) bound.
 - Usually these algorithms are implemented using vectors. This would require
  the mealy machine to be encoded with integers. This is possible and would
  make the algorithm run faster. However, I like to have actual state names,
  actual outputs and inputs. This makes it easier to debug and less
  error-prone. (For instance, it is impossible to mix up the types.)
 - I do not know whether `cleanupP` is needed. I think it is possible that
  gaps are introduced in the block numbering. This is not a problem for the
  partitions refinement. But it is a problem later on, when I need to know
  the exact number of blocks (for instance when comparing partitions in the
  /partition lattice/). The function `cleanupP` does not really show in the
  profiling results.
 -}