mirror of
https://git.cs.ou.nl/joshua.moerman/mealy-decompose.git
synced 2025-04-30 02:07:44 +02:00
102 lines
3.9 KiB
Haskell
102 lines
3.9 KiB
Haskell
{-# LANGUAGE OverloadedStrings #-}
|
|
|
|
module DotParser where
|
|
|
|
import Mealy
|
|
|
|
import Data.Char (isAlphaNum)
|
|
import Data.HashMap.Strict qualified as Map
|
|
import Data.HashSet qualified as Set
|
|
import Data.Maybe (mapMaybe)
|
|
import Data.Text qualified as T
|
|
import Data.Text.IO qualified as T
|
|
import Data.Void (Void)
|
|
import Text.Megaparsec
|
|
import Text.Megaparsec.Char (hspace, hspace1, space)
|
|
import Text.Megaparsec.Char.Lexer qualified as L
|
|
|
|
-- * Main function(s)
|
|
|
|
-- $moduleDoc
|
|
--
|
|
-- Parser for Dot files generated by LearnLib (and other tools). This is not
|
|
-- a fully fledged parser for dot files. It is specific to our encoding of
|
|
-- Mealy machines. The parser works line-based, so transitions need to be
|
|
-- on a single line. It assumes the first transitions belongs to the initial
|
|
-- state. The parser ignores any line which is not a transition.
|
|
|
|
-- | Read a dot file and convert it to a Mealy machine.
|
|
readDotFile :: FilePath -> IO (MealyMachine T.Text T.Text T.Text)
|
|
readDotFile dotFile = convertToMealy . mapMaybe (parseMaybe lineP) . T.lines <$> T.readFile dotFile
|
|
where
|
|
-- Parses a full line, ignoring whitespace, until the end of the line
|
|
lineP = hspace *> parseTrans <* space <* eof
|
|
|
|
-- * Internals
|
|
|
|
-- | Type of tokens we accept is `T.Text`, but it could be any type which
|
|
-- is compatible with Megaparsec.
|
|
type Toks = T.Text
|
|
|
|
-- | A transition is a tuple of state, successor state, input label, and
|
|
-- output label
|
|
type Trans = (Toks, Toks, Toks, Toks)
|
|
|
|
-- | Our parser does not have any custom error messages, and always consumes
|
|
-- a stream as `T.Text`.
|
|
type Parser = Parsec Void Toks
|
|
|
|
-- | Parse a single transition.
|
|
parseTrans :: Parser Trans
|
|
parseTrans = assoc <$> identifierQ <* symbol "->" <*> identifierQ <*> brackets parseLabel <* optional (symbol ";")
|
|
where
|
|
-- defines whitespace and lexemes
|
|
sc = L.space hspace1 empty empty
|
|
lexeme = L.lexeme sc
|
|
symbol = L.symbol sc
|
|
-- state, input, output is any string of alphaNumChar's (and some additional characters)
|
|
isAlphaNumExtra c = isAlphaNum c || ('(' <= c && c <= '.') || c == '_'
|
|
alphaNumCharExtra = takeWhile1P (Just "alphanumeric character or extra") isAlphaNumExtra
|
|
identifier = lexeme alphaNumCharExtra
|
|
identifierQ = identifier <|> between (symbol "\"") (symbol "\"") identifier
|
|
-- The label has the shape [label="i/o"]
|
|
brackets = between (symbol "[") (symbol "]")
|
|
parseLabel = (,) <$> (symbol "label=\"" *> identifier <* symbol "/") <*> (identifier <* symbol "\"")
|
|
-- re-associate different parts of the parser
|
|
assoc from to (i, o) = (from, to, i, o)
|
|
|
|
-- | Convert a list of transitions to a Mealy machine, assumes the first
|
|
-- transition belongs to the initial state.
|
|
convertToMealy :: [Trans] -> MealyMachine T.Text T.Text T.Text
|
|
convertToMealy l =
|
|
MealyMachine
|
|
{ states = fmap (allStrs Map.!) . Set.toList $ states
|
|
, inputs = Set.toList $ ins
|
|
, outputs = Set.toList $ outs
|
|
, behaviour = curry (base Map.!)
|
|
, initialState = (allStrs Map.!) . (\(a, _, _, _) -> a) . head $ l
|
|
}
|
|
where
|
|
states = Set.fromList . concatMap (\(a, b, _, _) -> [a, b]) $ l
|
|
ins = Set.fromList . fmap (\(_, _, i, _) -> i) $ l
|
|
outs = Set.fromList . fmap (\(_, _, _, o) -> o) $ l
|
|
-- We put some effort in sharing string values
|
|
allStrs = Map.mapWithKey (\k _ -> k) . Set.toMap . Set.unions $ [states, ins, outs]
|
|
base = Map.fromList . fmap (\(from, to, i, o) -> ((allStrs Map.! from, allStrs Map.! i), (allStrs Map.! o, allStrs Map.! to))) $ l
|
|
|
|
{- Notes
|
|
~~~~~~~~
|
|
|
|
- Originally I used a `Data.Map` data structure to hold the transitions. But
|
|
it turns out that `Data.HashMap` is much faster. This is important, because
|
|
the `behaviour` function is called a lot during partition refinement. This
|
|
is also the reason for moving from `String` to `T.Text`.
|
|
|
|
- Even better would be to move the `Int`. But that makes debugging and
|
|
experimenting with the code harder. It is now efficient enough for our
|
|
purposes.
|
|
|
|
- I have tried @ShortText@ from the @short-text@ package, but it provided
|
|
no benefit over `T.Text`.
|
|
|
|
-}
|