1
Fork 0
mirror of https://git.cs.ou.nl/joshua.moerman/mealy-decompose.git synced 2025-04-30 02:07:44 +02:00
mealy-decompose/src/DotParser.hs
2024-09-23 10:06:29 +02:00

102 lines
3.9 KiB
Haskell

{-# LANGUAGE OverloadedStrings #-}
module DotParser where
import Mealy
import Data.Char (isAlphaNum)
import Data.HashMap.Strict qualified as Map
import Data.HashSet qualified as Set
import Data.Maybe (mapMaybe)
import Data.Text qualified as T
import Data.Text.IO qualified as T
import Data.Void (Void)
import Text.Megaparsec
import Text.Megaparsec.Char (hspace, hspace1, space)
import Text.Megaparsec.Char.Lexer qualified as L
-- * Main function(s)
-- $moduleDoc
--
-- Parser for Dot files generated by LearnLib (and other tools). This is not
-- a fully fledged parser for dot files. It is specific to our encoding of
-- Mealy machines. The parser works line-based, so transitions need to be
-- on a single line. It assumes the first transitions belongs to the initial
-- state. The parser ignores any line which is not a transition.
-- | Read a dot file and convert it to a Mealy machine.
readDotFile :: FilePath -> IO (MealyMachine T.Text T.Text T.Text)
readDotFile dotFile = convertToMealy . mapMaybe (parseMaybe lineP) . T.lines <$> T.readFile dotFile
where
-- Parses a full line, ignoring whitespace, until the end of the line
lineP = hspace *> parseTrans <* space <* eof
-- * Internals
-- | Type of tokens we accept is `T.Text`, but it could be any type which
-- is compatible with Megaparsec.
type Toks = T.Text
-- | A transition is a tuple of state, successor state, input label, and
-- output label
type Trans = (Toks, Toks, Toks, Toks)
-- | Our parser does not have any custom error messages, and always consumes
-- a stream as `T.Text`.
type Parser = Parsec Void Toks
-- | Parse a single transition.
parseTrans :: Parser Trans
parseTrans = assoc <$> identifierQ <* symbol "->" <*> identifierQ <*> brackets parseLabel <* optional (symbol ";")
where
-- defines whitespace and lexemes
sc = L.space hspace1 empty empty
lexeme = L.lexeme sc
symbol = L.symbol sc
-- state, input, output is any string of alphaNumChar's (and some additional characters)
isAlphaNumExtra c = isAlphaNum c || ('(' <= c && c <= '.') || c == '_'
alphaNumCharExtra = takeWhile1P (Just "alphanumeric character or extra") isAlphaNumExtra
identifier = lexeme alphaNumCharExtra
identifierQ = identifier <|> between (symbol "\"") (symbol "\"") identifier
-- The label has the shape [label="i/o"]
brackets = between (symbol "[") (symbol "]")
parseLabel = (,) <$> (symbol "label=\"" *> identifier <* symbol "/") <*> (identifier <* symbol "\"")
-- re-associate different parts of the parser
assoc from to (i, o) = (from, to, i, o)
-- | Convert a list of transitions to a Mealy machine, assumes the first
-- transition belongs to the initial state.
convertToMealy :: [Trans] -> MealyMachine T.Text T.Text T.Text
convertToMealy l =
MealyMachine
{ states = fmap (allStrs Map.!) . Set.toList $ states
, inputs = Set.toList $ ins
, outputs = Set.toList $ outs
, behaviour = curry (base Map.!)
, initialState = (allStrs Map.!) . (\(a, _, _, _) -> a) . head $ l
}
where
states = Set.fromList . concatMap (\(a, b, _, _) -> [a, b]) $ l
ins = Set.fromList . fmap (\(_, _, i, _) -> i) $ l
outs = Set.fromList . fmap (\(_, _, _, o) -> o) $ l
-- We put some effort in sharing string values
allStrs = Map.mapWithKey (\k _ -> k) . Set.toMap . Set.unions $ [states, ins, outs]
base = Map.fromList . fmap (\(from, to, i, o) -> ((allStrs Map.! from, allStrs Map.! i), (allStrs Map.! o, allStrs Map.! to))) $ l
{- Notes
~~~~~~~~
- Originally I used a `Data.Map` data structure to hold the transitions. But
it turns out that `Data.HashMap` is much faster. This is important, because
the `behaviour` function is called a lot during partition refinement. This
is also the reason for moving from `String` to `T.Text`.
- Even better would be to move the `Int`. But that makes debugging and
experimenting with the code harder. It is now efficient enough for our
purposes.
- I have tried @ShortText@ from the @short-text@ package, but it provided
no benefit over `T.Text`.
-}