Made the ADS encoding (and moved the parser elsewhere)

2025-07-05 19:47:44 +02:00 · 2022-01-19 13:56:25 +01:00 · 2022-01-19 13:56:25 +01:00 · 921a261c47
commit 921a261c47
parent ddb65c050e
3 changed files with 345 additions and 29 deletions
--- a/satuio/ads.py
+++ b/satuio/ads.py
@ -0,0 +1,302 @@
 """
 Script for finding AD sequences in a Mealy machine. Uses SAT solvers
 (in pysat) to search efficiently. The length of the sequence is fixed,
 but you specify multiple the states for which the ADS is supposed to
 work. When an ADS does not exist, the solver returns UNSAT. For the
 usage, please run
  python3 ads.py --help
 © Joshua Moerman, Open Universiteit, 2022
 """
 # Import the solvers and utilities
 from pysat.solvers import Solver
 from pysat.formula import IDPool
 from pysat.card import CardEnc, EncType
 from argparse import ArgumentParser # Command line options
 from rich.console import Console    # Import colorized output
 from time import time               # Time for rough timing measurements
 from tqdm import tqdm               # Import fancy progress bars
 from parser import read_machine
 # function for some time logging
 start = time()
 start_total = start
 def measure_time(*str):
  global start
  now = time()
  print('***', *str, "in %.3f seconds" % (now - start))
  start = now
 # *****************
 # Reading the input
 # *****************
 # command line options
 parser = ArgumentParser()
 parser.add_argument('filename', help='File of the mealy machine (dot format)')
 parser.add_argument('length', help='Length of the ADS', type=int)
 parser.add_argument('-v', '--verbose', help="Show more output", action='store_true')
 parser.add_argument('--solver', help='Which solver to use (default g3)', default='g3')
 parser.add_argument('--states', help='For which states to compute an ADS', nargs='+')
 args = parser.parse_args()
 if args.states == None or len(args.states) <= 1:
  raise ValueError('Should specify at leasta 2 states')
 # reading the automaton
 (alphabet, outputs, all_states, delta, labda) = read_machine(args.filename)
 states = args.states
 length = args.length
 measure_time('Constructed automaton with', len(all_states), 'states and', len(alphabet), 'symbols')
 # ********************
 # Seting up the solver
 #  And the variables
 # ********************
 vpool = IDPool()
 solver = Solver(name=args.solver)
 # Since the solver can only deal with variables x_i, we need
 # a mapping of variabeles: x_whatever  ->  x_i.
 # We use the IDPool of pysat for this. It generates variables
 # on the fly.
 def var(x):
  return(vpool.id(('uio', x)))
 # Each state has its own path, and on this path we encode
 # the states, the input, and the output.
 # avar(s, i, a) means: on path s, on place i there is symbol a
 def avar(s, i, a):
  return var(('a', s, i, a))
 # svar(s, i, t) means: on path s, at place i, we are in state t
 def svar(s, i, t):
  return var(('s', s, i, t))
 # ovar(s, i, o) means: on path s, on place i, there is output o
 def ovar(s, i, o):
  return var(('o', s, i, o))
 # We use extra variables to encode the fact that there is
 # a difference in output (a la Tseytin transformation)
 # dvar(s, t, i) means: the paths s and t differ on place i.
 def dvar(s, t, i):
  return var(('d1', s, t, i))
 # Since we are looking for an adaptive distinguishing sequence,
 # the inputs must be consistent among the paths, until there is
 # a difference. We use additional variables for that
 # d2var(s, t, i) means: the paths s and t differ on i or earlier
 def d2var(s, t, i):
  return var(('d2', s, t, i))
 # We often need to assert that exacly one variable in a list holds.
 # For that we use pysat's cardinality encoding. This might introduce
 # additional variables. But that does not matter for us.
 def unique(lits):
  cnf = CardEnc.equals(lits, 1, vpool=vpool, encoding=EncType.seqcounter)
  solver.append_formula(cnf.clauses)
 measure_time('Setup solver', args.solver)
 # ********************
 # Constructing the CNF
 # ********************
 # For each state s, we construct a path of possible successor states,
 # following the guessed words. This path should be consistent with delta,
 # and we also record the outputs along this path. The outputs are later
 # used to decide whether we found a different output.
 possible_outputs = {}
 for s in tqdm(states, desc="CNF paths"):
  # current set of possible states we're in
  current_set = set([s])
  # set of successors for the next iteration of i
  next_set = set()
  for i in range(length):
    # Only one input at this position
    unique([avar(s, i, a) for a in alphabet])
    # Only one successor state should be enabled.
    # For i == 0, this is a single state (s).
    unique([svar(s, i, t) for t in current_set])
    # We keep track of the possible outputs
    possible_outputs[(s, i)] = set()
    for t in current_set:
      for a in alphabet:
        output = labda[(t, a)]
        possible_outputs[(s, i)].add(output)
        # Constraint: on path s, when in state t and input a, we output o
        # x_('s', s, i, t) /\ x_('in', s, i, a) => x_('o', i, labda(t, a))
        # == -x_('s', s, i, t) \/ -x_('in', s, i, a) \/ x_('o', i, labda(t, a))
        solver.add_clause([-svar(s, i, t), -avar(s, i, a), ovar(s, i, output)])
        # when i == length-1 we don't need to consider successors
        if i < length-1:
          next_t = delta[(t, a)]
          next_set.add(next_t)
          # Constraint: on path s, when in state t and input a, we go to next_t
          # x_('s', s, i, t) /\ x_('in', s, i, a) => x_('s', s, i+1, delta(t, a))
          # == -x_('s', s, i, t) \/ -x_('in', s, i, a) \/ x_('s', s, i+1, delta(t, a))
          solver.add_clause([-svar(s, i, t), -avar(s, i, a), svar(s, i+1, next_t)])
    # Only one output should be enabled
    unique([ovar(s, i, o) for o in possible_outputs[(s, i)]])
    # Next iteration with successor states
    current_set = next_set
    next_set = set()
 for s in tqdm(states, desc="CNF diffs"):
  for t in states:
    # We skip s == t, since those state are equivalent.
    # I am not sure whether we can skip s <= t, since our construction
    # below is not symmetrical.
    if s == t:
      continue
    # First, we require that there is a difference on the paths of s and t
    solver.add_clause([dvar(s, t, i) for i in range(length)])
    for i in range(length):
      # The difference variables are symmetric in the sense that
      # x_('d', s, t, i) <=> x_('d', t, s, i)
      # We do only one direction here, the other direction is handled
      # with s and t swapped. I don't know whether this is needed though.
      solver.add_clause([-dvar(s, t, i), dvar(t, s, i)])
      solver.add_clause([-d2var(s, t, i), d2var(t, s, i)])
      # First we encode that d2var is the closure of dvar.
      # Note that we only do one direction. Setting d2var to true helps the
      # solver, as it means that the inputs may be chosen differently.
      # So if the solver sets a d2var2 to true, it must mean there is
      # a difference, or an earlier difference.
      if i == 0:
        # d2var(s, t, 0) => dvar(s, t, 0) (there is no "earlier")
        solver.add_clause([-d2var(s, t, i), dvar(s, t, i)])
      else:
        # d2var(s, t, i) => (dvar(s, t, i) \/ d2var(s, t, i-1))
        solver.add_clause([-d2var(s, t, i), dvar(s, t, i), d2var(s, t, i-1)])
      # Now we encode that, if there is no difference yet, the
      # guessed inputs must be the same for both states.
      # -d2var(s, t, i) => (avar(s, i, a) <=> avar(t, i, a))
      for a in alphabet:
        # for i == 0, the inputs have to be the same
        if i == 0:
          # avar(s, i, a) => avar(t, i, a)
          solver.add_clause([-avar(s, i, a), avar(t, i, a)])
        else:
          # We do one direction -d2var(s, t, i-1) /\ avar(s, i, a) => avar(t, i, a)
          solver.add_clause([d2var(s, t, i-1), -avar(s, i, a), avar(t, i, a)])
      # We encode: if there is a difference, then the outputs should
      # actually differ. (We do not have to encode the other implication!)
      # x_('d', s, t, i) /\ x_('o', s, i, o) => -x_('o', t, i, o)
      # Note: when o is not possible for state t, then the clause already holds
      outputs_s = possible_outputs[(s, i)]
      outputs_t = possible_outputs[(t, i)]
      for o in outputs_s:
        if o in outputs_t:
          solver.add_clause([-dvar(s, t, i), -ovar(s, i, o), -ovar(t, i, o)])
 measure_time('Constructed CNF with', solver.nof_clauses(), 'clauses and', solver.nof_vars(), 'variables')
 # ******************
 # Solving and output
 # ******************
 # We set up some things for nice output
 console = Console(markup=False, highlight=False)
 max_state_length = max([len(str) for str in states])
 # Solve it!
 solution = solver.solve()
 measure_time('Solver finished')
 if solution:
  # We get the model, and store all true variables
  # in a set, for easy lookup.
  m = solver.get_model()
  truth = set()
  for l in m:
    if l > 0:
      truth.add(l)
  console.print('! words:')
  for s in states:
    console.print('for', s, end=': ', style='bold black')
    # We print the word
    for i in range(length):
      for a in alphabet:
        if avar(s, i, a) in truth:
          console.print(a, end=' ', style='bold green')
    console.print('')
  # (If verbose) For each state, we print the paths and output.
  # We mark the differences red (there can be differences not
  # marked, these are the differences decided in the solving).
  if args.verbose:
    console.print('! paths')
    for s in states:
      console.print(s.rjust(max_state_length, ' '), ' ==> ', end=' ', style='bold black')
      for i in range(length):
        for t in all_states:
          if svar(s, i, t) in truth:
            console.print(t, end=' ', style='blue')
        for a in alphabet:
          if avar(s, i, a) in truth:
            console.print(a, end=' ', style='green')
        for o in possible_outputs[(s, i)]:
          if ovar(s, i, o) in truth:
            console.print(o, end=' ', style='red')
      console.print('')
    console.print('! differences')
    for s in states:
      for t in states:
        console.print(s, 'vs', t, end=': ')
        for i in range(length):
          if dvar(s, t, i) in truth:
            console.print('X', end='')
          else:
            console.print('.', end='')
          if d2var(s, t, i) in truth:
            console.print('-', end=' ')
          else:
            console.print('_', end=' ')
        console.print('')
 else:
  console.print('! no ADS of length', length, style='bold red')
  # The core returned by the solver is not interesting:
  # It is only the assumption (i.e. bvar).
 # Report some final stats
 start = start_total
 print('')
 measure_time("Done with total time")
--- a/satuio/parser.py
+++ b/satuio/parser.py
@ -0,0 +1,36 @@
 """
 Function to read a Mealy machine in .dot format (as outputted by
 LearnLib). This is not a battle-tested parser, but a quick and
 dirty parser. Good enough for now. All states, inputs and output
 are stored as strings.
 © Joshua Moerman, Open Universiteit, 2022
 """
 def read_machine(filename):
  alphabet = set()
  outputs = set()
  states = set()
  delta = {}
  labda = {}
  with open(filename) as file:
    for line in file.readlines():
      asdf = line.split()
      if len(asdf) > 3 and asdf[1] == '->':
        s = asdf[0]
        t = asdf[2]
        rest = ''.join(asdf[3:])
        label = rest.split('"')[1]
        [i, o] = label.split('/')
        states.add(s)
        states.add(t)
        alphabet.add(i)
        outputs.add(o)
        delta[(s, i)] = t
        labda[(s, i)] = o
  return (alphabet, outputs, states, delta, labda)
--- a/satuio/uio.py
+++ b/satuio/uio.py
@ -10,7 +10,6 @@ returns UNSAT. For the usage, please run
 © Joshua Moerman, Open Universiteit, 2022
 """
 # Import the solvers and utilities
 from pysat.solvers import Solver
 from pysat.formula import IDPool
@ -21,6 +20,8 @@ from rich.console import Console    # Import colorized output
 from time import time               # Time for rough timing measurements
 from tqdm import tqdm               # Import fancy progress bars
 from parser import read_machine
 # function for some time logging
 start = time()
 start_total = start
@ -38,37 +39,14 @@ def measure_time(*str):
 # command line options
 parser = ArgumentParser()
 parser.add_argument('filename', help='File of the mealy machine (dot format)')
-parser.add_argument('length', help='Length of the uio', type=int)
+parser.add_argument('length', help='Length of the UIO', type=int)
 parser.add_argument('-v', '--verbose', help="Show more output", action='store_true')
 parser.add_argument('--solver', help='Which solver to use (default g3)', default='g3')
 parser.add_argument('--bases', help='For which states to compute an UIO (leave empty for all states)', nargs='*')
 args = parser.parse_args()
-# reading the automaton with a hacky .dot parser
+# reading the automaton
-alphabet = set()
+(alphabet, outputs, states, delta, labda) = read_machine(args.filename)
 outputs = set()
 states = set()
 delta = {}
 labda = {}
 with open(args.filename) as file:
  for line in file.readlines():
    asdf = line.split()
    if len(asdf) > 3 and asdf[1] == '->':
      s = asdf[0]
      t = asdf[2]
      rest = ''.join(asdf[3:])
      label = rest.split('"')[1]
      [i, o] = label.split('/')
      states.add(s)
      states.add(t)
      alphabet.add(i)
      outputs.add(o)
      delta[(s, i)] = t
      labda[(s, i)] = o
 # if the base states are not specified, take all
 if args.bases == None:
@ -163,8 +141,8 @@ for s in tqdm(states, desc="CNF paths"):
  next_set = set()
  for i in range(length):
-    # Only one successor state should be enabled (this clause is
+    # Only one successor state should be enabled.
-    # probably redundant). For i == 0, this is a single state (s).
+    # For i == 0, this is a single state (s).
    unique([svar(s, i, t) for t in current_set])
    # We keep track of the possible outputs