From 9e0132e2e81b96fea3c4febef7ed37ecce24b33b Mon Sep 17 00:00:00 2001
From: Joshua Moerman <lakseru@gmail.com>
Date: Tue, 18 Jan 2022 17:00:32 +0100
Subject: [PATCH] Add more comments and clean up a bit

---
 uio.py | 91 +++++++++++++++++++++++++++++++++++++---------------------
 1 file changed, 58 insertions(+), 33 deletions(-)

diff --git a/uio.py b/uio.py
index 844768b..aeff0a4 100644
--- a/uio.py
+++ b/uio.py
@@ -8,7 +8,6 @@ from tqdm import tqdm               # Import fancy progress bars
 from rich.console import Console    # Import colorized output
 
 solver_name = 'g3'
-verbose = True
 
 start = time.time()
 start_total = start
@@ -23,10 +22,13 @@ def measure_time(*str):
 # Reading the input
 # *****************
 
+# command line options
+# TODO: find a way to read the base states
 parser = argparse.ArgumentParser()
 parser.add_argument('filename', help='File of the mealy machine (dot format)')
 parser.add_argument('length', help="Length of the uio", type=int)
 parser.add_argument('-v', '--verbose', help="Show more output", action="store_true")
+parser.add_argument('--solver', help="Which solver to use (default g3)", default='g3')
 args = parser.parse_args()
 
 length = args.length
@@ -65,30 +67,39 @@ measure_time('Constructed automaton with', len(states), 'states and', len(alphab
 
 # ********************
 # Seting up the solver
+#  And the variables
 # ********************
 
 vpool = IDPool()
-solver = Solver(name=solver_name)
+solver = Solver(name=args.solver)
 
-# mapping van variabeles: x_... ->  x_i
+# Since the solver can only deal with variables x_i, we need
+# a mapping of variabeles: x_whatever  ->  x_i.
+# We use the IDPool of pysat for this. It generates variables
+# on the fly.
 def var(x):
   return(vpool.id(('uio', x)))
 
-# On place i we have symbol a
+# Variables for the guessed word
+# avar(i, a) means: on place i there is symbol a
 def avar(i, a):
   return var(('a', i, a))
 
-# Each state has its own path
-# On path s, on place i, there is output o
-def ovar(s, i, o):
-  return var(('o', s, i, o))
-
-# On path s, we are in state t on place i
+# Each state has its own path, and on this path we encode
+# states and outputs
+# svar(s, i, t) means: on path s, at place i, we are in state t
 def svar(s, i, t):
   return var(('s', s, i, t))
 
-# Extra variable (a la Tseytin transformation)
-# On path s, there is a difference on place i
+# ovar(s, i, o) means: on path s, on place i, there is output o
+def ovar(s, i, o):
+  return var(('o', s, i, o))
+
+# We use extra variables to encode the fact that there is
+# a difference in output (a la Tseytin transformation)
+# evar(s, i) means: on path s, on place i, there is a difference
+# in output. Note: the converse (if there is a difference
+# evar(s, i) is true) does not hold!
 def evar(s, i):
   return var(('e', s, i))
 
@@ -97,18 +108,18 @@ def evar(s, i):
 # we want to compute an UIO. By changing these variables only, we
 # can keep most of the formula the same and incrementally solve it.
 # The fixed state is called the "base".
+# bvar(s) means: s is the base.
 def bvar(s):
   return var(('base', s))
 
-# maakt de constraint dat precies een van de literals waar moet zijn
+# We often need to assert that exacly one variable in a list holds.
+# For that we use pysat's cardinality encoding. This might introduce
+# additional variables. But that does not matter for us.
 def unique(lits):
-  # deze werken goed: pairwise, seqcounter, bitwise, mtotalizer, kmtotalizer
-  # anderen geven groter aantal oplossingen
-  # alles behalve pairwise introduceert meer variabelen
   cnf = CardEnc.equals(lits, 1, vpool=vpool, encoding=EncType.seqcounter)
   solver.append_formula(cnf.clauses)
 
-measure_time('Setup solver')
+measure_time('Setup solver', args.solver)
 
 
 # ********************
@@ -116,16 +127,17 @@ measure_time('Setup solver')
 # ********************
 
 # Guessing the word:
-# variable x_('in', i, a) says: on place i there is an input a
 for i in range(length):
   unique([avar(i, a) for a in alphabet])
 
-# We should only enable one base state (this allows for an optimisation later)
+# We should only enable one base state.
+# (This allows for an optimisation later.)
 unique([bvar(base) for base in bases])
 
+
 # For each state s, we construct a path of possible successor states,
 # following the guessed word. This path should be consistent with delta,
-# and we also record the outputs along this path. The output are later
+# and we also record the outputs along this path. The outputs are later
 # used to decide whether we found a different output.
 possible_outputs = {}
 for s in tqdm(states, desc="CNF paths"):
@@ -135,8 +147,8 @@ for s in tqdm(states, desc="CNF paths"):
   next_set = set()
 
   for i in range(length):
-    # Only one successor state should be enable (probably redundant)
-    # For i == 0, this is a single state (namely s)
+    # Only one successor state should be enabled (this clause is
+    # probably redundant). For i == 0, this is a single state (s).
     unique([svar(s, i, t) for t in current_set])
 
     # We keep track of the possible outputs
@@ -151,9 +163,9 @@ for s in tqdm(states, desc="CNF paths"):
         output = labda[(t, a)]
         possible_outputs[(s, i)].add(output)
 
-        # Constraint: when in state t and input a, we output o
-        # x_('s', state, i, t) /\ x_('in', i, a) => x_('o', i, labda(t, a))
-        # == -x_('s', state, i, t) \/ -x_('in', i, a) \/ x_('o', i, labda(t, a))
+        # Constraint: on path s, when in state t and input a, we output o
+        # x_('s', s, i, t) /\ x_('in', i, a) => x_('o', i, labda(t, a))
+        # == -x_('s', s, i, t) \/ -x_('in', i, a) \/ x_('o', i, labda(t, a))
         solver.add_clause([-sv, -av, ovar(s, i, output)])
 
         # when i == length-1 we don't need to consider successors
@@ -161,13 +173,12 @@ for s in tqdm(states, desc="CNF paths"):
           next_t = delta[(t, a)]
           next_set.add(next_t)
 
-          # Constraint: when in state t and input a, we go to next_t
+          # Constraint: on path s, when in state t and input a, we go to next_t
           # x_('s', s, i, t) /\ x_('in', i, a) => x_('s', s, i+1, delta(t, a))
           # == -x_('s', s, i, t) \/ -x_('in', i, a) \/ x_('s', s, i+1, delta(t, a))
           solver.add_clause([-sv, -av, svar(s, i+1, next_t)])
     
     # Only one output should be enabled
-    # variable x_('out', s, i, a) says: on place i there is an output o of the path s
     unique([ovar(s, i, o) for o in possible_outputs[(s, i)]])
 
     # Next iteration with successor states
@@ -175,13 +186,17 @@ for s in tqdm(states, desc="CNF paths"):
     next_set = set()
 
 
-# If(f) the output of a state is different than the one from our base state,
+# If the output of a state is different than the one from our base state,
 # then, we encode that in a new variable. This is only needed when the base
 # state is active, so the first literal in these clauses is -bvar(base).
+# Also note, we only encode the converse: if there is a difference claimed
+# and base has a certain output, than the state should not have that output.
+# This means that the solver doesn't report all differences, but at least one.
 for s in tqdm(states, desc="CNF diffs"):
   # Constraint: there is a place, such that there is a difference in output
   # \/_i x_('e', s, i)
-  # If s is our base, we don't care
+  # If s is our base, we don't care (this can be done, because only
+  # a single bvar is true).
   if s in bases:
     solver.add_clause([bvar(s)] + [evar(s, i) for i in range(length)])
   else:
@@ -198,7 +213,7 @@ for s in tqdm(states, desc="CNF diffs"):
       outputs_base = possible_outputs[(base, i)]
       outputs_s = possible_outputs[(s, i)]
 
-      # We encode, if the base is enabled and there is a difference,
+      # We encode: if the base is enabled and there is a difference,
       # then the outputs should actually differ. (We do not have to
       # encode the other implication!)
       # x_('b', base) /\ x_('e', s, i) /\ x_('o', base, i, o) => -x_('o', s, i, o)
@@ -207,7 +222,6 @@ for s in tqdm(states, desc="CNF diffs"):
         if o in outputs_s:
           solver.add_clause([-bv, -evar(s, i), -ovar(base, i, o), -ovar(s, i, o)])
 
-
 measure_time('Constructed CNF with', solver.nof_clauses(), 'clauses and', solver.nof_vars(), 'variables')
 
 
@@ -215,25 +229,35 @@ measure_time('Constructed CNF with', solver.nof_clauses(), 'clauses and', solver
 # Solving and output
 # ******************
 
+# We set up some things for nice output
 console = Console(markup=False, highlight=False)
 max_state_length = max([len(str) for str in states])
 
+# We count the number of uios
 num_uios = {True: 0, False: 0}
 
+# We want to find an UIO for each base. We have already constructed
+# the CNF. So it remains to add assumptions to the solver, this is
+# called "incremental solving" in SAT literature.
 for base in bases:
   console.print('')
   console.print('*** UIO for state', base, style='bold blue')
+
+  # Solve with bvar(base) being true
   b = solver.solve(assumptions=[bvar(base)])
   num_uios[b] = num_uios[b] + 1
   measure_time('Solver finished')
 
   if b:
+    # We get the model, and store all true variables
+    # in a set, for easy lookup.
     m = solver.get_model()
     truth = set()
     for l in m:
       if l > 0:
         truth.add(l)
     
+    # We print the word
     console.print('! UIO of length', length, style='bold green')
     for i in range(length):
       for a in alphabet:
@@ -241,7 +265,7 @@ for base in bases:
           console.print(a, end=' ', style='bold green')
     console.print('')
 
-    # For each state, we print the paths and output.
+    # (If verbose) For each state, we print the paths and output.
     # We mark the differences red (there can be differences not
     # marked, these are the differences decided in the solving).
     if args.verbose:
@@ -266,10 +290,11 @@ for base in bases:
 
   else:
     console.print('! no UIO of length', length, style='bold red')
-    core = solver.get_core()
     # The core returned by the solver is not interesting:
     # It is only the assumption (i.e. bvar).
 
+
+# Report some final stats
 start = start_total
 print('')
 measure_time("Done with total time")