mirror of
https://git.cs.ou.nl/joshua.moerman/utf8-learner.git
synced 2025-07-01 14:17:45 +02:00
already did all the things :-)
This commit is contained in:
parent
9a09a24df3
commit
65f891e731
36 changed files with 7873 additions and 0 deletions
5
dfa-decompose/.ruff.toml
Normal file
5
dfa-decompose/.ruff.toml
Normal file
|
@ -0,0 +1,5 @@
|
|||
indent-width = 2
|
||||
line-length = 320
|
||||
|
||||
[format]
|
||||
quote-style = "single"
|
20
dfa-decompose/README.md
Normal file
20
dfa-decompose/README.md
Normal file
|
@ -0,0 +1,20 @@
|
|||
dfa-decompose
|
||||
=============
|
||||
|
||||
Given a DFA, try to find smaller DFAs such that their intersection is the
|
||||
input.
|
||||
|
||||
```
|
||||
pip install -r requirements.txt
|
||||
python main.py
|
||||
```
|
||||
|
||||
## Copyright notice
|
||||
|
||||
(c) 2025 Joshua Moerman, Open Universiteit, licensed under the EUPL (European
|
||||
Union Public License). If you want to use this code and find the license not
|
||||
suitable for you, then please do get in touch.
|
||||
|
||||
```
|
||||
SPDX-License-Identifier: EUPL-1.2
|
||||
```
|
195
dfa-decompose/main.py
Normal file
195
dfa-decompose/main.py
Normal file
|
@ -0,0 +1,195 @@
|
|||
# Copyright 2024-2025 Joshua Moerman, Open Universiteit. All rights reserved
|
||||
# SPDX-License-Identifier: EUPL-1.2
|
||||
|
||||
import itertools
|
||||
from pysat.solvers import Solver
|
||||
from pysat.card import CardEnc
|
||||
from pysat.formula import IDPool
|
||||
|
||||
# Script to decompose a DFA as the intersection of smaller DFAs.
|
||||
# As an example this is applied to the UTF-8 automaton, see
|
||||
# https://joshuamoerman.nl/2025/6/The-UTF-8-Automaton.html
|
||||
|
||||
# Regular language of UTF8 sequences (9 states including a sink state).
|
||||
# The model used here is a complete DFA, bytes are mapped to classes
|
||||
# as explained here: https://bjoern.hoehrmann.de/utf-8/decoder/dfa/
|
||||
|
||||
# Possible improvements:
|
||||
# - Allow partial models. The current smallest decomposition is as 6+4.
|
||||
# Coincidentally, both components have a sink state and we get a
|
||||
# decomposition of size 5+3 which is quite good. Encoding partiallity
|
||||
# directly will always find a minimal decomposition.
|
||||
# - Get rid of input "8", since it is never accepted in a UTF8 string.
|
||||
# - Only use reduced_sizes and not size. This will reduce the number of
|
||||
# variables and constraints.
|
||||
# - Write a loop to search for smallest decompositions, this is now
|
||||
# done by hand.
|
||||
|
||||
# With 2 components, decomposable as:
|
||||
# 5+5 NO
|
||||
# 6+4 YES
|
||||
# 7+3 YES
|
||||
# 8+2 NO
|
||||
# and everything below: NO
|
||||
|
||||
# With 3 components, decomposable as:
|
||||
# 4+4+4 NO
|
||||
# 5+4+3 YES
|
||||
# 5+5+2 ???
|
||||
# 6+3+3 YES
|
||||
# 6+4+2 YES (see above)
|
||||
# 7+3+2 YES (see above)
|
||||
# 8+2+2 ???
|
||||
# any x+y+z such that the sum is 11 or less: NO
|
||||
# any x+y+z such that the sum is 13 or more: YES
|
||||
def exampleDfa():
|
||||
initial = 0
|
||||
final = set([0])
|
||||
alphabet = list(range(12))
|
||||
states = list(range(9))
|
||||
trans = {}
|
||||
|
||||
trans[(0, 0)] = 0
|
||||
trans[(0, 2)] = 2
|
||||
trans[(0, 3)] = 3
|
||||
trans[(0, 4)] = 5
|
||||
trans[(0, 5)] = 8
|
||||
trans[(0, 6)] = 7
|
||||
trans[(0, 10)] = 4
|
||||
trans[(0, 11)] = 6
|
||||
trans[(2, 1)] = 0
|
||||
trans[(2, 7)] = 0
|
||||
trans[(2, 9)] = 0
|
||||
trans[(3, 1)] = 2
|
||||
trans[(3, 7)] = 2
|
||||
trans[(3, 9)] = 2
|
||||
trans[(4, 7)] = 2
|
||||
trans[(5, 1)] = 2
|
||||
trans[(5, 9)] = 2
|
||||
trans[(6, 7)] = 3
|
||||
trans[(6, 9)] = 3
|
||||
trans[(7, 1)] = 3
|
||||
trans[(7, 7)] = 3
|
||||
trans[(7, 9)] = 3
|
||||
trans[(8, 1)] = 3
|
||||
|
||||
assert len(trans) == 23
|
||||
|
||||
for s in states:
|
||||
for a in alphabet:
|
||||
if (s,a) not in trans:
|
||||
trans[(s,a)] = 1
|
||||
return {'initial': initial, 'final': final, 'trans': trans}
|
||||
|
||||
|
||||
class DfaEncode:
|
||||
def __init__(self):
|
||||
self.components = 3
|
||||
self.reduced_sizes = {0: 7, 1: 2, 2: 2}
|
||||
print(f'SIZES = {self.reduced_sizes}')
|
||||
self.size = max(self.reduced_sizes.values())
|
||||
self.alphabet = list(range(12))
|
||||
self.dfa = exampleDfa()
|
||||
self.dfa_size = 9
|
||||
|
||||
self.vpool = IDPool()
|
||||
self.solver = Solver()
|
||||
|
||||
def var_bool(self, b):
|
||||
return self.vpool.id(('bool', b))
|
||||
|
||||
def var_trans(self, m, s, a, t):
|
||||
assert 0 <= m and m < self.components
|
||||
assert 0 <= s and s < self.size
|
||||
assert a in self.alphabet
|
||||
assert 0 <= t and t < self.size
|
||||
|
||||
return self.vpool.id(('trans', m, s, a, t))
|
||||
|
||||
def var_final(self, m, s):
|
||||
assert 0 <= m and m < self.components
|
||||
assert 0 <= s and s < self.size
|
||||
|
||||
return self.vpool.id(('final', m, s))
|
||||
|
||||
def var_bisim(self, ss, org):
|
||||
assert len(ss) == self.components
|
||||
|
||||
return self.vpool.id(('bisim', tuple(ss), org))
|
||||
|
||||
def constrain_component(self, m):
|
||||
for s in range(self.size):
|
||||
for a in self.alphabet:
|
||||
lits = [self.var_trans(m, s, a, t) for t in range(self.reduced_sizes[m])]
|
||||
cnf = CardEnc.equals(lits, 1, vpool=self.vpool)
|
||||
self.solver.append_formula(cnf.clauses)
|
||||
|
||||
def constrain_bisim(self):
|
||||
ss_init = [0 for m in range(self.components)]
|
||||
org_init = self.dfa['initial']
|
||||
|
||||
# we require initial states to be bisimilar
|
||||
self.solver.add_clause([self.var_bisim(ss_init, org_init)])
|
||||
|
||||
for org in range(self.dfa_size):
|
||||
for ss in itertools.product(range(self.size), repeat=self.components):
|
||||
# require intersection of components have the right acceptance
|
||||
# if bisim and all components accept => then original dfa accepts
|
||||
clause1 = [-self.var_bisim(ss, org)] + [-self.var_final(m, ss[m]) for m in range(self.components)] + [self.var_bool(org in self.dfa['final'])]
|
||||
self.solver.add_clause(clause1)
|
||||
# if bisim and original dfa accepts => then each component accepts
|
||||
for m in range(self.components):
|
||||
clause2 = [-self.var_bisim(ss, org)] + [-self.var_bool(org in self.dfa['final'])] + [self.var_final(m, ss[m])]
|
||||
self.solver.add_clause(clause2)
|
||||
|
||||
# require transitions to bisimilar states
|
||||
for a in self.alphabet:
|
||||
org2 = self.dfa['trans'][(org, a)]
|
||||
|
||||
for tt in itertools.product(range(self.size), repeat=self.components):
|
||||
clause = [-self.var_bisim(ss, org)] + [-self.var_trans(m, ss[m], a, tt[m]) for m in range(self.components)] + [self.var_bisim(tt, org2)]
|
||||
self.solver.add_clause(clause)
|
||||
|
||||
def constraint(self):
|
||||
self.solver.add_clause([-self.var_bool(False)])
|
||||
self.solver.add_clause([self.var_bool(True)])
|
||||
|
||||
for m in range(self.components):
|
||||
self.constrain_component(m)
|
||||
|
||||
self.constrain_bisim()
|
||||
|
||||
def solve(self):
|
||||
self.solver.solve()
|
||||
|
||||
def get_model(self):
|
||||
lits = self.solver.get_model()
|
||||
model = set([lit for lit in lits if lit > 0])
|
||||
|
||||
components = {}
|
||||
for m in range(self.components):
|
||||
dfa = {'trans': {}, 'final': set(), 'initial': 0}
|
||||
for s in range(self.size):
|
||||
if self.var_final(m, s) in model:
|
||||
dfa['final'].add(s)
|
||||
|
||||
for a in self.alphabet:
|
||||
for t in range(self.size):
|
||||
if self.var_trans(m, s, a, t) in model:
|
||||
dfa['trans'][(s, a)] = t
|
||||
|
||||
components[m] = dfa
|
||||
|
||||
return components
|
||||
|
||||
|
||||
def main():
|
||||
encoder = DfaEncode()
|
||||
encoder.constraint()
|
||||
encoder.solver.solve()
|
||||
m = encoder.get_model()
|
||||
print(m)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
1
dfa-decompose/requirements.txt
Normal file
1
dfa-decompose/requirements.txt
Normal file
|
@ -0,0 +1 @@
|
|||
python-sat
|
16
dfa-decompose/results-2/comp2-6-4.txt
Normal file
16
dfa-decompose/results-2/comp2-6-4.txt
Normal file
|
@ -0,0 +1,16 @@
|
|||
{0:
|
||||
{'trans':
|
||||
{(0, 0): 0, (0, 1): 4, (0, 2): 2, (0, 3): 5, (0, 4): 5, (0, 5): 3, (0, 6): 1, (0, 7): 4, (0, 8): 4, (0, 9): 4, (0, 10): 3, (0, 11): 1, (1, 0): 4, (1, 1): 5, (1, 2): 4, (1, 3): 4, (1, 4): 4, (1, 5): 4, (1, 6): 4, (1, 7): 5, (1, 8): 4, (1, 9): 5, (1, 10): 4, (1, 11): 4, (2, 0): 4, (2, 1): 0, (2, 2): 4, (2, 3): 4, (2, 4): 4, (2, 5): 4, (2, 6): 4, (2, 7): 0, (2, 8): 1, (2, 9): 0, (2, 10): 4, (2, 11): 4, (3, 0): 4, (3, 1): 5, (3, 2): 4, (3, 3): 4, (3, 4): 4, (3, 5): 4, (3, 6): 4, (3, 7): 2, (3, 8): 4, (3, 9): 4, (3, 10): 4, (3, 11): 4, (4, 0): 4, (4, 1): 4, (4, 2): 4, (4, 3): 4, (4, 4): 4, (4, 5): 4, (4, 6): 4, (4, 7): 4, (4, 8): 4, (4, 9): 4, (4, 10): 4, (4, 11): 4, (5, 0): 4, (5, 1): 2, (5, 2): 4, (5, 3): 4, (5, 4): 4, (5, 5): 4, (5, 6): 4, (5, 7): 2, (5, 8): 4, (5, 9): 2, (5, 10): 4, (5, 11): 4},
|
||||
'final':
|
||||
{0},
|
||||
'initial':
|
||||
0},
|
||||
|
||||
1:
|
||||
{'trans':
|
||||
{(0, 0): 1, (0, 1): 1, (0, 2): 1, (0, 3): 1, (0, 4): 0, (0, 5): 0, (0, 6): 1, (0, 7): 2, (0, 8): 1, (0, 9): 1, (0, 10): 3, (0, 11): 3, (1, 0): 1, (1, 1): 1, (1, 2): 1, (1, 3): 1, (1, 4): 0, (1, 5): 0, (1, 6): 1, (1, 7): 1, (1, 8): 2, (1, 9): 1, (1, 10): 3, (1, 11): 3, (2, 0): 2, (2, 1): 2, (2, 2): 2, (2, 3): 2, (2, 4): 2, (2, 5): 2, (2, 6): 2, (2, 7): 2, (2, 8): 2, (2, 9): 2, (2, 10): 2, (2, 11): 2, (3, 0): 3, (3, 1): 2, (3, 2): 0, (3, 3): 2, (3, 4): 2, (3, 5): 3, (3, 6): 1, (3, 7): 1, (3, 8): 3, (3, 9): 1, (3, 10): 0, (3, 11): 0, (4, 0): 3, (4, 1): 3, (4, 2): 3, (4, 3): 3, (4, 4): 3, (4, 5): 3, (4, 6): 3, (4, 7): 3, (4, 8): 3, (4, 9): 3, (4, 10): 3, (4, 11): 3, (5, 0): 3, (5, 1): 3, (5, 2): 3, (5, 3): 3, (5, 4): 3, (5, 5): 3, (5, 6): 3, (5, 7): 3, (5, 8): 3, (5, 9): 3, (5, 10): 3, (5, 11): 3},
|
||||
'final':
|
||||
{0, 1},
|
||||
'initial':
|
||||
0}
|
||||
}
|
10
dfa-decompose/results-2/comp2-7-3.txt
Normal file
10
dfa-decompose/results-2/comp2-7-3.txt
Normal file
|
@ -0,0 +1,10 @@
|
|||
{
|
||||
0: {
|
||||
'trans': {(0, 0): 0, (0, 1): 1, (0, 2): 5, (0, 3): 6, (0, 4): 6, (0, 5): 4, (0, 6): 2, (0, 7): 1, (0, 8): 6, (0, 9): 1, (0, 10): 4, (0, 11): 3, (1, 0): 1, (1, 1): 1, (1, 2): 1, (1, 3): 1, (1, 4): 1, (1, 5): 1, (1, 6): 1, (1, 7): 1, (1, 8): 6, (1, 9): 1, (1, 10): 1, (1, 11): 1, (2, 0): 1, (2, 1): 6, (2, 2): 1, (2, 3): 1, (2, 4): 1, (2, 5): 1, (2, 6): 1, (2, 7): 6, (2, 8): 6, (2, 9): 6, (2, 10): 1, (2, 11): 1, (3, 0): 1, (3, 1): 1, (3, 2): 1, (3, 3): 1, (3, 4): 1, (3, 5): 1, (3, 6): 1, (3, 7): 6, (3, 8): 6, (3, 9): 6, (3, 10): 1, (3, 11): 1, (4, 0): 1, (4, 1): 6, (4, 2): 1, (4, 3): 1, (4, 4): 0, (4, 5): 1, (4, 6): 1, (4, 7): 5, (4, 8): 1, (4, 9): 1, (4, 10): 1, (4, 11): 1, (5, 0): 1, (5, 1): 0, (5, 2): 1, (5, 3): 1, (5, 4): 1, (5, 5): 1, (5, 6): 1, (5, 7): 0, (5, 8): 6, (5, 9): 0, (5, 10): 1, (5, 11): 1, (6, 0): 1, (6, 1): 5, (6, 2): 1, (6, 3): 1, (6, 4): 1, (6, 5): 1, (6, 6): 1, (6, 7): 5, (6, 8): 6, (6, 9): 5, (6, 10): 1, (6, 11): 1},
|
||||
'final': {0},
|
||||
'initial': 0},
|
||||
1: {
|
||||
'trans': {(0, 0): 0, (0, 1): 2, (0, 2): 2, (0, 3): 0, (0, 4): 1, (0, 5): 2, (0, 6): 2, (0, 7): 2, (0, 8): 2, (0, 9): 2, (0, 10): 0, (0, 11): 2, (1, 0): 1, (1, 1): 2, (1, 2): 0, (1, 3): 2, (1, 4): 2, (1, 5): 1, (1, 6): 0, (1, 7): 0, (1, 8): 2, (1, 9): 2, (1, 10): 1, (1, 11): 0, (2, 0): 2, (2, 1): 0, (2, 2): 0, (2, 3): 2, (2, 4): 2, (2, 5): 1, (2, 6): 0, (2, 7): 0, (2, 8): 2, (2, 9): 0, (2, 10): 1, (2, 11): 0, (3, 0): 2, (3, 1): 2, (3, 2): 2, (3, 3): 2, (3, 4): 2, (3, 5): 2, (3, 6): 2, (3, 7): 2, (3, 8): 2, (3, 9): 2, (3, 10): 2, (3, 11): 2, (4, 0): 2, (4, 1): 2, (4, 2): 2, (4, 3): 2, (4, 4): 2, (4, 5): 2, (4, 6): 2, (4, 7): 2, (4, 8): 2, (4, 9): 2, (4, 10): 2, (4, 11): 2, (5, 0): 2, (5, 1): 2, (5, 2): 2, (5, 3): 2, (5, 4): 2, (5, 5): 2, (5, 6): 2, (5, 7): 2, (5, 8): 2, (5, 9): 2, (5, 10): 2, (5, 11): 2, (6, 0): 2, (6, 1): 2, (6, 2): 2, (6, 3): 2, (6, 4): 2, (6, 5): 2, (6, 6): 2, (6, 7): 2, (6, 8): 2, (6, 9): 2, (6, 10): 2, (6, 11): 2},
|
||||
'final': {0},
|
||||
'initial': 0}
|
||||
}
|
16
dfa-decompose/results-2/comp2-diagram.txt
Normal file
16
dfa-decompose/results-2/comp2-diagram.txt
Normal file
|
@ -0,0 +1,16 @@
|
|||
|
||||
8,5+ YES
|
||||
/ \
|
||||
7,5 YES 8,4 YES
|
||||
/ \ / \
|
||||
6,5 YES 7,4 YES 8,3 YES
|
||||
/ \ / \ / \
|
||||
5,5 no 6,4 YES 7,3 YES 8,2 no
|
||||
\ / \ / \ /
|
||||
5,4 no 6,3 no 7,2 no
|
||||
/ \ / \ /
|
||||
4,4 no 5,3 no 6,2 no
|
||||
\ / \ /
|
||||
4,3 no 5,2 no
|
||||
\ /
|
||||
4-,2 no
|
15
dfa-decompose/results-2/comp2_1.dot
Normal file
15
dfa-decompose/results-2/comp2_1.dot
Normal file
|
@ -0,0 +1,15 @@
|
|||
digraph g {
|
||||
s0 [shape="doublecircle" label="s0"];
|
||||
s0 -> s0 [label="0x00-0x7F"]
|
||||
s0 -> s2 [label="0xC2-0xDF"]
|
||||
s0 -> s5 [label="0xE1-0xEF"]
|
||||
s0 -> s3 [label="0xE0, 0xF4"]
|
||||
s0 -> s1 [label="0xF0-0xF3"]
|
||||
s1 -> s5 [label="0x80-0xBF"]
|
||||
s2 -> s0 [label="0x80-0xBF"]
|
||||
s3 -> s5 [label="0x80-0x8F"]
|
||||
s3 -> s2 [label="0xA0-0xBF"]
|
||||
s5 -> s2 [label="0x80-0xBF"]
|
||||
__start0 [label="" shape="none" width="0" height="0"];
|
||||
__start0 -> s0;
|
||||
}
|
BIN
dfa-decompose/results-2/comp2_1.pdf
Normal file
BIN
dfa-decompose/results-2/comp2_1.pdf
Normal file
Binary file not shown.
15
dfa-decompose/results-2/comp2_2.dot
Normal file
15
dfa-decompose/results-2/comp2_2.dot
Normal file
|
@ -0,0 +1,15 @@
|
|||
digraph g {
|
||||
s0 [shape="doublecircle" label="s0"];
|
||||
s1 [shape="doublecircle" label="s1"];
|
||||
s0 -> s1 [label="0x00-0x9F,\n0xC2-0xDF,\n0xE1-0xEC,\n0xEE-0xEF,\n0xF1-0xF3"]
|
||||
s0 -> s0 [label="0xED, 0xF4"]
|
||||
s0 -> s3 [label="0xE0, 0xF0"]
|
||||
s1 -> s1 [label="0x00-0xBF, 0xC2-0xDF, 0xE1-0xEC, 0xEE-0xEF, 0xF1-0xF3"]
|
||||
s1 -> s0 [label="0xED, 0xF4"]
|
||||
s1 -> s3 [label="0xE0, 0xF0"]
|
||||
s3 -> s3 [label="0x00-0x7F, 0xF4"]
|
||||
s3 -> s0 [label="0xC2-0xE0, 0xF0"]
|
||||
s3 -> s1 [label="0x90-0xBF, 0xF1-0xF3"]
|
||||
__start0 [label="" shape="none" width="0" height="0"];
|
||||
__start0 -> s0;
|
||||
}
|
BIN
dfa-decompose/results-2/comp2_2.pdf
Normal file
BIN
dfa-decompose/results-2/comp2_2.pdf
Normal file
Binary file not shown.
2
dfa-decompose/results-3/comp3-5-4-3.txt
Normal file
2
dfa-decompose/results-3/comp3-5-4-3.txt
Normal file
|
@ -0,0 +1,2 @@
|
|||
SIZES = {0: 5, 1: 4, 2: 3}
|
||||
{0: {'trans': {(0, 0): 0, (0, 1): 3, (0, 2): 2, (0, 3): 4, (0, 4): 4, (0, 5): 1, (0, 6): 1, (0, 7): 3, (0, 8): 2, (0, 9): 3, (0, 10): 4, (0, 11): 1, (1, 0): 3, (1, 1): 4, (1, 2): 3, (1, 3): 3, (1, 4): 3, (1, 5): 3, (1, 6): 3, (1, 7): 4, (1, 8): 2, (1, 9): 4, (1, 10): 0, (1, 11): 3, (2, 0): 3, (2, 1): 0, (2, 2): 3, (2, 3): 3, (2, 4): 3, (2, 5): 3, (2, 6): 3, (2, 7): 0, (2, 8): 2, (2, 9): 0, (2, 10): 0, (2, 11): 3, (3, 0): 3, (3, 1): 3, (3, 2): 3, (3, 3): 3, (3, 4): 3, (3, 5): 3, (3, 6): 3, (3, 7): 3, (3, 8): 2, (3, 9): 3, (3, 10): 0, (3, 11): 3, (4, 0): 3, (4, 1): 2, (4, 2): 3, (4, 3): 3, (4, 4): 3, (4, 5): 3, (4, 6): 3, (4, 7): 2, (4, 8): 2, (4, 9): 2, (4, 10): 0, (4, 11): 3}, 'final': {0}, 'initial': 0}, 1: {'trans': {(0, 0): 1, (0, 1): 3, (0, 2): 3, (0, 3): 3, (0, 4): 0, (0, 5): 0, (0, 6): 3, (0, 7): 2, (0, 8): 3, (0, 9): 3, (0, 10): 1, (0, 11): 1, (1, 0): 1, (1, 1): 2, (1, 2): 3, (1, 3): 3, (1, 4): 0, (1, 5): 0, (1, 6): 3, (1, 7): 3, (1, 8): 3, (1, 9): 3, (1, 10): 1, (1, 11): 1, (2, 0): 2, (2, 1): 2, (2, 2): 2, (2, 3): 2, (2, 4): 2, (2, 5): 2, (2, 6): 2, (2, 7): 2, (2, 8): 3, (2, 9): 2, (2, 10): 2, (2, 11): 2, (3, 0): 0, (3, 1): 3, (3, 2): 3, (3, 3): 3, (3, 4): 0, (3, 5): 0, (3, 6): 3, (3, 7): 3, (3, 8): 3, (3, 9): 3, (3, 10): 1, (3, 11): 1, (4, 0): 3, (4, 1): 3, (4, 2): 3, (4, 3): 3, (4, 4): 3, (4, 5): 3, (4, 6): 3, (4, 7): 3, (4, 8): 3, (4, 9): 3, (4, 10): 3, (4, 11): 3}, 'final': {0, 1, 3}, 'initial': 0}, 2: {'trans': {(0, 0): 0, (0, 1): 0, (0, 2): 0, (0, 3): 0, (0, 4): 0, (0, 5): 1, (0, 6): 0, (0, 7): 0, (0, 8): 2, (0, 9): 0, (0, 10): 1, (0, 11): 0, (1, 0): 2, (1, 1): 0, (1, 2): 2, (1, 3): 2, (1, 4): 2, (1, 5): 2, (1, 6): 2, (1, 7): 0, (1, 8): 2, (1, 9): 2, (1, 10): 2, (1, 11): 2, (2, 0): 2, (2, 1): 2, (2, 2): 2, (2, 3): 2, (2, 4): 2, (2, 5): 2, (2, 6): 2, (2, 7): 2, (2, 8): 2, (2, 9): 2, (2, 10): 2, (2, 11): 2, (3, 0): 2, (3, 1): 2, (3, 2): 2, (3, 3): 2, (3, 4): 2, (3, 5): 2, (3, 6): 2, (3, 7): 2, (3, 8): 2, (3, 9): 2, (3, 10): 2, (3, 11): 2, (4, 0): 2, (4, 1): 2, (4, 2): 2, (4, 3): 2, (4, 4): 2, (4, 5): 2, (4, 6): 2, (4, 7): 2, (4, 8): 2, (4, 9): 2, (4, 10): 2, (4, 11): 2}, 'final': {0}, 'initial': 0}}
|
2
dfa-decompose/results-3/comp3-6-3-3.txt
Normal file
2
dfa-decompose/results-3/comp3-6-3-3.txt
Normal file
|
@ -0,0 +1,2 @@
|
|||
SIZES = {0: 6, 1: 3, 2: 3}
|
||||
{0: {'trans': {(0, 0): 0, (0, 1): 1, (0, 2): 2, (0, 3): 4, (0, 4): 4, (0, 5): 5, (0, 6): 3, (0, 7): 1, (0, 8): 5, (0, 9): 1, (0, 10): 5, (0, 11): 3, (1, 0): 1, (1, 1): 1, (1, 2): 1, (1, 3): 1, (1, 4): 1, (1, 5): 1, (1, 6): 1, (1, 7): 1, (1, 8): 5, (1, 9): 1, (1, 10): 1, (1, 11): 1, (2, 0): 1, (2, 1): 0, (2, 2): 1, (2, 3): 1, (2, 4): 1, (2, 5): 1, (2, 6): 1, (2, 7): 0, (2, 8): 5, (2, 9): 0, (2, 10): 1, (2, 11): 1, (3, 0): 1, (3, 1): 4, (3, 2): 1, (3, 3): 1, (3, 4): 1, (3, 5): 1, (3, 6): 1, (3, 7): 4, (3, 8): 5, (3, 9): 4, (3, 10): 1, (3, 11): 1, (4, 0): 1, (4, 1): 2, (4, 2): 1, (4, 3): 1, (4, 4): 1, (4, 5): 1, (4, 6): 1, (4, 7): 2, (4, 8): 5, (4, 9): 2, (4, 10): 1, (4, 11): 1, (5, 0): 1, (5, 1): 4, (5, 2): 1, (5, 3): 1, (5, 4): 1, (5, 5): 1, (5, 6): 1, (5, 7): 2, (5, 8): 5, (5, 9): 1, (5, 10): 1, (5, 11): 1}, 'final': {0}, 'initial': 0}, 1: {'trans': {(0, 0): 0, (0, 1): 1, (0, 2): 1, (0, 3): 2, (0, 4): 0, (0, 5): 1, (0, 6): 1, (0, 7): 2, (0, 8): 0, (0, 9): 1, (0, 10): 2, (0, 11): 1, (1, 0): 1, (1, 1): 2, (1, 2): 2, (1, 3): 1, (1, 4): 1, (1, 5): 0, (1, 6): 2, (1, 7): 2, (1, 8): 0, (1, 9): 2, (1, 10): 0, (1, 11): 2, (2, 0): 2, (2, 1): 1, (2, 2): 1, (2, 3): 2, (2, 4): 0, (2, 5): 1, (2, 6): 1, (2, 7): 1, (2, 8): 0, (2, 9): 1, (2, 10): 2, (2, 11): 1, (3, 0): 2, (3, 1): 2, (3, 2): 2, (3, 3): 2, (3, 4): 2, (3, 5): 2, (3, 6): 2, (3, 7): 2, (3, 8): 2, (3, 9): 2, (3, 10): 2, (3, 11): 2, (4, 0): 2, (4, 1): 2, (4, 2): 2, (4, 3): 2, (4, 4): 2, (4, 5): 2, (4, 6): 2, (4, 7): 2, (4, 8): 2, (4, 9): 2, (4, 10): 2, (4, 11): 2, (5, 0): 2, (5, 1): 2, (5, 2): 2, (5, 3): 2, (5, 4): 2, (5, 5): 2, (5, 6): 2, (5, 7): 2, (5, 8): 2, (5, 9): 2, (5, 10): 2, (5, 11): 2}, 'final': {0, 2}, 'initial': 0}, 2: {'trans': {(0, 0): 0, (0, 1): 2, (0, 2): 1, (0, 3): 1, (0, 4): 1, (0, 5): 1, (0, 6): 1, (0, 7): 1, (0, 8): 1, (0, 9): 1, (0, 10): 0, (0, 11): 0, (1, 0): 1, (1, 1): 1, (1, 2): 1, (1, 3): 1, (1, 4): 1, (1, 5): 1, (1, 6): 1, (1, 7): 1, (1, 8): 1, (1, 9): 1, (1, 10): 0, (1, 11): 0, (2, 0): 2, (2, 1): 2, (2, 2): 2, (2, 3): 2, (2, 4): 2, (2, 5): 2, (2, 6): 2, (2, 7): 2, (2, 8): 1, (2, 9): 2, (2, 10): 2, (2, 11): 2, (3, 0): 2, (3, 1): 2, (3, 2): 2, (3, 3): 2, (3, 4): 2, (3, 5): 2, (3, 6): 2, (3, 7): 2, (3, 8): 2, (3, 9): 2, (3, 10): 2, (3, 11): 2, (4, 0): 2, (4, 1): 2, (4, 2): 2, (4, 3): 2, (4, 4): 2, (4, 5): 2, (4, 6): 2, (4, 7): 2, (4, 8): 2, (4, 9): 2, (4, 10): 2, (4, 11): 2, (5, 0): 2, (5, 1): 2, (5, 2): 2, (5, 3): 2, (5, 4): 2, (5, 5): 2, (5, 6): 2, (5, 7): 2, (5, 8): 2, (5, 9): 2, (5, 10): 2, (5, 11): 2}, 'final': {0, 1}, 'initial': 0}}
|
17
dfa-decompose/results-3/comp3_1.dot
Normal file
17
dfa-decompose/results-3/comp3_1.dot
Normal file
|
@ -0,0 +1,17 @@
|
|||
digraph g{
|
||||
0 -> 0 [label="[0]"]
|
||||
0 -> 1 [label="[5, 6, 11]"]
|
||||
0 -> 2 [label="[2]"]
|
||||
0 -> 3 [label="[1, 7, 9]"]
|
||||
0 -> 4 [label="[3, 4, 10]"]
|
||||
1 -> 0 [label="[10]"]
|
||||
1 -> 3 [label="[0, 2, 3, 4, 5, 6, 11]"]
|
||||
1 -> 4 [label="[1, 7, 9]"]
|
||||
2 -> 0 [label="[1, 7, 9, 10]"]
|
||||
2 -> 3 [label="[0, 2, 3, 4, 5, 6, 11]"]
|
||||
3 -> 0 [label="[10]"]
|
||||
3 -> 3 [label="[0, 1, 2, 3, 4, 5, 6, 7, 9, 11]"]
|
||||
4 -> 0 [label="[10]"]
|
||||
4 -> 2 [label="[1, 7, 9]"]
|
||||
4 -> 3 [label="[0, 2, 3, 4, 5, 6, 11]"]
|
||||
}
|
BIN
dfa-decompose/results-3/comp3_1.pdf
Normal file
BIN
dfa-decompose/results-3/comp3_1.pdf
Normal file
Binary file not shown.
11
dfa-decompose/results-3/comp3_2.dot
Normal file
11
dfa-decompose/results-3/comp3_2.dot
Normal file
|
@ -0,0 +1,11 @@
|
|||
digraph g{
|
||||
0 -> 0 [label="[4, 5]"]
|
||||
0 -> 1 [label="[0, 10, 11]"]
|
||||
0 -> 3 [label="[1, 2, 3, 6, 9]"]
|
||||
1 -> 0 [label="[4, 5]"]
|
||||
1 -> 1 [label="[0, 10, 11]"]
|
||||
1 -> 3 [label="[2, 3, 6, 7, 9]"]
|
||||
3 -> 0 [label="[0, 4, 5]"]
|
||||
3 -> 1 [label="[10, 11]"]
|
||||
3 -> 3 [label="[1, 2, 3, 6, 7, 9]"]
|
||||
}
|
BIN
dfa-decompose/results-3/comp3_2.pdf
Normal file
BIN
dfa-decompose/results-3/comp3_2.pdf
Normal file
Binary file not shown.
5
dfa-decompose/results-3/comp3_3.dot
Normal file
5
dfa-decompose/results-3/comp3_3.dot
Normal file
|
@ -0,0 +1,5 @@
|
|||
digraph g{
|
||||
0 -> 0 [label="[0, 1, 2, 3, 4, 6, 7, 9, 11]"]
|
||||
0 -> 1 [label="[5, 10]"]
|
||||
1 -> 0 [label="[1, 7]"]
|
||||
}
|
BIN
dfa-decompose/results-3/comp3_3.pdf
Normal file
BIN
dfa-decompose/results-3/comp3_3.pdf
Normal file
Binary file not shown.
170
dfa-decompose/results-3/triples.dot
Normal file
170
dfa-decompose/results-3/triples.dot
Normal file
|
@ -0,0 +1,170 @@
|
|||
digraph g {
|
||||
t5x4x4 -> t6x4x4
|
||||
t4x3x2 -> t4x4x2
|
||||
t7x5x2 -> t7x5x3
|
||||
t8x4x4 -> t8x5x4
|
||||
t7x7x4 -> t7x7x5
|
||||
t6x5x2 -> t6x6x2
|
||||
t7x6x3 -> t7x6x4
|
||||
t8x6x5 -> t8x7x5
|
||||
t5x3x2 -> t6x3x2
|
||||
t6x5x3 -> t6x5x4
|
||||
t7x3x3 -> t8x3x3
|
||||
t5x4x2 -> t5x5x2
|
||||
t8x2x2 -> t8x3x2
|
||||
t6x5x4 -> t7x5x4
|
||||
t7x2x2 -> t7x3x2
|
||||
t7x6x6 -> t7x7x6
|
||||
t7x5x3 -> t7x6x3
|
||||
t5x3x2 -> t5x3x3
|
||||
t7x4x3 -> t8x4x3
|
||||
t6x4x2 -> t6x4x3
|
||||
t4x4x2 -> t5x4x2
|
||||
t5x5x2 -> t6x5x2
|
||||
t4x3x2 -> t4x3x3
|
||||
t6x3x2 -> t6x4x2
|
||||
t7x5x2 -> t8x5x2
|
||||
t8x4x2 -> t8x5x2
|
||||
t7x5x5 -> t7x6x5
|
||||
t8x6x3 -> t8x7x3
|
||||
t7x6x4 -> t7x6x5
|
||||
t7x7x7 -> t8x7x7
|
||||
t7x3x2 -> t8x3x2
|
||||
t2x2x2 -> t3x2x2
|
||||
t4x4x4 -> t5x4x4
|
||||
t7x4x3 -> t7x4x4
|
||||
t7x5x3 -> t8x5x3
|
||||
t8x8x4 -> t8x8x5
|
||||
t5x2x2 -> t5x3x2
|
||||
t4x4x3 -> t4x4x4
|
||||
t8x7x6 -> t8x8x6
|
||||
t3x3x2 -> t4x3x2
|
||||
t6x6x3 -> t7x6x3
|
||||
t8x5x5 -> t8x6x5
|
||||
t6x6x5 -> t6x6x6
|
||||
t8x4x2 -> t8x4x3
|
||||
t5x5x4 -> t5x5x5
|
||||
t8x6x6 -> t8x7x6
|
||||
t6x5x5 -> t6x6x5
|
||||
t8x6x2 -> t8x6x3
|
||||
t7x5x4 -> t8x5x4
|
||||
t7x6x5 -> t8x6x5
|
||||
t8x8x6 -> t8x8x7
|
||||
t7x5x3 -> t7x5x4
|
||||
t6x5x4 -> t6x5x5
|
||||
t8x5x4 -> t8x5x5
|
||||
t7x6x2 -> t7x6x3
|
||||
t8x7x4 -> t8x8x4
|
||||
t5x5x3 -> t5x5x4
|
||||
t5x4x3 -> t5x5x3
|
||||
t6x4x3 -> t6x4x4
|
||||
t7x6x4 -> t8x6x4
|
||||
t7x7x3 -> t7x7x4
|
||||
t6x6x6 -> t7x6x6
|
||||
t4x2x2 -> t5x2x2
|
||||
t6x5x3 -> t6x6x3
|
||||
t7x4x2 -> t7x4x3
|
||||
t4x3x3 -> t4x4x3
|
||||
t6x4x4 -> t6x5x4
|
||||
t6x5x5 -> t7x5x5
|
||||
t6x5x4 -> t6x6x4
|
||||
t8x7x2 -> t8x8x2
|
||||
t6x4x3 -> t6x5x3
|
||||
t8x7x4 -> t8x7x5
|
||||
t7x7x5 -> t7x7x6
|
||||
t8x8x7 -> t8x8x8
|
||||
t7x4x4 -> t7x5x4
|
||||
t6x6x4 -> t7x6x4
|
||||
t8x5x3 -> t8x5x4
|
||||
t7x7x2 -> t8x7x2
|
||||
t7x6x5 -> t7x7x5
|
||||
t3x3x2 -> t3x3x3
|
||||
t7x3x3 -> t7x4x3
|
||||
t8x7x2 -> t8x7x3
|
||||
t6x6x5 -> t7x6x5
|
||||
t4x2x2 -> t4x3x2
|
||||
t5x5x3 -> t6x5x3
|
||||
t5x4x3 -> t5x4x4
|
||||
t4x3x3 -> t5x3x3
|
||||
t6x3x2 -> t6x3x3
|
||||
t7x3x2 -> t7x3x3
|
||||
t6x5x3 -> t7x5x3
|
||||
t7x7x3 -> t8x7x3
|
||||
t7x7x6 -> t8x7x6
|
||||
t4x4x3 -> t5x4x3
|
||||
t6x6x2 -> t7x6x2
|
||||
t8x5x2 -> t8x5x3
|
||||
t7x4x4 -> t8x4x4
|
||||
t8x6x3 -> t8x6x4
|
||||
t8x4x3 -> t8x5x3
|
||||
t5x4x2 -> t6x4x2
|
||||
t6x2x2 -> t7x2x2
|
||||
t7x4x2 -> t8x4x2
|
||||
t3x2x2 -> t4x2x2
|
||||
t8x3x2 -> t8x4x2
|
||||
t6x3x3 -> t6x4x3
|
||||
t7x4x3 -> t7x5x3
|
||||
t8x7x5 -> t8x7x6
|
||||
t7x2x2 -> t8x2x2
|
||||
t8x6x4 -> t8x6x5
|
||||
t7x7x6 -> t7x7x7
|
||||
t5x3x3 -> t6x3x3
|
||||
t6x2x2 -> t6x3x2
|
||||
t8x6x2 -> t8x7x2
|
||||
t6x5x2 -> t6x5x3
|
||||
t7x3x2 -> t7x4x2
|
||||
t6x4x2 -> t7x4x2
|
||||
t4x4x2 -> t4x4x3
|
||||
t8x6x5 -> t8x6x6
|
||||
t6x6x4 -> t6x6x5
|
||||
t8x7x3 -> t8x8x3
|
||||
t5x5x4 -> t6x5x4
|
||||
t5x4x2 -> t5x4x3
|
||||
t8x5x4 -> t8x6x4
|
||||
t5x5x2 -> t5x5x3
|
||||
t6x3x3 -> t7x3x3
|
||||
t3x3x3 -> t4x3x3
|
||||
t8x6x4 -> t8x7x4
|
||||
t6x5x2 -> t7x5x2
|
||||
t5x2x2 -> t6x2x2
|
||||
t8x4x3 -> t8x4x4
|
||||
t5x3x2 -> t5x4x2
|
||||
t7x5x4 -> t7x6x4
|
||||
t7x6x2 -> t8x6x2
|
||||
t6x6x3 -> t6x6x4
|
||||
t6x4x4 -> t7x4x4
|
||||
t8x7x3 -> t8x7x4
|
||||
t7x6x2 -> t7x7x2
|
||||
t7x7x5 -> t8x7x5
|
||||
t7x4x2 -> t7x5x2
|
||||
t7x5x2 -> t7x6x2
|
||||
t7x7x2 -> t7x7x3
|
||||
t7x6x4 -> t7x7x4
|
||||
t7x6x6 -> t8x6x6
|
||||
t7x6x5 -> t7x6x6
|
||||
t7x5x4 -> t7x5x5
|
||||
t4x3x2 -> t5x3x2
|
||||
t8x8x5 -> t8x8x6
|
||||
t6x6x2 -> t6x6x3
|
||||
t6x4x3 -> t7x4x3
|
||||
t7x6x3 -> t7x7x3
|
||||
t8x7x7 -> t8x8x7
|
||||
t8x5x3 -> t8x6x3
|
||||
t6x3x2 -> t7x3x2
|
||||
t5x4x4 -> t5x5x4
|
||||
t8x8x3 -> t8x8x4
|
||||
t8x7x6 -> t8x7x7
|
||||
t8x5x2 -> t8x6x2
|
||||
t3x2x2 -> t3x3x2
|
||||
t7x6x3 -> t8x6x3
|
||||
t8x3x3 -> t8x4x3
|
||||
t5x3x3 -> t5x4x3
|
||||
t7x5x5 -> t8x5x5
|
||||
t7x7x4 -> t8x7x4
|
||||
t8x7x5 -> t8x8x5
|
||||
t8x3x2 -> t8x3x3
|
||||
t8x8x2 -> t8x8x3
|
||||
t6x4x2 -> t6x5x2
|
||||
t5x4x3 -> t6x4x3
|
||||
t5x5x5 -> t6x5x5
|
||||
}
|
1
dfa-decompose/results-3/triples.svg
Normal file
1
dfa-decompose/results-3/triples.svg
Normal file
File diff suppressed because one or more lines are too long
After Width: | Height: | Size: 48 KiB |
71
pom.xml
Normal file
71
pom.xml
Normal file
|
@ -0,0 +1,71 @@
|
|||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<groupId>nl.ou.utf8learner</groupId>
|
||||
<artifactId>utf8learner</artifactId>
|
||||
<version>1.0.0-SNAPSHOT</version>
|
||||
|
||||
<properties>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
<maven.compiler.release>11</maven.compiler.release>
|
||||
</properties>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>net.automatalib.distribution</groupId>
|
||||
<artifactId>automata-distribution</artifactId>
|
||||
<version>0.13.0-SNAPSHOT</version>
|
||||
<type>pom</type>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>de.learnlib.distribution</groupId>
|
||||
<artifactId>learnlib-distribution</artifactId>
|
||||
<version>0.19.0-SNAPSHOT</version>
|
||||
<type>pom</type>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-codec</groupId>
|
||||
<artifactId>commons-codec</artifactId>
|
||||
<version>1.18.0</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.ibm.icu</groupId>
|
||||
<artifactId>icu4j</artifactId>
|
||||
<version>77.1</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-shade-plugin</artifactId>
|
||||
<version>3.6.0</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<phase>package</phase>
|
||||
<goals>
|
||||
<goal>shade</goal>
|
||||
</goals>
|
||||
<configuration>
|
||||
<transformers>
|
||||
<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
|
||||
<mainClass>nl.ou.utf8learner.Main</mainClass>
|
||||
</transformer>
|
||||
</transformers>
|
||||
<filters>
|
||||
<filter>
|
||||
<artifact>*:*</artifact>
|
||||
<excludes>
|
||||
<exclude>module-info.class</exclude>
|
||||
</excludes>
|
||||
</filter>
|
||||
</filters>
|
||||
</configuration>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</project>
|
2320
results/apache.dot
Normal file
2320
results/apache.dot
Normal file
File diff suppressed because it is too large
Load diff
1
results/full-partial-optim.svg
Normal file
1
results/full-partial-optim.svg
Normal file
File diff suppressed because one or more lines are too long
After Width: | Height: | Size: 5.9 KiB |
30
results/full-partial.dot
Normal file
30
results/full-partial.dot
Normal file
|
@ -0,0 +1,30 @@
|
|||
digraph full {
|
||||
|
||||
s0 [shape="doublecircle" label="s0"];
|
||||
s2 [shape="circle" label="s2"];
|
||||
s3 [shape="circle" label="s3"];
|
||||
s4 [shape="circle" label="s4"];
|
||||
s5 [shape="circle" label="s5"];
|
||||
s6 [shape="circle" label="s6"];
|
||||
s7 [shape="circle" label="s7"];
|
||||
s8 [shape="circle" label="s8"];
|
||||
s0 -> s0 [label="0x00-0x7F"];
|
||||
s0 -> s2 [label="0xC2-0xDF"];
|
||||
s0 -> s4 [label="0xE0"];
|
||||
s0 -> s3 [label="0xE1-0xEC, 0xEE-0xEF"];
|
||||
s0 -> s5 [label="0xED"];
|
||||
s0 -> s7 [label="0xF0"];
|
||||
s0 -> s6 [label="0xF1-0xF3"];
|
||||
s0 -> s8 [label="0xF4"];
|
||||
s2 -> s0 [label="0x80-0xBF"];
|
||||
s3 -> s2 [label="0x80-0xBF"];
|
||||
s4 -> s2 [label="0xA0-0xBF"];
|
||||
s5 -> s2 [label="0x80-0x9F"];
|
||||
s6 -> s3 [label="0x80-0xBF"];
|
||||
s7 -> s3 [label="0x90-0xBF"];
|
||||
s8 -> s3 [label="0x80-0x8F"];
|
||||
|
||||
__start0 [label="" shape="none" width="0" height="0"];
|
||||
__start0 -> s0;
|
||||
|
||||
}
|
BIN
results/full-partial.pdf
Normal file
BIN
results/full-partial.pdf
Normal file
Binary file not shown.
49
results/full.dot
Normal file
49
results/full.dot
Normal file
|
@ -0,0 +1,49 @@
|
|||
digraph full {
|
||||
|
||||
s0 [shape="doublecircle" label="s0"];
|
||||
s1 [shape="circle" label="s1"];
|
||||
s2 [shape="circle" label="s2"];
|
||||
s3 [shape="circle" label="s3"];
|
||||
s4 [shape="circle" label="s4"];
|
||||
s5 [shape="circle" label="s5"];
|
||||
s6 [shape="circle" label="s6"];
|
||||
s7 [shape="circle" label="s7"];
|
||||
s8 [shape="circle" label="s8"];
|
||||
s0 -> s0 [label="0 -- 127"];
|
||||
s0 -> s1 [label="-128 -- -63"];
|
||||
s0 -> s2 [label="-62 -- -33"];
|
||||
s0 -> s4 [label="-32"];
|
||||
s0 -> s3 [label="-31 -- -20"];
|
||||
s0 -> s5 [label="-19"];
|
||||
s0 -> s3 [label="-18 -- -17"];
|
||||
s0 -> s7 [label="-16"];
|
||||
s0 -> s6 [label="-15 -- -13"];
|
||||
s0 -> s8 [label="-12"];
|
||||
s0 -> s1 [label="-11 -- -1"];
|
||||
s1 -> s1 [label="0 -- -1"];
|
||||
s2 -> s1 [label="0 -- 127"];
|
||||
s2 -> s0 [label="-128 -- -65"];
|
||||
s2 -> s1 [label="-64 -- -1"];
|
||||
s3 -> s1 [label="0 -- 127"];
|
||||
s3 -> s2 [label="-128 -- -65"];
|
||||
s3 -> s1 [label="-64 -- -1"];
|
||||
s4 -> s1 [label="0 -- -97"];
|
||||
s4 -> s2 [label="-96 -- -65"];
|
||||
s4 -> s1 [label="-64 -- -1"];
|
||||
s5 -> s1 [label="0 -- 127"];
|
||||
s5 -> s2 [label="-128 -- -97"];
|
||||
s5 -> s1 [label="-96 -- -1"];
|
||||
s6 -> s1 [label="0 -- 127"];
|
||||
s6 -> s3 [label="-128 -- -65"];
|
||||
s6 -> s1 [label="-64 -- -1"];
|
||||
s7 -> s1 [label="0 -- -113"];
|
||||
s7 -> s3 [label="-112 -- -65"];
|
||||
s7 -> s1 [label="-64 -- -1"];
|
||||
s8 -> s1 [label="0 -- 127"];
|
||||
s8 -> s3 [label="-128 -- -113"];
|
||||
s8 -> s1 [label="-112 -- -1"];
|
||||
|
||||
__start0 [label="" shape="none" width="0" height="0"];
|
||||
__start0 -> s0;
|
||||
|
||||
}
|
BIN
results/full.pdf
Normal file
BIN
results/full.pdf
Normal file
Binary file not shown.
2320
results/guava.dot
Normal file
2320
results/guava.dot
Normal file
File diff suppressed because it is too large
Load diff
2320
results/java.dot
Normal file
2320
results/java.dot
Normal file
File diff suppressed because it is too large
Load diff
7
run.sh
Executable file
7
run.sh
Executable file
|
@ -0,0 +1,7 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
set -euxo pipefail
|
||||
|
||||
mvn package
|
||||
|
||||
time java -cp target/utf8learner-1.0.0-SNAPSHOT.jar nl.ou.utf8learner.Main
|
163
src/main/java/nl/ou/utf8learner/Main.java
Normal file
163
src/main/java/nl/ou/utf8learner/Main.java
Normal file
|
@ -0,0 +1,163 @@
|
|||
/*
|
||||
* Copyright (c) 2025 Joshua Moerman, Open Universiteit
|
||||
* SPDX-License-Identifier: EUPL-1.2
|
||||
*/
|
||||
|
||||
package nl.ou.utf8learner;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import org.checkerframework.checker.nullness.qual.Nullable;
|
||||
import com.google.common.collect.Lists;
|
||||
import de.learnlib.acex.AcexAnalyzers;
|
||||
import de.learnlib.algorithm.LearningAlgorithm.DFALearner;
|
||||
import de.learnlib.algorithm.ttt.dfa.TTTLearnerDFA;
|
||||
import de.learnlib.oracle.EquivalenceOracle;
|
||||
import de.learnlib.oracle.EquivalenceOracle.DFAEquivalenceOracle;
|
||||
import de.learnlib.oracle.MembershipOracle.DFAMembershipOracle;
|
||||
import de.learnlib.oracle.equivalence.DFAEQOracleChain;
|
||||
import de.learnlib.oracle.equivalence.DFARandomWpMethodEQOracle;
|
||||
import de.learnlib.oracle.equivalence.DFAWMethodEQOracle;
|
||||
import de.learnlib.query.DefaultQuery;
|
||||
import de.learnlib.query.Query;
|
||||
import de.learnlib.util.Experiment.DFAExperiment;
|
||||
import net.automatalib.alphabet.impl.AbstractAlphabet;
|
||||
import net.automatalib.automaton.fsa.DFA;
|
||||
import net.automatalib.serialization.dot.GraphDOT;
|
||||
import net.automatalib.word.Word;
|
||||
|
||||
public class Main {
|
||||
public static void main(String[] args) throws IOException {
|
||||
// Basic set-up. Normally one would use a Cache, but in this case
|
||||
// the queries are much faster than the lookup. So no cache here.
|
||||
ByteAlphabet alph = new ByteAlphabet();
|
||||
DFAMembershipOracle<Byte> mqOracle = new UTF8MembershipOracle();
|
||||
|
||||
// We run three equivalence oracles in sequence: from fast to exhaustive.
|
||||
// It is really only the W-method in the end which makes a lot of queries.
|
||||
// But we want at least some guarantee of completeness :-).
|
||||
DFAEquivalenceOracle<Byte> fixedTestSuite = new DFASampleSetEQOracle<>(getTestSuite(), mqOracle);
|
||||
DFAEquivalenceOracle<Byte> randomWpMethod = new DFARandomWpMethodEQOracle<>(mqOracle, 0, 8, 5000);
|
||||
DFAEquivalenceOracle<Byte> wmethod = new DFAWMethodEQOracle<>(mqOracle, 1);
|
||||
DFAEquivalenceOracle<Byte> eqOracle = new DFAEQOracleChain<>(fixedTestSuite, randomWpMethod, wmethod);
|
||||
|
||||
// TTT is always a good default
|
||||
DFALearner<Byte> learner = new TTTLearnerDFA<>(alph, mqOracle, AcexAnalyzers.BINARY_SEARCH_BWD);
|
||||
DFAExperiment<Byte> experiment = new DFAExperiment<>(learner, eqOracle, alph);
|
||||
experiment.run();
|
||||
|
||||
// Output result!
|
||||
System.err.println("");
|
||||
System.out.println(experiment.getRounds().getSummary());
|
||||
GraphDOT.write(experiment.getFinalHypothesis(), alph, System.out);
|
||||
}
|
||||
|
||||
// Implementing the membership oracle directly. We obtain a word of bytes,
|
||||
// and assemble them into a byte array, which is then passed to the
|
||||
// underlying implementation.
|
||||
public static class UTF8MembershipOracle implements DFAMembershipOracle<Byte> {
|
||||
private long count = 0;
|
||||
|
||||
@Override
|
||||
public void processQuery(Query<Byte, Boolean> query) {
|
||||
count++;
|
||||
System.err.print("\r" + count);
|
||||
|
||||
Word<Byte> input = query.getInput();
|
||||
byte[] bytearray = new byte[input.length()];
|
||||
|
||||
int i = 0;
|
||||
for (Byte b : input) {
|
||||
bytearray[i] = b;
|
||||
i++;
|
||||
}
|
||||
|
||||
boolean acc = UTF8SUL.accepts(bytearray);
|
||||
query.answer(acc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void processQueries(Collection<? extends Query<Byte, Boolean>> queries) {
|
||||
for (Query<Byte, Boolean> query : queries) {
|
||||
processQuery(query);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Alphabet of all 256 bytes. Implemented directly for efficiency.
|
||||
public static class ByteAlphabet extends AbstractAlphabet<Byte> {
|
||||
@Override
|
||||
public Byte getSymbol(int index) {
|
||||
return (byte) index;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getSymbolIndex(Byte symbol) {
|
||||
if (symbol >= 0) {
|
||||
return (int) symbol;
|
||||
} else {
|
||||
return (int) (symbol + 256);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean containsSymbol(Byte symbol) {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int size() {
|
||||
return 256;
|
||||
}
|
||||
}
|
||||
|
||||
// For some reason the SampleSetEQOracle of LearnLib was not easy to
|
||||
// use with DFAs. So I made my own.
|
||||
public static class DFASampleSetEQOracle<I> implements EquivalenceOracle.DFAEquivalenceOracle<I> {
|
||||
final private Collection<Word<I>> testSuite;
|
||||
final private DFAMembershipOracle<I> truth;
|
||||
|
||||
public DFASampleSetEQOracle(Collection<Word<I>> testSuite, DFAMembershipOracle<I> truth) {
|
||||
this.testSuite = testSuite;
|
||||
this.truth = truth;
|
||||
}
|
||||
|
||||
@Override
|
||||
public @Nullable DefaultQuery<I, Boolean> findCounterExample(DFA<?, I> hypothesis,
|
||||
Collection<? extends I> alphabet) {
|
||||
for (Word<I> test : testSuite) {
|
||||
DefaultQuery<I, Boolean> query = new DefaultQuery<>(test);
|
||||
truth.processQuery(query);
|
||||
|
||||
if (hypothesis.accepts(test) != query.getOutput()) {
|
||||
return query;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
// This should cover all states, I think.
|
||||
public static Collection<Word<Byte>> getTestSuite() {
|
||||
return Lists.newArrayList(
|
||||
Word.fromSymbols((byte) -15, (byte) 64),
|
||||
Word.fromSymbols((byte) -62, (byte) -65),
|
||||
Word.fromSymbols((byte) -33, (byte) 127),
|
||||
Word.fromSymbols((byte) -32, (byte) 0),
|
||||
Word.fromSymbols((byte) -12, (byte) -112),
|
||||
Word.fromSymbols((byte) -18, (byte) -100, (byte) 0),
|
||||
Word.fromSymbols((byte) -32, (byte) -70, (byte) -120),
|
||||
Word.fromSymbols((byte) -19, (byte) -97, (byte) -1),
|
||||
Word.fromSymbols((byte) -31, (byte) -128, (byte) -100),
|
||||
Word.fromSymbols((byte) -31, (byte) -128, (byte) 127),
|
||||
Word.fromSymbols((byte) -16, (byte) -112, (byte) -64),
|
||||
Word.fromSymbols((byte) -18, (byte) -100, (byte) -100),
|
||||
Word.fromSymbols((byte) -19, (byte) -97, (byte) -90),
|
||||
Word.fromSymbols((byte) 0xEF, (byte) 0xBB, (byte) 0xBF),
|
||||
Word.fromSymbols((byte) -16, (byte) -112, (byte) -128, (byte) -128),
|
||||
Word.fromSymbols((byte) -12, (byte) -120, (byte) -65, (byte) -80),
|
||||
Word.fromSymbols((byte) -15, (byte) -65, (byte) -65, (byte) -65),
|
||||
Word.fromSymbols((byte) -15, (byte) -128, (byte) -128, (byte) -128));
|
||||
}
|
||||
}
|
27
src/main/java/nl/ou/utf8learner/UTF8SUL.java
Normal file
27
src/main/java/nl/ou/utf8learner/UTF8SUL.java
Normal file
|
@ -0,0 +1,27 @@
|
|||
/*
|
||||
* Copyright (c) 2025 Joshua Moerman, Open Universiteit
|
||||
* SPDX-License-Identifier: EUPL-1.2
|
||||
*/
|
||||
|
||||
package nl.ou.utf8learner;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.CharsetDecoder;
|
||||
import java.nio.charset.CodingErrorAction;
|
||||
import java.nio.charset.CharacterCodingException;
|
||||
|
||||
public class UTF8SUL {
|
||||
public static boolean accepts(byte[] data) {
|
||||
CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder();
|
||||
decoder.onMalformedInput(CodingErrorAction.REPORT);
|
||||
decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
|
||||
|
||||
try {
|
||||
decoder.decode(ByteBuffer.wrap(data));
|
||||
return true;
|
||||
} catch (CharacterCodingException e) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
20
src/main/java/nl/ou/utf8learner/UTF8SULApache.java
Normal file
20
src/main/java/nl/ou/utf8learner/UTF8SULApache.java
Normal file
|
@ -0,0 +1,20 @@
|
|||
/*
|
||||
* Copyright (c) 2025 Joshua Moerman, Open Universiteit
|
||||
* SPDX-License-Identifier: EUPL-1.2
|
||||
*/
|
||||
|
||||
package nl.ou.utf8learner;
|
||||
|
||||
import java.util.Arrays;
|
||||
import org.apache.commons.codec.binary.StringUtils;
|
||||
|
||||
public class UTF8SULApache {
|
||||
public static boolean accepts(byte[] data) {
|
||||
try {
|
||||
String s = StringUtils.newStringUtf8(data);
|
||||
return Arrays.equals(data, StringUtils.getBytesUtf8(s));
|
||||
} catch (Exception e) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
14
src/main/java/nl/ou/utf8learner/UTF8SULGuava.java
Normal file
14
src/main/java/nl/ou/utf8learner/UTF8SULGuava.java
Normal file
|
@ -0,0 +1,14 @@
|
|||
/*
|
||||
* Copyright (c) 2025 Joshua Moerman, Open Universiteit
|
||||
* SPDX-License-Identifier: EUPL-1.2
|
||||
*/
|
||||
|
||||
package nl.ou.utf8learner;
|
||||
|
||||
import com.google.common.base.Utf8;
|
||||
|
||||
public class UTF8SULGuava {
|
||||
public static boolean accepts(byte[] data) {
|
||||
return Utf8.isWellFormed(data);
|
||||
}
|
||||
}
|
30
src/main/java/nl/ou/utf8learner/UTF8SULICU4J.java
Normal file
30
src/main/java/nl/ou/utf8learner/UTF8SULICU4J.java
Normal file
|
@ -0,0 +1,30 @@
|
|||
/*
|
||||
* Copyright (c) 2025 Joshua Moerman, Open Universiteit
|
||||
* SPDX-License-Identifier: EUPL-1.2
|
||||
*/
|
||||
|
||||
package nl.ou.utf8learner;
|
||||
|
||||
import com.ibm.icu.text.CharsetDetector;
|
||||
import com.ibm.icu.text.CharsetMatch;
|
||||
|
||||
public class UTF8SULICU4J {
|
||||
public static boolean accepts(byte[] data) {
|
||||
// The CharsetDetector is not a good validator, it accepts a certain
|
||||
// amount of errors. And it doesn't always report short strings as
|
||||
// valid UTF-8.
|
||||
CharsetDetector detector = new CharsetDetector();
|
||||
detector.setDeclaredEncoding("UTF-8");
|
||||
detector.setText(data);
|
||||
|
||||
CharsetMatch[] matches = detector.detectAll();
|
||||
for (CharsetMatch match : matches) {
|
||||
// The confidence can be either 15, 25, 80 or 100.
|
||||
if ("UTF-8".equalsIgnoreCase(match.getName()) && match.getConfidence() >= 100) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
Loading…
Add table
Reference in a new issue