# HG changeset patch
# User cin
# Date 2016-03-13 22:19:38
# Node ID 92d5278d1b107bd6648fbe66f7d4e202196bb92e
# Parent  0f70905b46522ae15bf2de1c4b30123912eeab9e
Working on text scanner
diff --git a/Implab/Automaton/DFAStateDescriptor.cs b/Implab/Automaton/DFAStateDescriptor.cs
--- a/Implab/Automaton/DFAStateDescriptor.cs
+++ b/Implab/Automaton/DFAStateDescriptor.cs
@@ -1,18 +1,18 @@
 namespace Implab.Automaton {
-    public struct DFAStateDescriptior {
+    public struct DFAStateDescriptor {
         public readonly bool final;
         public readonly int[] transitions;
 
 
-        public DFAStateDescriptior(int[] transitions, bool final) {
+        public DFAStateDescriptor(int[] transitions, bool final) {
             this.transitions = transitions;
             this.final = final;
         }
 
-        public DFAStateDescriptior(int[] transitions) : this(transitions, false) {
+        public DFAStateDescriptor(int[] transitions) : this(transitions, false) {
         }
 
-        public DFAStateDescriptior(int size, bool final) {
+        public DFAStateDescriptor(int size, bool final) {
             Safe.ArgumentInRange(size, 0, int.MaxValue, "size");
 
             this.final = final;
diff --git a/Implab/Automaton/DFATable.cs b/Implab/Automaton/DFATable.cs
--- a/Implab/Automaton/DFATable.cs
+++ b/Implab/Automaton/DFATable.cs
@@ -100,6 +100,20 @@ namespace Implab.Automaton {
             return GetEnumerator();
         }
 
+        public DFAStateDescriptor[] CreateTransitionTable() {
+            var table = new DFAStateDescriptor[StateCount];
+
+            foreach (var t in this) {
+                if (table[t.s1].transitions == null)
+                    table[t.s1] = new DFAStateDescriptor(AlphabetSize, IsFinalState(t.s1));
+                if (table[t.s2].transitions == null)
+                    table[t.s2] = new DFAStateDescriptor(AlphabetSize, IsFinalState(t.s2));
+                table[t.s1].transitions[t.edge] = t.s2;
+            }
+
+            return table;
+        }
+
         /// Формирует множества конечных состояний перед началом работы алгоритма минимизации.
         /// 
         /// В процессе построения минимального автомата требуется разделить множество состояний,
diff --git a/Implab/Automaton/IndexedAlphabetBase.cs b/Implab/Automaton/IndexedAlphabetBase.cs
--- a/Implab/Automaton/IndexedAlphabetBase.cs
+++ b/Implab/Automaton/IndexedAlphabetBase.cs
@@ -71,8 +71,16 @@ namespace Implab.Automaton {
             return true;
         }
 
+        public IEnumerable GetSymbols(int cls) {
+            for (var i = 0; i < m_map.Length; i++)
+                if (m_map[i] == cls)
+                    yield return GetSymbolByIndex(i);
+        }
+
         public abstract int GetSymbolIndex(T symbol);
 
+        public abstract T GetSymbolByIndex(int index);
+
         public abstract IEnumerable InputSymbols { get; }
 
         /// 
diff --git a/Implab/Automaton/MapAlphabet.cs b/Implab/Automaton/MapAlphabet.cs
--- a/Implab/Automaton/MapAlphabet.cs
+++ b/Implab/Automaton/MapAlphabet.cs
@@ -38,7 +38,6 @@ namespace Implab.Automaton {
             Safe.ArgumentNotNull(symbols, "symbols");
 
             m_nextCls = Math.Max(cls + 1, m_nextCls);
-            symbols = symbols.Distinct();
 
             foreach (var symbol in symbols)
                 m_map[symbol] = cls;
@@ -68,6 +67,10 @@ namespace Implab.Automaton {
             return m_supportUnclassified || m_map.ContainsKey(symbol);
         }
 
+
+        public IEnumerable GetSymbols(int cls) {
+            return m_map.Where(p => p.Value == cls).Select(p => p.Key);
+        }
         #endregion
     }
 }
diff --git a/Implab/Automaton/RegularExpressions/DFAStateDescriptorT.cs b/Implab/Automaton/RegularExpressions/DFAStateDescriptorT.cs
--- a/Implab/Automaton/RegularExpressions/DFAStateDescriptorT.cs
+++ b/Implab/Automaton/RegularExpressions/DFAStateDescriptorT.cs
@@ -1,14 +1,14 @@
 using System;
 
 namespace Implab.Automaton.RegularExpressions {
-    public struct DFAStateDescriptorT {
+    public struct DFAStateDescriptor {
         public readonly bool final;
 
         public readonly int[] transitions;
 
         public readonly T[] tags;
 
-        public DFAStateDescriptorT(int size, bool final, T[] tags) {
+        public DFAStateDescriptor(int size, bool final, T[] tags) {
             Safe.ArgumentAssert(size >= 0, "size");
             this.final = final;
             this.tags = tags;
diff --git a/Implab/Automaton/RegularExpressions/Grammar.cs b/Implab/Automaton/RegularExpressions/Grammar.cs
--- a/Implab/Automaton/RegularExpressions/Grammar.cs
+++ b/Implab/Automaton/RegularExpressions/Grammar.cs
@@ -66,23 +66,21 @@ namespace Implab.Automaton.RegularExpres
             return Token.New( Enumerable.Range(0, AlphabetBuilder.Count).Except(TranslateOrDie(symbols)).ToArray() );
         }
 
-        protected void BuildDFA(Token lang, IDFATableBuilder dfaTable, IAlphabetBuilder dfaAlphabet) {
-            Safe.ArgumentNotNull(lang, "lang");
-            Safe.ArgumentNotNull(dfaAlphabet, "dfaAlphabet");
-
-            var dfa = new RegularDFADefinition(AlphabetBuilder);
+        protected abstract IAlphabetBuilder CreateAlphabet();
 
-            var builder = new RegularDFABuilder();
+        protected RegularDFA BuildDFA(Token regexp) {
+            
+            var dfa = new RegularDFA(AlphabetBuilder);
 
-            lang.Accept( builder );
+            var visitor = new RegularExpressionVisitor();
+            regexp.Accept( visitor );
 
-            builder.BuildDFA(dfa);
+            visitor.BuildDFA(dfa);
 
             if (dfa.IsFinalState(dfa.InitialState))
                 throw new ApplicationException("The specified language contains empty token");
 
-            dfa.Optimize(dfaTable, dfaAlphabet);
-
+            return dfa.Optimize(CreateAlphabet());
         }
 
     }
diff --git a/Implab/Automaton/RegularExpressions/ITaggedDFABuilder.cs b/Implab/Automaton/RegularExpressions/ITaggedDFABuilder.cs
new file mode 100644
--- /dev/null
+++ b/Implab/Automaton/RegularExpressions/ITaggedDFABuilder.cs
@@ -0,0 +1,8 @@
+using System;
+
+namespace Implab.Automaton.RegularExpressions {
+    public interface ITaggedDFABuilder : IDFATableBuilder {
+        void SetStateTag(int s, TTag[] tags);
+    }
+}
+
diff --git a/Implab/Automaton/RegularExpressions/RegularDFA.cs b/Implab/Automaton/RegularExpressions/RegularDFA.cs
new file mode 100644
--- /dev/null
+++ b/Implab/Automaton/RegularExpressions/RegularDFA.cs
@@ -0,0 +1,89 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+
+namespace Implab.Automaton.RegularExpressions {
+    public class RegularDFA : DFATable, ITaggedDFABuilder {
+
+        readonly Dictionary m_tags = new Dictionary();
+        readonly IAlphabet m_alphabet;
+
+        public RegularDFA(IAlphabet alphabet) {
+            Safe.ArgumentNotNull(alphabet, "aplhabet");
+
+            m_alphabet = alphabet;
+        }
+
+
+        public IAlphabet InputAlphabet {
+            get {
+                return m_alphabet;
+            }
+        }
+
+        public void MarkFinalState(int s, TTag[] tags) {
+            MarkFinalState(s);
+            SetStateTag(s, tags);
+        }
+
+        public void SetStateTag(int s, TTag[] tags) {
+            Safe.ArgumentNotNull(tags, "tags");
+            m_tags[s] = tags;
+        }
+
+        public TTag[] GetStateTag(int s) {
+            TTag[] tags;
+            return m_tags.TryGetValue(s, out tags) ? tags : new TTag[0];
+        }
+
+        public new DFAStateDescriptor[] CreateTransitionTable() {
+            var table = new DFAStateDescriptor[StateCount];
+
+            foreach (var t in this) {
+                if (table[t.s1].transitions == null)
+                    table[t.s1] = new DFAStateDescriptor(AlphabetSize, IsFinalState(t.s1), GetStateTag(t.s1));
+                if (table[t.s2].transitions == null)
+                    table[t.s2] = new DFAStateDescriptor(AlphabetSize, IsFinalState(t.s2), GetStateTag(t.s2));
+                table[t.s1].transitions[t.edge] = t.s2;
+            }
+
+            return table;
+        }
+
+        /// 
+        /// Optimize the specified alphabet.
+        /// 
+        /// Пустой алфавит, который будет зполнен в процессе оптимизации.
+        public RegularDFA Optimize(IAlphabetBuilder alphabet) {
+            Safe.ArgumentNotNull(alphabet, "alphabet");
+
+            var dfa = new RegularDFA(alphabet);
+
+            var states = new DummyAlphabet(StateCount);
+            var alphaMap = new Dictionary();
+            var stateMap = new Dictionary();
+
+            Optimize(dfa, alphaMap, stateMap); 
+
+            // mark tags in the new DFA
+            foreach (var g in m_tags.Where(x => x.Key < StateCount).GroupBy(x => stateMap[x.Key], x => x.Value ))
+                dfa.SetStateTag(g.Key, g.SelectMany(x => x).ToArray());
+
+            // make the alphabet for the new DFA
+            foreach (var pair in alphaMap)
+                alphabet.DefineClass(m_alphabet.GetSymbols(pair.Key), pair.Value);
+            
+            return dfa;
+        }
+
+        protected override IEnumerable> GroupFinalStates() {
+            var arrayComparer = new CustomEqualityComparer(
+                (x,y) => x.Length == y.Length && x.All(it => y.Contains(it)),
+                x => x.Sum(it => x.GetHashCode())
+            );
+            return FinalStates.GroupBy(x => m_tags[x], arrayComparer).Select(g => new HashSet(g));
+        }
+
+    }
+}
+
diff --git a/Implab/Automaton/RegularExpressions/RegularDFABuilder.cs b/Implab/Automaton/RegularExpressions/RegularDFABuilder.cs
deleted file mode 100644
--- a/Implab/Automaton/RegularExpressions/RegularDFABuilder.cs
+++ /dev/null
@@ -1,180 +0,0 @@
-using Implab;
-using System;
-using System.Collections.Generic;
-using System.Diagnostics;
-using System.Linq;
-
-namespace Implab.Automaton.RegularExpressions {
-    /// 
-    /// Используется для построения ДКА по регулярному выражению, сначала обходит
-    /// регулярное выражение и вычисляет followpos, затем используется метод
-    ///  для построения автомата.
-    /// 
-    public class RegularDFABuilder : IVisitor {
-        int m_idx;
-        Token m_root;
-        HashSet m_firstpos;
-        HashSet m_lastpos;
-
-        readonly Dictionary> m_followpos = new Dictionary>();
-        readonly Dictionary m_indexes = new Dictionary();
-        readonly Dictionary m_ends = new Dictionary();
-
-        public Dictionary> FollowposMap {
-            get { return m_followpos; }
-        }
-
-        public HashSet Followpos(int pos) {
-            HashSet set;
-            return m_followpos.TryGetValue(pos, out set) ? set : m_followpos[pos] = new HashSet();
-        }
-
-        bool Nullable(object n) {
-            if (n is EmptyToken || n is StarToken)
-                return true;
-            var altToken = n as AltToken;
-            if (altToken != null)
-                return Nullable(altToken.Left) || Nullable(altToken.Right);
-            var catToken = n as CatToken;
-            if (catToken != null)
-                return Nullable(catToken.Left) && Nullable(catToken.Right);
-            return false;
-        }
-
-
-        public void Visit(AltToken token) {
-            if (m_root == null)
-                m_root = token;
-            var firtspos = new HashSet();
-            var lastpos = new HashSet();
-
-            token.Left.Accept(this);
-            firtspos.UnionWith(m_firstpos);
-            lastpos.UnionWith(m_lastpos);
-
-            token.Right.Accept(this);
-            firtspos.UnionWith(m_firstpos);
-            lastpos.UnionWith(m_lastpos);
-
-            m_firstpos = firtspos;
-            m_lastpos = lastpos;
-        }
-
-        public void Visit(StarToken token) {
-            if (m_root == null)
-                m_root = token;
-            token.Token.Accept(this);
-
-            foreach (var i in m_lastpos)
-                Followpos(i).UnionWith(m_firstpos);
-        }
-
-        public void Visit(CatToken token) {
-            if (m_root == null)
-                m_root = token;
-
-            var firtspos = new HashSet();
-            var lastpos = new HashSet();
-            token.Left.Accept(this);
-            firtspos.UnionWith(m_firstpos);
-            var leftLastpos = m_lastpos;
-
-            token.Right.Accept(this);
-            lastpos.UnionWith(m_lastpos);
-            var rightFirstpos = m_firstpos;
-
-            if (Nullable(token.Left))
-                firtspos.UnionWith(rightFirstpos);
-
-            if (Nullable(token.Right))
-                lastpos.UnionWith(leftLastpos);
-
-            m_firstpos = firtspos;
-            m_lastpos = lastpos;
-
-            foreach (var i in leftLastpos)
-                Followpos(i).UnionWith(rightFirstpos);
-
-        }
-
-        public void Visit(EmptyToken token) {
-            if (m_root == null)
-                m_root = token;
-        }
-
-        public void Visit(SymbolToken token) {
-            if (m_root == null)
-                m_root = token;
-            m_idx++;
-            m_indexes[m_idx] = token.Value;
-            m_firstpos = new HashSet(new[] { m_idx });
-            m_lastpos = new HashSet(new[] { m_idx });
-        }
-
-        public void Visit(EndToken token) {
-            if (m_root == null)
-                m_root = token;
-            m_idx++;
-            m_indexes[m_idx] = DFAConst.UNCLASSIFIED_INPUT;
-            m_firstpos = new HashSet(new[] { m_idx });
-            m_lastpos = new HashSet(new[] { m_idx });
-            Followpos(m_idx);
-            m_ends.Add(m_idx, token.Tag);
-        }
-
-        public void BuildDFA(IDFATableBuilder dfa) {
-            Safe.ArgumentNotNull(dfa,"dfa");
-
-            var states = new MapAlphabet>(new CustomEqualityComparer>(
-                             (x, y) => x.SetEquals(y),
-                             x => x.Sum(n => n.GetHashCode())
-                         ));
-
-            var initialState = states.DefineSymbol(m_firstpos);
-            dfa.SetInitialState(initialState);
-
-            var tags = GetStateTags(m_firstpos);
-            if (tags != null && tags.Length > 0)
-                dfa.MarkFinalState(initialState, tags);
-
-            var inputMax = m_indexes.Values.Max();
-            var queue = new Queue>();
-
-            queue.Enqueue(m_firstpos);
-
-            while (queue.Count > 0) {
-                var state = queue.Dequeue();
-                var s1 = states.Translate(state);
-                Debug.Assert(s1 != DFAConst.UNCLASSIFIED_INPUT);
-
-                for (int a = 0; a <= inputMax; a++) {
-                    var next = new HashSet();
-                    foreach (var p in state) {
-                        if (m_indexes[p] == a) {
-                            next.UnionWith(Followpos(p));
-                        }
-                    }
-                    if (next.Count > 0) {
-                        int s2 = states.Translate(next);
-                        if (s2 == DFAConst.UNCLASSIFIED_INPUT) {
-                            s2 = states.DefineSymbol(next);
-
-                            tags = GetStateTags(next);
-                            if (tags != null && tags.Length > 0)
-                                dfa.MarkFinalState(s2, tags);
-                            
-                            queue.Enqueue(next);
-                        }
-                        dfa.Add(new AutomatonTransition(s1, s2, a));
-                    }
-                }
-            }
-        }
-
-        TTag[] GetStateTags(IEnumerable state) {
-            Debug.Assert(state != null);
-            return state.Where(m_ends.ContainsKey).Select(pos => m_ends[pos]).ToArray();
-        }
-
-    }
-}
diff --git a/Implab/Automaton/RegularExpressions/RegularDFADefinition.cs b/Implab/Automaton/RegularExpressions/RegularDFADefinition.cs
deleted file mode 100644
--- a/Implab/Automaton/RegularExpressions/RegularDFADefinition.cs
+++ /dev/null
@@ -1,69 +0,0 @@
-using System;
-using System.Collections.Generic;
-using System.Linq;
-
-namespace Implab.Automaton.RegularExpressions {
-    public class RegularDFADefinition : DFATable {
-
-        readonly Dictionary m_tags = new Dictionary();
-        readonly IAlphabet m_alphabet;
-
-        public RegularDFADefinition(IAlphabet alphabet) {
-            Safe.ArgumentNotNull(alphabet, "aplhabet");
-
-            m_alphabet = alphabet;
-        }
-
-
-        public IAlphabet InputAlphabet {
-            get {
-                return m_alphabet;
-            }
-        }
-
-        public void MarkFinalState(int s, TTag[] tags) {
-            MarkFinalState(s);
-            SetStateTag(s, tags);
-        }
-
-        public void SetStateTag(int s, TTag[] tags) {
-            Safe.ArgumentNotNull(tags, "tags");
-            m_tags[s] = tags;
-        }
-
-        public TTag[] GetStateTag(int s) {
-            TTag[] tags;
-            return m_tags.TryGetValue(s, out tags) ? tags : new TTag[0];
-        }
-
-        /// 
-        /// Optimize the specified alphabet.
-        /// 
-        /// Пустой алфавит, который будет зполнен в процессе оптимизации.
-        public RegularDFADefinition Optimize(IAlphabetBuilder alphabet) {
-            Safe.ArgumentNotNull(alphabet, "alphabet");
-
-            var dfaTable = new RegularDFADefinition(alphabet);
-
-            var states = new DummyAlphabet(StateCount);
-            var alphaMap = new Dictionary();
-            var stateMap = new Dictionary();
-            Optimize(dfaTable, alphaMap, stateMap); 
-
-            foreach (var g in m_tags.Where(x => x.Key < StateCount).GroupBy(x => stateMap[x.Key], x => x.Value ))
-                dfaTable.SetStateTag(g.Key, g.SelectMany(x => x).ToArray());
-
-            return dfaTable;
-        }
-
-        protected override IEnumerable> GroupFinalStates() {
-            var arrayComparer = new CustomEqualityComparer(
-                (x,y) => x.Length == y.Length && x.All(it => y.Contains(it)),
-                x => x.Sum(it => x.GetHashCode())
-            );
-            return FinalStates.GroupBy(x => m_tags[x], arrayComparer).Select(g => new HashSet(g));
-        }
-
-    }
-}
-
diff --git a/Implab/Automaton/RegularExpressions/RegularExpressionVisitor.cs b/Implab/Automaton/RegularExpressions/RegularExpressionVisitor.cs
new file mode 100644
--- /dev/null
+++ b/Implab/Automaton/RegularExpressions/RegularExpressionVisitor.cs
@@ -0,0 +1,184 @@
+using Implab;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Linq;
+
+namespace Implab.Automaton.RegularExpressions {
+    /// 
+    /// Используется для построения ДКА по регулярному выражению, сначала обходит
+    /// регулярное выражение и вычисляет followpos, затем используется метод
+    ///  для построения автомата.
+    /// 
+    public class RegularExpressionVisitor : IVisitor {
+        int m_idx;
+        Token m_root;
+        HashSet m_firstpos;
+        HashSet m_lastpos;
+
+        readonly Dictionary> m_followpos = new Dictionary>();
+        readonly Dictionary m_indexes = new Dictionary();
+        readonly Dictionary m_ends = new Dictionary();
+
+        public Dictionary> FollowposMap {
+            get { return m_followpos; }
+        }
+
+        public HashSet Followpos(int pos) {
+            HashSet set;
+            return m_followpos.TryGetValue(pos, out set) ? set : m_followpos[pos] = new HashSet();
+        }
+
+        bool Nullable(object n) {
+            if (n is EmptyToken || n is StarToken)
+                return true;
+            var altToken = n as AltToken;
+            if (altToken != null)
+                return Nullable(altToken.Left) || Nullable(altToken.Right);
+            var catToken = n as CatToken;
+            if (catToken != null)
+                return Nullable(catToken.Left) && Nullable(catToken.Right);
+            return false;
+        }
+
+
+        public void Visit(AltToken token) {
+            if (m_root == null)
+                m_root = token;
+            var firtspos = new HashSet();
+            var lastpos = new HashSet();
+
+            token.Left.Accept(this);
+            firtspos.UnionWith(m_firstpos);
+            lastpos.UnionWith(m_lastpos);
+
+            token.Right.Accept(this);
+            firtspos.UnionWith(m_firstpos);
+            lastpos.UnionWith(m_lastpos);
+
+            m_firstpos = firtspos;
+            m_lastpos = lastpos;
+        }
+
+        public void Visit(StarToken token) {
+            if (m_root == null)
+                m_root = token;
+            token.Token.Accept(this);
+
+            foreach (var i in m_lastpos)
+                Followpos(i).UnionWith(m_firstpos);
+        }
+
+        public void Visit(CatToken token) {
+            if (m_root == null)
+                m_root = token;
+
+            var firtspos = new HashSet();
+            var lastpos = new HashSet();
+            token.Left.Accept(this);
+            firtspos.UnionWith(m_firstpos);
+            var leftLastpos = m_lastpos;
+
+            token.Right.Accept(this);
+            lastpos.UnionWith(m_lastpos);
+            var rightFirstpos = m_firstpos;
+
+            if (Nullable(token.Left))
+                firtspos.UnionWith(rightFirstpos);
+
+            if (Nullable(token.Right))
+                lastpos.UnionWith(leftLastpos);
+
+            m_firstpos = firtspos;
+            m_lastpos = lastpos;
+
+            foreach (var i in leftLastpos)
+                Followpos(i).UnionWith(rightFirstpos);
+
+        }
+
+        public void Visit(EmptyToken token) {
+            if (m_root == null)
+                m_root = token;
+        }
+
+        public void Visit(SymbolToken token) {
+            if (m_root == null)
+                m_root = token;
+            m_idx++;
+            m_indexes[m_idx] = token.Value;
+            m_firstpos = new HashSet(new[] { m_idx });
+            m_lastpos = new HashSet(new[] { m_idx });
+        }
+
+        public void Visit(EndToken token) {
+            if (m_root == null)
+                m_root = token;
+            m_idx++;
+            m_indexes[m_idx] = DFAConst.UNCLASSIFIED_INPUT;
+            m_firstpos = new HashSet(new[] { m_idx });
+            m_lastpos = new HashSet(new[] { m_idx });
+            Followpos(m_idx);
+            m_ends.Add(m_idx, token.Tag);
+        }
+
+        public void BuildDFA(ITaggedDFABuilder dfa) {
+            Safe.ArgumentNotNull(dfa,"dfa");
+
+            var states = new MapAlphabet>(
+                             false,
+                             new CustomEqualityComparer>(
+                                 (x, y) => x.SetEquals(y),
+                                 x => x.Sum(n => n.GetHashCode())
+                             ));
+
+            var initialState = states.DefineSymbol(m_firstpos);
+            dfa.SetInitialState(initialState);
+
+            var tags = GetStateTags(m_firstpos);
+            if (tags != null && tags.Length > 0)
+                dfa.MarkFinalState(initialState, tags);
+
+            var inputMax = m_indexes.Values.Max();
+            var queue = new Queue>();
+
+            queue.Enqueue(m_firstpos);
+
+            while (queue.Count > 0) {
+                var state = queue.Dequeue();
+                var s1 = states.Translate(state);
+                Debug.Assert(s1 != DFAConst.UNCLASSIFIED_INPUT);
+
+                for (int a = 0; a <= inputMax; a++) {
+                    var next = new HashSet();
+                    foreach (var p in state) {
+                        if (m_indexes[p] == a) {
+                            next.UnionWith(Followpos(p));
+                        }
+                    }
+                    if (next.Count > 0) {
+                        int s2 = states.Translate(next);
+                        if (s2 == DFAConst.UNCLASSIFIED_INPUT) {
+                            s2 = states.DefineSymbol(next);
+
+                            tags = GetStateTags(next);
+                            if (tags != null && tags.Length > 0) {
+                                dfa.MarkFinalState(s2);
+                                dfa.SetStateTag(s2, tags);
+                            }
+                            
+                            queue.Enqueue(next);
+                        }
+                        dfa.Add(new AutomatonTransition(s1, s2, a));
+                    }
+                }
+            }
+        }
+
+        TTag[] GetStateTags(IEnumerable state) {
+            Debug.Assert(state != null);
+            return state.Where(m_ends.ContainsKey).Select(pos => m_ends[pos]).ToArray();
+        }
+
+    }
+}
diff --git a/Implab/Automaton/Scanner.cs b/Implab/Automaton/Scanner.cs
--- a/Implab/Automaton/Scanner.cs
+++ b/Implab/Automaton/Scanner.cs
@@ -3,6 +3,7 @@ using System;
 using System.Collections.Generic;
 using System.IO;
 using Implab.Components;
+using Implab.Automaton.RegularExpressions;
 
 namespace Implab.Automaton {
     /// 
@@ -14,19 +15,23 @@ namespace Implab.Automaton {
     /// конца токена и допустимости текущего символа.
     /// 
     public abstract class Scanner : Disposable {
-        struct ScannerConfig {
-            public DFAStateDescriptior[] states;
-            public int[] alphabetMap;
-            public int initialState;
+        protected struct ScannerConfig {
+            public readonly DFAStateDescriptor[] states;
+            public readonly int[] alphabet;
+            public readonly int initialState;
+
+            public ScannerConfig(DFAStateDescriptor[] states, int[] alphabet, int initialState) {
+                this.initialState = initialState;
+                this.alphabet = alphabet;
+                this.states = states;
+            }
         }
 
         Stack m_defs = new Stack();
 
-        DFAStateDescriptior[] m_states;
-        int[] m_alphabetMap;
-        int m_initialState;
+        ScannerConfig m_config;
 
-        protected DFAStateDescriptior m_currentState;
+        protected DFAStateDescriptor m_currentState;
         int m_previewCode;
 
         protected int m_tokenLen;
@@ -41,15 +46,11 @@ namespace Implab.Automaton {
         int m_chunkSize = 1024; // 1k
         int m_limit = 10 * 1024 * 1024; // 10Mb
 
-        protected Scanner(DFAStateDescriptior[] states, int[] alphabet, int initialState) {
-            Safe.ArgumentNotEmpty(states, "states");
-            Safe.ArgumentNotNull(alphabet, "alphabet");
+        protected Scanner(ScannerConfig config) {
+            Safe.ArgumentNotEmpty(config.states, "config.states");
+            Safe.ArgumentNotNull(config.alphabet, "config.alphabet");
 
-            m_states = states;
-            m_alphabetMap = alphabet;
-            m_initialState = initialState;
-
-            Feed(new char[0]);
+            m_config = config;
         }
 
         /// 
@@ -110,7 +111,7 @@ namespace Implab.Automaton {
         /// 
         protected TTag[] TokenTags {
             get {
-                return m_currentState.tag;
+                return m_currentState.tags;
             }
         }
 
@@ -133,7 +134,7 @@ namespace Implab.Automaton {
             if (m_pointer >= m_bufferSize)
                 return false;
 
-            m_currentState = m_states[m_initialState];
+            m_currentState = m_config.states[m_config.initialState];
             m_tokenLen = 0;
             m_tokenOffset = m_pointer;
             int nextState;
@@ -151,7 +152,7 @@ namespace Implab.Automaton {
                         )
                     );
                 }
-                m_currentState = m_states[nextState];
+                m_currentState = m_config.states[nextState];
                 m_tokenLen++;
 
             } while (Shift());
@@ -172,7 +173,7 @@ namespace Implab.Automaton {
                     return false;
             }
 
-            m_previewCode = m_alphabetMap[m_buffer[m_pointer]];
+            m_previewCode = m_config.alphabet[m_buffer[m_pointer]];
 
             return true;
         }
@@ -217,23 +218,14 @@ namespace Implab.Automaton {
         /// Преключает внутренний ДКА на указанный, позволяет реализовать подобие захватывающей
         /// группировки.
         /// 
-        /// Таблица состояний нового ДКА
-        /// Таблица входных символов для нового ДКА
-        /// 
-        protected void Switch(DFAStateDescriptior[] states, int[] alphabet, int initialState) {
-            Safe.ArgumentNotNull(states, "dfa");
+        /// 
+        protected void Switch(ScannerConfig config) {
+            Safe.ArgumentNotNull(config.states, "config.states");
 
-            m_defs.Push(new ScannerConfig {
-                states = m_states,
-                alphabetMap = m_alphabetMap,
-                initialState = m_initialState
-            });
+            m_defs.Push(m_config);
+            m_config = config;
 
-            m_states = states;
-            m_alphabetMap = alphabet;
-            m_initialState = initialState;
-
-            m_previewCode = m_alphabetMap[m_buffer[m_pointer]];
+            m_previewCode = m_config.alphabet[m_buffer[m_pointer]];
         }
 
         /// 
@@ -242,11 +234,9 @@ namespace Implab.Automaton {
         protected void Restore() {
             if (m_defs.Count == 0)
                 throw new InvalidOperationException();
-            var prev = m_defs.Pop();
-            m_states = prev.states;
-            m_alphabetMap = prev.alphabetMap;
-            m_initialState = prev.initialState;
-            m_previewCode = m_alphabetMap[m_buffer[m_pointer]];
+            m_config = m_defs.Pop();
+
+            m_previewCode = m_config.alphabet[m_buffer[m_pointer]];
         }
 
         protected override void Dispose(bool disposing) {
diff --git a/Implab/Formats/ByteAlphabet.cs b/Implab/Formats/ByteAlphabet.cs
--- a/Implab/Formats/ByteAlphabet.cs
+++ b/Implab/Formats/ByteAlphabet.cs
@@ -13,6 +13,10 @@ namespace Implab.Formats {
             return (int)symbol;
         }
 
+        public override byte GetSymbolByIndex(int index) {
+            return (byte)index;
+        }
+
         public IEnumerable InputSymbols {
             get {
                 return Enumerable.Range(byte.MinValue, byte.MaxValue).Cast();
diff --git a/Implab/Formats/CharAlphabet.cs b/Implab/Formats/CharAlphabet.cs
--- a/Implab/Formats/CharAlphabet.cs
+++ b/Implab/Formats/CharAlphabet.cs
@@ -13,6 +13,10 @@ namespace Implab.Formats {
             return symbol;
         }
 
+        public override char GetSymbolByIndex(int index) {
+            return (char)index;
+        }
+
         public override IEnumerable InputSymbols {
             get { return Enumerable.Range(char.MinValue, char.MaxValue).Cast(); }
         }
diff --git a/Implab/Formats/JSON/JSONGrammar.cs b/Implab/Formats/JSON/JSONGrammar.cs
--- a/Implab/Formats/JSON/JSONGrammar.cs
+++ b/Implab/Formats/JSON/JSONGrammar.cs
@@ -1,6 +1,7 @@
 using System.Linq;
 using Implab.Automaton.RegularExpressions;
 using System;
+using Implab.Automaton;
 
 namespace Implab.Formats.JSON {
     class JSONGrammar : Grammar {
@@ -35,8 +36,8 @@ namespace Implab.Formats.JSON {
             get { return _instance.Value; }
         }
 
-        readonly RegularCharDFADefinition m_jsonDFA;
-        readonly RegularCharDFADefinition m_stringDFA;
+        readonly RegularDFA m_jsonDFA;
+        readonly RegularDFA m_stringDFA;
 
         public JSONGrammar() {
             DefineAlphabet(Enumerable.Range(0, 0x20).Select(x => (char)x));
@@ -87,21 +88,17 @@ namespace Implab.Formats.JSON {
                 .Or(unescaped.Closure().Tag(TokenType.UnescapedChar));
                     
 
-            m_jsonDFA = new RegularCharDFADefinition(new CharAlphabet()); 
-            BuildDFA(jsonExpression, m_jsonDFA, m_jsonDFA.InputAlphabet);
-
-
-            m_stringDFA = new RegularCharDFADefinition(new CharAlphabet());
-            BuildDFA(jsonStringExpression, m_jsonDFA, m_jsonDFA.InputAlphabet);
+            m_jsonDFA = BuildDFA(jsonExpression);
+            m_stringDFA = BuildDFA(jsonStringExpression);
         }
 
-        public RegularCharDFADefinition JsonDFA {
+        public RegularDFA JsonDFA {
             get {
                 return m_jsonDFA;
             }
         }
 
-        public RegularDFADefinition JsonStringDFA {
+        public RegularDFA JsonStringDFA {
             get {
                 return m_stringDFA;
             }
@@ -110,6 +107,10 @@ namespace Implab.Formats.JSON {
         Token SymbolRangeToken(char start, char stop) {
             return SymbolToken(Enumerable.Range(start,stop - start).Cast());
         }
+
+        protected override IAlphabetBuilder CreateAlphabet() {
+            return new CharAlphabet();
+        }
                 
     }
 }
diff --git a/Implab/Formats/JSON/JSONParser.cs b/Implab/Formats/JSON/JSONParser.cs
--- a/Implab/Formats/JSON/JSONParser.cs
+++ b/Implab/Formats/JSON/JSONParser.cs
@@ -86,9 +86,9 @@ namespace Implab.Formats.JSON {
             return Token