| @@ -0,0 +1,8 | |||||
|  | 1 | using System; | |||
|  | 2 | ||||
|  | 3 | namespace Implab.Automaton.RegularExpressions { | |||
|  | 4 | public interface ITaggedDFABuilder<TTag> : IDFATableBuilder { | |||
|  | 5 | void SetStateTag(int s, TTag[] tags); | |||
|  | 6 | } | |||
|  | 7 | } | |||
|  | 8 | ||||
| @@ -0,0 +1,89 | |||||
|  | 1 | using System; | |||
|  | 2 | using System.Collections.Generic; | |||
|  | 3 | using System.Linq; | |||
|  | 4 | ||||
|  | 5 | namespace Implab.Automaton.RegularExpressions { | |||
|  | 6 | public class RegularDFA<TInput, TTag> : DFATable, ITaggedDFABuilder<TTag> { | |||
|  | 7 | ||||
|  | 8 | readonly Dictionary<int,TTag[]> m_tags = new Dictionary<int, TTag[]>(); | |||
|  | 9 | readonly IAlphabet<TInput> m_alphabet; | |||
|  | 10 | ||||
|  | 11 | public RegularDFA(IAlphabet<TInput> alphabet) { | |||
|  | 12 | Safe.ArgumentNotNull(alphabet, "aplhabet"); | |||
|  | 13 | ||||
|  | 14 | m_alphabet = alphabet; | |||
|  | 15 | } | |||
|  | 16 | ||||
|  | 17 | ||||
|  | 18 | public IAlphabet<TInput> InputAlphabet { | |||
|  | 19 | get { | |||
|  | 20 | return m_alphabet; | |||
|  | 21 | } | |||
|  | 22 | } | |||
|  | 23 | ||||
|  | 24 | public void MarkFinalState(int s, TTag[] tags) { | |||
|  | 25 | MarkFinalState(s); | |||
|  | 26 | SetStateTag(s, tags); | |||
|  | 27 | } | |||
|  | 28 | ||||
|  | 29 | public void SetStateTag(int s, TTag[] tags) { | |||
|  | 30 | Safe.ArgumentNotNull(tags, "tags"); | |||
|  | 31 | m_tags[s] = tags; | |||
|  | 32 | } | |||
|  | 33 | ||||
|  | 34 | public TTag[] GetStateTag(int s) { | |||
|  | 35 | TTag[] tags; | |||
|  | 36 | return m_tags.TryGetValue(s, out tags) ? tags : new TTag[0]; | |||
|  | 37 | } | |||
|  | 38 | ||||
|  | 39 | public new DFAStateDescriptor<TTag>[] CreateTransitionTable() { | |||
|  | 40 | var table = new DFAStateDescriptor<TTag>[StateCount]; | |||
|  | 41 | ||||
|  | 42 | foreach (var t in this) { | |||
|  | 43 | if (table[t.s1].transitions == null) | |||
|  | 44 | table[t.s1] = new DFAStateDescriptor<TTag>(AlphabetSize, IsFinalState(t.s1), GetStateTag(t.s1)); | |||
|  | 45 | if (table[t.s2].transitions == null) | |||
|  | 46 | table[t.s2] = new DFAStateDescriptor<TTag>(AlphabetSize, IsFinalState(t.s2), GetStateTag(t.s2)); | |||
|  | 47 | table[t.s1].transitions[t.edge] = t.s2; | |||
|  | 48 | } | |||
|  | 49 | ||||
|  | 50 | return table; | |||
|  | 51 | } | |||
|  | 52 | ||||
|  | 53 | /// <summary> | |||
|  | 54 | /// Optimize the specified alphabet. | |||
|  | 55 | /// </summary> | |||
|  | 56 | /// <param name="alphabet">Пустой алфавит, который будет зполнен в процессе оптимизации.</param> | |||
|  | 57 | public RegularDFA<TInput,TTag> Optimize(IAlphabetBuilder<TInput> alphabet) { | |||
|  | 58 | Safe.ArgumentNotNull(alphabet, "alphabet"); | |||
|  | 59 | ||||
|  | 60 | var dfa = new RegularDFA<TInput, TTag>(alphabet); | |||
|  | 61 | ||||
|  | 62 | var states = new DummyAlphabet(StateCount); | |||
|  | 63 | var alphaMap = new Dictionary<int,int>(); | |||
|  | 64 | var stateMap = new Dictionary<int,int>(); | |||
|  | 65 | ||||
|  | 66 | Optimize(dfa, alphaMap, stateMap); | |||
|  | 67 | ||||
|  | 68 | // mark tags in the new DFA | |||
|  | 69 | foreach (var g in m_tags.Where(x => x.Key < StateCount).GroupBy(x => stateMap[x.Key], x => x.Value )) | |||
|  | 70 | dfa.SetStateTag(g.Key, g.SelectMany(x => x).ToArray()); | |||
|  | 71 | ||||
|  | 72 | // make the alphabet for the new DFA | |||
|  | 73 | foreach (var pair in alphaMap) | |||
|  | 74 | alphabet.DefineClass(m_alphabet.GetSymbols(pair.Key), pair.Value); | |||
|  | 75 | ||||
|  | 76 | return dfa; | |||
|  | 77 | } | |||
|  | 78 | ||||
|  | 79 | protected override IEnumerable<HashSet<int>> GroupFinalStates() { | |||
|  | 80 | var arrayComparer = new CustomEqualityComparer<TTag[]>( | |||
|  | 81 | (x,y) => x.Length == y.Length && x.All(it => y.Contains(it)), | |||
|  | 82 | x => x.Sum(it => x.GetHashCode()) | |||
|  | 83 | ); | |||
|  | 84 | return FinalStates.GroupBy(x => m_tags[x], arrayComparer).Select(g => new HashSet<int>(g)); | |||
|  | 85 | } | |||
|  | 86 | ||||
|  | 87 | } | |||
|  | 88 | } | |||
|  | 89 | ||||
| @@ -0,0 +1,184 | |||||
|  | 1 | using Implab; | |||
|  | 2 | using System; | |||
|  | 3 | using System.Collections.Generic; | |||
|  | 4 | using System.Diagnostics; | |||
|  | 5 | using System.Linq; | |||
|  | 6 | ||||
|  | 7 | namespace Implab.Automaton.RegularExpressions { | |||
|  | 8 | /// <summary> | |||
|  | 9 | /// Используется для построения ДКА по регулярному выражению, сначала обходит | |||
|  | 10 | /// регулярное выражение и вычисляет followpos, затем используется метод | |||
|  | 11 | /// <see cref="BuildDFA(IDFADefinition)"/> для построения автомата. | |||
|  | 12 | /// </summary> | |||
|  | 13 | public class RegularExpressionVisitor<TTag> : IVisitor<TTag> { | |||
|  | 14 | int m_idx; | |||
|  | 15 | Token<TTag> m_root; | |||
|  | 16 | HashSet<int> m_firstpos; | |||
|  | 17 | HashSet<int> m_lastpos; | |||
|  | 18 | ||||
|  | 19 | readonly Dictionary<int, HashSet<int>> m_followpos = new Dictionary<int, HashSet<int>>(); | |||
|  | 20 | readonly Dictionary<int, int> m_indexes = new Dictionary<int, int>(); | |||
|  | 21 | readonly Dictionary<int, TTag> m_ends = new Dictionary<int, TTag>(); | |||
|  | 22 | ||||
|  | 23 | public Dictionary<int, HashSet<int>> FollowposMap { | |||
|  | 24 | get { return m_followpos; } | |||
|  | 25 | } | |||
|  | 26 | ||||
|  | 27 | public HashSet<int> Followpos(int pos) { | |||
|  | 28 | HashSet<int> set; | |||
|  | 29 | return m_followpos.TryGetValue(pos, out set) ? set : m_followpos[pos] = new HashSet<int>(); | |||
|  | 30 | } | |||
|  | 31 | ||||
|  | 32 | bool Nullable(object n) { | |||
|  | 33 | if (n is EmptyToken<TTag> || n is StarToken<TTag>) | |||
|  | 34 | return true; | |||
|  | 35 | var altToken = n as AltToken<TTag>; | |||
|  | 36 | if (altToken != null) | |||
|  | 37 | return Nullable(altToken.Left) || Nullable(altToken.Right); | |||
|  | 38 | var catToken = n as CatToken<TTag>; | |||
|  | 39 | if (catToken != null) | |||
|  | 40 | return Nullable(catToken.Left) && Nullable(catToken.Right); | |||
|  | 41 | return false; | |||
|  | 42 | } | |||
|  | 43 | ||||
|  | 44 | ||||
|  | 45 | public void Visit(AltToken<TTag> token) { | |||
|  | 46 | if (m_root == null) | |||
|  | 47 | m_root = token; | |||
|  | 48 | var firtspos = new HashSet<int>(); | |||
|  | 49 | var lastpos = new HashSet<int>(); | |||
|  | 50 | ||||
|  | 51 | token.Left.Accept(this); | |||
|  | 52 | firtspos.UnionWith(m_firstpos); | |||
|  | 53 | lastpos.UnionWith(m_lastpos); | |||
|  | 54 | ||||
|  | 55 | token.Right.Accept(this); | |||
|  | 56 | firtspos.UnionWith(m_firstpos); | |||
|  | 57 | lastpos.UnionWith(m_lastpos); | |||
|  | 58 | ||||
|  | 59 | m_firstpos = firtspos; | |||
|  | 60 | m_lastpos = lastpos; | |||
|  | 61 | } | |||
|  | 62 | ||||
|  | 63 | public void Visit(StarToken<TTag> token) { | |||
|  | 64 | if (m_root == null) | |||
|  | 65 | m_root = token; | |||
|  | 66 | token.Token.Accept(this); | |||
|  | 67 | ||||
|  | 68 | foreach (var i in m_lastpos) | |||
|  | 69 | Followpos(i).UnionWith(m_firstpos); | |||
|  | 70 | } | |||
|  | 71 | ||||
|  | 72 | public void Visit(CatToken<TTag> token) { | |||
|  | 73 | if (m_root == null) | |||
|  | 74 | m_root = token; | |||
|  | 75 | ||||
|  | 76 | var firtspos = new HashSet<int>(); | |||
|  | 77 | var lastpos = new HashSet<int>(); | |||
|  | 78 | token.Left.Accept(this); | |||
|  | 79 | firtspos.UnionWith(m_firstpos); | |||
|  | 80 | var leftLastpos = m_lastpos; | |||
|  | 81 | ||||
|  | 82 | token.Right.Accept(this); | |||
|  | 83 | lastpos.UnionWith(m_lastpos); | |||
|  | 84 | var rightFirstpos = m_firstpos; | |||
|  | 85 | ||||
|  | 86 | if (Nullable(token.Left)) | |||
|  | 87 | firtspos.UnionWith(rightFirstpos); | |||
|  | 88 | ||||
|  | 89 | if (Nullable(token.Right)) | |||
|  | 90 | lastpos.UnionWith(leftLastpos); | |||
|  | 91 | ||||
|  | 92 | m_firstpos = firtspos; | |||
|  | 93 | m_lastpos = lastpos; | |||
|  | 94 | ||||
|  | 95 | foreach (var i in leftLastpos) | |||
|  | 96 | Followpos(i).UnionWith(rightFirstpos); | |||
|  | 97 | ||||
|  | 98 | } | |||
|  | 99 | ||||
|  | 100 | public void Visit(EmptyToken<TTag> token) { | |||
|  | 101 | if (m_root == null) | |||
|  | 102 | m_root = token; | |||
|  | 103 | } | |||
|  | 104 | ||||
|  | 105 | public void Visit(SymbolToken<TTag> token) { | |||
|  | 106 | if (m_root == null) | |||
|  | 107 | m_root = token; | |||
|  | 108 | m_idx++; | |||
|  | 109 | m_indexes[m_idx] = token.Value; | |||
|  | 110 | m_firstpos = new HashSet<int>(new[] { m_idx }); | |||
|  | 111 | m_lastpos = new HashSet<int>(new[] { m_idx }); | |||
|  | 112 | } | |||
|  | 113 | ||||
|  | 114 | public void Visit(EndToken<TTag> token) { | |||
|  | 115 | if (m_root == null) | |||
|  | 116 | m_root = token; | |||
|  | 117 | m_idx++; | |||
|  | 118 | m_indexes[m_idx] = DFAConst.UNCLASSIFIED_INPUT; | |||
|  | 119 | m_firstpos = new HashSet<int>(new[] { m_idx }); | |||
|  | 120 | m_lastpos = new HashSet<int>(new[] { m_idx }); | |||
|  | 121 | Followpos(m_idx); | |||
|  | 122 | m_ends.Add(m_idx, token.Tag); | |||
|  | 123 | } | |||
|  | 124 | ||||
|  | 125 | public void BuildDFA(ITaggedDFABuilder<TTag> dfa) { | |||
|  | 126 | Safe.ArgumentNotNull(dfa,"dfa"); | |||
|  | 127 | ||||
|  | 128 | var states = new MapAlphabet<HashSet<int>>( | |||
|  | 129 | false, | |||
|  | 130 | new CustomEqualityComparer<HashSet<int>>( | |||
|  | 131 | (x, y) => x.SetEquals(y), | |||
|  | 132 | x => x.Sum(n => n.GetHashCode()) | |||
|  | 133 | )); | |||
|  | 134 | ||||
|  | 135 | var initialState = states.DefineSymbol(m_firstpos); | |||
|  | 136 | dfa.SetInitialState(initialState); | |||
|  | 137 | ||||
|  | 138 | var tags = GetStateTags(m_firstpos); | |||
|  | 139 | if (tags != null && tags.Length > 0) | |||
|  | 140 | dfa.MarkFinalState(initialState, tags); | |||
|  | 141 | ||||
|  | 142 | var inputMax = m_indexes.Values.Max(); | |||
|  | 143 | var queue = new Queue<HashSet<int>>(); | |||
|  | 144 | ||||
|  | 145 | queue.Enqueue(m_firstpos); | |||
|  | 146 | ||||
|  | 147 | while (queue.Count > 0) { | |||
|  | 148 | var state = queue.Dequeue(); | |||
|  | 149 | var s1 = states.Translate(state); | |||
|  | 150 | Debug.Assert(s1 != DFAConst.UNCLASSIFIED_INPUT); | |||
|  | 151 | ||||
|  | 152 | for (int a = 0; a <= inputMax; a++) { | |||
|  | 153 | var next = new HashSet<int>(); | |||
|  | 154 | foreach (var p in state) { | |||
|  | 155 | if (m_indexes[p] == a) { | |||
|  | 156 | next.UnionWith(Followpos(p)); | |||
|  | 157 | } | |||
|  | 158 | } | |||
|  | 159 | if (next.Count > 0) { | |||
|  | 160 | int s2 = states.Translate(next); | |||
|  | 161 | if (s2 == DFAConst.UNCLASSIFIED_INPUT) { | |||
|  | 162 | s2 = states.DefineSymbol(next); | |||
|  | 163 | ||||
|  | 164 | tags = GetStateTags(next); | |||
|  | 165 | if (tags != null && tags.Length > 0) { | |||
|  | 166 | dfa.MarkFinalState(s2); | |||
|  | 167 | dfa.SetStateTag(s2, tags); | |||
|  | 168 | } | |||
|  | 169 | ||||
|  | 170 | queue.Enqueue(next); | |||
|  | 171 | } | |||
|  | 172 | dfa.Add(new AutomatonTransition(s1, s2, a)); | |||
|  | 173 | } | |||
|  | 174 | } | |||
|  | 175 | } | |||
|  | 176 | } | |||
|  | 177 | ||||
|  | 178 | TTag[] GetStateTags(IEnumerable<int> state) { | |||
|  | 179 | Debug.Assert(state != null); | |||
|  | 180 | return state.Where(m_ends.ContainsKey).Select(pos => m_ends[pos]).ToArray(); | |||
|  | 181 | } | |||
|  | 182 | ||||
|  | 183 | } | |||
|  | 184 | } | |||
| @@ -1,18 +1,18 | |||||
| 1 | namespace Implab.Automaton { |  | 1 | namespace Implab.Automaton { | |
| 2 | public struct DFAStateDescript |  | 2 | public struct DFAStateDescriptor { | |
| 3 | public readonly bool final; |  | 3 | public readonly bool final; | |
| 4 | public readonly int[] transitions; |  | 4 | public readonly int[] transitions; | |
| 5 |  | 5 | |||
| 6 |  | 6 | |||
| 7 | public DFAStateDescript |  | 7 | public DFAStateDescriptor(int[] transitions, bool final) { | |
| 8 | this.transitions = transitions; |  | 8 | this.transitions = transitions; | |
| 9 | this.final = final; |  | 9 | this.final = final; | |
| 10 | } |  | 10 | } | |
| 11 |  | 11 | |||
| 12 | public DFAStateDescript |  | 12 | public DFAStateDescriptor(int[] transitions) : this(transitions, false) { | |
| 13 | } |  | 13 | } | |
| 14 |  | 14 | |||
| 15 | public DFAStateDescript |  | 15 | public DFAStateDescriptor(int size, bool final) { | |
| 16 | Safe.ArgumentInRange(size, 0, int.MaxValue, "size"); |  | 16 | Safe.ArgumentInRange(size, 0, int.MaxValue, "size"); | |
| 17 |  | 17 | |||
| 18 | this.final = final; |  | 18 | this.final = final; | |
| @@ -100,6 +100,20 namespace Implab.Automaton { | |||||
| 100 | return GetEnumerator(); |  | 100 | return GetEnumerator(); | |
| 101 | } |  | 101 | } | |
| 102 |  | 102 | |||
|  | 103 | public DFAStateDescriptor[] CreateTransitionTable() { | |||
|  | 104 | var table = new DFAStateDescriptor[StateCount]; | |||
|  | 105 | ||||
|  | 106 | foreach (var t in this) { | |||
|  | 107 | if (table[t.s1].transitions == null) | |||
|  | 108 | table[t.s1] = new DFAStateDescriptor(AlphabetSize, IsFinalState(t.s1)); | |||
|  | 109 | if (table[t.s2].transitions == null) | |||
|  | 110 | table[t.s2] = new DFAStateDescriptor(AlphabetSize, IsFinalState(t.s2)); | |||
|  | 111 | table[t.s1].transitions[t.edge] = t.s2; | |||
|  | 112 | } | |||
|  | 113 | ||||
|  | 114 | return table; | |||
|  | 115 | } | |||
|  | 116 | ||||
| 103 | /// <summary>Формирует множества конечных состояний перед началом работы алгоритма минимизации.</summary> |  | 117 | /// <summary>Формирует множества конечных состояний перед началом работы алгоритма минимизации.</summary> | |
| 104 | /// <remarks> |  | 118 | /// <remarks> | |
| 105 | /// В процессе построения минимального автомата требуется разделить множество состояний, |  | 119 | /// В процессе построения минимального автомата требуется разделить множество состояний, | |
| @@ -71,8 +71,16 namespace Implab.Automaton { | |||||
| 71 | return true; |  | 71 | return true; | |
| 72 | } |  | 72 | } | |
| 73 |  | 73 | |||
|  | 74 | public IEnumerable<T> GetSymbols(int cls) { | |||
|  | 75 | for (var i = 0; i < m_map.Length; i++) | |||
|  | 76 | if (m_map[i] == cls) | |||
|  | 77 | yield return GetSymbolByIndex(i); | |||
|  | 78 | } | |||
|  | 79 | ||||
| 74 | public abstract int GetSymbolIndex(T symbol); |  | 80 | public abstract int GetSymbolIndex(T symbol); | |
| 75 |  | 81 | |||
|  | 82 | public abstract T GetSymbolByIndex(int index); | |||
|  | 83 | ||||
| 76 | public abstract IEnumerable<T> InputSymbols { get; } |  | 84 | public abstract IEnumerable<T> InputSymbols { get; } | |
| 77 |  | 85 | |||
| 78 | /// <summary> |  | 86 | /// <summary> | |
| @@ -38,7 +38,6 namespace Implab.Automaton { | |||||
| 38 | Safe.ArgumentNotNull(symbols, "symbols"); |  | 38 | Safe.ArgumentNotNull(symbols, "symbols"); | |
| 39 |  | 39 | |||
| 40 | m_nextCls = Math.Max(cls + 1, m_nextCls); |  | 40 | m_nextCls = Math.Max(cls + 1, m_nextCls); | |
| 41 | symbols = symbols.Distinct(); |  | |||
| 42 |  | 41 | |||
| 43 | foreach (var symbol in symbols) |  | 42 | foreach (var symbol in symbols) | |
| 44 | m_map[symbol] = cls; |  | 43 | m_map[symbol] = cls; | |
| @@ -68,6 +67,10 namespace Implab.Automaton { | |||||
| 68 | return m_supportUnclassified || m_map.ContainsKey(symbol); |  | 67 | return m_supportUnclassified || m_map.ContainsKey(symbol); | |
| 69 | } |  | 68 | } | |
| 70 |  | 69 | |||
|  | 70 | ||||
|  | 71 | public IEnumerable<T> GetSymbols(int cls) { | |||
|  | 72 | return m_map.Where(p => p.Value == cls).Select(p => p.Key); | |||
|  | 73 | } | |||
| 71 | #endregion |  | 74 | #endregion | |
| 72 | } |  | 75 | } | |
| 73 | } |  | 76 | } | |
| @@ -1,14 +1,14 | |||||
| 1 | using System; |  | 1 | using System; | |
| 2 |  | 2 | |||
| 3 | namespace Implab.Automaton.RegularExpressions { |  | 3 | namespace Implab.Automaton.RegularExpressions { | |
| 4 | public struct DFAStateDescriptor |  | 4 | public struct DFAStateDescriptor<T> { | |
| 5 | public readonly bool final; |  | 5 | public readonly bool final; | |
| 6 |  | 6 | |||
| 7 | public readonly int[] transitions; |  | 7 | public readonly int[] transitions; | |
| 8 |  | 8 | |||
| 9 | public readonly T[] tags; |  | 9 | public readonly T[] tags; | |
| 10 |  | 10 | |||
| 11 | public DFAStateDescriptor |  | 11 | public DFAStateDescriptor(int size, bool final, T[] tags) { | |
| 12 | Safe.ArgumentAssert(size >= 0, "size"); |  | 12 | Safe.ArgumentAssert(size >= 0, "size"); | |
| 13 | this.final = final; |  | 13 | this.final = final; | |
| 14 | this.tags = tags; |  | 14 | this.tags = tags; | |
| @@ -66,23 +66,21 namespace Implab.Automaton.RegularExpres | |||||
| 66 | return Token<TTag>.New( Enumerable.Range(0, AlphabetBuilder.Count).Except(TranslateOrDie(symbols)).ToArray() ); |  | 66 | return Token<TTag>.New( Enumerable.Range(0, AlphabetBuilder.Count).Except(TranslateOrDie(symbols)).ToArray() ); | |
| 67 | } |  | 67 | } | |
| 68 |  | 68 | |||
| 69 | protected |  | 69 | protected abstract IAlphabetBuilder<TSymbol> CreateAlphabet(); | |
| 70 | Safe.ArgumentNotNull(lang, "lang"); |  | |||
| 71 | Safe.ArgumentNotNull(dfaAlphabet, "dfaAlphabet"); |  | |||
| 72 |  | ||||
| 73 | var dfa = new RegularDFADefinition<TSymbol, TTag>(AlphabetBuilder); |  | |||
| 74 |  | 70 | |||
| 75 | var builder = new RegularDFABuilder<TTag>(); |  | 71 | protected RegularDFA<TSymbol, TTag> BuildDFA(Token<TTag> regexp) { | |
|  | 72 | ||||
|  | 73 | var dfa = new RegularDFA<TSymbol, TTag>(AlphabetBuilder); | |||
| 76 |  | 74 | |||
| 77 | lang.Accept( builder ); |  | 75 | var visitor = new RegularExpressionVisitor<TTag>(); | |
|  | 76 | regexp.Accept( visitor ); | |||
| 78 |  | 77 | |||
| 79 |  |  | 78 | visitor.BuildDFA(dfa); | |
| 80 |  | 79 | |||
| 81 | if (dfa.IsFinalState(dfa.InitialState)) |  | 80 | if (dfa.IsFinalState(dfa.InitialState)) | |
| 82 | throw new ApplicationException("The specified language contains empty token"); |  | 81 | throw new ApplicationException("The specified language contains empty token"); | |
| 83 |  | 82 | |||
| 84 | dfa.Optimize( |  | 83 | return dfa.Optimize(CreateAlphabet()); | |
| 85 |  | ||||
| 86 | } |  | 84 | } | |
| 87 |  | 85 | |||
| 88 | } |  | 86 | } | |
| @@ -3,6 +3,7 using System; | |||||
| 3 | using System.Collections.Generic; |  | 3 | using System.Collections.Generic; | |
| 4 | using System.IO; |  | 4 | using System.IO; | |
| 5 | using Implab.Components; |  | 5 | using Implab.Components; | |
|  | 6 | using Implab.Automaton.RegularExpressions; | |||
| 6 |  | 7 | |||
| 7 | namespace Implab.Automaton { |  | 8 | namespace Implab.Automaton { | |
| 8 | /// <summary> |  | 9 | /// <summary> | |
| @@ -14,19 +15,23 namespace Implab.Automaton { | |||||
| 14 | /// конца токена и допустимости текущего символа. |  | 15 | /// конца токена и допустимости текущего символа. | |
| 15 | /// </remarks> |  | 16 | /// </remarks> | |
| 16 | public abstract class Scanner<TTag> : Disposable { |  | 17 | public abstract class Scanner<TTag> : Disposable { | |
| 17 | struct ScannerConfig { |  | 18 | protected struct ScannerConfig { | |
| 18 | public DFAStateDescript |  | 19 | public readonly DFAStateDescriptor<TTag>[] states; | |
| 19 | public int[] alphabet |  | 20 | public readonly int[] alphabet; | |
| 20 | public int initialState; |  | 21 | public readonly int initialState; | |
|  | 22 | ||||
|  | 23 | public ScannerConfig(DFAStateDescriptor<TTag>[] states, int[] alphabet, int initialState) { | |||
|  | 24 | this.initialState = initialState; | |||
|  | 25 | this.alphabet = alphabet; | |||
|  | 26 | this.states = states; | |||
|  | 27 | } | |||
| 21 | } |  | 28 | } | |
| 22 |  | 29 | |||
| 23 | Stack<ScannerConfig> m_defs = new Stack<ScannerConfig>(); |  | 30 | Stack<ScannerConfig> m_defs = new Stack<ScannerConfig>(); | |
| 24 |  | 31 | |||
| 25 | DFAStateDescriptior<TTag>[] m_states; |  | 32 | ScannerConfig m_config; | |
| 26 | int[] m_alphabetMap; |  | |||
| 27 | int m_initialState; |  | |||
| 28 |  | 33 | |||
| 29 | protected DFAStateDescript |  | 34 | protected DFAStateDescriptor<TTag> m_currentState; | |
| 30 | int m_previewCode; |  | 35 | int m_previewCode; | |
| 31 |  | 36 | |||
| 32 | protected int m_tokenLen; |  | 37 | protected int m_tokenLen; | |
| @@ -41,15 +46,11 namespace Implab.Automaton { | |||||
| 41 | int m_chunkSize = 1024; // 1k |  | 46 | int m_chunkSize = 1024; // 1k | |
| 42 | int m_limit = 10 * 1024 * 1024; // 10Mb |  | 47 | int m_limit = 10 * 1024 * 1024; // 10Mb | |
| 43 |  | 48 | |||
| 44 | protected Scanner(DFAStateDescriptior<TTag>[] states, int[] alphabet, int initialState) { |  | 49 | protected Scanner(ScannerConfig config) { | |
| 45 | Safe.ArgumentNotEmpty(states, "states"); |  | 50 | Safe.ArgumentNotEmpty(config.states, "config.states"); | |
| 46 | Safe.ArgumentNotNull(alphabet, "alphabet"); |  | 51 | Safe.ArgumentNotNull(config.alphabet, "config.alphabet"); | |
| 47 |  | 52 | |||
| 48 | m_ |  | 53 | m_config = config; | |
| 49 | m_alphabetMap = alphabet; |  | |||
| 50 | m_initialState = initialState; |  | |||
| 51 |  | ||||
| 52 | Feed(new char[0]); |  | |||
| 53 | } |  | 54 | } | |
| 54 |  | 55 | |||
| 55 | /// <summary> |  | 56 | /// <summary> | |
| @@ -110,7 +111,7 namespace Implab.Automaton { | |||||
| 110 | /// </summary> |  | 111 | /// </summary> | |
| 111 | protected TTag[] TokenTags { |  | 112 | protected TTag[] TokenTags { | |
| 112 | get { |  | 113 | get { | |
| 113 | return m_currentState.tag; |  | 114 | return m_currentState.tags; | |
| 114 | } |  | 115 | } | |
| 115 | } |  | 116 | } | |
| 116 |  | 117 | |||
| @@ -133,7 +134,7 namespace Implab.Automaton { | |||||
| 133 | if (m_pointer >= m_bufferSize) |  | 134 | if (m_pointer >= m_bufferSize) | |
| 134 | return false; |  | 135 | return false; | |
| 135 |  | 136 | |||
| 136 | m_currentState = m_states[m_initialState]; |  | 137 | m_currentState = m_config.states[m_config.initialState]; | |
| 137 | m_tokenLen = 0; |  | 138 | m_tokenLen = 0; | |
| 138 | m_tokenOffset = m_pointer; |  | 139 | m_tokenOffset = m_pointer; | |
| 139 | int nextState; |  | 140 | int nextState; | |
| @@ -151,7 +152,7 namespace Implab.Automaton { | |||||
| 151 | ) |  | 152 | ) | |
| 152 | ); |  | 153 | ); | |
| 153 | } |  | 154 | } | |
| 154 | m_currentState = m_states[nextState]; |  | 155 | m_currentState = m_config.states[nextState]; | |
| 155 | m_tokenLen++; |  | 156 | m_tokenLen++; | |
| 156 |  | 157 | |||
| 157 | } while (Shift()); |  | 158 | } while (Shift()); | |
| @@ -172,7 +173,7 namespace Implab.Automaton { | |||||
| 172 | return false; |  | 173 | return false; | |
| 173 | } |  | 174 | } | |
| 174 |  | 175 | |||
| 175 | m_previewCode = m_alphabet |  | 176 | m_previewCode = m_config.alphabet[m_buffer[m_pointer]]; | |
| 176 |  | 177 | |||
| 177 | return true; |  | 178 | return true; | |
| 178 | } |  | 179 | } | |
| @@ -217,23 +218,14 namespace Implab.Automaton { | |||||
| 217 | /// Преключает внутренний ДКА на указанный, позволяет реализовать подобие захватывающей |  | 218 | /// Преключает внутренний ДКА на указанный, позволяет реализовать подобие захватывающей | |
| 218 | /// группировки. |  | 219 | /// группировки. | |
| 219 | /// </summary> |  | 220 | /// </summary> | |
| 220 | /// <param name |  | 221 | /// <param name = "config"></param> | |
| 221 | /// <param name="alphabet">Таблица входных символов для нового ДКА</param> |  | 222 | protected void Switch(ScannerConfig config) { | |
| 222 | /// <param name = "initialState"></param> |  | 223 | Safe.ArgumentNotNull(config.states, "config.states"); | |
| 223 | protected void Switch(DFAStateDescriptior<TTag>[] states, int[] alphabet, int initialState) { |  | |||
| 224 | Safe.ArgumentNotNull(states, "dfa"); |  | |||
| 225 |  | 224 | |||
| 226 | m_defs.Push( |  | 225 | m_defs.Push(m_config); | |
| 227 | states = m_states, |  | 226 | m_config = config; | |
| 228 | alphabetMap = m_alphabetMap, |  | |||
| 229 | initialState = m_initialState |  | |||
| 230 | }); |  | |||
| 231 |  | 227 | |||
| 232 | m_states = states; |  | 228 | m_previewCode = m_config.alphabet[m_buffer[m_pointer]]; | |
| 233 | m_alphabetMap = alphabet; |  | |||
| 234 | m_initialState = initialState; |  | |||
| 235 |  | ||||
| 236 | m_previewCode = m_alphabetMap[m_buffer[m_pointer]]; |  | |||
| 237 | } |  | 229 | } | |
| 238 |  | 230 | |||
| 239 | /// <summary> |  | 231 | /// <summary> | |
| @@ -242,11 +234,9 namespace Implab.Automaton { | |||||
| 242 | protected void Restore() { |  | 234 | protected void Restore() { | |
| 243 | if (m_defs.Count == 0) |  | 235 | if (m_defs.Count == 0) | |
| 244 | throw new InvalidOperationException(); |  | 236 | throw new InvalidOperationException(); | |
| 245 |  |  | 237 | m_config = m_defs.Pop(); | |
| 246 | m_states = prev.states; |  | 238 | ||
| 247 | m_alphabetMap = prev.alphabetMap; |  | 239 | m_previewCode = m_config.alphabet[m_buffer[m_pointer]]; | |
| 248 | m_initialState = prev.initialState; |  | |||
| 249 | m_previewCode = m_alphabetMap[m_buffer[m_pointer]]; |  | |||
| 250 | } |  | 240 | } | |
| 251 |  | 241 | |||
| 252 | protected override void Dispose(bool disposing) { |  | 242 | protected override void Dispose(bool disposing) { | |
| @@ -13,6 +13,10 namespace Implab.Formats { | |||||
| 13 | return (int)symbol; |  | 13 | return (int)symbol; | |
| 14 | } |  | 14 | } | |
| 15 |  | 15 | |||
|  | 16 | public override byte GetSymbolByIndex(int index) { | |||
|  | 17 | return (byte)index; | |||
|  | 18 | } | |||
|  | 19 | ||||
| 16 | public IEnumerable<byte> InputSymbols { |  | 20 | public IEnumerable<byte> InputSymbols { | |
| 17 | get { |  | 21 | get { | |
| 18 | return Enumerable.Range(byte.MinValue, byte.MaxValue).Cast<byte>(); |  | 22 | return Enumerable.Range(byte.MinValue, byte.MaxValue).Cast<byte>(); | |
| @@ -13,6 +13,10 namespace Implab.Formats { | |||||
| 13 | return symbol; |  | 13 | return symbol; | |
| 14 | } |  | 14 | } | |
| 15 |  | 15 | |||
|  | 16 | public override char GetSymbolByIndex(int index) { | |||
|  | 17 | return (char)index; | |||
|  | 18 | } | |||
|  | 19 | ||||
| 16 | public override IEnumerable<char> InputSymbols { |  | 20 | public override IEnumerable<char> InputSymbols { | |
| 17 | get { return Enumerable.Range(char.MinValue, char.MaxValue).Cast<char>(); } |  | 21 | get { return Enumerable.Range(char.MinValue, char.MaxValue).Cast<char>(); } | |
| 18 | } |  | 22 | } | |
| @@ -1,6 +1,7 | |||||
| 1 | using System.Linq; |  | 1 | using System.Linq; | |
| 2 | using Implab.Automaton.RegularExpressions; |  | 2 | using Implab.Automaton.RegularExpressions; | |
| 3 | using System; |  | 3 | using System; | |
|  | 4 | using Implab.Automaton; | |||
| 4 |  | 5 | |||
| 5 | namespace Implab.Formats.JSON { |  | 6 | namespace Implab.Formats.JSON { | |
| 6 | class JSONGrammar : Grammar<char,JSONGrammar.TokenType> { |  | 7 | class JSONGrammar : Grammar<char,JSONGrammar.TokenType> { | |
| @@ -35,8 +36,8 namespace Implab.Formats.JSON { | |||||
| 35 | get { return _instance.Value; } |  | 36 | get { return _instance.Value; } | |
| 36 | } |  | 37 | } | |
| 37 |  | 38 | |||
| 38 | readonly Regular |  | 39 | readonly RegularDFA<char, TokenType> m_jsonDFA; | |
| 39 | readonly Regular |  | 40 | readonly RegularDFA<char, TokenType> m_stringDFA; | |
| 40 |  | 41 | |||
| 41 | public JSONGrammar() { |  | 42 | public JSONGrammar() { | |
| 42 | DefineAlphabet(Enumerable.Range(0, 0x20).Select(x => (char)x)); |  | 43 | DefineAlphabet(Enumerable.Range(0, 0x20).Select(x => (char)x)); | |
| @@ -87,21 +88,17 namespace Implab.Formats.JSON { | |||||
| 87 | .Or(unescaped.Closure().Tag(TokenType.UnescapedChar)); |  | 88 | .Or(unescaped.Closure().Tag(TokenType.UnescapedChar)); | |
| 88 |  | 89 | |||
| 89 |  | 90 | |||
| 90 | m_jsonDFA = new RegularCharDFADefinition<TokenType>(new CharAlphabet()); |  | 91 | m_jsonDFA = BuildDFA(jsonExpression); | |
| 91 | BuildDFA(jsonExpression, m_jsonDFA, m_jsonDFA.InputAlphabet); |  | 92 | m_stringDFA = BuildDFA(jsonStringExpression); | |
| 92 |  | ||||
| 93 |  | ||||
| 94 | m_stringDFA = new RegularCharDFADefinition<TokenType>(new CharAlphabet()); |  | |||
| 95 | BuildDFA(jsonStringExpression, m_jsonDFA, m_jsonDFA.InputAlphabet); |  | |||
| 96 | } |  | 93 | } | |
| 97 |  | 94 | |||
| 98 | public Regular |  | 95 | public RegularDFA<char, TokenType> JsonDFA { | |
| 99 | get { |  | 96 | get { | |
| 100 | return m_jsonDFA; |  | 97 | return m_jsonDFA; | |
| 101 | } |  | 98 | } | |
| 102 | } |  | 99 | } | |
| 103 |  | 100 | |||
| 104 | public RegularDFA |  | 101 | public RegularDFA<char,TokenType> JsonStringDFA { | |
| 105 | get { |  | 102 | get { | |
| 106 | return m_stringDFA; |  | 103 | return m_stringDFA; | |
| 107 | } |  | 104 | } | |
| @@ -110,6 +107,10 namespace Implab.Formats.JSON { | |||||
| 110 | Token<TokenType> SymbolRangeToken(char start, char stop) { |  | 107 | Token<TokenType> SymbolRangeToken(char start, char stop) { | |
| 111 | return SymbolToken(Enumerable.Range(start,stop - start).Cast<char>()); |  | 108 | return SymbolToken(Enumerable.Range(start,stop - start).Cast<char>()); | |
| 112 | } |  | 109 | } | |
|  | 110 | ||||
|  | 111 | protected override IAlphabetBuilder<char> CreateAlphabet() { | |||
|  | 112 | return new CharAlphabet(); | |||
|  | 113 | } | |||
| 113 |  | 114 | |||
| 114 | } |  | 115 | } | |
| 115 | } |  | 116 | } | |
| @@ -86,9 +86,9 namespace Implab.Formats.JSON { | |||||
| 86 | return Token<object>.New(input.Select(t => _alphabet.Translate(t)).ToArray()); |  | 86 | return Token<object>.New(input.Select(t => _alphabet.Translate(t)).ToArray()); | |
| 87 | } |  | 87 | } | |
| 88 |  | 88 | |||
| 89 | static RegularDFA |  | 89 | static RegularDFA<JsonTokenType,object> CreateDFA(Token<object> expr) { | |
| 90 | var builder = new Regular |  | 90 | var builder = new RegularExpressionVisitor<object>(); | |
| 91 | var dfa = new RegularDFA |  | 91 | var dfa = new RegularDFA<JsonTokenType,object>(_alphabet); | |
| 92 |  | 92 | |||
| 93 | expr.Accept(builder); |  | 93 | expr.Accept(builder); | |
| 94 |  | 94 | |||
| @@ -170,7 +170,6 | |||||
| 170 | <Compile Include="Automaton\RegularExpressions\Token.cs" /> |  | 170 | <Compile Include="Automaton\RegularExpressions\Token.cs" /> | |
| 171 | <Compile Include="Automaton\RegularExpressions\IVisitor.cs" /> |  | 171 | <Compile Include="Automaton\RegularExpressions\IVisitor.cs" /> | |
| 172 | <Compile Include="Automaton\AutomatonTransition.cs" /> |  | 172 | <Compile Include="Automaton\AutomatonTransition.cs" /> | |
| 173 | <Compile Include="Automaton\RegularExpressions\RegularDFABuilder.cs" /> |  | |||
| 174 | <Compile Include="Formats\JSON\JSONElementContext.cs" /> |  | 173 | <Compile Include="Formats\JSON\JSONElementContext.cs" /> | |
| 175 | <Compile Include="Formats\JSON\JSONElementType.cs" /> |  | 174 | <Compile Include="Formats\JSON\JSONElementType.cs" /> | |
| 176 | <Compile Include="Formats\JSON\JSONGrammar.cs" /> |  | 175 | <Compile Include="Formats\JSON\JSONGrammar.cs" /> | |
| @@ -183,13 +182,14 | |||||
| 183 | <Compile Include="Formats\JSON\StringTranslator.cs" /> |  | 182 | <Compile Include="Formats\JSON\StringTranslator.cs" /> | |
| 184 | <Compile Include="Automaton\MapAlphabet.cs" /> |  | 183 | <Compile Include="Automaton\MapAlphabet.cs" /> | |
| 185 | <Compile Include="Automaton\DummyAlphabet.cs" /> |  | 184 | <Compile Include="Automaton\DummyAlphabet.cs" /> | |
| 186 | <Compile Include="Automaton\RegularExpressions\RegularDFADefinition.cs" /> |  | |||
| 187 | <Compile Include="Formats\CharAlphabet.cs" /> |  | 185 | <Compile Include="Formats\CharAlphabet.cs" /> | |
| 188 | <Compile Include="Formats\ByteAlphabet.cs" /> |  | 186 | <Compile Include="Formats\ByteAlphabet.cs" /> | |
| 189 | <Compile Include="Formats\RegularCharDFADefinition.cs" /> |  | |||
| 190 | <Compile Include="Automaton\IDFATable.cs" /> |  | 187 | <Compile Include="Automaton\IDFATable.cs" /> | |
| 191 | <Compile Include="Automaton\IDFATableBuilder.cs" /> |  | 188 | <Compile Include="Automaton\IDFATableBuilder.cs" /> | |
| 192 | <Compile Include="Automaton\DFATable.cs" /> |  | 189 | <Compile Include="Automaton\DFATable.cs" /> | |
|  | 190 | <Compile Include="Automaton\RegularExpressions\RegularDFA.cs" /> | |||
|  | 191 | <Compile Include="Automaton\RegularExpressions\RegularExpressionVisitor.cs" /> | |||
|  | 192 | <Compile Include="Automaton\RegularExpressions\ITaggedDFABuilder.cs" /> | |||
| 193 | <Compile Include="Automaton\RegularExpressions\DFAStateDescriptorT.cs" /> |  | 193 | <Compile Include="Automaton\RegularExpressions\DFAStateDescriptorT.cs" /> | |
| 194 | </ItemGroup> |  | 194 | </ItemGroup> | |
| 195 | <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" /> |  | 195 | <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" /> | |
| 1 | NO CONTENT: file was removed |  | NO CONTENT: file was removed | 
| 1 | NO CONTENT: file was removed |  | NO CONTENT: file was removed | 
| 1 | NO CONTENT: file was removed |  | NO CONTENT: file was removed | 
        
        General Comments 0
    
    
  
  
                      You need to be logged in to leave comments.
                      Login now
                    
                