| @@ -0,0 +1,9 | |||||
|  | 1 | ||||
|  | 2 | namespace Implab.Automaton { | |||
|  | 3 | public static class DFAConst { | |||
|  | 4 | public const int UNREACHABLE_STATE = -1; | |||
|  | 5 | ||||
|  | 6 | public const int UNCLASSIFIED_INPUT = 0; | |||
|  | 7 | } | |||
|  | 8 | } | |||
|  | 9 | ||||
| @@ -0,0 +1,37 | |||||
|  | 1 | using Implab; | |||
|  | 2 | using System; | |||
|  | 3 | using System.Collections.Generic; | |||
|  | 4 | using System.Diagnostics; | |||
|  | 5 | using System.Linq; | |||
|  | 6 | ||||
|  | 7 | namespace Implab.Automaton.RegularExpressions { | |||
|  | 8 | /// <summary> | |||
|  | 9 | /// </summary> | |||
|  | 10 | public class RegularExpressionVisitor<TTag> : RegularExpressionVisitor { | |||
|  | 11 | readonly Dictionary<int, TTag> m_tags = new Dictionary<int, TTag>(); | |||
|  | 12 | ||||
|  | 13 | readonly ITaggedDFABuilder<TTag> m_builder; | |||
|  | 14 | ||||
|  | 15 | public RegularExpressionVisitor(ITaggedDFABuilder<TTag> builder) : base(builder) { | |||
|  | 16 | m_builder = builder; | |||
|  | 17 | } | |||
|  | 18 | ||||
|  | 19 | public override void Visit(EndToken token) { | |||
|  | 20 | base.Visit(token); | |||
|  | 21 | var tagged = token as EndToken<TTag>; | |||
|  | 22 | if (tagged != null) | |||
|  | 23 | m_tags.Add(Index, tagged.Tag); | |||
|  | 24 | } | |||
|  | 25 | ||||
|  | 26 | protected override void MarkFinalState(HashSet<int> state) { | |||
|  | 27 | base.MarkFinalState(state); | |||
|  | 28 | m_builder.SetStateTag(Translate(state), GetStateTags(state)); | |||
|  | 29 | } | |||
|  | 30 | ||||
|  | 31 | TTag[] GetStateTags(IEnumerable<int> state) { | |||
|  | 32 | Debug.Assert(state != null); | |||
|  | 33 | return state.Where(m_tags.ContainsKey).Select(pos => m_tags[pos]).ToArray(); | |||
|  | 34 | } | |||
|  | 35 | ||||
|  | 36 | } | |||
|  | 37 | } | |||
| @@ -0,0 +1,44 | |||||
|  | 1 | using System; | |||
|  | 2 | using System.Threading; | |||
|  | 3 | ||||
|  | 4 | namespace Implab.Components { | |||
|  | 5 | public class LazyAndWeak<T> where T : class { | |||
|  | 6 | ||||
|  | 7 | readonly Func<T> m_factory; | |||
|  | 8 | readonly object m_lock; | |||
|  | 9 | WeakReference m_reference; | |||
|  | 10 | ||||
|  | 11 | ||||
|  | 12 | public LazyAndWeak(Func<T> factory, bool useLock) { | |||
|  | 13 | Safe.ArgumentNotNull(factory, "factory"); | |||
|  | 14 | m_factory = factory; | |||
|  | 15 | m_lock = useLock ? new object() : null; | |||
|  | 16 | } | |||
|  | 17 | ||||
|  | 18 | public LazyAndWeak(Func<T> factory) : this(factory, false) { | |||
|  | 19 | } | |||
|  | 20 | ||||
|  | 21 | public T Value { | |||
|  | 22 | get { | |||
|  | 23 | while (true) { | |||
|  | 24 | var weak = m_reference; | |||
|  | 25 | T value; | |||
|  | 26 | if (weak != null) { | |||
|  | 27 | value = weak.Target as T; | |||
|  | 28 | if (value != null) | |||
|  | 29 | return value; | |||
|  | 30 | } | |||
|  | 31 | ||||
|  | 32 | if (m_lock == null) { | |||
|  | 33 | value = m_factory(); | |||
|  | 34 | ||||
|  | 35 | if (Interlocked.CompareExchange(ref m_reference, new WeakReference(value), weak) == weak) | |||
|  | 36 | return value; | |||
|  | 37 | } else { | |||
|  | 38 | } | |||
|  | 39 | } | |||
|  | 40 | } | |||
|  | 41 | } | |||
|  | 42 | } | |||
|  | 43 | } | |||
|  | 44 | ||||
| @@ -105,7 +105,7 namespace Implab.Automaton { | |||||
| 105 |  | 105 | |||
| 106 | for (int i = 0; i < StateCount; i++) |  | 106 | for (int i = 0; i < StateCount; i++) | |
| 107 | for (int j = 0; i < AlphabetSize; j++) |  | 107 | for (int j = 0; i < AlphabetSize; j++) | |
| 108 | table[i, j] = |  | 108 | table[i, j] = AutomatonConst.UNREACHABLE_STATE; | |
| 109 |  | 109 | |||
| 110 | foreach (var t in this) |  | 110 | foreach (var t in this) | |
| 111 | table[t.s1,t.edge] = t.s2; |  | 111 | table[t.s1,t.edge] = t.s2; | |
| @@ -273,11 +273,11 namespace Implab.Automaton { | |||||
| 273 |  | 273 | |||
| 274 | var nextCls = 0; |  | 274 | var nextCls = 0; | |
| 275 | foreach (var item in minClasses) { |  | 275 | foreach (var item in minClasses) { | |
| 276 | if (nextCls == |  | 276 | if (nextCls == AutomatonConst.UNCLASSIFIED_INPUT) | |
| 277 | nextCls++; |  | 277 | nextCls++; | |
| 278 |  | 278 | |||
| 279 | // сохраняем DFAConst.UNCLASSIFIED_INPUT |  | 279 | // сохраняем DFAConst.UNCLASSIFIED_INPUT | |
| 280 | var cls = item.Contains( |  | 280 | var cls = item.Contains(AutomatonConst.UNCLASSIFIED_INPUT) ? AutomatonConst.UNCLASSIFIED_INPUT : nextCls; | |
| 281 |  | 281 | |||
| 282 | foreach (var a in item) |  | 282 | foreach (var a in item) | |
| 283 | alphabetMap[a] = cls; |  | 283 | alphabetMap[a] = cls; | |
| @@ -54,7 +54,7 namespace Implab.Automaton { | |||||
| 54 | return cls; |  | 54 | return cls; | |
| 55 | if (!m_supportUnclassified) |  | 55 | if (!m_supportUnclassified) | |
| 56 | throw new ArgumentOutOfRangeException("symbol", "The specified symbol isn't in the alphabet"); |  | 56 | throw new ArgumentOutOfRangeException("symbol", "The specified symbol isn't in the alphabet"); | |
| 57 | return |  | 57 | return AutomatonConst.UNCLASSIFIED_INPUT; | |
| 58 | } |  | 58 | } | |
| 59 |  | 59 | |||
| 60 | public int Count { |  | 60 | public int Count { | |
| @@ -1,13 +1,11 | |||||
| 1 | using Implab; |  | 1 | namespace Implab.Automaton.RegularExpressions { | |
| 2 |  | ||||
| 3 | namespace Implab.Automaton.RegularExpressions { |  | |||
| 4 | /// <summary> |  | 2 | /// <summary> | |
| 5 | /// Конечный символ расширенного регулярного выражения, при построении ДКА |  | 3 | /// Конечный символ расширенного регулярного выражения, при построении ДКА | |
| 6 | /// используется для определения конечных состояний. |  | 4 | /// используется для определения конечных состояний. | |
| 7 | /// </summary> |  | 5 | /// </summary> | |
| 8 | public class EndToken<TTag>: Token { |  | 6 | public class EndToken<TTag>: EndToken { | |
| 9 |  | 7 | |||
| 10 | TTag m_tag; |  | 8 | readonly TTag m_tag; | |
| 11 |  | 9 | |||
| 12 | public EndToken(TTag tag) { |  | 10 | public EndToken(TTag tag) { | |
| 13 | m_tag = tag; |  | 11 | m_tag = tag; | |
| @@ -20,14 +18,6 namespace Implab.Automaton.RegularExpres | |||||
| 20 | public TTag Tag { |  | 18 | public TTag Tag { | |
| 21 | get { return m_tag; } |  | 19 | get { return m_tag; } | |
| 22 | } |  | 20 | } | |
| 23 |  | 21 | |||
| 24 | public override void Accept(IVisitor visitor) { |  | |||
| 25 | Safe.ArgumentOfType(visitor, typeof(IVisitor<TTag>), "visitor"); |  | |||
| 26 | Safe.ArgumentNotNull(visitor, "visitor"); |  | |||
| 27 | ((IVisitor<TTag>)visitor).Visit(this); |  | |||
| 28 | } |  | |||
| 29 | public override string ToString() { |  | |||
| 30 | return "#"; |  | |||
| 31 | } |  | |||
| 32 | } |  | 22 | } | |
| 33 | } |  | 23 | } | |
| @@ -2,12 +2,12 | |||||
| 2 | using System.Linq; |  | 2 | using System.Linq; | |
| 3 |  | 3 | |||
| 4 | namespace Implab.Automaton.RegularExpressions { |  | 4 | namespace Implab.Automaton.RegularExpressions { | |
| 5 | public class |  | 5 | public class TaggedDFA<TInput, TTag> : DFATable, ITaggedDFABuilder<TTag> { | |
| 6 |  | 6 | |||
| 7 | readonly Dictionary<int,TTag[]> m_tags = new Dictionary<int, TTag[]>(); |  | 7 | readonly Dictionary<int,TTag[]> m_tags = new Dictionary<int, TTag[]>(); | |
| 8 | readonly IAlphabet<TInput> m_alphabet; |  | 8 | readonly IAlphabet<TInput> m_alphabet; | |
| 9 |  | 9 | |||
| 10 | public |  | 10 | public TaggedDFA(IAlphabet<TInput> alphabet) { | |
| 11 | Safe.ArgumentNotNull(alphabet, "aplhabet"); |  | 11 | Safe.ArgumentNotNull(alphabet, "aplhabet"); | |
| 12 |  | 12 | |||
| 13 | m_alphabet = alphabet; |  | 13 | m_alphabet = alphabet; | |
| @@ -48,10 +48,10 namespace Implab.Automaton.RegularExpres | |||||
| 48 | /// Optimize the specified alphabet. |  | 48 | /// Optimize the specified alphabet. | |
| 49 | /// </summary> |  | 49 | /// </summary> | |
| 50 | /// <param name="alphabet">Пустой алфавит, который будет зполнен в процессе оптимизации.</param> |  | 50 | /// <param name="alphabet">Пустой алфавит, который будет зполнен в процессе оптимизации.</param> | |
| 51 | public |  | 51 | public TaggedDFA<TInput,TTag> Optimize(IAlphabetBuilder<TInput> alphabet) { | |
| 52 | Safe.ArgumentNotNull(alphabet, "alphabet"); |  | 52 | Safe.ArgumentNotNull(alphabet, "alphabet"); | |
| 53 |  | 53 | |||
| 54 | var dfa = new |  | 54 | var dfa = new TaggedDFA<TInput, TTag>(alphabet); | |
| 55 |  | 55 | |||
| 56 | var states = new DummyAlphabet(StateCount); |  | 56 | var states = new DummyAlphabet(StateCount); | |
| 57 | var alphaMap = new Dictionary<int,int>(); |  | 57 | var alphaMap = new Dictionary<int,int>(); | |
| @@ -10,7 +10,7 namespace Implab.Automaton.RegularExpres | |||||
| 10 | /// регулярное выражение и вычисляет followpos, затем используется метод |  | 10 | /// регулярное выражение и вычисляет followpos, затем используется метод | |
| 11 | /// <see cref="BuildDFA(IDFADefinition)"/> для построения автомата. |  | 11 | /// <see cref="BuildDFA(IDFADefinition)"/> для построения автомата. | |
| 12 | /// </summary> |  | 12 | /// </summary> | |
| 13 | public class RegularExpressionVisitor |  | 13 | public class RegularExpressionVisitor : IVisitor { | |
| 14 | int m_idx; |  | 14 | int m_idx; | |
| 15 | Token m_root; |  | 15 | Token m_root; | |
| 16 | HashSet<int> m_firstpos; |  | 16 | HashSet<int> m_firstpos; | |
| @@ -19,13 +19,23 namespace Implab.Automaton.RegularExpres | |||||
| 19 | readonly Dictionary<int, HashSet<int>> m_followpos = new Dictionary<int, HashSet<int>>(); |  | 19 | readonly Dictionary<int, HashSet<int>> m_followpos = new Dictionary<int, HashSet<int>>(); | |
| 20 | readonly Dictionary<int, int> m_indexes = new Dictionary<int, int>(); |  | 20 | readonly Dictionary<int, int> m_indexes = new Dictionary<int, int>(); | |
| 21 | readonly HashSet<int> m_ends = new HashSet<int>(); |  | 21 | readonly HashSet<int> m_ends = new HashSet<int>(); | |
| 22 | readonly Dictionary<int, TTag> m_tags = new Dictionary<int, TTag>(); |  | |||
| 23 |  | 22 | |||
| 24 | public Dictionary<int, HashSet<int>> FollowposMap { |  | 23 | readonly IDFATableBuilder m_builder; | |
| 25 | get { return m_followpos; } |  | 24 | readonly IAlphabetBuilder<HashSet<int>> m_states = new MapAlphabet<HashSet<int>>( | |
|  | 25 | false, | |||
|  | 26 | new CustomEqualityComparer<HashSet<int>>( | |||
|  | 27 | (x, y) => x.SetEquals(y), | |||
|  | 28 | x => x.Sum(n => n.GetHashCode()) | |||
|  | 29 | ) | |||
|  | 30 | ); | |||
|  | 31 | ||||
|  | 32 | public RegularExpressionVisitor(IDFATableBuilder builder) { | |||
|  | 33 | Safe.ArgumentNotNull(builder, "builder"); | |||
|  | 34 | ||||
|  | 35 | m_builder = builder; | |||
| 26 | } |  | 36 | } | |
| 27 |  | 37 | |||
| 28 |  |  | 38 | HashSet<int> Followpos(int pos) { | |
| 29 | HashSet<int> set; |  | 39 | HashSet<int> set; | |
| 30 | return m_followpos.TryGetValue(pos, out set) ? set : m_followpos[pos] = new HashSet<int>(); |  | 40 | return m_followpos.TryGetValue(pos, out set) ? set : m_followpos[pos] = new HashSet<int>(); | |
| 31 | } |  | 41 | } | |
| @@ -42,6 +52,9 namespace Implab.Automaton.RegularExpres | |||||
| 42 | return false; |  | 52 | return false; | |
| 43 | } |  | 53 | } | |
| 44 |  | 54 | |||
|  | 55 | protected int Index { | |||
|  | 56 | get { return m_idx; } | |||
|  | 57 | } | |||
| 45 |  | 58 | |||
| 46 | public void Visit(AltToken token) { |  | 59 | public void Visit(AltToken token) { | |
| 47 | if (m_root == null) |  | 60 | if (m_root == null) | |
| @@ -112,45 +125,23 namespace Implab.Automaton.RegularExpres | |||||
| 112 | m_lastpos = new HashSet<int>(new[] { m_idx }); |  | 125 | m_lastpos = new HashSet<int>(new[] { m_idx }); | |
| 113 | } |  | 126 | } | |
| 114 |  | 127 | |||
| 115 | public void Visit(EndToken |  | 128 | public virtual void Visit(EndToken token) { | |
| 116 | if (m_root == null) |  | 129 | if (m_root == null) | |
| 117 | m_root = token; |  | 130 | m_root = token; | |
| 118 | m_idx++; |  | 131 | m_idx++; | |
| 119 | m_indexes[m_idx] = |  | 132 | m_indexes[m_idx] = AutomatonConst.UNCLASSIFIED_INPUT; | |
| 120 | m_firstpos = new HashSet<int>(new[] { m_idx }); |  | |||
| 121 | m_lastpos = new HashSet<int>(new[] { m_idx }); |  | |||
| 122 | Followpos(m_idx); |  | |||
| 123 | m_ends.Add(m_idx); |  | |||
| 124 | m_tags.Add(m_idx, token.Tag); |  | |||
| 125 | } |  | |||
| 126 |  | ||||
| 127 | public void Visit(EndToken token) { |  | |||
| 128 | if (m_root == null) |  | |||
| 129 | m_root = token; |  | |||
| 130 | m_idx++; |  | |||
| 131 | m_indexes[m_idx] = DFAConst.UNCLASSIFIED_INPUT; |  | |||
| 132 | m_firstpos = new HashSet<int>(new[] { m_idx }); |  | 133 | m_firstpos = new HashSet<int>(new[] { m_idx }); | |
| 133 | m_lastpos = new HashSet<int>(new[] { m_idx }); |  | 134 | m_lastpos = new HashSet<int>(new[] { m_idx }); | |
| 134 | Followpos(m_idx); |  | 135 | Followpos(m_idx); | |
| 135 | m_ends.Add(m_idx); |  | 136 | m_ends.Add(m_idx); | |
| 136 | } |  | 137 | } | |
| 137 |  | 138 | |||
| 138 | public void BuildDFA( |  | 139 | public void BuildDFA() { | |
| 139 | Safe.ArgumentNotNull(dfa,"dfa"); |  | 140 | AddState(m_firstpos); | |
|  | 141 | SetInitialState(m_firstpos); | |||
| 140 |  | 142 | |||
| 141 | var states = new MapAlphabet<HashSet<int>>( |  | 143 | if(IsFinal(m_firstpos)) | |
| 142 | false, |  | 144 | MarkFinalState(m_firstpos); | |
| 143 | new CustomEqualityComparer<HashSet<int>>( |  | |||
| 144 | (x, y) => x.SetEquals(y), |  | |||
| 145 | x => x.Sum(n => n.GetHashCode()) |  | |||
| 146 | )); |  | |||
| 147 |  | ||||
| 148 | var initialState = states.DefineSymbol(m_firstpos); |  | |||
| 149 | dfa.SetInitialState(initialState); |  | |||
| 150 |  | ||||
| 151 | var tags = GetStateTags(m_firstpos); |  | |||
| 152 | if (tags != null && tags.Length > 0) |  | |||
| 153 | dfa.MarkFinalState(initialState, tags); |  | |||
| 154 |  | 145 | |||
| 155 | var inputMax = m_indexes.Values.Max(); |  | 146 | var inputMax = m_indexes.Values.Max(); | |
| 156 | var queue = new Queue<HashSet<int>>(); |  | 147 | var queue = new Queue<HashSet<int>>(); | |
| @@ -158,49 +149,64 namespace Implab.Automaton.RegularExpres | |||||
| 158 | queue.Enqueue(m_firstpos); |  | 149 | queue.Enqueue(m_firstpos); | |
| 159 |  | 150 | |||
| 160 | while (queue.Count > 0) { |  | 151 | while (queue.Count > 0) { | |
| 161 | var s |  | 152 | var s1 = queue.Dequeue(); | |
| 162 | var s1 = states.Translate(state); |  | |||
| 163 | Debug.Assert(s1 != DFAConst.UNCLASSIFIED_INPUT); |  | |||
| 164 |  | 153 | |||
| 165 | for (int a = 0; a <= inputMax; a++) { |  | 154 | for (int a = 0; a <= inputMax; a++) { | |
| 166 | var |  | 155 | var s2 = new HashSet<int>(); | |
| 167 | foreach (var p in s |  | 156 | foreach (var p in s1) { | |
| 168 | if (m_indexes[p] == a) { |  | 157 | if (m_indexes[p] == a) { | |
| 169 |  |  | 158 | s2.UnionWith(Followpos(p)); | |
| 170 | } |  | 159 | } | |
| 171 | } |  | 160 | } | |
| 172 | if ( |  | 161 | if (s2.Count > 0) { | |
| 173 | i |  | 162 | if (!HasState(s2)) { | |
| 174 |  |  | 163 | AddState(s2); | |
| 175 | s2 |  | 164 | if (IsFinal(s2)) | |
| 176 |  |  | 165 | MarkFinalState(s2); | |
| 177 |  |  | 166 | ||
|  | 167 | queue.Enqueue(s2); | |||
|  | 168 | } | |||
| 178 |  | 169 | |||
| 179 |  |  | 170 | DefineTransition(s1, s2, a); | |
| 180 |  | ||||
| 181 | dfa.MarkFinalState(s2); |  | |||
| 182 | tags = GetStateTags(next); |  | |||
| 183 | if (tags != null && tags.Length > 0) |  | |||
| 184 | dfa.SetStateTag(s2, tags); |  | |||
| 185 | } |  | |||
| 186 |  | ||||
| 187 | queue.Enqueue(next); |  | |||
| 188 | } |  | |||
| 189 | dfa.Add(new AutomatonTransition(s1, s2, a)); |  | |||
| 190 | } |  | 171 | } | |
|  | 172 | ||||
| 191 | } |  | 173 | } | |
| 192 | } |  | 174 | } | |
| 193 | } |  | 175 | } | |
| 194 |  | 176 | |||
|  | 177 | protected bool HasState(HashSet<int> state) { | |||
|  | 178 | return m_states.Contains(state); | |||
|  | 179 | } | |||
|  | 180 | ||||
|  | 181 | protected void AddState(HashSet<int> state) { | |||
|  | 182 | Debug.Assert(!HasState(state)); | |||
|  | 183 | ||||
|  | 184 | m_states.DefineSymbol(state); | |||
|  | 185 | } | |||
|  | 186 | ||||
|  | 187 | protected int Translate(HashSet<int> state) { | |||
|  | 188 | Debug.Assert(HasState(state)); | |||
|  | 189 | ||||
|  | 190 | return m_states.Translate(state); | |||
|  | 191 | } | |||
|  | 192 | ||||
|  | 193 | protected virtual void SetInitialState(HashSet<int> state) { | |||
|  | 194 | m_builder.SetInitialState(Translate(state)); | |||
|  | 195 | } | |||
|  | 196 | ||||
|  | 197 | protected virtual void MarkFinalState(HashSet<int> state) { | |||
|  | 198 | m_builder.MarkFinalState(Translate(state)); | |||
|  | 199 | } | |||
|  | 200 | ||||
|  | 201 | protected virtual void DefineTransition(HashSet<int> s1, HashSet<int> s2, int ch) { | |||
|  | 202 | ||||
|  | 203 | m_builder.Add(new AutomatonTransition(Translate(s1), Translate(s2), ch)); | |||
|  | 204 | } | |||
|  | 205 | ||||
| 195 | bool IsFinal(IEnumerable<int> state) { |  | 206 | bool IsFinal(IEnumerable<int> state) { | |
| 196 | Debug.Assert(state != null); |  | 207 | Debug.Assert(state != null); | |
| 197 | return state.Any(m_ends.Contains); |  | 208 | return state.Any(m_ends.Contains); | |
| 198 | } |  | 209 | } | |
| 199 |  | 210 | |||
| 200 | TTag[] GetStateTags(IEnumerable<int> state) { |  | |||
| 201 | Debug.Assert(state != null); |  | |||
| 202 | return state.Where(m_tags.ContainsKey).Select(pos => m_tags[pos]).ToArray(); |  | |||
| 203 | } |  | |||
| 204 |  | ||||
| 205 | } |  | 211 | } | |
| 206 | } |  | 212 | } | |
| @@ -6,7 +6,7 namespace Implab.Automaton.RegularExpres | |||||
| 6 | public abstract class Token { |  | 6 | public abstract class Token { | |
| 7 | public abstract void Accept(IVisitor visitor); |  | 7 | public abstract void Accept(IVisitor visitor); | |
| 8 |  | 8 | |||
| 9 | public Token E |  | 9 | public Token End() { | |
| 10 | return Cat(new EndToken()); |  | 10 | return Cat(new EndToken()); | |
| 11 | } |  | 11 | } | |
| 12 |  | 12 | |||
| @@ -9,14 +9,14 namespace Implab.Formats { | |||||
| 9 | /// <summary> |  | 9 | /// <summary> | |
| 10 | /// Базовый абстрактный класс. Грамматика, позволяет формулировать выражения над алфавитом типа <c>char</c>. |  | 10 | /// Базовый абстрактный класс. Грамматика, позволяет формулировать выражения над алфавитом типа <c>char</c>. | |
| 11 | /// </summary> |  | 11 | /// </summary> | |
| 12 | public abstract class Grammar<TSymbol |  | 12 | public abstract class Grammar<TSymbol> { | |
| 13 |  | 13 | |||
| 14 | protected abstract IAlphabetBuilder<TSymbol> AlphabetBuilder { |  | 14 | protected abstract IAlphabetBuilder<TSymbol> AlphabetBuilder { | |
| 15 | get; |  | 15 | get; | |
| 16 | } |  | 16 | } | |
| 17 |  | 17 | |||
| 18 | protected SymbolToken |  | 18 | protected SymbolToken UnclassifiedToken() { | |
| 19 | return new SymbolToken |  | 19 | return new SymbolToken(AutomatonConst.UNCLASSIFIED_INPUT); | |
| 20 | } |  | 20 | } | |
| 21 |  | 21 | |||
| 22 | protected void DefineAlphabet(IEnumerable<TSymbol> alphabet) { |  | 22 | protected void DefineAlphabet(IEnumerable<TSymbol> alphabet) { | |
| @@ -26,23 +26,23 namespace Implab.Formats { | |||||
| 26 | AlphabetBuilder.DefineSymbol(ch); |  | 26 | AlphabetBuilder.DefineSymbol(ch); | |
| 27 | } |  | 27 | } | |
| 28 |  | 28 | |||
| 29 | protected Token |  | 29 | protected Token SymbolToken(TSymbol symbol) { | |
| 30 | return Token |  | 30 | return Token.New(TranslateOrAdd(symbol)); | |
| 31 | } |  | 31 | } | |
| 32 |  | 32 | |||
| 33 | protected Token |  | 33 | protected Token SymbolToken(IEnumerable<TSymbol> symbols) { | |
| 34 | Safe.ArgumentNotNull(symbols, "symbols"); |  | 34 | Safe.ArgumentNotNull(symbols, "symbols"); | |
| 35 |  | 35 | |||
| 36 | return Token |  | 36 | return Token.New(TranslateOrAdd(symbols).ToArray()); | |
| 37 | } |  | 37 | } | |
| 38 |  | 38 | |||
| 39 | protected Token |  | 39 | protected Token SymbolSetToken(params TSymbol[] set) { | |
| 40 | return SymbolToken(set); |  | 40 | return SymbolToken(set); | |
| 41 | } |  | 41 | } | |
| 42 |  | 42 | |||
| 43 | int TranslateOrAdd(TSymbol ch) { |  | 43 | int TranslateOrAdd(TSymbol ch) { | |
| 44 | var t = AlphabetBuilder.Translate(ch); |  | 44 | var t = AlphabetBuilder.Translate(ch); | |
| 45 | if (t == |  | 45 | if (t == AutomatonConst.UNCLASSIFIED_INPUT) | |
| 46 | t = AlphabetBuilder.DefineSymbol(ch); |  | 46 | t = AlphabetBuilder.DefineSymbol(ch); | |
| 47 | return t; |  | 47 | return t; | |
| 48 | } |  | 48 | } | |
| @@ -53,7 +53,7 namespace Implab.Formats { | |||||
| 53 |  | 53 | |||
| 54 | int TranslateOrDie(TSymbol ch) { |  | 54 | int TranslateOrDie(TSymbol ch) { | |
| 55 | var t = AlphabetBuilder.Translate(ch); |  | 55 | var t = AlphabetBuilder.Translate(ch); | |
| 56 | if (t == |  | 56 | if (t == AutomatonConst.UNCLASSIFIED_INPUT) | |
| 57 | throw new ApplicationException(String.Format("Symbol '{0}' is UNCLASSIFIED", ch)); |  | 57 | throw new ApplicationException(String.Format("Symbol '{0}' is UNCLASSIFIED", ch)); | |
| 58 | return t; |  | 58 | return t; | |
| 59 | } |  | 59 | } | |
| @@ -62,22 +62,21 namespace Implab.Formats { | |||||
| 62 | return symbols.Distinct().Select(TranslateOrDie); |  | 62 | return symbols.Distinct().Select(TranslateOrDie); | |
| 63 | } |  | 63 | } | |
| 64 |  | 64 | |||
| 65 | protected Token |  | 65 | protected Token SymbolTokenExcept(IEnumerable<TSymbol> symbols) { | |
| 66 | Safe.ArgumentNotNull(symbols, "symbols"); |  | 66 | Safe.ArgumentNotNull(symbols, "symbols"); | |
| 67 |  | 67 | |||
| 68 | return Token |  | 68 | return Token.New( Enumerable.Range(0, AlphabetBuilder.Count).Except(TranslateOrDie(symbols)).ToArray() ); | |
| 69 | } |  | 69 | } | |
| 70 |  | 70 | |||
| 71 | protected abstract IndexedAlphabetBase<TSymbol> CreateAlphabet(); |  | 71 | protected abstract IndexedAlphabetBase<TSymbol> CreateAlphabet(); | |
| 72 |  | 72 | |||
| 73 | protected ScannerContext<TTag> BuildScannerContext |  | 73 | protected ScannerContext<TTag> BuildScannerContext<TTag>(Token regexp) { | |
| 74 |  | 74 | |||
| 75 | var dfa = new RegularDFA<TSymbol, TTag>(AlphabetBuilder); |  | 75 | var dfa = new RegularDFA<TSymbol, TTag>(AlphabetBuilder); | |
| 76 |  | 76 | |||
| 77 | var visitor = new RegularExpressionVisitor<TTag>(); |  | 77 | var visitor = new RegularExpressionVisitor<TTag>(dfa); | |
| 78 | regexp.Accept( |  | 78 | regexp.Accept(visitor); | |
| 79 |  | 79 | visitor.BuildDFA(); | ||
| 80 | visitor.BuildDFA(dfa); |  | |||
| 81 |  | 80 | |||
| 82 | if (dfa.IsFinalState(dfa.InitialState)) |  | 81 | if (dfa.IsFinalState(dfa.InitialState)) | |
| 83 | throw new ApplicationException("The specified language contains empty token"); |  | 82 | throw new ApplicationException("The specified language contains empty token"); | |
| @@ -5,7 +5,6 | |||||
| 5 | enum JSONElementContext { |  | 5 | enum JSONElementContext { | |
| 6 | None, |  | 6 | None, | |
| 7 | Object, |  | 7 | Object, | |
| 8 | Array |  | 8 | Array | |
| 9 | Closed |  | |||
| 10 | } |  | 9 | } | |
| 11 | } |  | 10 | } | |
| @@ -4,7 +4,7 using System; | |||||
| 4 | using Implab.Automaton; |  | 4 | using Implab.Automaton; | |
| 5 |  | 5 | |||
| 6 | namespace Implab.Formats.JSON { |  | 6 | namespace Implab.Formats.JSON { | |
| 7 | class JSONGrammar : Grammar<char |  | 7 | class JSONGrammar : Grammar<char> { | |
| 8 | public enum TokenType { |  | 8 | public enum TokenType { | |
| 9 | None, |  | 9 | None, | |
| 10 | BeginObject, |  | 10 | BeginObject, | |
| @@ -29,8 +29,8 namespace Implab.Formats.JSON { | |||||
| 29 | get { return _instance.Value; } |  | 29 | get { return _instance.Value; } | |
| 30 | } |  | 30 | } | |
| 31 |  | 31 | |||
| 32 | readonly ScannerContext<TokenType> m_json |  | 32 | readonly ScannerContext<TokenType> m_jsonExpression; | |
| 33 | readonly ScannerContext<TokenType> m_string |  | 33 | readonly ScannerContext<TokenType> m_stringExpression; | |
| 34 |  | 34 | |||
| 35 | public JSONGrammar() { |  | 35 | public JSONGrammar() { | |
| 36 | DefineAlphabet(Enumerable.Range(0, 0x20).Select(x => (char)x)); |  | 36 | DefineAlphabet(Enumerable.Range(0, 0x20).Select(x => (char)x)); | |
| @@ -81,23 +81,25 namespace Implab.Formats.JSON { | |||||
| 81 | .Or(unescaped.Closure().Tag(TokenType.UnescapedChar)); |  | 81 | .Or(unescaped.Closure().Tag(TokenType.UnescapedChar)); | |
| 82 |  | 82 | |||
| 83 |  | 83 | |||
| 84 | m_json |  | 84 | m_jsonExpression = BuildScannerContext<TokenType>(jsonExpression); | |
| 85 | m_string |  | 85 | m_stringExpression = BuildScannerContext<TokenType>(jsonStringExpression); | |
|  | 86 | ||||
|  | 87 | ||||
| 86 | } |  | 88 | } | |
| 87 |  | 89 | |||
| 88 | public ScannerContext<TokenType> Json |  | 90 | public ScannerContext<TokenType> JsonExpression { | |
| 89 | get { |  | 91 | get { | |
| 90 | return m_json |  | 92 | return m_jsonExpression; | |
| 91 | } |  | 93 | } | |
| 92 | } |  | 94 | } | |
| 93 |  | 95 | |||
| 94 | public ScannerContext<TokenType> JsonString |  | 96 | public ScannerContext<TokenType> JsonStringExpression { | |
| 95 | get { |  | 97 | get { | |
| 96 | return m_string |  | 98 | return m_stringExpression; | |
| 97 | } |  | 99 | } | |
| 98 | } |  | 100 | } | |
| 99 |  | 101 | |||
| 100 | Token |  | 102 | Token SymbolRangeToken(char start, char stop) { | |
| 101 | return SymbolToken(Enumerable.Range(start,stop - start).Cast<char>()); |  | 103 | return SymbolToken(Enumerable.Range(start,stop - start).Cast<char>()); | |
| 102 | } |  | 104 | } | |
| 103 |  | 105 | |||
| @@ -38,17 +38,30 namespace Implab.Formats.JSON { | |||||
| 38 | MemberValue |  | 38 | MemberValue | |
| 39 | } |  | 39 | } | |
| 40 |  | 40 | |||
|  | 41 | #region Parser rules | |||
| 41 | struct ParserContext { |  | 42 | struct ParserContext { | |
| 42 | DFAStateDescriptior<object> |  | 43 | readonly int[,] m_dfa; | |
| 43 | } |  | 44 | int m_state; | |
|  | 45 | ||||
|  | 46 | readonly JSONElementContext m_elementContext; | |||
| 44 |  | 47 | |||
| 45 | static readonly EnumAlphabet<JsonTokenType> _alphabet = EnumAlphabet<JsonTokenType>.FullAlphabet; |  | 48 | public ParserContext(int[,] dfa, int state, JSONElementContext context) { | |
| 46 | static readonly DFAStateDescriptior<object>[] _jsonDFA; |  | 49 | m_dfa = dfa; | |
| 47 | static readonly int _jsonDFAInitialState; |  | 50 | m_state = state; | |
| 48 | static readonly DFAStateDescriptior<object>[] _objectDFA; |  | 51 | m_elementContext = context; | |
| 49 | static readonly int _objectDFAInitialState; |  | 52 | } | |
| 50 | static readonly DFAStateDescriptior<object>[] _arrayDFA; |  | 53 | ||
| 51 | static readonly int _arrayDFAInitialState; |  | 54 | public bool Move(JsonTokenType token) { | |
|  | 55 | var next = m_dfa[m_state, token]; | |||
|  | 56 | if (next == AutomatonConst.UNREACHABLE_STATE) | |||
|  | 57 | return false; | |||
|  | 58 | m_state = next; | |||
|  | 59 | } | |||
|  | 60 | ||||
|  | 61 | public JSONElementContext ElementContext { | |||
|  | 62 | get { return m_elementContext; } | |||
|  | 63 | } | |||
|  | 64 | } | |||
| 52 |  | 65 | |||
| 53 | static JSONParser() { |  | 66 | static JSONParser() { | |
| 54 |  | 67 | |||
| @@ -64,7 +77,8 namespace Implab.Formats.JSON { | |||||
| 64 | ) |  | 77 | ) | |
| 65 | .Optional() |  | 78 | .Optional() | |
| 66 | .Cat(Token(JsonTokenType.EndObject)) |  | 79 | .Cat(Token(JsonTokenType.EndObject)) | |
| 67 | . |  | 80 | .End(); | |
|  | 81 | ||||
| 68 | var arrayExpression = valueExpression |  | 82 | var arrayExpression = valueExpression | |
| 69 | .Cat( |  | 83 | .Cat( | |
| 70 | Token(JsonTokenType.ValueSeparator) |  | 84 | Token(JsonTokenType.ValueSeparator) | |
| @@ -73,29 +87,31 namespace Implab.Formats.JSON { | |||||
| 73 | ) |  | 87 | ) | |
| 74 | .Optional() |  | 88 | .Optional() | |
| 75 | .Cat(Token(JsonTokenType.EndArray)) |  | 89 | .Cat(Token(JsonTokenType.EndArray)) | |
| 76 | . |  | 90 | .End(); | |
| 77 |  | 91 | |||
| 78 | var jsonExpression = valueExpression. |  | 92 | var jsonExpression = valueExpression.End(); | |
| 79 |  | 93 | |||
| 80 | _jsonDFA = Create |  | 94 | _jsonDFA = CreateParserContext(jsonExpression, JSONElementContext.None); | |
| 81 | _objectDFA = Create |  | 95 | _objectDFA = CreateParserContext(objectExpression, JSONElementContext.Object); | |
| 82 | _arrayDFA = Create |  | 96 | _arrayDFA = CreateParserContext(arrayExpression, JSONElementContext.Array); | |
| 83 | } |  | 97 | } | |
| 84 |  | 98 | |||
| 85 | static Token |  | 99 | static Token Token(params JsonTokenType[] input) { | |
| 86 | return Token |  | 100 | return Token.New( input.Select(t => (int)t).ToArray() ); | |
| 87 | } |  | 101 | } | |
| 88 |  | 102 | |||
| 89 | static RegularDFA<JsonTokenType,object> CreateDFA(Token<object> expr) { |  | 103 | static ParserContext CreateParserContext(Token expr, JSONElementContext context) { | |
| 90 | var builder = new RegularExpressionVisitor<object>(); |  | 104 | ||
| 91 | var dfa = new |  | 105 | var dfa = new DFATable(); | |
| 92 |  | 106 | var builder = new RegularExpressionVisitor(dfa); | ||
| 93 | expr.Accept(builder); |  | 107 | expr.Accept(builder); | |
|  | 108 | builder.BuildDFA(); | |||
| 94 |  | 109 | |||
| 95 | builder.BuildDFA(dfa); |  | 110 | return new ParserContext(dfa.CreateTransitionTable(), dfa.InitialState, context); | |
| 96 | return dfa; |  | |||
| 97 | } |  | 111 | } | |
| 98 |  | 112 | |||
|  | 113 | #endregion | |||
|  | 114 | ||||
| 99 | JSONScanner m_scanner; |  | 115 | JSONScanner m_scanner; | |
| 100 | MemberContext m_memberContext; |  | 116 | MemberContext m_memberContext; | |
| 101 |  | 117 | |||
| @@ -117,8 +133,7 namespace Implab.Formats.JSON { | |||||
| 117 | /// Создает новый экземпляр парсера, на основе текстового потока. |  | 133 | /// Создает новый экземпляр парсера, на основе текстового потока. | |
| 118 | /// </summary> |  | 134 | /// </summary> | |
| 119 | /// <param name="reader">Текстовый поток.</param> |  | 135 | /// <param name="reader">Текстовый поток.</param> | |
| 120 | /// <param name="dispose">Признак того, что парсер должен конролировать время жизни входного потока.</param> |  | 136 | public JSONParser(TextReader reader) | |
| 121 | public JSONParser(TextReader reader, bool dispose) |  | |||
| 122 | : base(_jsonDFA, INITIAL_STATE, new JSONParserContext { elementContext = JSONElementContext.None, memberName = String.Empty }) { |  | 137 | : base(_jsonDFA, INITIAL_STATE, new JSONParserContext { elementContext = JSONElementContext.None, memberName = String.Empty }) { | |
| 123 | Safe.ArgumentNotNull(reader, "reader"); |  | 138 | Safe.ArgumentNotNull(reader, "reader"); | |
| 124 | m_scanner = new JSONScanner(); |  | 139 | m_scanner = new JSONScanner(); | |
| @@ -61,15 +61,15 namespace Implab.Formats { | |||||
| 61 | while (pos < m_bufferSize) { |  | 61 | while (pos < m_bufferSize) { | |
| 62 | var ch = m_buffer[pos]; |  | 62 | var ch = m_buffer[pos]; | |
| 63 |  | 63 | |||
| 64 | state = dfa[state, ch > maxSymbol ? |  | 64 | state = dfa[state, ch > maxSymbol ? AutomatonConst.UNCLASSIFIED_INPUT : alphabet[ch]]; | |
| 65 | if (state == |  | 65 | if (state == AutomatonConst.UNREACHABLE_STATE) | |
| 66 | break; |  | 66 | break; | |
| 67 |  | 67 | |||
| 68 | pos++; |  | 68 | pos++; | |
| 69 | } |  | 69 | } | |
| 70 |  | 70 | |||
| 71 | m_tokenLength = pos - m_bufferOffset; |  | 71 | m_tokenLength = pos - m_bufferOffset; | |
| 72 | } while (state != |  | 72 | } while (state != AutomatonConst.UNREACHABLE_STATE && Feed()); | |
| 73 |  | 73 | |||
| 74 | m_tokenOffset = m_bufferOffset; |  | 74 | m_tokenOffset = m_bufferOffset; | |
| 75 | m_bufferOffset += m_tokenLength; |  | 75 | m_bufferOffset += m_tokenLength; | |
| @@ -159,7 +159,6 | |||||
| 159 | <Compile Include="Automaton\RegularExpressions\AltToken.cs" /> |  | 159 | <Compile Include="Automaton\RegularExpressions\AltToken.cs" /> | |
| 160 | <Compile Include="Automaton\RegularExpressions\BinaryToken.cs" /> |  | 160 | <Compile Include="Automaton\RegularExpressions\BinaryToken.cs" /> | |
| 161 | <Compile Include="Automaton\RegularExpressions\CatToken.cs" /> |  | 161 | <Compile Include="Automaton\RegularExpressions\CatToken.cs" /> | |
| 162 | <Compile Include="Automaton\DFAConst.cs" /> |  | |||
| 163 | <Compile Include="Automaton\RegularExpressions\StarToken.cs" /> |  | 162 | <Compile Include="Automaton\RegularExpressions\StarToken.cs" /> | |
| 164 | <Compile Include="Automaton\RegularExpressions\SymbolToken.cs" /> |  | 163 | <Compile Include="Automaton\RegularExpressions\SymbolToken.cs" /> | |
| 165 | <Compile Include="Automaton\RegularExpressions\EmptyToken.cs" /> |  | 164 | <Compile Include="Automaton\RegularExpressions\EmptyToken.cs" /> | |
| @@ -183,7 +182,6 | |||||
| 183 | <Compile Include="Automaton\IDFATable.cs" /> |  | 182 | <Compile Include="Automaton\IDFATable.cs" /> | |
| 184 | <Compile Include="Automaton\IDFATableBuilder.cs" /> |  | 183 | <Compile Include="Automaton\IDFATableBuilder.cs" /> | |
| 185 | <Compile Include="Automaton\DFATable.cs" /> |  | 184 | <Compile Include="Automaton\DFATable.cs" /> | |
| 186 | <Compile Include="Automaton\RegularExpressions\RegularDFA.cs" /> |  | |||
| 187 | <Compile Include="Automaton\RegularExpressions\RegularExpressionVisitor.cs" /> |  | 185 | <Compile Include="Automaton\RegularExpressions\RegularExpressionVisitor.cs" /> | |
| 188 | <Compile Include="Automaton\RegularExpressions\ITaggedDFABuilder.cs" /> |  | 186 | <Compile Include="Automaton\RegularExpressions\ITaggedDFABuilder.cs" /> | |
| 189 | <Compile Include="Formats\TextScanner.cs" /> |  | 187 | <Compile Include="Formats\TextScanner.cs" /> | |
| @@ -193,7 +191,10 | |||||
| 193 | <Compile Include="Formats\Grammar.cs" /> |  | 191 | <Compile Include="Formats\Grammar.cs" /> | |
| 194 | <Compile Include="Automaton\RegularExpressions\EndTokenT.cs" /> |  | 192 | <Compile Include="Automaton\RegularExpressions\EndTokenT.cs" /> | |
| 195 | <Compile Include="Automaton\RegularExpressions\EndToken.cs" /> |  | 193 | <Compile Include="Automaton\RegularExpressions\EndToken.cs" /> | |
| 196 | <Compile Include="Automaton\RegularExpressions\ |  | 194 | <Compile Include="Automaton\RegularExpressions\RegularExpressionVisitorT.cs" /> | |
|  | 195 | <Compile Include="Automaton\AutomatonConst.cs" /> | |||
|  | 196 | <Compile Include="Automaton\RegularExpressions\RegularDFA.cs" /> | |||
|  | 197 | <Compile Include="Components\LazyAndWeak.cs" /> | |||
| 197 | </ItemGroup> |  | 198 | </ItemGroup> | |
| 198 | <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" /> |  | 199 | <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" /> | |
| 199 | <ItemGroup /> |  | 200 | <ItemGroup /> | |
| 1 | NO CONTENT: file was removed |  | NO CONTENT: file was removed | 
| 1 | NO CONTENT: file was removed |  | NO CONTENT: file was removed | 
        
        General Comments 0
    
    
  
  
                      You need to be logged in to leave comments.
                      Login now
                    
                