| @@ -0,0 +1,9 | |||||
|
|
1 | ||||
|
|
2 | namespace Implab.Automaton { | |||
|
|
3 | public static class DFAConst { | |||
|
|
4 | public const int UNREACHABLE_STATE = -1; | |||
|
|
5 | ||||
|
|
6 | public const int UNCLASSIFIED_INPUT = 0; | |||
|
|
7 | } | |||
|
|
8 | } | |||
|
|
9 | ||||
| @@ -0,0 +1,37 | |||||
|
|
1 | using Implab; | |||
|
|
2 | using System; | |||
|
|
3 | using System.Collections.Generic; | |||
|
|
4 | using System.Diagnostics; | |||
|
|
5 | using System.Linq; | |||
|
|
6 | ||||
|
|
7 | namespace Implab.Automaton.RegularExpressions { | |||
|
|
8 | /// <summary> | |||
|
|
9 | /// </summary> | |||
|
|
10 | public class RegularExpressionVisitor<TTag> : RegularExpressionVisitor { | |||
|
|
11 | readonly Dictionary<int, TTag> m_tags = new Dictionary<int, TTag>(); | |||
|
|
12 | ||||
|
|
13 | readonly ITaggedDFABuilder<TTag> m_builder; | |||
|
|
14 | ||||
|
|
15 | public RegularExpressionVisitor(ITaggedDFABuilder<TTag> builder) : base(builder) { | |||
|
|
16 | m_builder = builder; | |||
|
|
17 | } | |||
|
|
18 | ||||
|
|
19 | public override void Visit(EndToken token) { | |||
|
|
20 | base.Visit(token); | |||
|
|
21 | var tagged = token as EndToken<TTag>; | |||
|
|
22 | if (tagged != null) | |||
|
|
23 | m_tags.Add(Index, tagged.Tag); | |||
|
|
24 | } | |||
|
|
25 | ||||
|
|
26 | protected override void MarkFinalState(HashSet<int> state) { | |||
|
|
27 | base.MarkFinalState(state); | |||
|
|
28 | m_builder.SetStateTag(Translate(state), GetStateTags(state)); | |||
|
|
29 | } | |||
|
|
30 | ||||
|
|
31 | TTag[] GetStateTags(IEnumerable<int> state) { | |||
|
|
32 | Debug.Assert(state != null); | |||
|
|
33 | return state.Where(m_tags.ContainsKey).Select(pos => m_tags[pos]).ToArray(); | |||
|
|
34 | } | |||
|
|
35 | ||||
|
|
36 | } | |||
|
|
37 | } | |||
| @@ -0,0 +1,44 | |||||
|
|
1 | using System; | |||
|
|
2 | using System.Threading; | |||
|
|
3 | ||||
|
|
4 | namespace Implab.Components { | |||
|
|
5 | public class LazyAndWeak<T> where T : class { | |||
|
|
6 | ||||
|
|
7 | readonly Func<T> m_factory; | |||
|
|
8 | readonly object m_lock; | |||
|
|
9 | WeakReference m_reference; | |||
|
|
10 | ||||
|
|
11 | ||||
|
|
12 | public LazyAndWeak(Func<T> factory, bool useLock) { | |||
|
|
13 | Safe.ArgumentNotNull(factory, "factory"); | |||
|
|
14 | m_factory = factory; | |||
|
|
15 | m_lock = useLock ? new object() : null; | |||
|
|
16 | } | |||
|
|
17 | ||||
|
|
18 | public LazyAndWeak(Func<T> factory) : this(factory, false) { | |||
|
|
19 | } | |||
|
|
20 | ||||
|
|
21 | public T Value { | |||
|
|
22 | get { | |||
|
|
23 | while (true) { | |||
|
|
24 | var weak = m_reference; | |||
|
|
25 | T value; | |||
|
|
26 | if (weak != null) { | |||
|
|
27 | value = weak.Target as T; | |||
|
|
28 | if (value != null) | |||
|
|
29 | return value; | |||
|
|
30 | } | |||
|
|
31 | ||||
|
|
32 | if (m_lock == null) { | |||
|
|
33 | value = m_factory(); | |||
|
|
34 | ||||
|
|
35 | if (Interlocked.CompareExchange(ref m_reference, new WeakReference(value), weak) == weak) | |||
|
|
36 | return value; | |||
|
|
37 | } else { | |||
|
|
38 | } | |||
|
|
39 | } | |||
|
|
40 | } | |||
|
|
41 | } | |||
|
|
42 | } | |||
|
|
43 | } | |||
|
|
44 | ||||
| @@ -105,7 +105,7 namespace Implab.Automaton { | |||||
| 105 |
|
105 | |||
| 106 | for (int i = 0; i < StateCount; i++) |
|
106 | for (int i = 0; i < StateCount; i++) | |
| 107 | for (int j = 0; i < AlphabetSize; j++) |
|
107 | for (int j = 0; i < AlphabetSize; j++) | |
| 108 |
table[i, j] = |
|
108 | table[i, j] = AutomatonConst.UNREACHABLE_STATE; | |
| 109 |
|
109 | |||
| 110 | foreach (var t in this) |
|
110 | foreach (var t in this) | |
| 111 | table[t.s1,t.edge] = t.s2; |
|
111 | table[t.s1,t.edge] = t.s2; | |
| @@ -273,11 +273,11 namespace Implab.Automaton { | |||||
| 273 |
|
273 | |||
| 274 | var nextCls = 0; |
|
274 | var nextCls = 0; | |
| 275 | foreach (var item in minClasses) { |
|
275 | foreach (var item in minClasses) { | |
| 276 |
if (nextCls == |
|
276 | if (nextCls == AutomatonConst.UNCLASSIFIED_INPUT) | |
| 277 | nextCls++; |
|
277 | nextCls++; | |
| 278 |
|
278 | |||
| 279 | // сохраняем DFAConst.UNCLASSIFIED_INPUT |
|
279 | // сохраняем DFAConst.UNCLASSIFIED_INPUT | |
| 280 |
var cls = item.Contains( |
|
280 | var cls = item.Contains(AutomatonConst.UNCLASSIFIED_INPUT) ? AutomatonConst.UNCLASSIFIED_INPUT : nextCls; | |
| 281 |
|
281 | |||
| 282 | foreach (var a in item) |
|
282 | foreach (var a in item) | |
| 283 | alphabetMap[a] = cls; |
|
283 | alphabetMap[a] = cls; | |
| @@ -54,7 +54,7 namespace Implab.Automaton { | |||||
| 54 | return cls; |
|
54 | return cls; | |
| 55 | if (!m_supportUnclassified) |
|
55 | if (!m_supportUnclassified) | |
| 56 | throw new ArgumentOutOfRangeException("symbol", "The specified symbol isn't in the alphabet"); |
|
56 | throw new ArgumentOutOfRangeException("symbol", "The specified symbol isn't in the alphabet"); | |
| 57 |
return |
|
57 | return AutomatonConst.UNCLASSIFIED_INPUT; | |
| 58 | } |
|
58 | } | |
| 59 |
|
59 | |||
| 60 | public int Count { |
|
60 | public int Count { | |
| @@ -1,13 +1,11 | |||||
| 1 | using Implab; |
|
1 | namespace Implab.Automaton.RegularExpressions { | |
| 2 |
|
||||
| 3 | namespace Implab.Automaton.RegularExpressions { |
|
|||
| 4 | /// <summary> |
|
2 | /// <summary> | |
| 5 | /// Конечный символ расширенного регулярного выражения, при построении ДКА |
|
3 | /// Конечный символ расширенного регулярного выражения, при построении ДКА | |
| 6 | /// используется для определения конечных состояний. |
|
4 | /// используется для определения конечных состояний. | |
| 7 | /// </summary> |
|
5 | /// </summary> | |
| 8 | public class EndToken<TTag>: Token { |
|
6 | public class EndToken<TTag>: EndToken { | |
| 9 |
|
7 | |||
| 10 | TTag m_tag; |
|
8 | readonly TTag m_tag; | |
| 11 |
|
9 | |||
| 12 | public EndToken(TTag tag) { |
|
10 | public EndToken(TTag tag) { | |
| 13 | m_tag = tag; |
|
11 | m_tag = tag; | |
| @@ -21,13 +19,5 namespace Implab.Automaton.RegularExpres | |||||
| 21 | get { return m_tag; } |
|
19 | get { return m_tag; } | |
| 22 | } |
|
20 | } | |
| 23 |
|
21 | |||
| 24 | public override void Accept(IVisitor visitor) { |
|
|||
| 25 | Safe.ArgumentOfType(visitor, typeof(IVisitor<TTag>), "visitor"); |
|
|||
| 26 | Safe.ArgumentNotNull(visitor, "visitor"); |
|
|||
| 27 | ((IVisitor<TTag>)visitor).Visit(this); |
|
|||
| 28 | } |
|
|||
| 29 | public override string ToString() { |
|
|||
| 30 | return "#"; |
|
|||
| 31 |
|
|
22 | } | |
| 32 | } |
|
23 | } | |
| 33 | } |
|
|||
| @@ -2,12 +2,12 | |||||
| 2 | using System.Linq; |
|
2 | using System.Linq; | |
| 3 |
|
3 | |||
| 4 | namespace Implab.Automaton.RegularExpressions { |
|
4 | namespace Implab.Automaton.RegularExpressions { | |
| 5 |
public class |
|
5 | public class TaggedDFA<TInput, TTag> : DFATable, ITaggedDFABuilder<TTag> { | |
| 6 |
|
6 | |||
| 7 | readonly Dictionary<int,TTag[]> m_tags = new Dictionary<int, TTag[]>(); |
|
7 | readonly Dictionary<int,TTag[]> m_tags = new Dictionary<int, TTag[]>(); | |
| 8 | readonly IAlphabet<TInput> m_alphabet; |
|
8 | readonly IAlphabet<TInput> m_alphabet; | |
| 9 |
|
9 | |||
| 10 |
public |
|
10 | public TaggedDFA(IAlphabet<TInput> alphabet) { | |
| 11 | Safe.ArgumentNotNull(alphabet, "aplhabet"); |
|
11 | Safe.ArgumentNotNull(alphabet, "aplhabet"); | |
| 12 |
|
12 | |||
| 13 | m_alphabet = alphabet; |
|
13 | m_alphabet = alphabet; | |
| @@ -48,10 +48,10 namespace Implab.Automaton.RegularExpres | |||||
| 48 | /// Optimize the specified alphabet. |
|
48 | /// Optimize the specified alphabet. | |
| 49 | /// </summary> |
|
49 | /// </summary> | |
| 50 | /// <param name="alphabet">Пустой алфавит, который будет зполнен в процессе оптимизации.</param> |
|
50 | /// <param name="alphabet">Пустой алфавит, который будет зполнен в процессе оптимизации.</param> | |
| 51 |
public |
|
51 | public TaggedDFA<TInput,TTag> Optimize(IAlphabetBuilder<TInput> alphabet) { | |
| 52 | Safe.ArgumentNotNull(alphabet, "alphabet"); |
|
52 | Safe.ArgumentNotNull(alphabet, "alphabet"); | |
| 53 |
|
53 | |||
| 54 |
var dfa = new |
|
54 | var dfa = new TaggedDFA<TInput, TTag>(alphabet); | |
| 55 |
|
55 | |||
| 56 | var states = new DummyAlphabet(StateCount); |
|
56 | var states = new DummyAlphabet(StateCount); | |
| 57 | var alphaMap = new Dictionary<int,int>(); |
|
57 | var alphaMap = new Dictionary<int,int>(); | |
| @@ -10,7 +10,7 namespace Implab.Automaton.RegularExpres | |||||
| 10 | /// регулярное выражение и вычисляет followpos, затем используется метод |
|
10 | /// регулярное выражение и вычисляет followpos, затем используется метод | |
| 11 | /// <see cref="BuildDFA(IDFADefinition)"/> для построения автомата. |
|
11 | /// <see cref="BuildDFA(IDFADefinition)"/> для построения автомата. | |
| 12 | /// </summary> |
|
12 | /// </summary> | |
| 13 |
public class RegularExpressionVisitor |
|
13 | public class RegularExpressionVisitor : IVisitor { | |
| 14 | int m_idx; |
|
14 | int m_idx; | |
| 15 | Token m_root; |
|
15 | Token m_root; | |
| 16 | HashSet<int> m_firstpos; |
|
16 | HashSet<int> m_firstpos; | |
| @@ -19,13 +19,23 namespace Implab.Automaton.RegularExpres | |||||
| 19 | readonly Dictionary<int, HashSet<int>> m_followpos = new Dictionary<int, HashSet<int>>(); |
|
19 | readonly Dictionary<int, HashSet<int>> m_followpos = new Dictionary<int, HashSet<int>>(); | |
| 20 | readonly Dictionary<int, int> m_indexes = new Dictionary<int, int>(); |
|
20 | readonly Dictionary<int, int> m_indexes = new Dictionary<int, int>(); | |
| 21 | readonly HashSet<int> m_ends = new HashSet<int>(); |
|
21 | readonly HashSet<int> m_ends = new HashSet<int>(); | |
| 22 | readonly Dictionary<int, TTag> m_tags = new Dictionary<int, TTag>(); |
|
|||
| 23 |
|
22 | |||
| 24 | public Dictionary<int, HashSet<int>> FollowposMap { |
|
23 | readonly IDFATableBuilder m_builder; | |
| 25 | get { return m_followpos; } |
|
24 | readonly IAlphabetBuilder<HashSet<int>> m_states = new MapAlphabet<HashSet<int>>( | |
|
|
25 | false, | |||
|
|
26 | new CustomEqualityComparer<HashSet<int>>( | |||
|
|
27 | (x, y) => x.SetEquals(y), | |||
|
|
28 | x => x.Sum(n => n.GetHashCode()) | |||
|
|
29 | ) | |||
|
|
30 | ); | |||
|
|
31 | ||||
|
|
32 | public RegularExpressionVisitor(IDFATableBuilder builder) { | |||
|
|
33 | Safe.ArgumentNotNull(builder, "builder"); | |||
|
|
34 | ||||
|
|
35 | m_builder = builder; | |||
| 26 | } |
|
36 | } | |
| 27 |
|
37 | |||
| 28 |
|
|
38 | HashSet<int> Followpos(int pos) { | |
| 29 | HashSet<int> set; |
|
39 | HashSet<int> set; | |
| 30 | return m_followpos.TryGetValue(pos, out set) ? set : m_followpos[pos] = new HashSet<int>(); |
|
40 | return m_followpos.TryGetValue(pos, out set) ? set : m_followpos[pos] = new HashSet<int>(); | |
| 31 | } |
|
41 | } | |
| @@ -42,6 +52,9 namespace Implab.Automaton.RegularExpres | |||||
| 42 | return false; |
|
52 | return false; | |
| 43 | } |
|
53 | } | |
| 44 |
|
54 | |||
|
|
55 | protected int Index { | |||
|
|
56 | get { return m_idx; } | |||
|
|
57 | } | |||
| 45 |
|
58 | |||
| 46 | public void Visit(AltToken token) { |
|
59 | public void Visit(AltToken token) { | |
| 47 | if (m_root == null) |
|
60 | if (m_root == null) | |
| @@ -112,45 +125,23 namespace Implab.Automaton.RegularExpres | |||||
| 112 | m_lastpos = new HashSet<int>(new[] { m_idx }); |
|
125 | m_lastpos = new HashSet<int>(new[] { m_idx }); | |
| 113 | } |
|
126 | } | |
| 114 |
|
127 | |||
| 115 |
public void Visit(EndToken |
|
128 | public virtual void Visit(EndToken token) { | |
| 116 | if (m_root == null) |
|
129 | if (m_root == null) | |
| 117 | m_root = token; |
|
130 | m_root = token; | |
| 118 | m_idx++; |
|
131 | m_idx++; | |
| 119 |
m_indexes[m_idx] = |
|
132 | m_indexes[m_idx] = AutomatonConst.UNCLASSIFIED_INPUT; | |
| 120 | m_firstpos = new HashSet<int>(new[] { m_idx }); |
|
|||
| 121 | m_lastpos = new HashSet<int>(new[] { m_idx }); |
|
|||
| 122 | Followpos(m_idx); |
|
|||
| 123 | m_ends.Add(m_idx); |
|
|||
| 124 | m_tags.Add(m_idx, token.Tag); |
|
|||
| 125 | } |
|
|||
| 126 |
|
||||
| 127 | public void Visit(EndToken token) { |
|
|||
| 128 | if (m_root == null) |
|
|||
| 129 | m_root = token; |
|
|||
| 130 | m_idx++; |
|
|||
| 131 | m_indexes[m_idx] = DFAConst.UNCLASSIFIED_INPUT; |
|
|||
| 132 | m_firstpos = new HashSet<int>(new[] { m_idx }); |
|
133 | m_firstpos = new HashSet<int>(new[] { m_idx }); | |
| 133 | m_lastpos = new HashSet<int>(new[] { m_idx }); |
|
134 | m_lastpos = new HashSet<int>(new[] { m_idx }); | |
| 134 | Followpos(m_idx); |
|
135 | Followpos(m_idx); | |
| 135 | m_ends.Add(m_idx); |
|
136 | m_ends.Add(m_idx); | |
| 136 | } |
|
137 | } | |
| 137 |
|
138 | |||
| 138 |
public void BuildDFA( |
|
139 | public void BuildDFA() { | |
| 139 | Safe.ArgumentNotNull(dfa,"dfa"); |
|
140 | AddState(m_firstpos); | |
|
|
141 | SetInitialState(m_firstpos); | |||
| 140 |
|
142 | |||
| 141 | var states = new MapAlphabet<HashSet<int>>( |
|
143 | if(IsFinal(m_firstpos)) | |
| 142 | false, |
|
144 | MarkFinalState(m_firstpos); | |
| 143 | new CustomEqualityComparer<HashSet<int>>( |
|
|||
| 144 | (x, y) => x.SetEquals(y), |
|
|||
| 145 | x => x.Sum(n => n.GetHashCode()) |
|
|||
| 146 | )); |
|
|||
| 147 |
|
||||
| 148 | var initialState = states.DefineSymbol(m_firstpos); |
|
|||
| 149 | dfa.SetInitialState(initialState); |
|
|||
| 150 |
|
||||
| 151 | var tags = GetStateTags(m_firstpos); |
|
|||
| 152 | if (tags != null && tags.Length > 0) |
|
|||
| 153 | dfa.MarkFinalState(initialState, tags); |
|
|||
| 154 |
|
145 | |||
| 155 | var inputMax = m_indexes.Values.Max(); |
|
146 | var inputMax = m_indexes.Values.Max(); | |
| 156 | var queue = new Queue<HashSet<int>>(); |
|
147 | var queue = new Queue<HashSet<int>>(); | |
| @@ -158,38 +149,58 namespace Implab.Automaton.RegularExpres | |||||
| 158 | queue.Enqueue(m_firstpos); |
|
149 | queue.Enqueue(m_firstpos); | |
| 159 |
|
150 | |||
| 160 | while (queue.Count > 0) { |
|
151 | while (queue.Count > 0) { | |
| 161 |
var s |
|
152 | var s1 = queue.Dequeue(); | |
| 162 | var s1 = states.Translate(state); |
|
|||
| 163 | Debug.Assert(s1 != DFAConst.UNCLASSIFIED_INPUT); |
|
|||
| 164 |
|
153 | |||
| 165 | for (int a = 0; a <= inputMax; a++) { |
|
154 | for (int a = 0; a <= inputMax; a++) { | |
| 166 |
var |
|
155 | var s2 = new HashSet<int>(); | |
| 167 |
foreach (var p in s |
|
156 | foreach (var p in s1) { | |
| 168 | if (m_indexes[p] == a) { |
|
157 | if (m_indexes[p] == a) { | |
| 169 |
|
|
158 | s2.UnionWith(Followpos(p)); | |
| 170 | } |
|
159 | } | |
| 171 | } |
|
160 | } | |
| 172 |
if ( |
|
161 | if (s2.Count > 0) { | |
| 173 |
i |
|
162 | if (!HasState(s2)) { | |
| 174 |
|
|
163 | AddState(s2); | |
| 175 |
s2 |
|
164 | if (IsFinal(s2)) | |
| 176 |
|
|
165 | MarkFinalState(s2); | |
| 177 | s2 = states.DefineSymbol(next); |
|
|||
| 178 |
|
166 | |||
| 179 |
|
|
167 | queue.Enqueue(s2); | |
| 180 |
|
||||
| 181 | dfa.MarkFinalState(s2); |
|
|||
| 182 | tags = GetStateTags(next); |
|
|||
| 183 | if (tags != null && tags.Length > 0) |
|
|||
| 184 | dfa.SetStateTag(s2, tags); |
|
|||
| 185 |
|
|
168 | } | |
| 186 |
|
169 | |||
| 187 |
|
|
170 | DefineTransition(s1, s2, a); | |
| 188 |
|
|
171 | } | |
| 189 | dfa.Add(new AutomatonTransition(s1, s2, a)); |
|
172 | ||
| 190 |
|
|
173 | } | |
| 191 |
|
|
174 | } | |
| 192 |
|
|
175 | } | |
|
|
176 | ||||
|
|
177 | protected bool HasState(HashSet<int> state) { | |||
|
|
178 | return m_states.Contains(state); | |||
|
|
179 | } | |||
|
|
180 | ||||
|
|
181 | protected void AddState(HashSet<int> state) { | |||
|
|
182 | Debug.Assert(!HasState(state)); | |||
|
|
183 | ||||
|
|
184 | m_states.DefineSymbol(state); | |||
|
|
185 | } | |||
|
|
186 | ||||
|
|
187 | protected int Translate(HashSet<int> state) { | |||
|
|
188 | Debug.Assert(HasState(state)); | |||
|
|
189 | ||||
|
|
190 | return m_states.Translate(state); | |||
|
|
191 | } | |||
|
|
192 | ||||
|
|
193 | protected virtual void SetInitialState(HashSet<int> state) { | |||
|
|
194 | m_builder.SetInitialState(Translate(state)); | |||
|
|
195 | } | |||
|
|
196 | ||||
|
|
197 | protected virtual void MarkFinalState(HashSet<int> state) { | |||
|
|
198 | m_builder.MarkFinalState(Translate(state)); | |||
|
|
199 | } | |||
|
|
200 | ||||
|
|
201 | protected virtual void DefineTransition(HashSet<int> s1, HashSet<int> s2, int ch) { | |||
|
|
202 | ||||
|
|
203 | m_builder.Add(new AutomatonTransition(Translate(s1), Translate(s2), ch)); | |||
| 193 | } |
|
204 | } | |
| 194 |
|
205 | |||
| 195 | bool IsFinal(IEnumerable<int> state) { |
|
206 | bool IsFinal(IEnumerable<int> state) { | |
| @@ -197,10 +208,5 namespace Implab.Automaton.RegularExpres | |||||
| 197 | return state.Any(m_ends.Contains); |
|
208 | return state.Any(m_ends.Contains); | |
| 198 | } |
|
209 | } | |
| 199 |
|
210 | |||
| 200 | TTag[] GetStateTags(IEnumerable<int> state) { |
|
|||
| 201 | Debug.Assert(state != null); |
|
|||
| 202 | return state.Where(m_tags.ContainsKey).Select(pos => m_tags[pos]).ToArray(); |
|
|||
| 203 | } |
|
|||
| 204 |
|
||||
| 205 | } |
|
211 | } | |
| 206 | } |
|
212 | } | |
| @@ -6,7 +6,7 namespace Implab.Automaton.RegularExpres | |||||
| 6 | public abstract class Token { |
|
6 | public abstract class Token { | |
| 7 | public abstract void Accept(IVisitor visitor); |
|
7 | public abstract void Accept(IVisitor visitor); | |
| 8 |
|
8 | |||
| 9 |
public Token E |
|
9 | public Token End() { | |
| 10 | return Cat(new EndToken()); |
|
10 | return Cat(new EndToken()); | |
| 11 | } |
|
11 | } | |
| 12 |
|
12 | |||
| @@ -9,14 +9,14 namespace Implab.Formats { | |||||
| 9 | /// <summary> |
|
9 | /// <summary> | |
| 10 | /// Базовый абстрактный класс. Грамматика, позволяет формулировать выражения над алфавитом типа <c>char</c>. |
|
10 | /// Базовый абстрактный класс. Грамматика, позволяет формулировать выражения над алфавитом типа <c>char</c>. | |
| 11 | /// </summary> |
|
11 | /// </summary> | |
| 12 |
public abstract class Grammar<TSymbol |
|
12 | public abstract class Grammar<TSymbol> { | |
| 13 |
|
13 | |||
| 14 | protected abstract IAlphabetBuilder<TSymbol> AlphabetBuilder { |
|
14 | protected abstract IAlphabetBuilder<TSymbol> AlphabetBuilder { | |
| 15 | get; |
|
15 | get; | |
| 16 | } |
|
16 | } | |
| 17 |
|
17 | |||
| 18 |
protected SymbolToken |
|
18 | protected SymbolToken UnclassifiedToken() { | |
| 19 |
return new SymbolToken |
|
19 | return new SymbolToken(AutomatonConst.UNCLASSIFIED_INPUT); | |
| 20 | } |
|
20 | } | |
| 21 |
|
21 | |||
| 22 | protected void DefineAlphabet(IEnumerable<TSymbol> alphabet) { |
|
22 | protected void DefineAlphabet(IEnumerable<TSymbol> alphabet) { | |
| @@ -26,23 +26,23 namespace Implab.Formats { | |||||
| 26 | AlphabetBuilder.DefineSymbol(ch); |
|
26 | AlphabetBuilder.DefineSymbol(ch); | |
| 27 | } |
|
27 | } | |
| 28 |
|
28 | |||
| 29 |
protected Token |
|
29 | protected Token SymbolToken(TSymbol symbol) { | |
| 30 |
return Token |
|
30 | return Token.New(TranslateOrAdd(symbol)); | |
| 31 | } |
|
31 | } | |
| 32 |
|
32 | |||
| 33 |
protected Token |
|
33 | protected Token SymbolToken(IEnumerable<TSymbol> symbols) { | |
| 34 | Safe.ArgumentNotNull(symbols, "symbols"); |
|
34 | Safe.ArgumentNotNull(symbols, "symbols"); | |
| 35 |
|
35 | |||
| 36 |
return Token |
|
36 | return Token.New(TranslateOrAdd(symbols).ToArray()); | |
| 37 | } |
|
37 | } | |
| 38 |
|
38 | |||
| 39 |
protected Token |
|
39 | protected Token SymbolSetToken(params TSymbol[] set) { | |
| 40 | return SymbolToken(set); |
|
40 | return SymbolToken(set); | |
| 41 | } |
|
41 | } | |
| 42 |
|
42 | |||
| 43 | int TranslateOrAdd(TSymbol ch) { |
|
43 | int TranslateOrAdd(TSymbol ch) { | |
| 44 | var t = AlphabetBuilder.Translate(ch); |
|
44 | var t = AlphabetBuilder.Translate(ch); | |
| 45 |
if (t == |
|
45 | if (t == AutomatonConst.UNCLASSIFIED_INPUT) | |
| 46 | t = AlphabetBuilder.DefineSymbol(ch); |
|
46 | t = AlphabetBuilder.DefineSymbol(ch); | |
| 47 | return t; |
|
47 | return t; | |
| 48 | } |
|
48 | } | |
| @@ -53,7 +53,7 namespace Implab.Formats { | |||||
| 53 |
|
53 | |||
| 54 | int TranslateOrDie(TSymbol ch) { |
|
54 | int TranslateOrDie(TSymbol ch) { | |
| 55 | var t = AlphabetBuilder.Translate(ch); |
|
55 | var t = AlphabetBuilder.Translate(ch); | |
| 56 |
if (t == |
|
56 | if (t == AutomatonConst.UNCLASSIFIED_INPUT) | |
| 57 | throw new ApplicationException(String.Format("Symbol '{0}' is UNCLASSIFIED", ch)); |
|
57 | throw new ApplicationException(String.Format("Symbol '{0}' is UNCLASSIFIED", ch)); | |
| 58 | return t; |
|
58 | return t; | |
| 59 | } |
|
59 | } | |
| @@ -62,22 +62,21 namespace Implab.Formats { | |||||
| 62 | return symbols.Distinct().Select(TranslateOrDie); |
|
62 | return symbols.Distinct().Select(TranslateOrDie); | |
| 63 | } |
|
63 | } | |
| 64 |
|
64 | |||
| 65 |
protected Token |
|
65 | protected Token SymbolTokenExcept(IEnumerable<TSymbol> symbols) { | |
| 66 | Safe.ArgumentNotNull(symbols, "symbols"); |
|
66 | Safe.ArgumentNotNull(symbols, "symbols"); | |
| 67 |
|
67 | |||
| 68 |
return Token |
|
68 | return Token.New( Enumerable.Range(0, AlphabetBuilder.Count).Except(TranslateOrDie(symbols)).ToArray() ); | |
| 69 | } |
|
69 | } | |
| 70 |
|
70 | |||
| 71 | protected abstract IndexedAlphabetBase<TSymbol> CreateAlphabet(); |
|
71 | protected abstract IndexedAlphabetBase<TSymbol> CreateAlphabet(); | |
| 72 |
|
72 | |||
| 73 |
protected ScannerContext<TTag> BuildScannerContext |
|
73 | protected ScannerContext<TTag> BuildScannerContext<TTag>(Token regexp) { | |
| 74 |
|
74 | |||
| 75 | var dfa = new RegularDFA<TSymbol, TTag>(AlphabetBuilder); |
|
75 | var dfa = new RegularDFA<TSymbol, TTag>(AlphabetBuilder); | |
| 76 |
|
76 | |||
| 77 | var visitor = new RegularExpressionVisitor<TTag>(); |
|
77 | var visitor = new RegularExpressionVisitor<TTag>(dfa); | |
| 78 |
regexp.Accept( |
|
78 | regexp.Accept(visitor); | |
| 79 |
|
79 | visitor.BuildDFA(); | ||
| 80 | visitor.BuildDFA(dfa); |
|
|||
| 81 |
|
80 | |||
| 82 | if (dfa.IsFinalState(dfa.InitialState)) |
|
81 | if (dfa.IsFinalState(dfa.InitialState)) | |
| 83 | throw new ApplicationException("The specified language contains empty token"); |
|
82 | throw new ApplicationException("The specified language contains empty token"); | |
| @@ -5,7 +5,6 | |||||
| 5 | enum JSONElementContext { |
|
5 | enum JSONElementContext { | |
| 6 | None, |
|
6 | None, | |
| 7 | Object, |
|
7 | Object, | |
| 8 |
Array |
|
8 | Array | |
| 9 | Closed |
|
|||
| 10 | } |
|
9 | } | |
| 11 | } |
|
10 | } | |
| @@ -4,7 +4,7 using System; | |||||
| 4 | using Implab.Automaton; |
|
4 | using Implab.Automaton; | |
| 5 |
|
5 | |||
| 6 | namespace Implab.Formats.JSON { |
|
6 | namespace Implab.Formats.JSON { | |
| 7 |
class JSONGrammar : Grammar<char |
|
7 | class JSONGrammar : Grammar<char> { | |
| 8 | public enum TokenType { |
|
8 | public enum TokenType { | |
| 9 | None, |
|
9 | None, | |
| 10 | BeginObject, |
|
10 | BeginObject, | |
| @@ -29,8 +29,8 namespace Implab.Formats.JSON { | |||||
| 29 | get { return _instance.Value; } |
|
29 | get { return _instance.Value; } | |
| 30 | } |
|
30 | } | |
| 31 |
|
31 | |||
| 32 |
readonly ScannerContext<TokenType> m_json |
|
32 | readonly ScannerContext<TokenType> m_jsonExpression; | |
| 33 |
readonly ScannerContext<TokenType> m_string |
|
33 | readonly ScannerContext<TokenType> m_stringExpression; | |
| 34 |
|
34 | |||
| 35 | public JSONGrammar() { |
|
35 | public JSONGrammar() { | |
| 36 | DefineAlphabet(Enumerable.Range(0, 0x20).Select(x => (char)x)); |
|
36 | DefineAlphabet(Enumerable.Range(0, 0x20).Select(x => (char)x)); | |
| @@ -81,23 +81,25 namespace Implab.Formats.JSON { | |||||
| 81 | .Or(unescaped.Closure().Tag(TokenType.UnescapedChar)); |
|
81 | .Or(unescaped.Closure().Tag(TokenType.UnescapedChar)); | |
| 82 |
|
82 | |||
| 83 |
|
83 | |||
| 84 |
m_json |
|
84 | m_jsonExpression = BuildScannerContext<TokenType>(jsonExpression); | |
| 85 |
m_string |
|
85 | m_stringExpression = BuildScannerContext<TokenType>(jsonStringExpression); | |
|
|
86 | ||||
|
|
87 | ||||
| 86 | } |
|
88 | } | |
| 87 |
|
89 | |||
| 88 |
public ScannerContext<TokenType> Json |
|
90 | public ScannerContext<TokenType> JsonExpression { | |
| 89 | get { |
|
91 | get { | |
| 90 |
return m_json |
|
92 | return m_jsonExpression; | |
| 91 | } |
|
93 | } | |
| 92 | } |
|
94 | } | |
| 93 |
|
95 | |||
| 94 |
public ScannerContext<TokenType> JsonString |
|
96 | public ScannerContext<TokenType> JsonStringExpression { | |
| 95 | get { |
|
97 | get { | |
| 96 |
return m_string |
|
98 | return m_stringExpression; | |
| 97 | } |
|
99 | } | |
| 98 | } |
|
100 | } | |
| 99 |
|
101 | |||
| 100 |
Token |
|
102 | Token SymbolRangeToken(char start, char stop) { | |
| 101 | return SymbolToken(Enumerable.Range(start,stop - start).Cast<char>()); |
|
103 | return SymbolToken(Enumerable.Range(start,stop - start).Cast<char>()); | |
| 102 | } |
|
104 | } | |
| 103 |
|
105 | |||
| @@ -38,17 +38,30 namespace Implab.Formats.JSON { | |||||
| 38 | MemberValue |
|
38 | MemberValue | |
| 39 | } |
|
39 | } | |
| 40 |
|
40 | |||
|
|
41 | #region Parser rules | |||
| 41 | struct ParserContext { |
|
42 | struct ParserContext { | |
| 42 | DFAStateDescriptior<object> |
|
43 | readonly int[,] m_dfa; | |
|
|
44 | int m_state; | |||
|
|
45 | ||||
|
|
46 | readonly JSONElementContext m_elementContext; | |||
|
|
47 | ||||
|
|
48 | public ParserContext(int[,] dfa, int state, JSONElementContext context) { | |||
|
|
49 | m_dfa = dfa; | |||
|
|
50 | m_state = state; | |||
|
|
51 | m_elementContext = context; | |||
| 43 | } |
|
52 | } | |
| 44 |
|
53 | |||
| 45 | static readonly EnumAlphabet<JsonTokenType> _alphabet = EnumAlphabet<JsonTokenType>.FullAlphabet; |
|
54 | public bool Move(JsonTokenType token) { | |
| 46 | static readonly DFAStateDescriptior<object>[] _jsonDFA; |
|
55 | var next = m_dfa[m_state, token]; | |
| 47 | static readonly int _jsonDFAInitialState; |
|
56 | if (next == AutomatonConst.UNREACHABLE_STATE) | |
| 48 | static readonly DFAStateDescriptior<object>[] _objectDFA; |
|
57 | return false; | |
| 49 | static readonly int _objectDFAInitialState; |
|
58 | m_state = next; | |
| 50 | static readonly DFAStateDescriptior<object>[] _arrayDFA; |
|
59 | } | |
| 51 | static readonly int _arrayDFAInitialState; |
|
60 | ||
|
|
61 | public JSONElementContext ElementContext { | |||
|
|
62 | get { return m_elementContext; } | |||
|
|
63 | } | |||
|
|
64 | } | |||
| 52 |
|
65 | |||
| 53 | static JSONParser() { |
|
66 | static JSONParser() { | |
| 54 |
|
67 | |||
| @@ -64,7 +77,8 namespace Implab.Formats.JSON { | |||||
| 64 | ) |
|
77 | ) | |
| 65 | .Optional() |
|
78 | .Optional() | |
| 66 | .Cat(Token(JsonTokenType.EndObject)) |
|
79 | .Cat(Token(JsonTokenType.EndObject)) | |
| 67 |
. |
|
80 | .End(); | |
|
|
81 | ||||
| 68 | var arrayExpression = valueExpression |
|
82 | var arrayExpression = valueExpression | |
| 69 | .Cat( |
|
83 | .Cat( | |
| 70 | Token(JsonTokenType.ValueSeparator) |
|
84 | Token(JsonTokenType.ValueSeparator) | |
| @@ -73,29 +87,31 namespace Implab.Formats.JSON { | |||||
| 73 | ) |
|
87 | ) | |
| 74 | .Optional() |
|
88 | .Optional() | |
| 75 | .Cat(Token(JsonTokenType.EndArray)) |
|
89 | .Cat(Token(JsonTokenType.EndArray)) | |
| 76 |
. |
|
90 | .End(); | |
| 77 |
|
91 | |||
| 78 |
var jsonExpression = valueExpression. |
|
92 | var jsonExpression = valueExpression.End(); | |
| 79 |
|
93 | |||
| 80 |
_jsonDFA = Create |
|
94 | _jsonDFA = CreateParserContext(jsonExpression, JSONElementContext.None); | |
| 81 |
_objectDFA = Create |
|
95 | _objectDFA = CreateParserContext(objectExpression, JSONElementContext.Object); | |
| 82 |
_arrayDFA = Create |
|
96 | _arrayDFA = CreateParserContext(arrayExpression, JSONElementContext.Array); | |
| 83 | } |
|
97 | } | |
| 84 |
|
98 | |||
| 85 |
static Token |
|
99 | static Token Token(params JsonTokenType[] input) { | |
| 86 |
return Token |
|
100 | return Token.New( input.Select(t => (int)t).ToArray() ); | |
| 87 | } |
|
101 | } | |
| 88 |
|
102 | |||
| 89 | static RegularDFA<JsonTokenType,object> CreateDFA(Token<object> expr) { |
|
103 | static ParserContext CreateParserContext(Token expr, JSONElementContext context) { | |
| 90 | var builder = new RegularExpressionVisitor<object>(); |
|
|||
| 91 | var dfa = new RegularDFA<JsonTokenType,object>(_alphabet); |
|
|||
| 92 |
|
104 | |||
|
|
105 | var dfa = new DFATable(); | |||
|
|
106 | var builder = new RegularExpressionVisitor(dfa); | |||
| 93 | expr.Accept(builder); |
|
107 | expr.Accept(builder); | |
|
|
108 | builder.BuildDFA(); | |||
| 94 |
|
109 | |||
| 95 | builder.BuildDFA(dfa); |
|
110 | return new ParserContext(dfa.CreateTransitionTable(), dfa.InitialState, context); | |
| 96 | return dfa; |
|
|||
| 97 | } |
|
111 | } | |
| 98 |
|
112 | |||
|
|
113 | #endregion | |||
|
|
114 | ||||
| 99 | JSONScanner m_scanner; |
|
115 | JSONScanner m_scanner; | |
| 100 | MemberContext m_memberContext; |
|
116 | MemberContext m_memberContext; | |
| 101 |
|
117 | |||
| @@ -117,8 +133,7 namespace Implab.Formats.JSON { | |||||
| 117 | /// Создает новый экземпляр парсера, на основе текстового потока. |
|
133 | /// Создает новый экземпляр парсера, на основе текстового потока. | |
| 118 | /// </summary> |
|
134 | /// </summary> | |
| 119 | /// <param name="reader">Текстовый поток.</param> |
|
135 | /// <param name="reader">Текстовый поток.</param> | |
| 120 | /// <param name="dispose">Признак того, что парсер должен конролировать время жизни входного потока.</param> |
|
136 | public JSONParser(TextReader reader) | |
| 121 | public JSONParser(TextReader reader, bool dispose) |
|
|||
| 122 | : base(_jsonDFA, INITIAL_STATE, new JSONParserContext { elementContext = JSONElementContext.None, memberName = String.Empty }) { |
|
137 | : base(_jsonDFA, INITIAL_STATE, new JSONParserContext { elementContext = JSONElementContext.None, memberName = String.Empty }) { | |
| 123 | Safe.ArgumentNotNull(reader, "reader"); |
|
138 | Safe.ArgumentNotNull(reader, "reader"); | |
| 124 | m_scanner = new JSONScanner(); |
|
139 | m_scanner = new JSONScanner(); | |
| @@ -61,15 +61,15 namespace Implab.Formats { | |||||
| 61 | while (pos < m_bufferSize) { |
|
61 | while (pos < m_bufferSize) { | |
| 62 | var ch = m_buffer[pos]; |
|
62 | var ch = m_buffer[pos]; | |
| 63 |
|
63 | |||
| 64 |
state = dfa[state, ch > maxSymbol ? |
|
64 | state = dfa[state, ch > maxSymbol ? AutomatonConst.UNCLASSIFIED_INPUT : alphabet[ch]]; | |
| 65 |
if (state == |
|
65 | if (state == AutomatonConst.UNREACHABLE_STATE) | |
| 66 | break; |
|
66 | break; | |
| 67 |
|
67 | |||
| 68 | pos++; |
|
68 | pos++; | |
| 69 | } |
|
69 | } | |
| 70 |
|
70 | |||
| 71 | m_tokenLength = pos - m_bufferOffset; |
|
71 | m_tokenLength = pos - m_bufferOffset; | |
| 72 |
} while (state != |
|
72 | } while (state != AutomatonConst.UNREACHABLE_STATE && Feed()); | |
| 73 |
|
73 | |||
| 74 | m_tokenOffset = m_bufferOffset; |
|
74 | m_tokenOffset = m_bufferOffset; | |
| 75 | m_bufferOffset += m_tokenLength; |
|
75 | m_bufferOffset += m_tokenLength; | |
| @@ -159,7 +159,6 | |||||
| 159 | <Compile Include="Automaton\RegularExpressions\AltToken.cs" /> |
|
159 | <Compile Include="Automaton\RegularExpressions\AltToken.cs" /> | |
| 160 | <Compile Include="Automaton\RegularExpressions\BinaryToken.cs" /> |
|
160 | <Compile Include="Automaton\RegularExpressions\BinaryToken.cs" /> | |
| 161 | <Compile Include="Automaton\RegularExpressions\CatToken.cs" /> |
|
161 | <Compile Include="Automaton\RegularExpressions\CatToken.cs" /> | |
| 162 | <Compile Include="Automaton\DFAConst.cs" /> |
|
|||
| 163 | <Compile Include="Automaton\RegularExpressions\StarToken.cs" /> |
|
162 | <Compile Include="Automaton\RegularExpressions\StarToken.cs" /> | |
| 164 | <Compile Include="Automaton\RegularExpressions\SymbolToken.cs" /> |
|
163 | <Compile Include="Automaton\RegularExpressions\SymbolToken.cs" /> | |
| 165 | <Compile Include="Automaton\RegularExpressions\EmptyToken.cs" /> |
|
164 | <Compile Include="Automaton\RegularExpressions\EmptyToken.cs" /> | |
| @@ -183,7 +182,6 | |||||
| 183 | <Compile Include="Automaton\IDFATable.cs" /> |
|
182 | <Compile Include="Automaton\IDFATable.cs" /> | |
| 184 | <Compile Include="Automaton\IDFATableBuilder.cs" /> |
|
183 | <Compile Include="Automaton\IDFATableBuilder.cs" /> | |
| 185 | <Compile Include="Automaton\DFATable.cs" /> |
|
184 | <Compile Include="Automaton\DFATable.cs" /> | |
| 186 | <Compile Include="Automaton\RegularExpressions\RegularDFA.cs" /> |
|
|||
| 187 | <Compile Include="Automaton\RegularExpressions\RegularExpressionVisitor.cs" /> |
|
185 | <Compile Include="Automaton\RegularExpressions\RegularExpressionVisitor.cs" /> | |
| 188 | <Compile Include="Automaton\RegularExpressions\ITaggedDFABuilder.cs" /> |
|
186 | <Compile Include="Automaton\RegularExpressions\ITaggedDFABuilder.cs" /> | |
| 189 | <Compile Include="Formats\TextScanner.cs" /> |
|
187 | <Compile Include="Formats\TextScanner.cs" /> | |
| @@ -193,7 +191,10 | |||||
| 193 | <Compile Include="Formats\Grammar.cs" /> |
|
191 | <Compile Include="Formats\Grammar.cs" /> | |
| 194 | <Compile Include="Automaton\RegularExpressions\EndTokenT.cs" /> |
|
192 | <Compile Include="Automaton\RegularExpressions\EndTokenT.cs" /> | |
| 195 | <Compile Include="Automaton\RegularExpressions\EndToken.cs" /> |
|
193 | <Compile Include="Automaton\RegularExpressions\EndToken.cs" /> | |
| 196 |
<Compile Include="Automaton\RegularExpressions\ |
|
194 | <Compile Include="Automaton\RegularExpressions\RegularExpressionVisitorT.cs" /> | |
|
|
195 | <Compile Include="Automaton\AutomatonConst.cs" /> | |||
|
|
196 | <Compile Include="Automaton\RegularExpressions\RegularDFA.cs" /> | |||
|
|
197 | <Compile Include="Components\LazyAndWeak.cs" /> | |||
| 197 | </ItemGroup> |
|
198 | </ItemGroup> | |
| 198 | <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" /> |
|
199 | <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" /> | |
| 199 | <ItemGroup /> |
|
200 | <ItemGroup /> | |
| 1 | NO CONTENT: file was removed |
|
NO CONTENT: file was removed |
| 1 | NO CONTENT: file was removed |
|
NO CONTENT: file was removed |
General Comments 0
You need to be logged in to leave comments.
Login now
