##// END OF EJS Templates
minor fixes and debug
cin -
r181:b2b6a6640aa3 ref20160224
parent child
Show More
@@ -1,313 +1,316
1 1 using Implab;
2 2 using System;
3 3 using System.Collections.Generic;
4 4 using System.Linq;
5 using System.Diagnostics;
5 6
6 7 namespace Implab.Automaton {
7 8 public class DFATable : IDFATableBuilder {
8 9 int m_stateCount;
9 10 int m_symbolCount;
10 11 int m_initialState;
11 12
12 13 readonly HashSet<int> m_finalStates = new HashSet<int>();
13 14 readonly HashSet<AutomatonTransition> m_transitions = new HashSet<AutomatonTransition>();
14 15
15 16
16 17 #region IDFADefinition implementation
17 18
18 19 public bool IsFinalState(int s) {
19 20 Safe.ArgumentInRange(s, 0, m_stateCount, "s");
20 21
21 22 return m_finalStates.Contains(s);
22 23 }
23 24
24 25 public IEnumerable<int> FinalStates {
25 26 get {
26 27 return m_finalStates;
27 28 }
28 29 }
29 30
30 31 public int StateCount {
31 32 get { return m_stateCount; }
32 33 }
33 34
34 35 public int AlphabetSize {
35 36 get { return m_symbolCount; }
36 37 }
37 38
38 39 public int InitialState {
39 40 get { return m_initialState; }
40 41 }
41 42
42 43 #endregion
43 44
44 45 public void SetInitialState(int s) {
45 46 Safe.ArgumentAssert(s >= 0, "s");
47 m_stateCount = Math.Max(m_stateCount, s + 1);
46 48 m_initialState = s;
47 49 }
48 50
49 51 public void MarkFinalState(int state) {
52 m_stateCount = Math.Max(m_stateCount, state + 1);
50 53 m_finalStates.Add(state);
51 54 }
52 55
53 56 public void Add(AutomatonTransition item) {
54 57 Safe.ArgumentAssert(item.s1 >= 0, "item");
55 58 Safe.ArgumentAssert(item.s2 >= 0, "item");
56 59 Safe.ArgumentAssert(item.edge >= 0, "item");
57 60
58 61 m_stateCount = Math.Max(m_stateCount, Math.Max(item.s1, item.s2) + 1);
59 m_symbolCount = Math.Max(m_symbolCount, item.edge);
62 m_symbolCount = Math.Max(m_symbolCount, item.edge + 1);
60 63
61 64 m_transitions.Add(item);
62 65 }
63 66
64 67 public void Clear() {
65 68 m_stateCount = 0;
66 69 m_symbolCount = 0;
67 70 m_finalStates.Clear();
68 71 m_transitions.Clear();
69 72 }
70 73
71 74 public bool Contains(AutomatonTransition item) {
72 75 return m_transitions.Contains(item);
73 76 }
74 77
75 78 public void CopyTo(AutomatonTransition[] array, int arrayIndex) {
76 79 m_transitions.CopyTo(array, arrayIndex);
77 80 }
78 81
79 82 public bool Remove(AutomatonTransition item) {
80 83 return m_transitions.Remove(item);
81 84 }
82 85
83 86 public int Count {
84 87 get {
85 88 return m_transitions.Count;
86 89 }
87 90 }
88 91
89 92 public bool IsReadOnly {
90 93 get {
91 94 return false;
92 95 }
93 96 }
94 97
95 98 public IEnumerator<AutomatonTransition> GetEnumerator() {
96 99 return m_transitions.GetEnumerator();
97 100 }
98 101
99 102 System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator() {
100 103 return GetEnumerator();
101 104 }
102 105
103 106 public int[,] CreateTransitionTable() {
104 107 var table = new int[StateCount,AlphabetSize];
105 108
106 109 for (int i = 0; i < StateCount; i++)
107 for (int j = 0; i < AlphabetSize; j++)
110 for (int j = 0; j < AlphabetSize; j++)
108 111 table[i, j] = AutomatonConst.UNREACHABLE_STATE;
109 112
110 113 foreach (var t in this)
111 114 table[t.s1,t.edge] = t.s2;
112 115
113 116 return table;
114 117 }
115 118
116 119 public bool[] CreateFinalStateTable() {
117 120 var table = new bool[StateCount];
118 121
119 122 foreach (var s in FinalStates)
120 123 table[s] = true;
121 124
122 125 return table;
123 126 }
124 127
125 128 /// <summary>Π€ΠΎΡ€ΠΌΠΈΡ€ΡƒΠ΅Ρ‚ мноТСства ΠΊΠΎΠ½Π΅Ρ‡Π½Ρ‹Ρ… состояний ΠΏΠ΅Ρ€Π΅Π΄ Π½Π°Ρ‡Π°Π»ΠΎΠΌ Ρ€Π°Π±ΠΎΡ‚Ρ‹ Π°Π»Π³ΠΎΡ€ΠΈΡ‚ΠΌΠ° ΠΌΠΈΠ½ΠΈΠΌΠΈΠ·Π°Ρ†ΠΈΠΈ.</summary>
126 129 /// <remarks>
127 130 /// Π’ процСссС построСния минимального Π°Π²Ρ‚ΠΎΠΌΠ°Ρ‚Π° трСбуСтся Ρ€Π°Π·Π΄Π΅Π»ΠΈΡ‚ΡŒ мноТСство состояний,
128 131 /// Π½Π° Π΄Π²Π° подмноТСства - ΠΊΠΎΠ½Π΅Ρ‡Π½Ρ‹Π΅ состояния ΠΈ всС ΠΎΡΡ‚Π°Π»ΡŒΠ½Ρ‹Π΅, послС Ρ‡Π΅Π³ΠΎ эти подмноТСства
129 132 /// Π±ΡƒΠ΄ΡƒΡ‚ Ρ€Π΅Π·Π΄Π΅Π»Π΅Π½Ρ‹ Π½Π° Π±ΠΎΠ»Π΅Π΅ ΠΌΠ΅Π»ΠΊΠΈΠ΅. Иногда трСбуСтся Π³Π°Ρ€Π°Π½Ρ‚ΠΈΡ€ΠΎΠ²Π°Ρ‚ΡŒ различия ΠΊΠΎΠ½Π΅Ρ‡Π½Ρ‹Ρ… сосотяний,
130 133 /// для этого Π½Π΅ΠΎΠ±Ρ…ΠΎΠ΄ΠΈΠΌΠΎ ΠΏΠ΅Ρ€Π΅ΠΎΠΏΡ€Π΅Π΄Π΅Π»ΠΈΡ‚ΡŒ Π΄Π°Π½Π½Ρ†ΡŽ Ρ„ΡƒΠΊΠ½Ρ†ΠΈΡŽ, для получСния мноТСств ΠΊΠΎΠ½Π΅Ρ‡Π½Ρ‹Ρ… состояний.
131 134 /// </remarks>
132 135 /// <returns>The final states.</returns>
133 136 protected virtual IEnumerable<HashSet<int>> GroupFinalStates() {
134 137 return new HashSet<int>[] { m_finalStates };
135 138 }
136 139
137 140 protected void Optimize(
138 141 IDFATableBuilder optimalDFA,
139 142 IDictionary<int,int> alphabetMap,
140 143 IDictionary<int,int> stateMap
141 144 ) {
142 145 Safe.ArgumentNotNull(optimalDFA, "dfa");
143 146 Safe.ArgumentNotNull(alphabetMap, "alphabetMap");
144 147 Safe.ArgumentNotNull(stateMap, "stateMap");
145 148
146 149
147 150 var setComparer = new CustomEqualityComparer<HashSet<int>>(
148 151 (x, y) => x.SetEquals(y),
149 152 s => s.Sum(x => x.GetHashCode())
150 153 );
151 154
152 155 var optimalStates = new HashSet<HashSet<int>>(setComparer);
153 156 var queue = new HashSet<HashSet<int>>(setComparer);
154 157
155 158 // ΠΏΠΎΠ»ΡƒΡ‡Π°Π΅ΠΌ ΠΊΠΎΠ½Π΅Ρ‡Π½Ρ‹Π΅ состояния, сгруппированныС ΠΏΠΎ ΠΌΠ°Ρ€ΠΊΠ΅Ρ€Π°ΠΌ
156 159 optimalStates.UnionWith(
157 160 GroupFinalStates()
158 161 );
159 162
160 163 var state = new HashSet<int>(
161 164 Enumerable
162 165 .Range(0, m_stateCount - 1)
163 166 .Where(i => !m_finalStates.Contains(i))
164 167 );
165 168
166 169 optimalStates.Add(state);
167 170 queue.Add(state);
168 171
169 172 var rmap = m_transitions
170 173 .GroupBy(t => t.s2)
171 174 .ToDictionary(
172 175 g => g.Key, // s2
173 g => g.GroupBy(t => t.edge, t => t.s1).ToDictionary(p => p.Key)
176 g => g.ToLookup(t => t.edge, t => t.s1)//.ToDictionary(p => p.Key)
174 177 );
175 178
176 179 while (queue.Count > 0) {
177 180 var stateA = queue.First();
178 181 queue.Remove(stateA);
179 182
180 183 for (int c = 0; c < m_symbolCount; c++) {
181 184 var stateX = new HashSet<int>();
182 foreach(var a in stateA)
185 foreach(var a in stateA.Where(rmap.ContainsKey))
183 186 stateX.UnionWith(rmap[a][c]); // all states from wich the symbol 'c' leads to the state 'a'
184 187
185 188 foreach (var stateY in optimalStates.ToArray()) {
186 189 if (stateX.Overlaps(stateY) && !stateY.IsSubsetOf(stateX)) {
187 190 var stateR1 = new HashSet<int>(stateY);
188 191 var stateR2 = new HashSet<int>(stateY);
189 192
190 193 stateR1.IntersectWith(stateX);
191 194 stateR2.ExceptWith(stateX);
192 195
193 196 optimalStates.Remove(stateY);
194 197 optimalStates.Add(stateR1);
195 198 optimalStates.Add(stateR2);
196 199
197 200 if (queue.Contains(stateY)) {
198 201 queue.Remove(stateY);
199 202 queue.Add(stateR1);
200 203 queue.Add(stateR2);
201 204 } else {
202 205 queue.Add(stateR1.Count <= stateR2.Count ? stateR1 : stateR2);
203 206 }
204 207 }
205 208 }
206 209 }
207 210 }
208 211
209 212 // ΠΊΠ°Ρ€Ρ‚Π° получСния ΠΎΠΏΡ‚ΠΈΠΌΠ°Π»ΡŒΠ½ΠΎΠ³ΠΎ состояния ΠΏΠΎ ΡΠΎΠΎΡ‚Π²Π΅ΡΡ‚Π²ΡƒΡŽΡ‰Π΅ΠΌΡƒ Π΅ΠΌΡƒ простому ΡΠΎΡΡ‚ΠΎΡΠ½ΠΈΡŽ
210 213 var nextState = 0;
211 214 foreach (var item in optimalStates) {
212 215 var id = nextState++;
213 216 foreach (var s in item)
214 217 stateMap[s] = id;
215 218 }
216 219
217 220 // ΠΏΠΎΠ»ΡƒΡ‡Π°Π΅ΠΌ ΠΌΠΈΠ½ΠΈΠΌΠ°Π»ΡŒΠ½Ρ‹ΠΉ Π°Π»Ρ„Π°Π²ΠΈΡ‚
218 221 // Π²Ρ…ΠΎΠ΄Π½Ρ‹Π΅ символы Π½Π΅ Ρ€Π°Π·Π»ΠΈΡ‡ΠΈΠΌΡ‹, Ссли Move(s,a1) == Move(s,a2), для любого s
219 222 // для этого ΠΈΡΠΏΠΎΠ»ΡŒΠ·ΡƒΠ΅ΠΌ Π°Π»Π³ΠΎΡ€ΠΈΡ‚ΠΌ кластСризации, сначала
220 223 // считаСм, Ρ‡Ρ‚ΠΎ всС символы Π½Π΅ Ρ€Π°Π·Π»ΠΈΡ‡ΠΈΠΌΡ‹
221 224
222 225 var minClasses = new HashSet<HashSet<int>>(setComparer);
223 226 var alphaQueue = new Queue<HashSet<int>>();
224 227 alphaQueue.Enqueue(new HashSet<int>(Enumerable.Range(0,AlphabetSize)));
225 228
226 229 // для всСх состояний, Π±ΡƒΠ΄Π΅ΠΌ ΠΏΡ€ΠΎΠ²Π΅Ρ€ΡΡ‚ΡŒ ΠΊΠ°ΠΆΠ΄Ρ‹ΠΉ класс Π½Π° Ρ€Π°Π·Π»ΠΈΡ‡ΠΈΠΌΠΎΡΡ‚ΡŒ,
227 230 // Ρ‚.Π΅. символы Ρ€Π°Π·Π»ΠΈΡ‡ΠΈΠΌΡ‹, Ссли ΠΎΠ½ΠΈ приводят ΠΊ Ρ€Π°Π·Π½Ρ‹ΠΌ состояниям
228 231 for (int s = 0 ; s < optimalStates.Count; s++) {
229 232 var newQueue = new Queue<HashSet<int>>();
230 233
231 234 foreach (var A in alphaQueue) {
232 235 // классы ΠΈΠ· ΠΎΠ΄Π½ΠΎΠ³ΠΎ символа Π΄Π΅Π»ΠΈΡ‚ΡŒ бСсполСзно, ΠΏΠ΅Ρ€Π΅Π²ΠΎΠ΄ΠΈΠΌ ΠΈΡ… сразу Π²
233 236 // Ρ€Π΅Π·ΡƒΠ»ΡŒΡ‚ΠΈΡ€ΡƒΡŽΡ‰ΠΈΠΉ Π°Π»Ρ„Π°Π²ΠΈΡ‚
234 237 if (A.Count == 1) {
235 238 minClasses.Add(A);
236 239 continue;
237 240 }
238 241
239 242 // Ρ€Π°Π·Π»ΠΈΡ‡Π°Π΅ΠΌ классы символов, ΠΊΠΎΡ‚ΠΎΡ€Ρ‹Π΅ пСрСводят Π² Ρ€Π°Π·Π»ΠΈΡ‡Π½Ρ‹Π΅ ΠΎΠΏΡ‚ΠΈΠΌΠ°Π»ΡŒΠ½Ρ‹Π΅ состояния
240 243 // optimalState -> alphaClass
241 244 var classes = new Dictionary<int, HashSet<int>>();
242 245
243 246 foreach (var term in A) {
244 247 // ΠΈΡ‰Π΅ΠΌ всС ΠΏΠ΅Ρ€Π΅Ρ…ΠΎΠ΄Ρ‹ класса ΠΏΠΎ символу term
245 248 var res = m_transitions.Where(t => stateMap[t.s1] == s && t.edge == term).Select(t => stateMap[t.s2]).ToArray();
246 249
250 Debug.Assert(res.Length <= 1);
251
247 252 var s2 = res.Length > 0 ? res[0] : -1;
248 253
249 254 HashSet<int> a2;
250 255 if (!classes.TryGetValue(s2, out a2)) {
251 256 a2 = new HashSet<int>();
252 257 newQueue.Enqueue(a2);
253 258 classes[s2] = a2;
254 259 }
255 260 a2.Add(term);
256 261 }
257 262 }
258 263
259 264 if (newQueue.Count == 0)
260 265 break;
261 266 alphaQueue = newQueue;
262 267 }
263 268
264 269 // послС окончания Ρ€Π°Π±ΠΎΡ‚Ρ‹ Π°Π»Π³ΠΎΡ€ΠΈΡ‚ΠΌΠ° Π² ΠΎΡ‡Π΅Ρ€Π΅Π΄ΠΈ останутся ΠΌΠΈΠ½ΠΈΠΌΠ°Π»ΡŒΠ½Ρ‹Π΅ Ρ€Π°Π·Π»ΠΈΡ‡ΠΈΠΌΡ‹Π΅ классы
265 270 // Π²Ρ…ΠΎΠ΄Π½Ρ‹Ρ… символов
266 271 foreach (var A in alphaQueue)
267 272 minClasses.Add(A);
268 273
269 274 // построСниС отобраТСния Π°Π»Ρ„Π°Π²ΠΈΡ‚ΠΎΠ² Π²Ρ…ΠΎΠ΄Π½Ρ‹Ρ… символов.
270 275 // ΠΏΠΎΡΠΊΠΎΠ»ΡŒΠΊΡƒ символ DFAConst.UNCLASSIFIED_INPUT ΠΌΠΎΠΆΠ΅Ρ‚ ΠΈΠΌΠ΅Ρ‚ΡŒ
271 276 // ΡΠΏΠ΅Ρ†ΠΈΠ°Π»ΡŒΠ½ΠΎΠ΅ Π·Π½Π°Ρ‡Π΅Π½ΠΈΠ΅, Ρ‚ΠΎΠ³Π΄Π° сохраним ΠΌΠΈΠ½ΠΈΠΌΠ°Π»ΡŒΠ½Ρ‹ΠΉ класс,
272 277 // содСрТащий этот символ Π½Π° Ρ‚ΠΎΠΌΠΆΠ΅ мСстС.
273 278
274 279 var nextCls = 0;
275 280 foreach (var item in minClasses) {
276 281 if (nextCls == AutomatonConst.UNCLASSIFIED_INPUT)
277 282 nextCls++;
278 283
279 284 // сохраняСм DFAConst.UNCLASSIFIED_INPUT
280 var cls = item.Contains(AutomatonConst.UNCLASSIFIED_INPUT) ? AutomatonConst.UNCLASSIFIED_INPUT : nextCls;
285 var cls = item.Contains(AutomatonConst.UNCLASSIFIED_INPUT) ? AutomatonConst.UNCLASSIFIED_INPUT : nextCls++;
281 286
282 287 foreach (var a in item)
283 288 alphabetMap[a] = cls;
284
285 nextCls++;
286 289 }
287 290
288 291 // построСниС Π°Π²Ρ‚ΠΎΠΌΠ°Ρ‚Π°
289 292 optimalDFA.SetInitialState(stateMap[m_initialState]);
290 293
291 294 foreach (var sf in m_finalStates.Select(s => stateMap[s]).Distinct())
292 295 optimalDFA.MarkFinalState(sf);
293 296
294 297 foreach (var t in m_transitions.Select(t => new AutomatonTransition(stateMap[t.s1],stateMap[t.s2],alphabetMap[t.edge])).Distinct())
295 298 optimalDFA.Add(t);
296 299 }
297 300
298 301 protected void PrintDFA<TInput, TState>(IAlphabet<TInput> inputAlphabet, IAlphabet<TState> stateAlphabet) {
299 302 Safe.ArgumentNotNull(inputAlphabet, "inputAlphabet");
300 303 Safe.ArgumentNotNull(stateAlphabet, "stateAlphabet");
301 304
302 305 foreach(var t in m_transitions)
303 306 Console.WriteLine(
304 307 "[{0}] -{{{1}}}-> [{2}]{3}",
305 308 String.Join(",", stateAlphabet.GetSymbols(t.s1)),
306 309 String.Join("", inputAlphabet.GetSymbols(t.edge)),
307 310 String.Join(",", stateAlphabet.GetSymbols(t.s2)),
308 311 m_finalStates.Contains(t.s2) ? "$" : ""
309 312 );
310 313 }
311 314
312 315 }
313 316 }
@@ -1,84 +1,84
1 1 using System;
2 2 using System.Collections.Generic;
3 3 using System.Linq;
4 4
5 5 namespace Implab.Automaton {
6 6 public class MapAlphabet<T> : IAlphabetBuilder<T> {
7 7 readonly Dictionary<T,int> m_map;
8 8 int m_nextCls;
9 9 readonly bool m_supportUnclassified;
10 10
11 11 public MapAlphabet(bool supportUnclassified, IEqualityComparer<T> comparer) {
12 12 m_map = comparer != null ? new Dictionary<T, int>(comparer) : new Dictionary<T,int>();
13 13 m_supportUnclassified = supportUnclassified;
14 14 m_nextCls = supportUnclassified ? 1 : 0;
15 15 }
16 16
17 17 #region IAlphabetBuilder implementation
18 18
19 19 public int DefineSymbol(T symbol) {
20 20 int cls;
21 21 return m_map.TryGetValue(symbol, out cls) ? cls : DefineSymbol(symbol, m_nextCls);
22 22 }
23 23
24 24 public int DefineSymbol(T symbol, int cls) {
25 25 Safe.ArgumentAssert(cls >= 0, "cls");
26 26
27 27 m_nextCls = Math.Max(cls + 1, m_nextCls);
28 28 m_map.Add(symbol, cls);
29 29 return cls;
30 30 }
31 31
32 32 public int DefineClass(IEnumerable<T> symbols) {
33 33 return DefineClass(symbols, m_nextCls);
34 34 }
35 35
36 36 public int DefineClass(IEnumerable<T> symbols, int cls) {
37 37 Safe.ArgumentAssert(cls >= 0, "cls");
38 38 Safe.ArgumentNotNull(symbols, "symbols");
39 39
40 40 m_nextCls = Math.Max(cls + 1, m_nextCls);
41 41
42 42 foreach (var symbol in symbols)
43 43 m_map[symbol] = cls;
44 44 return cls;
45 45 }
46 46
47 47 #endregion
48 48
49 49 #region IAlphabet implementation
50 50
51 51 public int Translate(T symbol) {
52 52 int cls;
53 53 if (m_map.TryGetValue(symbol, out cls))
54 54 return cls;
55 55 if (!m_supportUnclassified)
56 56 throw new ArgumentOutOfRangeException("symbol", "The specified symbol isn't in the alphabet");
57 57 return AutomatonConst.UNCLASSIFIED_INPUT;
58 58 }
59 59
60 60 public int Count {
61 61 get {
62 62 return m_nextCls;
63 63 }
64 64 }
65 65
66 66 public bool Contains(T symbol) {
67 67 return m_supportUnclassified || m_map.ContainsKey(symbol);
68 68 }
69 69
70 70
71 71 public IEnumerable<T> GetSymbols(int cls) {
72 Safe.ArgumentAssert(cls > 0, "cls");
72 Safe.ArgumentAssert(!m_supportUnclassified || cls > 0, "cls");
73 73 return m_map.Where(p => p.Value == cls).Select(p => p.Key);
74 74 }
75 75 #endregion
76 76
77 77 public IEnumerable<KeyValuePair<T,int>> Mappings {
78 78 get {
79 79 return m_map;
80 80 }
81 81 }
82 82 }
83 83 }
84 84
@@ -1,82 +1,83
1 1 using System.Collections.Generic;
2 2 using System.Linq;
3 3
4 4 namespace Implab.Automaton.RegularExpressions {
5 5 public class RegularDFA<TInput, TTag> : DFATable, ITaggedDFABuilder<TTag> {
6 6
7 7 readonly Dictionary<int,TTag[]> m_tags = new Dictionary<int, TTag[]>();
8 8 readonly IAlphabet<TInput> m_alphabet;
9 9
10 10 public RegularDFA(IAlphabet<TInput> alphabet) {
11 11 Safe.ArgumentNotNull(alphabet, "aplhabet");
12 12
13 13 m_alphabet = alphabet;
14 14 }
15 15
16 16
17 17 public IAlphabet<TInput> InputAlphabet {
18 18 get {
19 19 return m_alphabet;
20 20 }
21 21 }
22 22
23 23 public void MarkFinalState(int s, TTag[] tags) {
24 24 MarkFinalState(s);
25 25 SetStateTag(s, tags);
26 26 }
27 27
28 28 public void SetStateTag(int s, TTag[] tags) {
29 29 Safe.ArgumentNotNull(tags, "tags");
30 30 m_tags[s] = tags;
31 31 }
32 32
33 33 public TTag[] GetStateTag(int s) {
34 34 TTag[] tags;
35 35 return m_tags.TryGetValue(s, out tags) ? tags : new TTag[0];
36 36 }
37 37
38 38 public TTag[][] CreateTagTable() {
39 39 var table = new TTag[StateCount][];
40 40
41 41 foreach (var pair in m_tags)
42 42 table[pair.Key] = pair.Value;
43 43
44 44 return table;
45 45 }
46 46
47 47 /// <summary>
48 48 /// Optimize the specified alphabet.
49 49 /// </summary>
50 50 /// <param name="alphabet">ΠŸΡƒΡΡ‚ΠΎΠΉ Π°Π»Ρ„Π°Π²ΠΈΡ‚, ΠΊΠΎΡ‚ΠΎΡ€Ρ‹ΠΉ Π±ΡƒΠ΄Π΅Ρ‚ Π·ΠΏΠΎΠ»Π½Π΅Π½ Π² процСссС ΠΎΠΏΡ‚ΠΈΠΌΠΈΠ·Π°Ρ†ΠΈΠΈ.</param>
51 51 public RegularDFA<TInput,TTag> Optimize(IAlphabetBuilder<TInput> alphabet) {
52 52 Safe.ArgumentNotNull(alphabet, "alphabet");
53 53
54 54 var dfa = new RegularDFA<TInput, TTag>(alphabet);
55 55
56 56 var alphaMap = new Dictionary<int,int>();
57 57 var stateMap = new Dictionary<int,int>();
58 58
59 59 Optimize(dfa, alphaMap, stateMap);
60 60
61 61 // mark tags in the new DFA
62 62 foreach (var g in m_tags.Where(x => x.Key < StateCount).GroupBy(x => stateMap[x.Key], x => x.Value ))
63 63 dfa.SetStateTag(g.Key, g.SelectMany(x => x).ToArray());
64 64
65 65 // make the alphabet for the new DFA
66 foreach (var pair in alphaMap)
66 // skip all unclassified symbols
67 foreach (var pair in alphaMap.Where(x => x.Value != 0))
67 68 alphabet.DefineClass(m_alphabet.GetSymbols(pair.Key), pair.Value);
68 69
69 70 return dfa;
70 71 }
71 72
72 73 protected override IEnumerable<HashSet<int>> GroupFinalStates() {
73 74 var arrayComparer = new CustomEqualityComparer<TTag[]>(
74 75 (x,y) => x.Length == y.Length && x.All(it => y.Contains(it)),
75 76 x => x.Sum(it => x.GetHashCode())
76 77 );
77 78 return FinalStates.GroupBy(x => m_tags[x], arrayComparer).Select(g => new HashSet<int>(g));
78 79 }
79 80
80 81 }
81 82 }
82 83
@@ -1,119 +1,119
1 1 using System.Linq;
2 2 using Implab.Automaton.RegularExpressions;
3 3 using System;
4 4 using Implab.Automaton;
5 5 using Implab.Components;
6 6
7 7 namespace Implab.Formats.JSON {
8 8 class JSONGrammar : Grammar<char> {
9 9 public enum TokenType {
10 10 None,
11 11 BeginObject,
12 12 EndObject,
13 13 BeginArray,
14 14 EndArray,
15 15 String,
16 16 Number,
17 17 Literal,
18 18 NameSeparator,
19 19 ValueSeparator,
20 20
21 21 StringBound,
22 22 EscapedChar,
23 23 UnescapedChar,
24 24 EscapedUnicode
25 25 }
26 26
27 27 static LazyAndWeak<JSONGrammar> _instance = new LazyAndWeak<JSONGrammar>(() => new JSONGrammar());
28 28
29 29 public static JSONGrammar Instance {
30 30 get { return _instance.Value; }
31 31 }
32 32
33 33 readonly ScannerContext<TokenType> m_jsonExpression;
34 34 readonly ScannerContext<TokenType> m_stringExpression;
35 35 readonly CharAlphabet m_defaultAlphabet = new CharAlphabet();
36 36
37 37 public JSONGrammar() {
38 38 DefineAlphabet(Enumerable.Range(0, 0x20).Select(x => (char)x));
39 39 var hexDigit = SymbolRangeToken('a','f').Or(SymbolRangeToken('A','F')).Or(SymbolRangeToken('0','9'));
40 40 var digit9 = SymbolRangeToken('1', '9');
41 41 var zero = SymbolToken('0');
42 42 var digit = zero.Or(digit9);
43 43 var dot = SymbolToken('.');
44 44 var minus = SymbolToken('-');
45 45 var sign = SymbolSetToken('-', '+');
46 46 var expSign = SymbolSetToken('e', 'E');
47 47 var letters = SymbolRangeToken('a', 'z');
48 48 var integer = zero.Or(digit9.Cat(digit.EClosure()));
49 49 var frac = dot.Cat(digit.Closure());
50 50 var exp = expSign.Cat(sign.Optional()).Cat(digit.Closure());
51 51 var quote = SymbolToken('"');
52 52 var backSlash = SymbolToken('\\');
53 53 var specialEscapeChars = SymbolSetToken('\\', '"', '/', 'b', 'f', 't', 'n', 'r');
54 54 var unicodeEspace = SymbolToken('u').Cat(hexDigit.Repeat(4));
55 55 var whitespace = SymbolSetToken('\n', '\r', '\t', ' ').EClosure();
56 56 var beginObject = whitespace.Cat(SymbolToken('{')).Cat(whitespace);
57 57 var endObject = whitespace.Cat(SymbolToken('}')).Cat(whitespace);
58 58 var beginArray = whitespace.Cat(SymbolToken('[')).Cat(whitespace);
59 59 var endArray = whitespace.Cat(SymbolToken(']')).Cat(whitespace);
60 60 var nameSep = whitespace.Cat(SymbolToken(':')).Cat(whitespace);
61 61 var valueSep = whitespace.Cat(SymbolToken(',')).Cat(whitespace);
62 62
63 63 var number = minus.Optional().Cat(integer).Cat(frac.Optional()).Cat(exp.Optional());
64 64 var literal = letters.Closure();
65 65 var unescaped = SymbolTokenExcept(Enumerable.Range(0, 0x20).Union(new int[] { '\\', '"' }).Select(x => (char)x));
66 66
67 67 var jsonExpression =
68 68 number.Tag(TokenType.Number)
69 69 .Or(literal.Tag(TokenType.Literal))
70 70 .Or(quote.Tag(TokenType.StringBound))
71 71 .Or(beginObject.Tag(TokenType.BeginObject))
72 72 .Or(endObject.Tag(TokenType.EndObject))
73 73 .Or(beginArray.Tag(TokenType.BeginArray))
74 74 .Or(endArray.Tag(TokenType.EndArray))
75 75 .Or(nameSep.Tag(TokenType.NameSeparator))
76 76 .Or(valueSep.Tag(TokenType.ValueSeparator));
77 77
78 78
79 79 var jsonStringExpression =
80 80 quote.Tag(TokenType.StringBound)
81 81 .Or(backSlash.Cat(specialEscapeChars).Tag(TokenType.EscapedChar))
82 82 .Or(backSlash.Cat(unicodeEspace).Tag(TokenType.EscapedUnicode))
83 83 .Or(unescaped.Closure().Tag(TokenType.UnescapedChar));
84 84
85 85
86 86 m_jsonExpression = BuildScannerContext<TokenType>(jsonExpression);
87 87 m_stringExpression = BuildScannerContext<TokenType>(jsonStringExpression);
88 88
89 89
90 90 }
91 91
92 92 protected override IAlphabetBuilder<char> AlphabetBuilder {
93 93 get {
94 94 return m_defaultAlphabet;
95 95 }
96 96 }
97 97
98 98 public ScannerContext<TokenType> JsonExpression {
99 99 get {
100 100 return m_jsonExpression;
101 101 }
102 102 }
103 103
104 104 public ScannerContext<TokenType> JsonStringExpression {
105 105 get {
106 106 return m_stringExpression;
107 107 }
108 108 }
109 109
110 110 Token SymbolRangeToken(char start, char stop) {
111 return SymbolToken(Enumerable.Range(start,stop - start).Cast<char>());
111 return SymbolToken(Enumerable.Range(start,stop - start).Select(x => (char)x));
112 112 }
113 113
114 114 protected override IndexedAlphabetBase<char> CreateAlphabet() {
115 115 return new CharAlphabet();
116 116 }
117 117
118 118 }
119 119 }
@@ -1,156 +1,162
1 1 using System;
2 2 using Implab.Components;
3 3 using System.Diagnostics;
4 4 using Implab.Automaton;
5 5 using System.Text;
6 6
7 7 namespace Implab.Formats {
8 8 public abstract class TextScanner : Disposable {
9 9 readonly int m_bufferMax;
10 10 readonly int m_chunkSize;
11 11
12 12 char[] m_buffer;
13 13 int m_bufferOffset;
14 14 int m_bufferSize;
15 15 int m_tokenOffset;
16 16 int m_tokenLength;
17 17
18 18 /// <summary>
19 19 /// Initializes a new instance of the <see cref="Implab.Formats.TextScanner"/> class.
20 20 /// </summary>
21 21 /// <param name="bufferMax">Buffer max.</param>
22 22 /// <param name="chunkSize">Chunk size.</param>
23 23 protected TextScanner(int bufferMax, int chunkSize) {
24 24 Debug.Assert(m_chunkSize <= m_bufferMax);
25 25
26 26 m_bufferMax = bufferMax;
27 27 m_chunkSize = chunkSize;
28 28 }
29 29
30 30 /// <summary>
31 31 /// Initializes a new instance of the <see cref="Implab.Formats.TextScanner"/> class.
32 32 /// </summary>
33 33 /// <param name="buffer">Buffer.</param>
34 34 protected TextScanner(char[] buffer) {
35 35 if (buffer != null) {
36 36 m_buffer = buffer;
37 37 m_bufferSize = buffer.Length;
38 38 }
39 39 }
40 40
41 41 /// <summary>
42 42 /// (hungry) Reads the next token.
43 43 /// </summary>
44 44 /// <returns><c>true</c>, if token internal was read, <c>false</c> if there is no more tokens in the stream.</returns>
45 45 /// <param name="dfa">The transition map for the automaton</param>
46 46 /// <param name="final">Final states of the automaton.</param>
47 47 /// <param name="tags">Tags.</param>
48 48 /// <param name="state">The initial state for the automaton.</param>
49 49 /// <param name="alphabet"></param>
50 50 /// <param name = "tag"></param>
51 51 internal bool ReadToken<TTag>(int[,] dfa, bool[] final, TTag[][] tags, int state, int[] alphabet, out TTag[] tag) {
52 52 m_tokenLength = 0;
53 53 tag = null;
54 54
55 55 var maxSymbol = alphabet.Length - 1;
56 56
57 57 do {
58 58 // after the next chunk is read the offset in the buffer may change
59 59 int pos = m_bufferOffset + m_tokenLength;
60 60
61 61 while (pos < m_bufferSize) {
62 62 var ch = m_buffer[pos];
63 63
64 state = dfa[state, ch > maxSymbol ? AutomatonConst.UNCLASSIFIED_INPUT : alphabet[ch]];
65 if (state == AutomatonConst.UNREACHABLE_STATE)
64 try {
65 var next = dfa[state, ch > maxSymbol ? AutomatonConst.UNCLASSIFIED_INPUT : alphabet[ch]];
66
67 if (next == AutomatonConst.UNREACHABLE_STATE)
66 68 break;
67
69
70 state = next;
71 }catch {
72 throw;
73 }
68 74 pos++;
69 75 }
70 76
71 77 m_tokenLength = pos - m_bufferOffset;
72 78 } while (state != AutomatonConst.UNREACHABLE_STATE && Feed());
73 79
74 80 m_tokenOffset = m_bufferOffset;
75 81 m_bufferOffset += m_tokenLength;
76 82
77 83 if (final[state]) {
78 84 tag = tags[state];
79 85 return true;
80 86 }
81 87
82 88 if (m_bufferOffset == m_bufferSize) {
83 89 if (m_tokenLength == 0) //EOF
84 90 return false;
85 91
86 92 throw new ParserException();
87 93 }
88 94
89 95 throw new ParserException(String.Format("Unexpected symbol '{0}'", m_buffer[m_bufferOffset]));
90 96
91 97 }
92 98
93 99 protected void Feed(char[] buffer, int offset, int length) {
94 100 m_buffer = buffer;
95 101 m_bufferOffset = offset;
96 102 m_bufferSize = offset + length;
97 103 }
98 104
99 105 protected bool Feed() {
100 106 if (m_chunkSize <= 0)
101 107 return false;
102 108
103 109 if (m_buffer != null) {
104 110 var free = m_buffer.Length - m_bufferSize;
105 111
106 112 if (free < m_chunkSize) {
107 113 free += m_chunkSize;
108 114 var used = m_bufferSize - m_bufferOffset;
109 115 var size = used + free;
110 116
111 117 if (size > m_bufferMax)
112 118 throw new ParserException(String.Format("The buffer limit ({0} Kb) is reached", m_bufferMax / 1024));
113 119
114 120 var temp = new char[size];
115 121
116 122 var read = Read(temp, used, m_chunkSize);
117 123 if (read == 0)
118 124 return false;
119 125
120 126 Array.Copy(m_buffer, m_bufferOffset, temp, 0, used);
121 127
122 128 m_bufferOffset = 0;
123 129 m_bufferSize = used + read;
124 130 m_buffer = temp;
125 131 } else {
126 132 var read = Read(m_buffer, m_bufferSize, m_chunkSize);
127 133 if (read == 0)
128 134 return false;
129 135 m_bufferSize += m_chunkSize;
130 136 }
131 137 return true;
132 138 } else {
133 139 Debug.Assert(m_bufferOffset == 0);
134 140 m_buffer = new char[m_chunkSize];
135 141 m_bufferSize = Read(m_buffer, 0, m_chunkSize);
136 142 return (m_bufferSize != 0);
137 143 }
138 144 }
139 145
140 146 protected abstract int Read(char[] buffer, int offset, int size);
141 147
142 148 public string GetTokenValue() {
143 149 return new String(m_buffer, m_tokenOffset, m_tokenLength);
144 150 }
145 151
146 152 public void CopyTokenTo(char[] buffer, int offset) {
147 153 m_buffer.CopyTo(buffer, offset);
148 154 }
149 155
150 156 public void CopyTokenTo(StringBuilder sb) {
151 157 sb.Append(m_buffer, m_tokenOffset, m_tokenLength);
152 158 }
153 159
154 160 }
155 161 }
156 162
General Comments 0
You need to be logged in to leave comments. Login now