##// END OF EJS Templates
fixed DFA optimization, JSON is fully functional
cin -
r183:4f82e0f161c3 ref20160224
parent child
Show More

The requested changes are too big and content was truncated. Show full diff

@@ -0,0 +1,4
1 <?xml version="1.0" encoding="utf-8"?>
2 <packages>
3 <package id="System.Text.Json" version="2.0.0.11" targetFramework="net45" />
4 </packages> No newline at end of file
1 NO CONTENT: new file 100644, binary diff hidden
1 NO CONTENT: new file 100644, binary diff hidden
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644, binary diff hidden
1 NO CONTENT: new file 100644, binary diff hidden
@@ -1,50 +1,88
1 1 using NUnit.Framework;
2 2 using System;
3 3 using Implab.Formats.JSON;
4 using Implab.Automaton;
4 5
5 6 namespace Implab.Format.Test {
6 7 [TestFixture]
7 8 public class JsonTests {
8 9 [Test]
9 10 public void TestScannerValidTokens() {
10 var scanner = new JSONScanner(@"9123, -123, 0, 0.1, -0.2, -0.1e3, 1.3E-3, ""some \t\n\u0020 text"", literal []{}:");
11 using (var scanner = new JSONScanner(@"9123, -123, 0, 0.1, -0.2, -0.1e3, 1.3E-3, ""some \t\n\u0020 text"", literal []{}:")) {
12
13 Tuple<JsonTokenType,object>[] expexted = new [] {
14 new Tuple<JsonTokenType,object>(JsonTokenType.Number, 9123d),
15 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", "),
16 new Tuple<JsonTokenType,object>(JsonTokenType.Number, -123d),
17 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", "),
18 new Tuple<JsonTokenType,object>(JsonTokenType.Number, 0d),
19 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", "),
20 new Tuple<JsonTokenType,object>(JsonTokenType.Number, 0.1d),
21 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", "),
22 new Tuple<JsonTokenType,object>(JsonTokenType.Number, -0.2d),
23 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", "),
24 new Tuple<JsonTokenType,object>(JsonTokenType.Number, -0.1e3d),
25 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", "),
26 new Tuple<JsonTokenType,object>(JsonTokenType.Number, 1.3E-3d),
27 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", "),
28 new Tuple<JsonTokenType,object>(JsonTokenType.String, "some \t\n text"),
29 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", "),
30 new Tuple<JsonTokenType,object>(JsonTokenType.Literal, "literal"),
31 new Tuple<JsonTokenType,object>(JsonTokenType.BeginArray, " ["),
32 new Tuple<JsonTokenType,object>(JsonTokenType.EndArray, "]"),
33 new Tuple<JsonTokenType,object>(JsonTokenType.BeginObject, "{"),
34 new Tuple<JsonTokenType,object>(JsonTokenType.EndObject, "}"),
35 new Tuple<JsonTokenType,object>(JsonTokenType.NameSeparator, ":")
36 };
11 37
12 Tuple<JsonTokenType,object>[] expexted = new [] {
13 new Tuple<JsonTokenType,object>(JsonTokenType.Number, 9123d),
14 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
15 new Tuple<JsonTokenType,object>(JsonTokenType.Number, -123d ),
16 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
17 new Tuple<JsonTokenType,object>(JsonTokenType.Number, 0d ),
18 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
19 new Tuple<JsonTokenType,object>(JsonTokenType.Number, 0.1d ),
20 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
21 new Tuple<JsonTokenType,object>(JsonTokenType.Number, -0.2d ),
22 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
23 new Tuple<JsonTokenType,object>(JsonTokenType.Number, -0.1e3d ),
24 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
25 new Tuple<JsonTokenType,object>(JsonTokenType.Number, 1.3E-3d ),
26 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
27 new Tuple<JsonTokenType,object>(JsonTokenType.String, "some \t\n text" ),
28 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
29 new Tuple<JsonTokenType,object>(JsonTokenType.Literal, "literal" ),
30 new Tuple<JsonTokenType,object>(JsonTokenType.BeginArray, " [" ),
31 new Tuple<JsonTokenType,object>(JsonTokenType.EndArray, "]" ),
32 new Tuple<JsonTokenType,object>(JsonTokenType.BeginObject, "{" ),
33 new Tuple<JsonTokenType,object>(JsonTokenType.EndObject, "}" ),
34 new Tuple<JsonTokenType,object>(JsonTokenType.NameSeparator, ":" )
38 object value;
39 JsonTokenType tokenType;
40 for (var i = 0; i < expexted.Length; i++) {
41
42 Assert.IsTrue(scanner.ReadToken(out value, out tokenType));
43 Assert.AreEqual(expexted[i].Item1, tokenType);
44 Assert.AreEqual(expexted[i].Item2, value);
45 }
46
47 Assert.IsFalse(scanner.ReadToken(out value, out tokenType));
48 }
49 }
50
51 [Test]
52 public void TestScannerBadTokens() {
53 var bad = new [] {
54 " 1",
55 " literal",
56 " \"",
57 "\"unclosed string",
58 "1.bad",
59 "001", // should be read as three numbers
60 "--10",
61 "+10",
62 "1.0.0",
63 "1e1.0",
64 "l1teral0",
65 ".123",
66 "-.123"
35 67 };
36 68
37 object value;
38 JsonTokenType tokenType;
39 for (var i = 0; i < expexted.Length; i++) {
40
41 Assert.IsTrue(scanner.ReadToken(out value, out tokenType));
42 Assert.AreEqual(expexted[i].Item1, tokenType);
43 Assert.AreEqual(expexted[i].Item2, value);
44 }
45
46 Assert.IsFalse(scanner.ReadToken(out value, out tokenType));
69 foreach (var json in bad)
70 using (var scanner = new JSONScanner(json)) {
71 try {
72 object value;
73 JsonTokenType token;
74 scanner.ReadToken(out value, out token);
75 if (!Object.Equals(value,json)) {
76 Console.WriteLine("'{0}' is read as {1}", json, value is String ? String.Format("'{0}'", value) : value );
77 continue;
78 }
79 Assert.Fail("Token '{0}' shouldn't pass", json);
80 } catch (ParserException e) {
81 Console.WriteLine(e.Message);
82 }
83 }
84
47 85 }
48 86 }
49 87 }
50 88
@@ -1,343 +1,348
1 1 using Implab;
2 2 using System;
3 3 using System.Collections.Generic;
4 4 using System.Linq;
5 5 using System.Diagnostics;
6 6 using System.IO;
7 7 using System.CodeDom.Compiler;
8 8 using System.CodeDom;
9 9
10 10 namespace Implab.Automaton {
11 11 public class DFATable : IDFATableBuilder {
12 12 int m_stateCount;
13 13 int m_symbolCount;
14 14 int m_initialState;
15 15
16 16 readonly HashSet<int> m_finalStates = new HashSet<int>();
17 17 readonly HashSet<AutomatonTransition> m_transitions = new HashSet<AutomatonTransition>();
18 18
19 19
20 20 #region IDFADefinition implementation
21 21
22 22 public bool IsFinalState(int s) {
23 23 Safe.ArgumentInRange(s, 0, m_stateCount, "s");
24 24
25 25 return m_finalStates.Contains(s);
26 26 }
27 27
28 28 public IEnumerable<int> FinalStates {
29 29 get {
30 30 return m_finalStates;
31 31 }
32 32 }
33 33
34 34 public int StateCount {
35 35 get { return m_stateCount; }
36 36 }
37 37
38 38 public int AlphabetSize {
39 39 get { return m_symbolCount; }
40 40 }
41 41
42 42 public int InitialState {
43 43 get { return m_initialState; }
44 44 }
45 45
46 46 #endregion
47 47
48 48 public void SetInitialState(int s) {
49 49 Safe.ArgumentAssert(s >= 0, "s");
50 50 m_stateCount = Math.Max(m_stateCount, s + 1);
51 51 m_initialState = s;
52 52 }
53 53
54 54 public void MarkFinalState(int state) {
55 55 m_stateCount = Math.Max(m_stateCount, state + 1);
56 56 m_finalStates.Add(state);
57 57 }
58 58
59 59 public void Add(AutomatonTransition item) {
60 60 Safe.ArgumentAssert(item.s1 >= 0, "item");
61 61 Safe.ArgumentAssert(item.s2 >= 0, "item");
62 62 Safe.ArgumentAssert(item.edge >= 0, "item");
63 63
64 64 m_stateCount = Math.Max(m_stateCount, Math.Max(item.s1, item.s2) + 1);
65 65 m_symbolCount = Math.Max(m_symbolCount, item.edge + 1);
66 66
67 67 m_transitions.Add(item);
68 68 }
69 69
70 70 public void Clear() {
71 71 m_stateCount = 0;
72 72 m_symbolCount = 0;
73 73 m_finalStates.Clear();
74 74 m_transitions.Clear();
75 75 }
76 76
77 77 public bool Contains(AutomatonTransition item) {
78 78 return m_transitions.Contains(item);
79 79 }
80 80
81 81 public void CopyTo(AutomatonTransition[] array, int arrayIndex) {
82 82 m_transitions.CopyTo(array, arrayIndex);
83 83 }
84 84
85 85 public bool Remove(AutomatonTransition item) {
86 86 return m_transitions.Remove(item);
87 87 }
88 88
89 89 public int Count {
90 90 get {
91 91 return m_transitions.Count;
92 92 }
93 93 }
94 94
95 95 public bool IsReadOnly {
96 96 get {
97 97 return false;
98 98 }
99 99 }
100 100
101 101 public IEnumerator<AutomatonTransition> GetEnumerator() {
102 102 return m_transitions.GetEnumerator();
103 103 }
104 104
105 105 System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator() {
106 106 return GetEnumerator();
107 107 }
108 108
109 109 public void AddSymbol(int symbol) {
110 110 Safe.ArgumentAssert(symbol >= 0, "symbol");
111 111 m_symbolCount = Math.Max(symbol + 1, m_symbolCount);
112 112 }
113 113
114 114 public int[,] CreateTransitionTable() {
115 115 var table = new int[StateCount,AlphabetSize];
116 116
117 117 for (int i = 0; i < StateCount; i++)
118 118 for (int j = 0; j < AlphabetSize; j++)
119 119 table[i, j] = AutomatonConst.UNREACHABLE_STATE;
120 120
121 121 foreach (var t in this)
122 122 table[t.s1,t.edge] = t.s2;
123 123
124 124 return table;
125 125 }
126 126
127 127 public bool[] CreateFinalStateTable() {
128 128 var table = new bool[StateCount];
129 129
130 130 foreach (var s in FinalStates)
131 131 table[s] = true;
132 132
133 133 return table;
134 134 }
135 135
136 136 /// <summary>Формирует множества конечных состояний перед началом работы алгоритма минимизации.</summary>
137 137 /// <remarks>
138 138 /// В процессе построения минимального автомата требуется разделить множество состояний,
139 139 /// на два подмножества - конечные состояния и все остальные, после чего эти подмножества
140 140 /// будут резделены на более мелкие. Иногда требуется гарантировать различия конечных сосотяний,
141 141 /// для этого необходимо переопределить даннцю фукнцию, для получения множеств конечных состояний.
142 142 /// </remarks>
143 143 /// <returns>The final states.</returns>
144 protected virtual IEnumerable<HashSet<int>> GroupFinalStates() {
145 return new HashSet<int>[] { m_finalStates };
144 protected virtual IEnumerable<HashSet<int>> SplitFinalStates(IEnumerable<int> states) {
145 return new [] { new HashSet<int>(states) };
146 146 }
147 147
148 148 protected void Optimize(
149 149 IDFATableBuilder optimalDFA,
150 150 IDictionary<int,int> alphabetMap,
151 151 IDictionary<int,int> stateMap
152 152 ) {
153 153 Safe.ArgumentNotNull(optimalDFA, "dfa");
154 154 Safe.ArgumentNotNull(alphabetMap, "alphabetMap");
155 155 Safe.ArgumentNotNull(stateMap, "stateMap");
156 156
157 157
158 158 var setComparer = new CustomEqualityComparer<HashSet<int>>(
159 159 (x, y) => x.SetEquals(y),
160 160 s => s.Sum(x => x.GetHashCode())
161 161 );
162 162
163 163 var optimalStates = new HashSet<HashSet<int>>(setComparer);
164 164 var queue = new HashSet<HashSet<int>>(setComparer);
165 165
166 // получаем конечные состояния, сгруппированные по маркерам
167 optimalStates.UnionWith(
168 GroupFinalStates()
169 );
166 optimalStates.Add(new HashSet<int>(FinalStates));
170 167
171 168 var state = new HashSet<int>(
172 169 Enumerable
173 170 .Range(0, m_stateCount)
174 171 .Where(i => !m_finalStates.Contains(i))
175 172 );
176 173
177 174 optimalStates.Add(state);
178 175 queue.Add(state);
179 176
180 177 var rmap = m_transitions
181 178 .GroupBy(t => t.s2)
182 179 .ToDictionary(
183 180 g => g.Key, // s2
184 181 g => g.ToLookup(t => t.edge, t => t.s1)//.ToDictionary(p => p.Key)
185 182 );
186 183
187 184 while (queue.Count > 0) {
188 185 var stateA = queue.First();
189 186 queue.Remove(stateA);
190 187
191 188 for (int c = 0; c < m_symbolCount; c++) {
192 189 var stateX = new HashSet<int>();
193 //foreach(var a in stateA.Where(rmap.ContainsKey))
194 // stateX.UnionWith(rmap[a][c]); // all states from wich the symbol 'c' leads to the state 'a'
195
196 stateX.UnionWith(m_transitions.Where(t => stateA.Contains(t.s2) && t.edge == c).Select(t => t.s1));
190 foreach(var a in stateA.Where(rmap.ContainsKey))
191 stateX.UnionWith(rmap[a][c]); // all states from wich the symbol 'c' leads to the state 'a'
197 192
198 193 var tmp = optimalStates.ToArray();
199 194 foreach (var stateY in tmp) {
200 if (stateX.Overlaps(stateY) && !stateY.IsSubsetOf(stateX)) {
201 var stateR1 = new HashSet<int>(stateY);
202 var stateR2 = new HashSet<int>(stateY);
195 var stateR1 = new HashSet<int>(stateY);
196 var stateR2 = new HashSet<int>(stateY);
203 197
204 stateR1.IntersectWith(stateX);
205 stateR2.ExceptWith(stateX);
198 stateR1.IntersectWith(stateX);
199 stateR2.ExceptWith(stateX);
200
201 if (stateR1.Count > 0 && stateR2.Count > 0) {
202
206 203
207 204 optimalStates.Remove(stateY);
208 205 optimalStates.Add(stateR1);
209 206 optimalStates.Add(stateR2);
210 207
211 208 if (queue.Contains(stateY)) {
212 209 queue.Remove(stateY);
213 210 queue.Add(stateR1);
214 211 queue.Add(stateR2);
215 212 } else {
216 213 queue.Add(stateR1.Count <= stateR2.Count ? stateR1 : stateR2);
217 214 }
218 215 }
219 216 }
220 217 }
221 218 }
222 219
220 // дополнительно разбиваем конечные состояния
221 foreach (var final in optimalStates.Where(s => s.Overlaps(m_finalStates)).ToArray()) {
222 optimalStates.Remove(final);
223 foreach (var split in SplitFinalStates(final))
224 optimalStates.Add(split);
225 }
226
227
223 228 // карта получения оптимального состояния по соотвествующему ему простому состоянию
224 229 var nextState = 0;
225 230 foreach (var item in optimalStates) {
226 231 var id = nextState++;
227 232 foreach (var s in item)
228 233 stateMap[s] = id;
229 234 }
230 235
231 236 // получаем минимальный алфавит
232 237 // входные символы не различимы, если Move(s,a1) == Move(s,a2), для любого s
233 238 // для этого используем алгоритм кластеризации, сначала
234 239 // считаем, что все символы не различимы
235 240
236 241 var minClasses = new HashSet<HashSet<int>>(setComparer);
237 242 var alphaQueue = new Queue<HashSet<int>>();
238 243 alphaQueue.Enqueue(new HashSet<int>(Enumerable.Range(0,AlphabetSize)));
239 244
240 245 // для всех состояний, будем проверять каждый класс на различимость,
241 246 // т.е. символы различимы, если они приводят к разным состояниям
242 247 for (int s = 0 ; s < optimalStates.Count; s++) {
243 248 var newQueue = new Queue<HashSet<int>>();
244 249
245 250 foreach (var A in alphaQueue) {
246 251 // классы из одного символа делить бесполезно, переводим их сразу в
247 252 // результирующий алфавит
248 253 if (A.Count == 1) {
249 254 minClasses.Add(A);
250 255 continue;
251 256 }
252 257
253 258 // различаем классы символов, которые переводят в различные оптимальные состояния
254 259 // optimalState -> alphaClass
255 260 var classes = new Dictionary<int, HashSet<int>>();
256 261
257 262 foreach (var term in A) {
258 263 // ищем все переходы класса по символу term
259 264 var s2 = m_transitions.Where(t => stateMap[t.s1] == s && t.edge == term).Select(t => stateMap[t.s2]).DefaultIfEmpty(-1).First();
260 265
261 266 HashSet<int> a2;
262 267 if (!classes.TryGetValue(s2, out a2)) {
263 268 a2 = new HashSet<int>();
264 269 newQueue.Enqueue(a2);
265 270 classes[s2] = a2;
266 271 }
267 272 a2.Add(term);
268 273 }
269 274 }
270 275
271 276 if (newQueue.Count == 0)
272 277 break;
273 278 alphaQueue = newQueue;
274 279 }
275 280
276 281 // после окончания работы алгоритма в очереди останутся минимальные различимые классы
277 282 // входных символов
278 283 foreach (var A in alphaQueue)
279 284 minClasses.Add(A);
280 285
281 286 // построение отображения алфавитов входных символов.
282 287 // поскольку символ DFAConst.UNCLASSIFIED_INPUT может иметь
283 288 // специальное значение, тогда сохраним минимальный класс,
284 289 // содержащий этот символ на томже месте.
285 290
286 291 var nextCls = 0;
287 292 foreach (var item in minClasses) {
288 293 if (nextCls == AutomatonConst.UNCLASSIFIED_INPUT)
289 294 nextCls++;
290 295
291 296 // сохраняем DFAConst.UNCLASSIFIED_INPUT
292 297 var cls = item.Contains(AutomatonConst.UNCLASSIFIED_INPUT) ? AutomatonConst.UNCLASSIFIED_INPUT : nextCls++;
293 298 optimalDFA.AddSymbol(cls);
294 299
295 300 foreach (var a in item)
296 301 alphabetMap[a] = cls;
297 302 }
298 303
299 304 // построение автомата
300 305 optimalDFA.SetInitialState(stateMap[m_initialState]);
301 306
302 307 foreach (var sf in m_finalStates.Select(s => stateMap[s]).Distinct())
303 308 optimalDFA.MarkFinalState(sf);
304 309
305 310 foreach (var t in m_transitions.Select(t => new AutomatonTransition(stateMap[t.s1],stateMap[t.s2],alphabetMap[t.edge])).Distinct())
306 311 optimalDFA.Add(t);
307 312 }
308 313
309 314 protected string PrintDFA<TInput, TState>(IAlphabet<TInput> inputAlphabet, IAlphabet<TState> stateAlphabet) {
310 315 Safe.ArgumentNotNull(inputAlphabet, "inputAlphabet");
311 316 Safe.ArgumentNotNull(stateAlphabet, "stateAlphabet");
312 317
313 318 var data = new List<string>();
314 319
315 320 data.Add("digraph dfa {");
316 321
317 322 foreach (var final in m_finalStates)
318 323 data.Add(String.Format("{0} [shape=box];",String.Join("", stateAlphabet.GetSymbols(final))));
319 324
320 325 foreach (var t in m_transitions)
321 326 data.Add(String.Format(
322 327 "{0} -> {2} [label={1}];",
323 328 String.Join("", stateAlphabet.GetSymbols(t.s1)),
324 329 ToLiteral(ToLiteral(String.Join("", t.edge == AutomatonConst.UNCLASSIFIED_INPUT ? new [] { "@" } : inputAlphabet.GetSymbols(t.edge).Select(x => x.ToString())))),
325 330 String.Join("", stateAlphabet.GetSymbols(t.s2))
326 331 ));
327 332 data.Add("}");
328 333 return String.Join("\n", data);
329 334 }
330 335
331 336 static string ToLiteral(string input)
332 337 {
333 338 using (var writer = new StringWriter())
334 339 {
335 340 using (var provider = CodeDomProvider.CreateProvider("CSharp"))
336 341 {
337 342 provider.GenerateCodeFromExpression(new CodePrimitiveExpression(input), writer, null);
338 343 return writer.ToString();
339 344 }
340 345 }
341 346 }
342 347 }
343 348 }
@@ -1,95 +1,91
1 1 using System.Collections.Generic;
2 2 using System.Linq;
3 3
4 4 namespace Implab.Automaton.RegularExpressions {
5 5 public class RegularDFA<TInput, TTag> : DFATable, ITaggedDFABuilder<TTag> {
6 6
7 7 readonly Dictionary<int,TTag[]> m_tags = new Dictionary<int, TTag[]>();
8 8 readonly IAlphabet<TInput> m_alphabet;
9 9
10 10 public RegularDFA(IAlphabet<TInput> alphabet) {
11 11 Safe.ArgumentNotNull(alphabet, "aplhabet");
12 12
13 13 m_alphabet = alphabet;
14 14 }
15 15
16 16
17 17 public IAlphabet<TInput> InputAlphabet {
18 18 get {
19 19 return m_alphabet;
20 20 }
21 21 }
22 22
23 23 public void MarkFinalState(int s, TTag[] tags) {
24 24 MarkFinalState(s);
25 25 SetStateTag(s, tags);
26 26 }
27 27
28 28 public void SetStateTag(int s, TTag[] tags) {
29 29 Safe.ArgumentNotNull(tags, "tags");
30 30 m_tags[s] = tags;
31 31 }
32 32
33 33 public TTag[] GetStateTag(int s) {
34 34 TTag[] tags;
35 35 return m_tags.TryGetValue(s, out tags) ? tags : new TTag[0];
36 36 }
37 37
38 38 public TTag[][] CreateTagTable() {
39 39 var table = new TTag[StateCount][];
40 40
41 41 foreach (var pair in m_tags)
42 42 table[pair.Key] = pair.Value;
43 43
44 44 return table;
45 45 }
46 46
47 47 /// <summary>
48 48 /// Optimize the specified alphabet.
49 49 /// </summary>
50 50 /// <param name="alphabet">Пустой алфавит, который будет зполнен в процессе оптимизации.</param>
51 51 public RegularDFA<TInput,TTag> Optimize(IAlphabetBuilder<TInput> alphabet) {
52 52 Safe.ArgumentNotNull(alphabet, "alphabet");
53 53
54 54 var dfa = new RegularDFA<TInput, TTag>(alphabet);
55 55
56 56 var alphaMap = new Dictionary<int,int>();
57 57 var stateMap = new Dictionary<int,int>();
58 58
59 59 Optimize(dfa, alphaMap, stateMap);
60 60
61 61 // mark tags in the new DFA
62 62 foreach (var g in m_tags.Where(x => x.Key < StateCount).GroupBy(x => stateMap[x.Key], x => x.Value ))
63 63 dfa.SetStateTag(g.Key, g.SelectMany(x => x).ToArray());
64 64
65 65 // make the alphabet for the new DFA
66 66 // skip all unclassified symbols
67 67 foreach (var pair in alphaMap.Where(x => x.Value != 0))
68 68 alphabet.DefineClass(m_alphabet.GetSymbols(pair.Key), pair.Value);
69
70 var orig = ToString();
71 var opt = dfa.ToString();
72
73 69 return dfa;
74 70 }
75 71
76 protected override IEnumerable<HashSet<int>> GroupFinalStates() {
72 protected override IEnumerable<HashSet<int>> SplitFinalStates(IEnumerable<int> states) {
77 73 var arrayComparer = new CustomEqualityComparer<TTag[]>(
78 74 (x,y) => x.Length == y.Length && x.All(it => y.Contains(it)),
79 75 x => x.Sum(it => x.GetHashCode())
80 76 );
81 return FinalStates.GroupBy(x => m_tags[x], arrayComparer).Select(g => new HashSet<int>(g));
77 return states.GroupBy(x => m_tags[x] ?? new TTag[0], arrayComparer).Select(g => new HashSet<int>(g));
82 78 }
83 79
84 80 public override string ToString() {
85 81 var states = new MapAlphabet<string>(false, null);
86 82
87 83 for (int i = 0; i < StateCount; i++)
88 84 states.DefineSymbol(string.Format("s{0}", i), i);
89 85
90 86 return string.Format("//[RegularDFA {1} x {2}]\n{0}", PrintDFA(InputAlphabet, states),StateCount, AlphabetSize);
91 87 }
92 88
93 89 }
94 90 }
95 91
@@ -1,119 +1,121
1 1 using System.Linq;
2 2 using Implab.Automaton.RegularExpressions;
3 3 using System;
4 4 using Implab.Automaton;
5 5 using Implab.Components;
6 6
7 7 namespace Implab.Formats.JSON {
8 8 class JSONGrammar : Grammar<char> {
9 9 public enum TokenType {
10 10 None,
11 11 BeginObject,
12 12 EndObject,
13 13 BeginArray,
14 14 EndArray,
15 15 String,
16 16 Number,
17 17 Literal,
18 18 NameSeparator,
19 19 ValueSeparator,
20 Whitespace,
20 21
21 22 StringBound,
22 23 EscapedChar,
23 24 UnescapedChar,
24 25 EscapedUnicode
25 26 }
26 27
27 28 static LazyAndWeak<JSONGrammar> _instance = new LazyAndWeak<JSONGrammar>(() => new JSONGrammar());
28 29
29 30 public static JSONGrammar Instance {
30 31 get { return _instance.Value; }
31 32 }
32 33
33 34 readonly ScannerContext<TokenType> m_jsonExpression;
34 35 readonly ScannerContext<TokenType> m_stringExpression;
35 36 readonly CharAlphabet m_defaultAlphabet = new CharAlphabet();
36 37
37 38 public JSONGrammar() {
38 39 DefineAlphabet(Enumerable.Range(0, 0x20).Select(x => (char)x));
39 40 var hexDigit = SymbolRangeToken('a','f').Or(SymbolRangeToken('A','F')).Or(SymbolRangeToken('0','9'));
40 41 var digit9 = SymbolRangeToken('1', '9');
41 42 var zero = SymbolToken('0');
42 43 var digit = zero.Or(digit9);
43 44 var dot = SymbolToken('.');
44 45 var minus = SymbolToken('-');
45 46 var sign = SymbolSetToken('-', '+');
46 47 var expSign = SymbolSetToken('e', 'E');
47 48 var letters = SymbolRangeToken('a', 'z');
48 49 var integer = zero.Or(digit9.Cat(digit.EClosure()));
49 50 var frac = dot.Cat(digit.Closure());
50 51 var exp = expSign.Cat(sign.Optional()).Cat(digit.Closure());
51 52 var quote = SymbolToken('"');
52 53 var backSlash = SymbolToken('\\');
53 54 var specialEscapeChars = SymbolSetToken('\\', '"', '/', 'b', 'f', 't', 'n', 'r');
54 55 var unicodeEspace = SymbolToken('u').Cat(hexDigit.Repeat(4));
55 56 var whitespace = SymbolSetToken('\n', '\r', '\t', ' ').EClosure();
56 57 var beginObject = whitespace.Cat(SymbolToken('{')).Cat(whitespace);
57 58 var endObject = whitespace.Cat(SymbolToken('}')).Cat(whitespace);
58 59 var beginArray = whitespace.Cat(SymbolToken('[')).Cat(whitespace);
59 60 var endArray = whitespace.Cat(SymbolToken(']')).Cat(whitespace);
60 61 var nameSep = whitespace.Cat(SymbolToken(':')).Cat(whitespace);
61 62 var valueSep = whitespace.Cat(SymbolToken(',')).Cat(whitespace);
62 63
63 64 var number = minus.Optional().Cat(integer).Cat(frac.Optional()).Cat(exp.Optional());
64 65 var literal = letters.Closure();
65 66 var unescaped = SymbolTokenExcept(Enumerable.Range(0, 0x20).Union(new int[] { '\\', '"' }).Select(x => (char)x));
66 67
67 68 var jsonExpression =
68 69 number.Tag(TokenType.Number)
69 70 .Or(literal.Tag(TokenType.Literal))
70 71 .Or(quote.Tag(TokenType.StringBound))
71 72 .Or(beginObject.Tag(TokenType.BeginObject))
72 73 .Or(endObject.Tag(TokenType.EndObject))
73 74 .Or(beginArray.Tag(TokenType.BeginArray))
74 75 .Or(endArray.Tag(TokenType.EndArray))
75 76 .Or(nameSep.Tag(TokenType.NameSeparator))
76 .Or(valueSep.Tag(TokenType.ValueSeparator));
77 .Or(valueSep.Tag(TokenType.ValueSeparator))
78 .Or(SymbolSetToken('\n', '\r', '\t', ' ').Closure().Tag(TokenType.Whitespace));
77 79
78 80
79 81 var jsonStringExpression =
80 82 quote.Tag(TokenType.StringBound)
81 83 .Or(backSlash.Cat(specialEscapeChars).Tag(TokenType.EscapedChar))
82 84 .Or(backSlash.Cat(unicodeEspace).Tag(TokenType.EscapedUnicode))
83 85 .Or(unescaped.Closure().Tag(TokenType.UnescapedChar));
84 86
85 87
86 88 m_jsonExpression = BuildScannerContext<TokenType>(jsonExpression);
87 89 m_stringExpression = BuildScannerContext<TokenType>(jsonStringExpression);
88 90
89 91
90 92 }
91 93
92 94 protected override IAlphabetBuilder<char> AlphabetBuilder {
93 95 get {
94 96 return m_defaultAlphabet;
95 97 }
96 98 }
97 99
98 100 public ScannerContext<TokenType> JsonExpression {
99 101 get {
100 102 return m_jsonExpression;
101 103 }
102 104 }
103 105
104 106 public ScannerContext<TokenType> JsonStringExpression {
105 107 get {
106 108 return m_stringExpression;
107 109 }
108 110 }
109 111
110 112 Token SymbolRangeToken(char start, char stop) {
111 113 return SymbolToken(Enumerable.Range(start, stop - start + 1).Select(x => (char)x));
112 114 }
113 115
114 116 protected override IndexedAlphabetBase<char> CreateAlphabet() {
115 117 return new CharAlphabet();
116 118 }
117 119
118 120 }
119 121 }
@@ -1,107 +1,109
1 1 using System;
2 2 using System.Globalization;
3 3 using Implab.Automaton;
4 4 using System.Text;
5 5 using Implab.Components;
6 6 using System.IO;
7 7
8 8 namespace Implab.Formats.JSON {
9 9 /// <summary>
10 10 /// Сканнер (лексер), разбивающий поток символов на токены JSON.
11 11 /// </summary>
12 12 public class JSONScanner : Disposable {
13 13 readonly StringBuilder m_builder = new StringBuilder();
14 14
15 15 readonly ScannerContext<JSONGrammar.TokenType> m_jsonContext = JSONGrammar.Instance.JsonExpression;
16 16 readonly ScannerContext<JSONGrammar.TokenType> m_stringContext = JSONGrammar.Instance.JsonStringExpression;
17 17
18 18
19 19 readonly TextScanner m_scanner;
20 20
21 21 /// <summary>
22 22 /// Создает новый экземпляр сканнера
23 23 /// </summary>
24 24 public JSONScanner(string text) {
25 25 Safe.ArgumentNotEmpty(text, "text");
26 26
27 27 m_scanner = new StringScanner(text);
28 28 }
29 29
30 30 public JSONScanner(TextReader reader, int bufferMax, int chunkSize) {
31 31 Safe.ArgumentNotNull(reader, "reader");
32 32
33 33 m_scanner = new ReaderScanner(reader, bufferMax, chunkSize);
34 34 }
35 35
36 36 public JSONScanner(TextReader reader) : this(reader, 1024*1024, 1024){
37 37 }
38 38
39 39 /// <summary>
40 40 /// Читает следующий лексический элемент из входных данных.
41 41 /// </summary>
42 42 /// <param name="tokenValue">Возвращает значение прочитанного токена.</param>
43 43 /// <param name="tokenType">Возвращает тип прочитанного токена.</param>
44 44 /// <returns><c>true</c> - чтение произведено успешно. <c>false</c> - достигнут конец входных данных</returns>
45 45 /// <remarks>В случе если токен не распознается, возникает исключение. Значения токенов обрабатываются, т.е.
46 46 /// в строках обрабатываются экранированные символы, числа становтся типа double.</remarks>
47 47 public bool ReadToken(out object tokenValue, out JsonTokenType tokenType) {
48 48 JSONGrammar.TokenType[] tag;
49 if (m_jsonContext.Execute(m_scanner, out tag)) {
49 while (m_jsonContext.Execute(m_scanner, out tag)) {
50 50 switch (tag[0]) {
51 51 case JSONGrammar.TokenType.StringBound:
52 52 tokenValue = ReadString();
53 53 tokenType = JsonTokenType.String;
54 54 break;
55 55 case JSONGrammar.TokenType.Number:
56 56 tokenValue = Double.Parse(m_scanner.GetTokenValue(), CultureInfo.InvariantCulture);
57 57 tokenType = JsonTokenType.Number;
58 58 break;
59 case JSONGrammar.TokenType.Whitespace:
60 continue;
59 61 default:
60 62 tokenType = (JsonTokenType)tag[0];
61 63 tokenValue = m_scanner.GetTokenValue();
62 64 break;
63 65 }
64 66 return true;
65 67 }
66 68 tokenValue = null;
67 69 tokenType = JsonTokenType.None;
68 70 return false;
69 71 }
70 72
71 73 string ReadString() {
72 74 int pos = 0;
73 75 var buf = new char[6]; // the buffer for unescaping chars
74 76
75 77 JSONGrammar.TokenType[] tag;
76 78 m_builder.Clear();
77 79
78 80 while (m_stringContext.Execute(m_scanner, out tag)) {
79 81 switch (tag[0]) {
80 82 case JSONGrammar.TokenType.StringBound:
81 83 return m_builder.ToString();
82 84 case JSONGrammar.TokenType.UnescapedChar:
83 85 m_scanner.CopyTokenTo(m_builder);
84 86 break;
85 87 case JSONGrammar.TokenType.EscapedUnicode: // \xXXXX - unicode escape sequence
86 88 m_scanner.CopyTokenTo(buf, 0);
87 89 m_builder.Append(StringTranslator.TranslateHexUnicode(buf, 2));
88 90 pos++;
89 91 break;
90 92 case JSONGrammar.TokenType.EscapedChar: // \t - escape sequence
91 93 m_scanner.CopyTokenTo(buf, 0);
92 94 m_builder.Append(StringTranslator.TranslateEscapedChar(buf[1]));
93 95 break;
94 96 }
95 97
96 98 }
97 99
98 100 throw new ParserException("Unexpected end of data");
99 101 }
100 102
101 103 protected override void Dispose(bool disposing) {
102 104 if (disposing)
103 105 Safe.Dispose(m_scanner);
104 106 base.Dispose(disposing);
105 107 }
106 108 }
107 109 }
@@ -1,30 +1,30
1 1 using System;
2 2 using System.IO;
3 3
4 4 namespace Implab.Formats {
5 5 public class ReaderScanner: TextScanner {
6 const int CHUNK_SIZE = 1024;
6 const int CHUNK_SIZE = 1024*4;
7 7 const int BUFFER_MAX = CHUNK_SIZE*1024;
8 8
9 9 readonly TextReader m_reader;
10 10
11 11 public ReaderScanner(TextReader reader, int limit, int chunk) : base(limit, chunk) {
12 12 Safe.ArgumentNotNull(reader, "reader");
13 13 m_reader = reader;
14 14 }
15 15
16 16 public ReaderScanner(TextReader reader) : this(reader, BUFFER_MAX, CHUNK_SIZE) {
17 17 }
18 18
19 19 protected override int Read(char[] buffer, int offset, int size) {
20 20 return m_reader.Read(buffer, offset, size);
21 21 }
22 22
23 23 protected override void Dispose(bool disposing) {
24 24 if (disposing)
25 25 Safe.Dispose(m_reader);
26 26 base.Dispose(disposing);
27 27 }
28 28 }
29 29 }
30 30
@@ -1,47 +1,53
1 1 <?xml version="1.0" encoding="utf-8"?>
2 2 <Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
3 3 <PropertyGroup>
4 4 <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
5 5 <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
6 6 <ProductVersion>8.0.30703</ProductVersion>
7 7 <SchemaVersion>2.0</SchemaVersion>
8 8 <ProjectGuid>{15DD7123-D504-4627-8B4F-D00C7F04D033}</ProjectGuid>
9 9 <OutputType>Exe</OutputType>
10 10 <RootNamespace>MonoPlay</RootNamespace>
11 11 <AssemblyName>MonoPlay</AssemblyName>
12 12 <TargetFrameworkVersion>v4.5</TargetFrameworkVersion>
13 13 <ReleaseVersion>0.2</ReleaseVersion>
14 14 </PropertyGroup>
15 15 <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
16 16 <DebugSymbols>true</DebugSymbols>
17 17 <DebugType>full</DebugType>
18 18 <Optimize>false</Optimize>
19 19 <OutputPath>bin\Debug</OutputPath>
20 20 <DefineConstants>DEBUG;TRACE;</DefineConstants>
21 21 <ErrorReport>prompt</ErrorReport>
22 22 <WarningLevel>4</WarningLevel>
23 23 <ConsolePause>false</ConsolePause>
24 24 </PropertyGroup>
25 25 <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
26 26 <DebugType>full</DebugType>
27 27 <Optimize>true</Optimize>
28 28 <OutputPath>bin\Release</OutputPath>
29 29 <ErrorReport>prompt</ErrorReport>
30 30 <WarningLevel>4</WarningLevel>
31 31 <ConsolePause>false</ConsolePause>
32 32 </PropertyGroup>
33 33 <ItemGroup>
34 34 <Reference Include="System" />
35 <Reference Include="System.Text.Json">
36 <HintPath>..\packages\System.Text.Json.2.0.0.11\lib\net40\System.Text.Json.dll</HintPath>
37 </Reference>
35 38 </ItemGroup>
36 39 <ItemGroup>
37 40 <Compile Include="Program.cs" />
38 41 <Compile Include="Properties\AssemblyInfo.cs" />
39 42 </ItemGroup>
40 43 <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" />
41 44 <ItemGroup>
42 45 <ProjectReference Include="..\Implab\Implab.csproj">
43 46 <Project>{F550F1F8-8746-4AD0-9614-855F4C4B7F05}</Project>
44 47 <Name>Implab</Name>
45 48 </ProjectReference>
46 49 </ItemGroup>
50 <ItemGroup>
51 <None Include="packages.config" />
52 </ItemGroup>
47 53 </Project> No newline at end of file
@@ -1,37 +1,45
1 1 using System;
2 2 using Implab;
3 3 using System.Threading.Tasks;
4 using Implab.Formats.JSON;
5 using System.IO;
6 using System.Text.Json;
4 7
5 8 namespace MonoPlay {
6 9 class MainClass {
7 10
8 11
9 12 public static void Main(string[] args) {
10 13 if (args == null)
11 14 throw new ArgumentNullException("args");
12
13 var t1 = Environment.TickCount;
14
15 DoWork().GetAwaiter().GetResult();
15 int t1, t2;
16 16
17 var t2 = Environment.TickCount;
18 Console.WriteLine("done: {0} ms, {1:.00} Mb, {2} GC", t2 - t1, GC.GetTotalMemory(false) / (1024*1024), GC.CollectionCount(0) );
17 for (int i = 0; i < 2; i++) {
18 t1 = Environment.TickCount;
19 int elements =0;
20 using (var reader = new JSONParser(File.OpenText("/home/sergey/temp/citylots.json"))) {
21 while (reader.Read())
22 elements++;
23 }
19 24
20 }
25 t2 = Environment.TickCount;
26 Console.WriteLine("attempt {0} done: {1} ms, {2:.00} Mb, {3} GC, Elements: {4}",i+1, t2 - t1, GC.GetTotalMemory(false) / (1024*1024), GC.CollectionCount(0), elements );
27 }
21 28
22 static IPromise<int> DoItem(int x) {
23 //return Promise<int>.FromResult(x + 1);
24 var p = new Promise<int>();
25 p.Resolve(x+1);
26 return p;
27 }
29 Console.WriteLine("Syste.Text.Json");
30 var paraser = new JsonParser();
31 for (int i = 0; i < 2; i++) {
32 t1 = Environment.TickCount;
33 using (var reader = File.OpenText("/home/sergey/temp/citylots.json")) {
34 paraser.Parse(reader);
35 }
28 36
29 static async Task<int> DoWork() {
30 var c = 0;
31 for (int i = 0; i < 10000000; i++)
32 c = await DoItem(c);
33 return c;
37 t2 = Environment.TickCount;
38 Console.WriteLine("attempt {0} done: {1} ms, {2:.00} Mb, {3} GC, ",i+1, t2 - t1, GC.GetTotalMemory(false) / (1024*1024), GC.CollectionCount(0));
39 }
40
41
34 42 }
35 43
36 44 }
37 45 }
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
General Comments 0
You need to be logged in to leave comments. Login now