##// END OF EJS Templates
fixed DFA optimization, JSON is fully functional
cin -
r183:4f82e0f161c3 ref20160224
parent child
Show More

The requested changes are too big and content was truncated. Show full diff

@@ -0,0 +1,4
1 <?xml version="1.0" encoding="utf-8"?>
2 <packages>
3 <package id="System.Text.Json" version="2.0.0.11" targetFramework="net45" />
4 </packages> No newline at end of file
1 NO CONTENT: new file 100644, binary diff hidden
NO CONTENT: new file 100644, binary diff hidden
1 NO CONTENT: new file 100644, binary diff hidden
NO CONTENT: new file 100644, binary diff hidden
1 NO CONTENT: new file 100644
NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644, binary diff hidden
NO CONTENT: new file 100644, binary diff hidden
1 NO CONTENT: new file 100644, binary diff hidden
NO CONTENT: new file 100644, binary diff hidden
@@ -1,50 +1,88
1 using NUnit.Framework;
1 using NUnit.Framework;
2 using System;
2 using System;
3 using Implab.Formats.JSON;
3 using Implab.Formats.JSON;
4 using Implab.Automaton;
4
5
5 namespace Implab.Format.Test {
6 namespace Implab.Format.Test {
6 [TestFixture]
7 [TestFixture]
7 public class JsonTests {
8 public class JsonTests {
8 [Test]
9 [Test]
9 public void TestScannerValidTokens() {
10 public void TestScannerValidTokens() {
10 var scanner = new JSONScanner(@"9123, -123, 0, 0.1, -0.2, -0.1e3, 1.3E-3, ""some \t\n\u0020 text"", literal []{}:");
11 using (var scanner = new JSONScanner(@"9123, -123, 0, 0.1, -0.2, -0.1e3, 1.3E-3, ""some \t\n\u0020 text"", literal []{}:")) {
11
12
12 Tuple<JsonTokenType,object>[] expexted = new [] {
13 Tuple<JsonTokenType,object>[] expexted = new [] {
13 new Tuple<JsonTokenType,object>(JsonTokenType.Number, 9123d),
14 new Tuple<JsonTokenType,object>(JsonTokenType.Number, 9123d),
14 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
15 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", "),
15 new Tuple<JsonTokenType,object>(JsonTokenType.Number, -123d ),
16 new Tuple<JsonTokenType,object>(JsonTokenType.Number, -123d),
16 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
17 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", "),
17 new Tuple<JsonTokenType,object>(JsonTokenType.Number, 0d ),
18 new Tuple<JsonTokenType,object>(JsonTokenType.Number, 0d),
18 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
19 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", "),
19 new Tuple<JsonTokenType,object>(JsonTokenType.Number, 0.1d ),
20 new Tuple<JsonTokenType,object>(JsonTokenType.Number, 0.1d),
20 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
21 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", "),
21 new Tuple<JsonTokenType,object>(JsonTokenType.Number, -0.2d ),
22 new Tuple<JsonTokenType,object>(JsonTokenType.Number, -0.2d),
22 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
23 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", "),
23 new Tuple<JsonTokenType,object>(JsonTokenType.Number, -0.1e3d ),
24 new Tuple<JsonTokenType,object>(JsonTokenType.Number, -0.1e3d),
24 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
25 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", "),
25 new Tuple<JsonTokenType,object>(JsonTokenType.Number, 1.3E-3d ),
26 new Tuple<JsonTokenType,object>(JsonTokenType.Number, 1.3E-3d),
26 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
27 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", "),
27 new Tuple<JsonTokenType,object>(JsonTokenType.String, "some \t\n text" ),
28 new Tuple<JsonTokenType,object>(JsonTokenType.String, "some \t\n text"),
28 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
29 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", "),
29 new Tuple<JsonTokenType,object>(JsonTokenType.Literal, "literal" ),
30 new Tuple<JsonTokenType,object>(JsonTokenType.Literal, "literal"),
30 new Tuple<JsonTokenType,object>(JsonTokenType.BeginArray, " [" ),
31 new Tuple<JsonTokenType,object>(JsonTokenType.BeginArray, " ["),
31 new Tuple<JsonTokenType,object>(JsonTokenType.EndArray, "]" ),
32 new Tuple<JsonTokenType,object>(JsonTokenType.EndArray, "]"),
32 new Tuple<JsonTokenType,object>(JsonTokenType.BeginObject, "{" ),
33 new Tuple<JsonTokenType,object>(JsonTokenType.BeginObject, "{"),
33 new Tuple<JsonTokenType,object>(JsonTokenType.EndObject, "}" ),
34 new Tuple<JsonTokenType,object>(JsonTokenType.EndObject, "}"),
34 new Tuple<JsonTokenType,object>(JsonTokenType.NameSeparator, ":" )
35 new Tuple<JsonTokenType,object>(JsonTokenType.NameSeparator, ":")
35 };
36 };
36
37
37 object value;
38 object value;
38 JsonTokenType tokenType;
39 JsonTokenType tokenType;
39 for (var i = 0; i < expexted.Length; i++) {
40 for (var i = 0; i < expexted.Length; i++) {
40
41
41 Assert.IsTrue(scanner.ReadToken(out value, out tokenType));
42 Assert.IsTrue(scanner.ReadToken(out value, out tokenType));
42 Assert.AreEqual(expexted[i].Item1, tokenType);
43 Assert.AreEqual(expexted[i].Item1, tokenType);
43 Assert.AreEqual(expexted[i].Item2, value);
44 Assert.AreEqual(expexted[i].Item2, value);
44 }
45 }
45
46
46 Assert.IsFalse(scanner.ReadToken(out value, out tokenType));
47 Assert.IsFalse(scanner.ReadToken(out value, out tokenType));
47 }
48 }
48 }
49 }
50
51 [Test]
52 public void TestScannerBadTokens() {
53 var bad = new [] {
54 " 1",
55 " literal",
56 " \"",
57 "\"unclosed string",
58 "1.bad",
59 "001", // should be read as three numbers
60 "--10",
61 "+10",
62 "1.0.0",
63 "1e1.0",
64 "l1teral0",
65 ".123",
66 "-.123"
67 };
68
69 foreach (var json in bad)
70 using (var scanner = new JSONScanner(json)) {
71 try {
72 object value;
73 JsonTokenType token;
74 scanner.ReadToken(out value, out token);
75 if (!Object.Equals(value,json)) {
76 Console.WriteLine("'{0}' is read as {1}", json, value is String ? String.Format("'{0}'", value) : value );
77 continue;
78 }
79 Assert.Fail("Token '{0}' shouldn't pass", json);
80 } catch (ParserException e) {
81 Console.WriteLine(e.Message);
82 }
49 }
83 }
50
84
85 }
86 }
87 }
88
@@ -1,343 +1,348
1 using Implab;
1 using Implab;
2 using System;
2 using System;
3 using System.Collections.Generic;
3 using System.Collections.Generic;
4 using System.Linq;
4 using System.Linq;
5 using System.Diagnostics;
5 using System.Diagnostics;
6 using System.IO;
6 using System.IO;
7 using System.CodeDom.Compiler;
7 using System.CodeDom.Compiler;
8 using System.CodeDom;
8 using System.CodeDom;
9
9
10 namespace Implab.Automaton {
10 namespace Implab.Automaton {
11 public class DFATable : IDFATableBuilder {
11 public class DFATable : IDFATableBuilder {
12 int m_stateCount;
12 int m_stateCount;
13 int m_symbolCount;
13 int m_symbolCount;
14 int m_initialState;
14 int m_initialState;
15
15
16 readonly HashSet<int> m_finalStates = new HashSet<int>();
16 readonly HashSet<int> m_finalStates = new HashSet<int>();
17 readonly HashSet<AutomatonTransition> m_transitions = new HashSet<AutomatonTransition>();
17 readonly HashSet<AutomatonTransition> m_transitions = new HashSet<AutomatonTransition>();
18
18
19
19
20 #region IDFADefinition implementation
20 #region IDFADefinition implementation
21
21
22 public bool IsFinalState(int s) {
22 public bool IsFinalState(int s) {
23 Safe.ArgumentInRange(s, 0, m_stateCount, "s");
23 Safe.ArgumentInRange(s, 0, m_stateCount, "s");
24
24
25 return m_finalStates.Contains(s);
25 return m_finalStates.Contains(s);
26 }
26 }
27
27
28 public IEnumerable<int> FinalStates {
28 public IEnumerable<int> FinalStates {
29 get {
29 get {
30 return m_finalStates;
30 return m_finalStates;
31 }
31 }
32 }
32 }
33
33
34 public int StateCount {
34 public int StateCount {
35 get { return m_stateCount; }
35 get { return m_stateCount; }
36 }
36 }
37
37
38 public int AlphabetSize {
38 public int AlphabetSize {
39 get { return m_symbolCount; }
39 get { return m_symbolCount; }
40 }
40 }
41
41
42 public int InitialState {
42 public int InitialState {
43 get { return m_initialState; }
43 get { return m_initialState; }
44 }
44 }
45
45
46 #endregion
46 #endregion
47
47
48 public void SetInitialState(int s) {
48 public void SetInitialState(int s) {
49 Safe.ArgumentAssert(s >= 0, "s");
49 Safe.ArgumentAssert(s >= 0, "s");
50 m_stateCount = Math.Max(m_stateCount, s + 1);
50 m_stateCount = Math.Max(m_stateCount, s + 1);
51 m_initialState = s;
51 m_initialState = s;
52 }
52 }
53
53
54 public void MarkFinalState(int state) {
54 public void MarkFinalState(int state) {
55 m_stateCount = Math.Max(m_stateCount, state + 1);
55 m_stateCount = Math.Max(m_stateCount, state + 1);
56 m_finalStates.Add(state);
56 m_finalStates.Add(state);
57 }
57 }
58
58
59 public void Add(AutomatonTransition item) {
59 public void Add(AutomatonTransition item) {
60 Safe.ArgumentAssert(item.s1 >= 0, "item");
60 Safe.ArgumentAssert(item.s1 >= 0, "item");
61 Safe.ArgumentAssert(item.s2 >= 0, "item");
61 Safe.ArgumentAssert(item.s2 >= 0, "item");
62 Safe.ArgumentAssert(item.edge >= 0, "item");
62 Safe.ArgumentAssert(item.edge >= 0, "item");
63
63
64 m_stateCount = Math.Max(m_stateCount, Math.Max(item.s1, item.s2) + 1);
64 m_stateCount = Math.Max(m_stateCount, Math.Max(item.s1, item.s2) + 1);
65 m_symbolCount = Math.Max(m_symbolCount, item.edge + 1);
65 m_symbolCount = Math.Max(m_symbolCount, item.edge + 1);
66
66
67 m_transitions.Add(item);
67 m_transitions.Add(item);
68 }
68 }
69
69
70 public void Clear() {
70 public void Clear() {
71 m_stateCount = 0;
71 m_stateCount = 0;
72 m_symbolCount = 0;
72 m_symbolCount = 0;
73 m_finalStates.Clear();
73 m_finalStates.Clear();
74 m_transitions.Clear();
74 m_transitions.Clear();
75 }
75 }
76
76
77 public bool Contains(AutomatonTransition item) {
77 public bool Contains(AutomatonTransition item) {
78 return m_transitions.Contains(item);
78 return m_transitions.Contains(item);
79 }
79 }
80
80
81 public void CopyTo(AutomatonTransition[] array, int arrayIndex) {
81 public void CopyTo(AutomatonTransition[] array, int arrayIndex) {
82 m_transitions.CopyTo(array, arrayIndex);
82 m_transitions.CopyTo(array, arrayIndex);
83 }
83 }
84
84
85 public bool Remove(AutomatonTransition item) {
85 public bool Remove(AutomatonTransition item) {
86 return m_transitions.Remove(item);
86 return m_transitions.Remove(item);
87 }
87 }
88
88
89 public int Count {
89 public int Count {
90 get {
90 get {
91 return m_transitions.Count;
91 return m_transitions.Count;
92 }
92 }
93 }
93 }
94
94
95 public bool IsReadOnly {
95 public bool IsReadOnly {
96 get {
96 get {
97 return false;
97 return false;
98 }
98 }
99 }
99 }
100
100
101 public IEnumerator<AutomatonTransition> GetEnumerator() {
101 public IEnumerator<AutomatonTransition> GetEnumerator() {
102 return m_transitions.GetEnumerator();
102 return m_transitions.GetEnumerator();
103 }
103 }
104
104
105 System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator() {
105 System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator() {
106 return GetEnumerator();
106 return GetEnumerator();
107 }
107 }
108
108
109 public void AddSymbol(int symbol) {
109 public void AddSymbol(int symbol) {
110 Safe.ArgumentAssert(symbol >= 0, "symbol");
110 Safe.ArgumentAssert(symbol >= 0, "symbol");
111 m_symbolCount = Math.Max(symbol + 1, m_symbolCount);
111 m_symbolCount = Math.Max(symbol + 1, m_symbolCount);
112 }
112 }
113
113
114 public int[,] CreateTransitionTable() {
114 public int[,] CreateTransitionTable() {
115 var table = new int[StateCount,AlphabetSize];
115 var table = new int[StateCount,AlphabetSize];
116
116
117 for (int i = 0; i < StateCount; i++)
117 for (int i = 0; i < StateCount; i++)
118 for (int j = 0; j < AlphabetSize; j++)
118 for (int j = 0; j < AlphabetSize; j++)
119 table[i, j] = AutomatonConst.UNREACHABLE_STATE;
119 table[i, j] = AutomatonConst.UNREACHABLE_STATE;
120
120
121 foreach (var t in this)
121 foreach (var t in this)
122 table[t.s1,t.edge] = t.s2;
122 table[t.s1,t.edge] = t.s2;
123
123
124 return table;
124 return table;
125 }
125 }
126
126
127 public bool[] CreateFinalStateTable() {
127 public bool[] CreateFinalStateTable() {
128 var table = new bool[StateCount];
128 var table = new bool[StateCount];
129
129
130 foreach (var s in FinalStates)
130 foreach (var s in FinalStates)
131 table[s] = true;
131 table[s] = true;
132
132
133 return table;
133 return table;
134 }
134 }
135
135
136 /// <summary>Формирует множества конечных состояний перед началом работы алгоритма минимизации.</summary>
136 /// <summary>Формирует множества конечных состояний перед началом работы алгоритма минимизации.</summary>
137 /// <remarks>
137 /// <remarks>
138 /// В процессе построения минимального автомата требуется разделить множество состояний,
138 /// В процессе построения минимального автомата требуется разделить множество состояний,
139 /// на два подмножества - конечные состояния и все остальные, после чего эти подмножества
139 /// на два подмножества - конечные состояния и все остальные, после чего эти подмножества
140 /// будут резделены на более мелкие. Иногда требуется гарантировать различия конечных сосотяний,
140 /// будут резделены на более мелкие. Иногда требуется гарантировать различия конечных сосотяний,
141 /// для этого необходимо переопределить даннцю фукнцию, для получения множеств конечных состояний.
141 /// для этого необходимо переопределить даннцю фукнцию, для получения множеств конечных состояний.
142 /// </remarks>
142 /// </remarks>
143 /// <returns>The final states.</returns>
143 /// <returns>The final states.</returns>
144 protected virtual IEnumerable<HashSet<int>> GroupFinalStates() {
144 protected virtual IEnumerable<HashSet<int>> SplitFinalStates(IEnumerable<int> states) {
145 return new HashSet<int>[] { m_finalStates };
145 return new [] { new HashSet<int>(states) };
146 }
146 }
147
147
148 protected void Optimize(
148 protected void Optimize(
149 IDFATableBuilder optimalDFA,
149 IDFATableBuilder optimalDFA,
150 IDictionary<int,int> alphabetMap,
150 IDictionary<int,int> alphabetMap,
151 IDictionary<int,int> stateMap
151 IDictionary<int,int> stateMap
152 ) {
152 ) {
153 Safe.ArgumentNotNull(optimalDFA, "dfa");
153 Safe.ArgumentNotNull(optimalDFA, "dfa");
154 Safe.ArgumentNotNull(alphabetMap, "alphabetMap");
154 Safe.ArgumentNotNull(alphabetMap, "alphabetMap");
155 Safe.ArgumentNotNull(stateMap, "stateMap");
155 Safe.ArgumentNotNull(stateMap, "stateMap");
156
156
157
157
158 var setComparer = new CustomEqualityComparer<HashSet<int>>(
158 var setComparer = new CustomEqualityComparer<HashSet<int>>(
159 (x, y) => x.SetEquals(y),
159 (x, y) => x.SetEquals(y),
160 s => s.Sum(x => x.GetHashCode())
160 s => s.Sum(x => x.GetHashCode())
161 );
161 );
162
162
163 var optimalStates = new HashSet<HashSet<int>>(setComparer);
163 var optimalStates = new HashSet<HashSet<int>>(setComparer);
164 var queue = new HashSet<HashSet<int>>(setComparer);
164 var queue = new HashSet<HashSet<int>>(setComparer);
165
165
166 // получаем конечные состояния, сгруппированные по маркерам
166 optimalStates.Add(new HashSet<int>(FinalStates));
167 optimalStates.UnionWith(
168 GroupFinalStates()
169 );
170
167
171 var state = new HashSet<int>(
168 var state = new HashSet<int>(
172 Enumerable
169 Enumerable
173 .Range(0, m_stateCount)
170 .Range(0, m_stateCount)
174 .Where(i => !m_finalStates.Contains(i))
171 .Where(i => !m_finalStates.Contains(i))
175 );
172 );
176
173
177 optimalStates.Add(state);
174 optimalStates.Add(state);
178 queue.Add(state);
175 queue.Add(state);
179
176
180 var rmap = m_transitions
177 var rmap = m_transitions
181 .GroupBy(t => t.s2)
178 .GroupBy(t => t.s2)
182 .ToDictionary(
179 .ToDictionary(
183 g => g.Key, // s2
180 g => g.Key, // s2
184 g => g.ToLookup(t => t.edge, t => t.s1)//.ToDictionary(p => p.Key)
181 g => g.ToLookup(t => t.edge, t => t.s1)//.ToDictionary(p => p.Key)
185 );
182 );
186
183
187 while (queue.Count > 0) {
184 while (queue.Count > 0) {
188 var stateA = queue.First();
185 var stateA = queue.First();
189 queue.Remove(stateA);
186 queue.Remove(stateA);
190
187
191 for (int c = 0; c < m_symbolCount; c++) {
188 for (int c = 0; c < m_symbolCount; c++) {
192 var stateX = new HashSet<int>();
189 var stateX = new HashSet<int>();
193 //foreach(var a in stateA.Where(rmap.ContainsKey))
190 foreach(var a in stateA.Where(rmap.ContainsKey))
194 // stateX.UnionWith(rmap[a][c]); // all states from wich the symbol 'c' leads to the state 'a'
191 stateX.UnionWith(rmap[a][c]); // all states from wich the symbol 'c' leads to the state 'a'
195
196 stateX.UnionWith(m_transitions.Where(t => stateA.Contains(t.s2) && t.edge == c).Select(t => t.s1));
197
192
198 var tmp = optimalStates.ToArray();
193 var tmp = optimalStates.ToArray();
199 foreach (var stateY in tmp) {
194 foreach (var stateY in tmp) {
200 if (stateX.Overlaps(stateY) && !stateY.IsSubsetOf(stateX)) {
201 var stateR1 = new HashSet<int>(stateY);
195 var stateR1 = new HashSet<int>(stateY);
202 var stateR2 = new HashSet<int>(stateY);
196 var stateR2 = new HashSet<int>(stateY);
203
197
204 stateR1.IntersectWith(stateX);
198 stateR1.IntersectWith(stateX);
205 stateR2.ExceptWith(stateX);
199 stateR2.ExceptWith(stateX);
206
200
201 if (stateR1.Count > 0 && stateR2.Count > 0) {
202
203
207 optimalStates.Remove(stateY);
204 optimalStates.Remove(stateY);
208 optimalStates.Add(stateR1);
205 optimalStates.Add(stateR1);
209 optimalStates.Add(stateR2);
206 optimalStates.Add(stateR2);
210
207
211 if (queue.Contains(stateY)) {
208 if (queue.Contains(stateY)) {
212 queue.Remove(stateY);
209 queue.Remove(stateY);
213 queue.Add(stateR1);
210 queue.Add(stateR1);
214 queue.Add(stateR2);
211 queue.Add(stateR2);
215 } else {
212 } else {
216 queue.Add(stateR1.Count <= stateR2.Count ? stateR1 : stateR2);
213 queue.Add(stateR1.Count <= stateR2.Count ? stateR1 : stateR2);
217 }
214 }
218 }
215 }
219 }
216 }
220 }
217 }
221 }
218 }
222
219
220 // дополнительно разбиваем конечные состояния
221 foreach (var final in optimalStates.Where(s => s.Overlaps(m_finalStates)).ToArray()) {
222 optimalStates.Remove(final);
223 foreach (var split in SplitFinalStates(final))
224 optimalStates.Add(split);
225 }
226
227
223 // карта получения оптимального состояния по соотвествующему ему простому состоянию
228 // карта получения оптимального состояния по соотвествующему ему простому состоянию
224 var nextState = 0;
229 var nextState = 0;
225 foreach (var item in optimalStates) {
230 foreach (var item in optimalStates) {
226 var id = nextState++;
231 var id = nextState++;
227 foreach (var s in item)
232 foreach (var s in item)
228 stateMap[s] = id;
233 stateMap[s] = id;
229 }
234 }
230
235
231 // получаем минимальный алфавит
236 // получаем минимальный алфавит
232 // входные символы не различимы, если Move(s,a1) == Move(s,a2), для любого s
237 // входные символы не различимы, если Move(s,a1) == Move(s,a2), для любого s
233 // для этого используем алгоритм кластеризации, сначала
238 // для этого используем алгоритм кластеризации, сначала
234 // считаем, что все символы не различимы
239 // считаем, что все символы не различимы
235
240
236 var minClasses = new HashSet<HashSet<int>>(setComparer);
241 var minClasses = new HashSet<HashSet<int>>(setComparer);
237 var alphaQueue = new Queue<HashSet<int>>();
242 var alphaQueue = new Queue<HashSet<int>>();
238 alphaQueue.Enqueue(new HashSet<int>(Enumerable.Range(0,AlphabetSize)));
243 alphaQueue.Enqueue(new HashSet<int>(Enumerable.Range(0,AlphabetSize)));
239
244
240 // для всех состояний, будем проверять каждый класс на различимость,
245 // для всех состояний, будем проверять каждый класс на различимость,
241 // т.е. символы различимы, если они приводят к разным состояниям
246 // т.е. символы различимы, если они приводят к разным состояниям
242 for (int s = 0 ; s < optimalStates.Count; s++) {
247 for (int s = 0 ; s < optimalStates.Count; s++) {
243 var newQueue = new Queue<HashSet<int>>();
248 var newQueue = new Queue<HashSet<int>>();
244
249
245 foreach (var A in alphaQueue) {
250 foreach (var A in alphaQueue) {
246 // классы из одного символа делить бесполезно, переводим их сразу в
251 // классы из одного символа делить бесполезно, переводим их сразу в
247 // результирующий алфавит
252 // результирующий алфавит
248 if (A.Count == 1) {
253 if (A.Count == 1) {
249 minClasses.Add(A);
254 minClasses.Add(A);
250 continue;
255 continue;
251 }
256 }
252
257
253 // различаем классы символов, которые переводят в различные оптимальные состояния
258 // различаем классы символов, которые переводят в различные оптимальные состояния
254 // optimalState -> alphaClass
259 // optimalState -> alphaClass
255 var classes = new Dictionary<int, HashSet<int>>();
260 var classes = new Dictionary<int, HashSet<int>>();
256
261
257 foreach (var term in A) {
262 foreach (var term in A) {
258 // ищем все переходы класса по символу term
263 // ищем все переходы класса по символу term
259 var s2 = m_transitions.Where(t => stateMap[t.s1] == s && t.edge == term).Select(t => stateMap[t.s2]).DefaultIfEmpty(-1).First();
264 var s2 = m_transitions.Where(t => stateMap[t.s1] == s && t.edge == term).Select(t => stateMap[t.s2]).DefaultIfEmpty(-1).First();
260
265
261 HashSet<int> a2;
266 HashSet<int> a2;
262 if (!classes.TryGetValue(s2, out a2)) {
267 if (!classes.TryGetValue(s2, out a2)) {
263 a2 = new HashSet<int>();
268 a2 = new HashSet<int>();
264 newQueue.Enqueue(a2);
269 newQueue.Enqueue(a2);
265 classes[s2] = a2;
270 classes[s2] = a2;
266 }
271 }
267 a2.Add(term);
272 a2.Add(term);
268 }
273 }
269 }
274 }
270
275
271 if (newQueue.Count == 0)
276 if (newQueue.Count == 0)
272 break;
277 break;
273 alphaQueue = newQueue;
278 alphaQueue = newQueue;
274 }
279 }
275
280
276 // после окончания работы алгоритма в очереди останутся минимальные различимые классы
281 // после окончания работы алгоритма в очереди останутся минимальные различимые классы
277 // входных символов
282 // входных символов
278 foreach (var A in alphaQueue)
283 foreach (var A in alphaQueue)
279 minClasses.Add(A);
284 minClasses.Add(A);
280
285
281 // построение отображения алфавитов входных символов.
286 // построение отображения алфавитов входных символов.
282 // поскольку символ DFAConst.UNCLASSIFIED_INPUT может иметь
287 // поскольку символ DFAConst.UNCLASSIFIED_INPUT может иметь
283 // специальное значение, тогда сохраним минимальный класс,
288 // специальное значение, тогда сохраним минимальный класс,
284 // содержащий этот символ на томже месте.
289 // содержащий этот символ на томже месте.
285
290
286 var nextCls = 0;
291 var nextCls = 0;
287 foreach (var item in minClasses) {
292 foreach (var item in minClasses) {
288 if (nextCls == AutomatonConst.UNCLASSIFIED_INPUT)
293 if (nextCls == AutomatonConst.UNCLASSIFIED_INPUT)
289 nextCls++;
294 nextCls++;
290
295
291 // сохраняем DFAConst.UNCLASSIFIED_INPUT
296 // сохраняем DFAConst.UNCLASSIFIED_INPUT
292 var cls = item.Contains(AutomatonConst.UNCLASSIFIED_INPUT) ? AutomatonConst.UNCLASSIFIED_INPUT : nextCls++;
297 var cls = item.Contains(AutomatonConst.UNCLASSIFIED_INPUT) ? AutomatonConst.UNCLASSIFIED_INPUT : nextCls++;
293 optimalDFA.AddSymbol(cls);
298 optimalDFA.AddSymbol(cls);
294
299
295 foreach (var a in item)
300 foreach (var a in item)
296 alphabetMap[a] = cls;
301 alphabetMap[a] = cls;
297 }
302 }
298
303
299 // построение автомата
304 // построение автомата
300 optimalDFA.SetInitialState(stateMap[m_initialState]);
305 optimalDFA.SetInitialState(stateMap[m_initialState]);
301
306
302 foreach (var sf in m_finalStates.Select(s => stateMap[s]).Distinct())
307 foreach (var sf in m_finalStates.Select(s => stateMap[s]).Distinct())
303 optimalDFA.MarkFinalState(sf);
308 optimalDFA.MarkFinalState(sf);
304
309
305 foreach (var t in m_transitions.Select(t => new AutomatonTransition(stateMap[t.s1],stateMap[t.s2],alphabetMap[t.edge])).Distinct())
310 foreach (var t in m_transitions.Select(t => new AutomatonTransition(stateMap[t.s1],stateMap[t.s2],alphabetMap[t.edge])).Distinct())
306 optimalDFA.Add(t);
311 optimalDFA.Add(t);
307 }
312 }
308
313
309 protected string PrintDFA<TInput, TState>(IAlphabet<TInput> inputAlphabet, IAlphabet<TState> stateAlphabet) {
314 protected string PrintDFA<TInput, TState>(IAlphabet<TInput> inputAlphabet, IAlphabet<TState> stateAlphabet) {
310 Safe.ArgumentNotNull(inputAlphabet, "inputAlphabet");
315 Safe.ArgumentNotNull(inputAlphabet, "inputAlphabet");
311 Safe.ArgumentNotNull(stateAlphabet, "stateAlphabet");
316 Safe.ArgumentNotNull(stateAlphabet, "stateAlphabet");
312
317
313 var data = new List<string>();
318 var data = new List<string>();
314
319
315 data.Add("digraph dfa {");
320 data.Add("digraph dfa {");
316
321
317 foreach (var final in m_finalStates)
322 foreach (var final in m_finalStates)
318 data.Add(String.Format("{0} [shape=box];",String.Join("", stateAlphabet.GetSymbols(final))));
323 data.Add(String.Format("{0} [shape=box];",String.Join("", stateAlphabet.GetSymbols(final))));
319
324
320 foreach (var t in m_transitions)
325 foreach (var t in m_transitions)
321 data.Add(String.Format(
326 data.Add(String.Format(
322 "{0} -> {2} [label={1}];",
327 "{0} -> {2} [label={1}];",
323 String.Join("", stateAlphabet.GetSymbols(t.s1)),
328 String.Join("", stateAlphabet.GetSymbols(t.s1)),
324 ToLiteral(ToLiteral(String.Join("", t.edge == AutomatonConst.UNCLASSIFIED_INPUT ? new [] { "@" } : inputAlphabet.GetSymbols(t.edge).Select(x => x.ToString())))),
329 ToLiteral(ToLiteral(String.Join("", t.edge == AutomatonConst.UNCLASSIFIED_INPUT ? new [] { "@" } : inputAlphabet.GetSymbols(t.edge).Select(x => x.ToString())))),
325 String.Join("", stateAlphabet.GetSymbols(t.s2))
330 String.Join("", stateAlphabet.GetSymbols(t.s2))
326 ));
331 ));
327 data.Add("}");
332 data.Add("}");
328 return String.Join("\n", data);
333 return String.Join("\n", data);
329 }
334 }
330
335
331 static string ToLiteral(string input)
336 static string ToLiteral(string input)
332 {
337 {
333 using (var writer = new StringWriter())
338 using (var writer = new StringWriter())
334 {
339 {
335 using (var provider = CodeDomProvider.CreateProvider("CSharp"))
340 using (var provider = CodeDomProvider.CreateProvider("CSharp"))
336 {
341 {
337 provider.GenerateCodeFromExpression(new CodePrimitiveExpression(input), writer, null);
342 provider.GenerateCodeFromExpression(new CodePrimitiveExpression(input), writer, null);
338 return writer.ToString();
343 return writer.ToString();
339 }
344 }
340 }
345 }
341 }
346 }
342 }
347 }
343 }
348 }
@@ -1,95 +1,91
1 using System.Collections.Generic;
1 using System.Collections.Generic;
2 using System.Linq;
2 using System.Linq;
3
3
4 namespace Implab.Automaton.RegularExpressions {
4 namespace Implab.Automaton.RegularExpressions {
5 public class RegularDFA<TInput, TTag> : DFATable, ITaggedDFABuilder<TTag> {
5 public class RegularDFA<TInput, TTag> : DFATable, ITaggedDFABuilder<TTag> {
6
6
7 readonly Dictionary<int,TTag[]> m_tags = new Dictionary<int, TTag[]>();
7 readonly Dictionary<int,TTag[]> m_tags = new Dictionary<int, TTag[]>();
8 readonly IAlphabet<TInput> m_alphabet;
8 readonly IAlphabet<TInput> m_alphabet;
9
9
10 public RegularDFA(IAlphabet<TInput> alphabet) {
10 public RegularDFA(IAlphabet<TInput> alphabet) {
11 Safe.ArgumentNotNull(alphabet, "aplhabet");
11 Safe.ArgumentNotNull(alphabet, "aplhabet");
12
12
13 m_alphabet = alphabet;
13 m_alphabet = alphabet;
14 }
14 }
15
15
16
16
17 public IAlphabet<TInput> InputAlphabet {
17 public IAlphabet<TInput> InputAlphabet {
18 get {
18 get {
19 return m_alphabet;
19 return m_alphabet;
20 }
20 }
21 }
21 }
22
22
23 public void MarkFinalState(int s, TTag[] tags) {
23 public void MarkFinalState(int s, TTag[] tags) {
24 MarkFinalState(s);
24 MarkFinalState(s);
25 SetStateTag(s, tags);
25 SetStateTag(s, tags);
26 }
26 }
27
27
28 public void SetStateTag(int s, TTag[] tags) {
28 public void SetStateTag(int s, TTag[] tags) {
29 Safe.ArgumentNotNull(tags, "tags");
29 Safe.ArgumentNotNull(tags, "tags");
30 m_tags[s] = tags;
30 m_tags[s] = tags;
31 }
31 }
32
32
33 public TTag[] GetStateTag(int s) {
33 public TTag[] GetStateTag(int s) {
34 TTag[] tags;
34 TTag[] tags;
35 return m_tags.TryGetValue(s, out tags) ? tags : new TTag[0];
35 return m_tags.TryGetValue(s, out tags) ? tags : new TTag[0];
36 }
36 }
37
37
38 public TTag[][] CreateTagTable() {
38 public TTag[][] CreateTagTable() {
39 var table = new TTag[StateCount][];
39 var table = new TTag[StateCount][];
40
40
41 foreach (var pair in m_tags)
41 foreach (var pair in m_tags)
42 table[pair.Key] = pair.Value;
42 table[pair.Key] = pair.Value;
43
43
44 return table;
44 return table;
45 }
45 }
46
46
47 /// <summary>
47 /// <summary>
48 /// Optimize the specified alphabet.
48 /// Optimize the specified alphabet.
49 /// </summary>
49 /// </summary>
50 /// <param name="alphabet">Пустой алфавит, который будет зполнен в процессе оптимизации.</param>
50 /// <param name="alphabet">Пустой алфавит, который будет зполнен в процессе оптимизации.</param>
51 public RegularDFA<TInput,TTag> Optimize(IAlphabetBuilder<TInput> alphabet) {
51 public RegularDFA<TInput,TTag> Optimize(IAlphabetBuilder<TInput> alphabet) {
52 Safe.ArgumentNotNull(alphabet, "alphabet");
52 Safe.ArgumentNotNull(alphabet, "alphabet");
53
53
54 var dfa = new RegularDFA<TInput, TTag>(alphabet);
54 var dfa = new RegularDFA<TInput, TTag>(alphabet);
55
55
56 var alphaMap = new Dictionary<int,int>();
56 var alphaMap = new Dictionary<int,int>();
57 var stateMap = new Dictionary<int,int>();
57 var stateMap = new Dictionary<int,int>();
58
58
59 Optimize(dfa, alphaMap, stateMap);
59 Optimize(dfa, alphaMap, stateMap);
60
60
61 // mark tags in the new DFA
61 // mark tags in the new DFA
62 foreach (var g in m_tags.Where(x => x.Key < StateCount).GroupBy(x => stateMap[x.Key], x => x.Value ))
62 foreach (var g in m_tags.Where(x => x.Key < StateCount).GroupBy(x => stateMap[x.Key], x => x.Value ))
63 dfa.SetStateTag(g.Key, g.SelectMany(x => x).ToArray());
63 dfa.SetStateTag(g.Key, g.SelectMany(x => x).ToArray());
64
64
65 // make the alphabet for the new DFA
65 // make the alphabet for the new DFA
66 // skip all unclassified symbols
66 // skip all unclassified symbols
67 foreach (var pair in alphaMap.Where(x => x.Value != 0))
67 foreach (var pair in alphaMap.Where(x => x.Value != 0))
68 alphabet.DefineClass(m_alphabet.GetSymbols(pair.Key), pair.Value);
68 alphabet.DefineClass(m_alphabet.GetSymbols(pair.Key), pair.Value);
69
70 var orig = ToString();
71 var opt = dfa.ToString();
72
73 return dfa;
69 return dfa;
74 }
70 }
75
71
76 protected override IEnumerable<HashSet<int>> GroupFinalStates() {
72 protected override IEnumerable<HashSet<int>> SplitFinalStates(IEnumerable<int> states) {
77 var arrayComparer = new CustomEqualityComparer<TTag[]>(
73 var arrayComparer = new CustomEqualityComparer<TTag[]>(
78 (x,y) => x.Length == y.Length && x.All(it => y.Contains(it)),
74 (x,y) => x.Length == y.Length && x.All(it => y.Contains(it)),
79 x => x.Sum(it => x.GetHashCode())
75 x => x.Sum(it => x.GetHashCode())
80 );
76 );
81 return FinalStates.GroupBy(x => m_tags[x], arrayComparer).Select(g => new HashSet<int>(g));
77 return states.GroupBy(x => m_tags[x] ?? new TTag[0], arrayComparer).Select(g => new HashSet<int>(g));
82 }
78 }
83
79
84 public override string ToString() {
80 public override string ToString() {
85 var states = new MapAlphabet<string>(false, null);
81 var states = new MapAlphabet<string>(false, null);
86
82
87 for (int i = 0; i < StateCount; i++)
83 for (int i = 0; i < StateCount; i++)
88 states.DefineSymbol(string.Format("s{0}", i), i);
84 states.DefineSymbol(string.Format("s{0}", i), i);
89
85
90 return string.Format("//[RegularDFA {1} x {2}]\n{0}", PrintDFA(InputAlphabet, states),StateCount, AlphabetSize);
86 return string.Format("//[RegularDFA {1} x {2}]\n{0}", PrintDFA(InputAlphabet, states),StateCount, AlphabetSize);
91 }
87 }
92
88
93 }
89 }
94 }
90 }
95
91
@@ -1,119 +1,121
1 using System.Linq;
1 using System.Linq;
2 using Implab.Automaton.RegularExpressions;
2 using Implab.Automaton.RegularExpressions;
3 using System;
3 using System;
4 using Implab.Automaton;
4 using Implab.Automaton;
5 using Implab.Components;
5 using Implab.Components;
6
6
7 namespace Implab.Formats.JSON {
7 namespace Implab.Formats.JSON {
8 class JSONGrammar : Grammar<char> {
8 class JSONGrammar : Grammar<char> {
9 public enum TokenType {
9 public enum TokenType {
10 None,
10 None,
11 BeginObject,
11 BeginObject,
12 EndObject,
12 EndObject,
13 BeginArray,
13 BeginArray,
14 EndArray,
14 EndArray,
15 String,
15 String,
16 Number,
16 Number,
17 Literal,
17 Literal,
18 NameSeparator,
18 NameSeparator,
19 ValueSeparator,
19 ValueSeparator,
20 Whitespace,
20
21
21 StringBound,
22 StringBound,
22 EscapedChar,
23 EscapedChar,
23 UnescapedChar,
24 UnescapedChar,
24 EscapedUnicode
25 EscapedUnicode
25 }
26 }
26
27
27 static LazyAndWeak<JSONGrammar> _instance = new LazyAndWeak<JSONGrammar>(() => new JSONGrammar());
28 static LazyAndWeak<JSONGrammar> _instance = new LazyAndWeak<JSONGrammar>(() => new JSONGrammar());
28
29
29 public static JSONGrammar Instance {
30 public static JSONGrammar Instance {
30 get { return _instance.Value; }
31 get { return _instance.Value; }
31 }
32 }
32
33
33 readonly ScannerContext<TokenType> m_jsonExpression;
34 readonly ScannerContext<TokenType> m_jsonExpression;
34 readonly ScannerContext<TokenType> m_stringExpression;
35 readonly ScannerContext<TokenType> m_stringExpression;
35 readonly CharAlphabet m_defaultAlphabet = new CharAlphabet();
36 readonly CharAlphabet m_defaultAlphabet = new CharAlphabet();
36
37
37 public JSONGrammar() {
38 public JSONGrammar() {
38 DefineAlphabet(Enumerable.Range(0, 0x20).Select(x => (char)x));
39 DefineAlphabet(Enumerable.Range(0, 0x20).Select(x => (char)x));
39 var hexDigit = SymbolRangeToken('a','f').Or(SymbolRangeToken('A','F')).Or(SymbolRangeToken('0','9'));
40 var hexDigit = SymbolRangeToken('a','f').Or(SymbolRangeToken('A','F')).Or(SymbolRangeToken('0','9'));
40 var digit9 = SymbolRangeToken('1', '9');
41 var digit9 = SymbolRangeToken('1', '9');
41 var zero = SymbolToken('0');
42 var zero = SymbolToken('0');
42 var digit = zero.Or(digit9);
43 var digit = zero.Or(digit9);
43 var dot = SymbolToken('.');
44 var dot = SymbolToken('.');
44 var minus = SymbolToken('-');
45 var minus = SymbolToken('-');
45 var sign = SymbolSetToken('-', '+');
46 var sign = SymbolSetToken('-', '+');
46 var expSign = SymbolSetToken('e', 'E');
47 var expSign = SymbolSetToken('e', 'E');
47 var letters = SymbolRangeToken('a', 'z');
48 var letters = SymbolRangeToken('a', 'z');
48 var integer = zero.Or(digit9.Cat(digit.EClosure()));
49 var integer = zero.Or(digit9.Cat(digit.EClosure()));
49 var frac = dot.Cat(digit.Closure());
50 var frac = dot.Cat(digit.Closure());
50 var exp = expSign.Cat(sign.Optional()).Cat(digit.Closure());
51 var exp = expSign.Cat(sign.Optional()).Cat(digit.Closure());
51 var quote = SymbolToken('"');
52 var quote = SymbolToken('"');
52 var backSlash = SymbolToken('\\');
53 var backSlash = SymbolToken('\\');
53 var specialEscapeChars = SymbolSetToken('\\', '"', '/', 'b', 'f', 't', 'n', 'r');
54 var specialEscapeChars = SymbolSetToken('\\', '"', '/', 'b', 'f', 't', 'n', 'r');
54 var unicodeEspace = SymbolToken('u').Cat(hexDigit.Repeat(4));
55 var unicodeEspace = SymbolToken('u').Cat(hexDigit.Repeat(4));
55 var whitespace = SymbolSetToken('\n', '\r', '\t', ' ').EClosure();
56 var whitespace = SymbolSetToken('\n', '\r', '\t', ' ').EClosure();
56 var beginObject = whitespace.Cat(SymbolToken('{')).Cat(whitespace);
57 var beginObject = whitespace.Cat(SymbolToken('{')).Cat(whitespace);
57 var endObject = whitespace.Cat(SymbolToken('}')).Cat(whitespace);
58 var endObject = whitespace.Cat(SymbolToken('}')).Cat(whitespace);
58 var beginArray = whitespace.Cat(SymbolToken('[')).Cat(whitespace);
59 var beginArray = whitespace.Cat(SymbolToken('[')).Cat(whitespace);
59 var endArray = whitespace.Cat(SymbolToken(']')).Cat(whitespace);
60 var endArray = whitespace.Cat(SymbolToken(']')).Cat(whitespace);
60 var nameSep = whitespace.Cat(SymbolToken(':')).Cat(whitespace);
61 var nameSep = whitespace.Cat(SymbolToken(':')).Cat(whitespace);
61 var valueSep = whitespace.Cat(SymbolToken(',')).Cat(whitespace);
62 var valueSep = whitespace.Cat(SymbolToken(',')).Cat(whitespace);
62
63
63 var number = minus.Optional().Cat(integer).Cat(frac.Optional()).Cat(exp.Optional());
64 var number = minus.Optional().Cat(integer).Cat(frac.Optional()).Cat(exp.Optional());
64 var literal = letters.Closure();
65 var literal = letters.Closure();
65 var unescaped = SymbolTokenExcept(Enumerable.Range(0, 0x20).Union(new int[] { '\\', '"' }).Select(x => (char)x));
66 var unescaped = SymbolTokenExcept(Enumerable.Range(0, 0x20).Union(new int[] { '\\', '"' }).Select(x => (char)x));
66
67
67 var jsonExpression =
68 var jsonExpression =
68 number.Tag(TokenType.Number)
69 number.Tag(TokenType.Number)
69 .Or(literal.Tag(TokenType.Literal))
70 .Or(literal.Tag(TokenType.Literal))
70 .Or(quote.Tag(TokenType.StringBound))
71 .Or(quote.Tag(TokenType.StringBound))
71 .Or(beginObject.Tag(TokenType.BeginObject))
72 .Or(beginObject.Tag(TokenType.BeginObject))
72 .Or(endObject.Tag(TokenType.EndObject))
73 .Or(endObject.Tag(TokenType.EndObject))
73 .Or(beginArray.Tag(TokenType.BeginArray))
74 .Or(beginArray.Tag(TokenType.BeginArray))
74 .Or(endArray.Tag(TokenType.EndArray))
75 .Or(endArray.Tag(TokenType.EndArray))
75 .Or(nameSep.Tag(TokenType.NameSeparator))
76 .Or(nameSep.Tag(TokenType.NameSeparator))
76 .Or(valueSep.Tag(TokenType.ValueSeparator));
77 .Or(valueSep.Tag(TokenType.ValueSeparator))
78 .Or(SymbolSetToken('\n', '\r', '\t', ' ').Closure().Tag(TokenType.Whitespace));
77
79
78
80
79 var jsonStringExpression =
81 var jsonStringExpression =
80 quote.Tag(TokenType.StringBound)
82 quote.Tag(TokenType.StringBound)
81 .Or(backSlash.Cat(specialEscapeChars).Tag(TokenType.EscapedChar))
83 .Or(backSlash.Cat(specialEscapeChars).Tag(TokenType.EscapedChar))
82 .Or(backSlash.Cat(unicodeEspace).Tag(TokenType.EscapedUnicode))
84 .Or(backSlash.Cat(unicodeEspace).Tag(TokenType.EscapedUnicode))
83 .Or(unescaped.Closure().Tag(TokenType.UnescapedChar));
85 .Or(unescaped.Closure().Tag(TokenType.UnescapedChar));
84
86
85
87
86 m_jsonExpression = BuildScannerContext<TokenType>(jsonExpression);
88 m_jsonExpression = BuildScannerContext<TokenType>(jsonExpression);
87 m_stringExpression = BuildScannerContext<TokenType>(jsonStringExpression);
89 m_stringExpression = BuildScannerContext<TokenType>(jsonStringExpression);
88
90
89
91
90 }
92 }
91
93
92 protected override IAlphabetBuilder<char> AlphabetBuilder {
94 protected override IAlphabetBuilder<char> AlphabetBuilder {
93 get {
95 get {
94 return m_defaultAlphabet;
96 return m_defaultAlphabet;
95 }
97 }
96 }
98 }
97
99
98 public ScannerContext<TokenType> JsonExpression {
100 public ScannerContext<TokenType> JsonExpression {
99 get {
101 get {
100 return m_jsonExpression;
102 return m_jsonExpression;
101 }
103 }
102 }
104 }
103
105
104 public ScannerContext<TokenType> JsonStringExpression {
106 public ScannerContext<TokenType> JsonStringExpression {
105 get {
107 get {
106 return m_stringExpression;
108 return m_stringExpression;
107 }
109 }
108 }
110 }
109
111
110 Token SymbolRangeToken(char start, char stop) {
112 Token SymbolRangeToken(char start, char stop) {
111 return SymbolToken(Enumerable.Range(start, stop - start + 1).Select(x => (char)x));
113 return SymbolToken(Enumerable.Range(start, stop - start + 1).Select(x => (char)x));
112 }
114 }
113
115
114 protected override IndexedAlphabetBase<char> CreateAlphabet() {
116 protected override IndexedAlphabetBase<char> CreateAlphabet() {
115 return new CharAlphabet();
117 return new CharAlphabet();
116 }
118 }
117
119
118 }
120 }
119 }
121 }
@@ -1,107 +1,109
1 using System;
1 using System;
2 using System.Globalization;
2 using System.Globalization;
3 using Implab.Automaton;
3 using Implab.Automaton;
4 using System.Text;
4 using System.Text;
5 using Implab.Components;
5 using Implab.Components;
6 using System.IO;
6 using System.IO;
7
7
8 namespace Implab.Formats.JSON {
8 namespace Implab.Formats.JSON {
9 /// <summary>
9 /// <summary>
10 /// Сканнер (лексер), разбивающий поток символов на токены JSON.
10 /// Сканнер (лексер), разбивающий поток символов на токены JSON.
11 /// </summary>
11 /// </summary>
12 public class JSONScanner : Disposable {
12 public class JSONScanner : Disposable {
13 readonly StringBuilder m_builder = new StringBuilder();
13 readonly StringBuilder m_builder = new StringBuilder();
14
14
15 readonly ScannerContext<JSONGrammar.TokenType> m_jsonContext = JSONGrammar.Instance.JsonExpression;
15 readonly ScannerContext<JSONGrammar.TokenType> m_jsonContext = JSONGrammar.Instance.JsonExpression;
16 readonly ScannerContext<JSONGrammar.TokenType> m_stringContext = JSONGrammar.Instance.JsonStringExpression;
16 readonly ScannerContext<JSONGrammar.TokenType> m_stringContext = JSONGrammar.Instance.JsonStringExpression;
17
17
18
18
19 readonly TextScanner m_scanner;
19 readonly TextScanner m_scanner;
20
20
21 /// <summary>
21 /// <summary>
22 /// Создает новый экземпляр сканнера
22 /// Создает новый экземпляр сканнера
23 /// </summary>
23 /// </summary>
24 public JSONScanner(string text) {
24 public JSONScanner(string text) {
25 Safe.ArgumentNotEmpty(text, "text");
25 Safe.ArgumentNotEmpty(text, "text");
26
26
27 m_scanner = new StringScanner(text);
27 m_scanner = new StringScanner(text);
28 }
28 }
29
29
30 public JSONScanner(TextReader reader, int bufferMax, int chunkSize) {
30 public JSONScanner(TextReader reader, int bufferMax, int chunkSize) {
31 Safe.ArgumentNotNull(reader, "reader");
31 Safe.ArgumentNotNull(reader, "reader");
32
32
33 m_scanner = new ReaderScanner(reader, bufferMax, chunkSize);
33 m_scanner = new ReaderScanner(reader, bufferMax, chunkSize);
34 }
34 }
35
35
36 public JSONScanner(TextReader reader) : this(reader, 1024*1024, 1024){
36 public JSONScanner(TextReader reader) : this(reader, 1024*1024, 1024){
37 }
37 }
38
38
39 /// <summary>
39 /// <summary>
40 /// Читает следующий лексический элемент из входных данных.
40 /// Читает следующий лексический элемент из входных данных.
41 /// </summary>
41 /// </summary>
42 /// <param name="tokenValue">Возвращает значение прочитанного токена.</param>
42 /// <param name="tokenValue">Возвращает значение прочитанного токена.</param>
43 /// <param name="tokenType">Возвращает тип прочитанного токена.</param>
43 /// <param name="tokenType">Возвращает тип прочитанного токена.</param>
44 /// <returns><c>true</c> - чтение произведено успешно. <c>false</c> - достигнут конец входных данных</returns>
44 /// <returns><c>true</c> - чтение произведено успешно. <c>false</c> - достигнут конец входных данных</returns>
45 /// <remarks>В случе если токен не распознается, возникает исключение. Значения токенов обрабатываются, т.е.
45 /// <remarks>В случе если токен не распознается, возникает исключение. Значения токенов обрабатываются, т.е.
46 /// в строках обрабатываются экранированные символы, числа становтся типа double.</remarks>
46 /// в строках обрабатываются экранированные символы, числа становтся типа double.</remarks>
47 public bool ReadToken(out object tokenValue, out JsonTokenType tokenType) {
47 public bool ReadToken(out object tokenValue, out JsonTokenType tokenType) {
48 JSONGrammar.TokenType[] tag;
48 JSONGrammar.TokenType[] tag;
49 if (m_jsonContext.Execute(m_scanner, out tag)) {
49 while (m_jsonContext.Execute(m_scanner, out tag)) {
50 switch (tag[0]) {
50 switch (tag[0]) {
51 case JSONGrammar.TokenType.StringBound:
51 case JSONGrammar.TokenType.StringBound:
52 tokenValue = ReadString();
52 tokenValue = ReadString();
53 tokenType = JsonTokenType.String;
53 tokenType = JsonTokenType.String;
54 break;
54 break;
55 case JSONGrammar.TokenType.Number:
55 case JSONGrammar.TokenType.Number:
56 tokenValue = Double.Parse(m_scanner.GetTokenValue(), CultureInfo.InvariantCulture);
56 tokenValue = Double.Parse(m_scanner.GetTokenValue(), CultureInfo.InvariantCulture);
57 tokenType = JsonTokenType.Number;
57 tokenType = JsonTokenType.Number;
58 break;
58 break;
59 case JSONGrammar.TokenType.Whitespace:
60 continue;
59 default:
61 default:
60 tokenType = (JsonTokenType)tag[0];
62 tokenType = (JsonTokenType)tag[0];
61 tokenValue = m_scanner.GetTokenValue();
63 tokenValue = m_scanner.GetTokenValue();
62 break;
64 break;
63 }
65 }
64 return true;
66 return true;
65 }
67 }
66 tokenValue = null;
68 tokenValue = null;
67 tokenType = JsonTokenType.None;
69 tokenType = JsonTokenType.None;
68 return false;
70 return false;
69 }
71 }
70
72
71 string ReadString() {
73 string ReadString() {
72 int pos = 0;
74 int pos = 0;
73 var buf = new char[6]; // the buffer for unescaping chars
75 var buf = new char[6]; // the buffer for unescaping chars
74
76
75 JSONGrammar.TokenType[] tag;
77 JSONGrammar.TokenType[] tag;
76 m_builder.Clear();
78 m_builder.Clear();
77
79
78 while (m_stringContext.Execute(m_scanner, out tag)) {
80 while (m_stringContext.Execute(m_scanner, out tag)) {
79 switch (tag[0]) {
81 switch (tag[0]) {
80 case JSONGrammar.TokenType.StringBound:
82 case JSONGrammar.TokenType.StringBound:
81 return m_builder.ToString();
83 return m_builder.ToString();
82 case JSONGrammar.TokenType.UnescapedChar:
84 case JSONGrammar.TokenType.UnescapedChar:
83 m_scanner.CopyTokenTo(m_builder);
85 m_scanner.CopyTokenTo(m_builder);
84 break;
86 break;
85 case JSONGrammar.TokenType.EscapedUnicode: // \xXXXX - unicode escape sequence
87 case JSONGrammar.TokenType.EscapedUnicode: // \xXXXX - unicode escape sequence
86 m_scanner.CopyTokenTo(buf, 0);
88 m_scanner.CopyTokenTo(buf, 0);
87 m_builder.Append(StringTranslator.TranslateHexUnicode(buf, 2));
89 m_builder.Append(StringTranslator.TranslateHexUnicode(buf, 2));
88 pos++;
90 pos++;
89 break;
91 break;
90 case JSONGrammar.TokenType.EscapedChar: // \t - escape sequence
92 case JSONGrammar.TokenType.EscapedChar: // \t - escape sequence
91 m_scanner.CopyTokenTo(buf, 0);
93 m_scanner.CopyTokenTo(buf, 0);
92 m_builder.Append(StringTranslator.TranslateEscapedChar(buf[1]));
94 m_builder.Append(StringTranslator.TranslateEscapedChar(buf[1]));
93 break;
95 break;
94 }
96 }
95
97
96 }
98 }
97
99
98 throw new ParserException("Unexpected end of data");
100 throw new ParserException("Unexpected end of data");
99 }
101 }
100
102
101 protected override void Dispose(bool disposing) {
103 protected override void Dispose(bool disposing) {
102 if (disposing)
104 if (disposing)
103 Safe.Dispose(m_scanner);
105 Safe.Dispose(m_scanner);
104 base.Dispose(disposing);
106 base.Dispose(disposing);
105 }
107 }
106 }
108 }
107 }
109 }
@@ -1,30 +1,30
1 using System;
1 using System;
2 using System.IO;
2 using System.IO;
3
3
4 namespace Implab.Formats {
4 namespace Implab.Formats {
5 public class ReaderScanner: TextScanner {
5 public class ReaderScanner: TextScanner {
6 const int CHUNK_SIZE = 1024;
6 const int CHUNK_SIZE = 1024*4;
7 const int BUFFER_MAX = CHUNK_SIZE*1024;
7 const int BUFFER_MAX = CHUNK_SIZE*1024;
8
8
9 readonly TextReader m_reader;
9 readonly TextReader m_reader;
10
10
11 public ReaderScanner(TextReader reader, int limit, int chunk) : base(limit, chunk) {
11 public ReaderScanner(TextReader reader, int limit, int chunk) : base(limit, chunk) {
12 Safe.ArgumentNotNull(reader, "reader");
12 Safe.ArgumentNotNull(reader, "reader");
13 m_reader = reader;
13 m_reader = reader;
14 }
14 }
15
15
16 public ReaderScanner(TextReader reader) : this(reader, BUFFER_MAX, CHUNK_SIZE) {
16 public ReaderScanner(TextReader reader) : this(reader, BUFFER_MAX, CHUNK_SIZE) {
17 }
17 }
18
18
19 protected override int Read(char[] buffer, int offset, int size) {
19 protected override int Read(char[] buffer, int offset, int size) {
20 return m_reader.Read(buffer, offset, size);
20 return m_reader.Read(buffer, offset, size);
21 }
21 }
22
22
23 protected override void Dispose(bool disposing) {
23 protected override void Dispose(bool disposing) {
24 if (disposing)
24 if (disposing)
25 Safe.Dispose(m_reader);
25 Safe.Dispose(m_reader);
26 base.Dispose(disposing);
26 base.Dispose(disposing);
27 }
27 }
28 }
28 }
29 }
29 }
30
30
@@ -1,47 +1,53
1 <?xml version="1.0" encoding="utf-8"?>
1 <?xml version="1.0" encoding="utf-8"?>
2 <Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
2 <Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
3 <PropertyGroup>
3 <PropertyGroup>
4 <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
4 <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
5 <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
5 <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
6 <ProductVersion>8.0.30703</ProductVersion>
6 <ProductVersion>8.0.30703</ProductVersion>
7 <SchemaVersion>2.0</SchemaVersion>
7 <SchemaVersion>2.0</SchemaVersion>
8 <ProjectGuid>{15DD7123-D504-4627-8B4F-D00C7F04D033}</ProjectGuid>
8 <ProjectGuid>{15DD7123-D504-4627-8B4F-D00C7F04D033}</ProjectGuid>
9 <OutputType>Exe</OutputType>
9 <OutputType>Exe</OutputType>
10 <RootNamespace>MonoPlay</RootNamespace>
10 <RootNamespace>MonoPlay</RootNamespace>
11 <AssemblyName>MonoPlay</AssemblyName>
11 <AssemblyName>MonoPlay</AssemblyName>
12 <TargetFrameworkVersion>v4.5</TargetFrameworkVersion>
12 <TargetFrameworkVersion>v4.5</TargetFrameworkVersion>
13 <ReleaseVersion>0.2</ReleaseVersion>
13 <ReleaseVersion>0.2</ReleaseVersion>
14 </PropertyGroup>
14 </PropertyGroup>
15 <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
15 <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
16 <DebugSymbols>true</DebugSymbols>
16 <DebugSymbols>true</DebugSymbols>
17 <DebugType>full</DebugType>
17 <DebugType>full</DebugType>
18 <Optimize>false</Optimize>
18 <Optimize>false</Optimize>
19 <OutputPath>bin\Debug</OutputPath>
19 <OutputPath>bin\Debug</OutputPath>
20 <DefineConstants>DEBUG;TRACE;</DefineConstants>
20 <DefineConstants>DEBUG;TRACE;</DefineConstants>
21 <ErrorReport>prompt</ErrorReport>
21 <ErrorReport>prompt</ErrorReport>
22 <WarningLevel>4</WarningLevel>
22 <WarningLevel>4</WarningLevel>
23 <ConsolePause>false</ConsolePause>
23 <ConsolePause>false</ConsolePause>
24 </PropertyGroup>
24 </PropertyGroup>
25 <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
25 <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
26 <DebugType>full</DebugType>
26 <DebugType>full</DebugType>
27 <Optimize>true</Optimize>
27 <Optimize>true</Optimize>
28 <OutputPath>bin\Release</OutputPath>
28 <OutputPath>bin\Release</OutputPath>
29 <ErrorReport>prompt</ErrorReport>
29 <ErrorReport>prompt</ErrorReport>
30 <WarningLevel>4</WarningLevel>
30 <WarningLevel>4</WarningLevel>
31 <ConsolePause>false</ConsolePause>
31 <ConsolePause>false</ConsolePause>
32 </PropertyGroup>
32 </PropertyGroup>
33 <ItemGroup>
33 <ItemGroup>
34 <Reference Include="System" />
34 <Reference Include="System" />
35 <Reference Include="System.Text.Json">
36 <HintPath>..\packages\System.Text.Json.2.0.0.11\lib\net40\System.Text.Json.dll</HintPath>
37 </Reference>
35 </ItemGroup>
38 </ItemGroup>
36 <ItemGroup>
39 <ItemGroup>
37 <Compile Include="Program.cs" />
40 <Compile Include="Program.cs" />
38 <Compile Include="Properties\AssemblyInfo.cs" />
41 <Compile Include="Properties\AssemblyInfo.cs" />
39 </ItemGroup>
42 </ItemGroup>
40 <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" />
43 <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" />
41 <ItemGroup>
44 <ItemGroup>
42 <ProjectReference Include="..\Implab\Implab.csproj">
45 <ProjectReference Include="..\Implab\Implab.csproj">
43 <Project>{F550F1F8-8746-4AD0-9614-855F4C4B7F05}</Project>
46 <Project>{F550F1F8-8746-4AD0-9614-855F4C4B7F05}</Project>
44 <Name>Implab</Name>
47 <Name>Implab</Name>
45 </ProjectReference>
48 </ProjectReference>
46 </ItemGroup>
49 </ItemGroup>
50 <ItemGroup>
51 <None Include="packages.config" />
52 </ItemGroup>
47 </Project> No newline at end of file
53 </Project>
@@ -1,37 +1,45
1 using System;
1 using System;
2 using Implab;
2 using Implab;
3 using System.Threading.Tasks;
3 using System.Threading.Tasks;
4 using Implab.Formats.JSON;
5 using System.IO;
6 using System.Text.Json;
4
7
5 namespace MonoPlay {
8 namespace MonoPlay {
6 class MainClass {
9 class MainClass {
7
10
8
11
9 public static void Main(string[] args) {
12 public static void Main(string[] args) {
10 if (args == null)
13 if (args == null)
11 throw new ArgumentNullException("args");
14 throw new ArgumentNullException("args");
12
15 int t1, t2;
13 var t1 = Environment.TickCount;
14
16
15 DoWork().GetAwaiter().GetResult();
17 for (int i = 0; i < 2; i++) {
18 t1 = Environment.TickCount;
19 int elements =0;
20 using (var reader = new JSONParser(File.OpenText("/home/sergey/temp/citylots.json"))) {
21 while (reader.Read())
22 elements++;
23 }
16
24
17 var t2 = Environment.TickCount;
25 t2 = Environment.TickCount;
18 Console.WriteLine("done: {0} ms, {1:.00} Mb, {2} GC", t2 - t1, GC.GetTotalMemory(false) / (1024*1024), GC.CollectionCount(0) );
26 Console.WriteLine("attempt {0} done: {1} ms, {2:.00} Mb, {3} GC, Elements: {4}",i+1, t2 - t1, GC.GetTotalMemory(false) / (1024*1024), GC.CollectionCount(0), elements );
19
20 }
27 }
21
28
22 static IPromise<int> DoItem(int x) {
29 Console.WriteLine("Syste.Text.Json");
23 //return Promise<int>.FromResult(x + 1);
30 var paraser = new JsonParser();
24 var p = new Promise<int>();
31 for (int i = 0; i < 2; i++) {
25 p.Resolve(x+1);
32 t1 = Environment.TickCount;
26 return p;
33 using (var reader = File.OpenText("/home/sergey/temp/citylots.json")) {
34 paraser.Parse(reader);
27 }
35 }
28
36
29 static async Task<int> DoWork() {
37 t2 = Environment.TickCount;
30 var c = 0;
38 Console.WriteLine("attempt {0} done: {1} ms, {2:.00} Mb, {3} GC, ",i+1, t2 - t1, GC.GetTotalMemory(false) / (1024*1024), GC.CollectionCount(0));
31 for (int i = 0; i < 10000000; i++)
39 }
32 c = await DoItem(c);
40
33 return c;
41
34 }
42 }
35
43
36 }
44 }
37 }
45 }
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
General Comments 0
You need to be logged in to leave comments. Login now