##// END OF EJS Templates
JsonReader optimizations
cin -
r236:302ca905c19e v2
parent child
Show More
@@ -1,147 +1,173
1 1 using NUnit.Framework;
2 2 using System;
3 3 using Implab.Automaton;
4 4 using Implab.Xml;
5 5 using System.Xml;
6 6 using Implab.Formats;
7 7 using Implab.Formats.Json;
8 8 using System.IO;
9 9
10 10 namespace Implab.Format.Test {
11 11 [TestFixture]
12 public class JsonTests {
13
12 public class JsonTests {
13
14 14 [Test]
15 15 public void TestScannerValidTokens() {
16 16 using (var scanner = JsonStringScanner.Create(@"9123, -123, 0, 0.1, -0.2, -0.1e3, 1.3E-3, ""some \t\n\u0020 text"", literal []{}:")) {
17 17
18 18 Tuple<JsonTokenType, object>[] expexted = {
19 19 new Tuple<JsonTokenType,object>(JsonTokenType.Number, "9123"),
20 20 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, null),
21 21 new Tuple<JsonTokenType,object>(JsonTokenType.Number, "-123"),
22 22 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, null),
23 23 new Tuple<JsonTokenType,object>(JsonTokenType.Number, "0"),
24 24 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, null),
25 25 new Tuple<JsonTokenType,object>(JsonTokenType.Number, "0.1"),
26 26 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, null),
27 27 new Tuple<JsonTokenType,object>(JsonTokenType.Number, "-0.2"),
28 28 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, null),
29 29 new Tuple<JsonTokenType,object>(JsonTokenType.Number, "-0.1e3"),
30 30 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, null),
31 31 new Tuple<JsonTokenType,object>(JsonTokenType.Number, "1.3E-3"),
32 32 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, null),
33 33 new Tuple<JsonTokenType,object>(JsonTokenType.String, "some \t\n text"),
34 34 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, null),
35 35 new Tuple<JsonTokenType,object>(JsonTokenType.Literal, "literal"),
36 36 new Tuple<JsonTokenType,object>(JsonTokenType.BeginArray, null),
37 37 new Tuple<JsonTokenType,object>(JsonTokenType.EndArray, null),
38 38 new Tuple<JsonTokenType,object>(JsonTokenType.BeginObject, null),
39 39 new Tuple<JsonTokenType,object>(JsonTokenType.EndObject, null),
40 40 new Tuple<JsonTokenType,object>(JsonTokenType.NameSeparator, null)
41 41 };
42 42
43 43 string value;
44 44 JsonTokenType tokenType;
45 45 for (var i = 0; i < expexted.Length; i++) {
46 46
47 47 Assert.IsTrue(scanner.ReadToken(out value, out tokenType));
48 48 Assert.AreEqual(expexted[i].Item1, tokenType);
49 49 Assert.AreEqual(expexted[i].Item2, value);
50 50 }
51 51
52 52 Assert.IsFalse(scanner.ReadToken(out value, out tokenType));
53 53 }
54 54 }
55 55
56 56 [Test]
57 57 public void TestScannerBadTokens() {
58 58 var bad = new[] {
59 59 " 1",
60 60 " literal",
61 61 " \"",
62 62 "\"unclosed string",
63 63 "1.bad",
64 64 "001", // should be read as three numbers
65 65 "--10",
66 66 "+10",
67 67 "1.0.0",
68 68 "1e1.0",
69 69 "l1teral0",
70 70 ".123",
71 71 "-.123"
72 72 };
73 73
74 74 foreach (var json in bad) {
75 75 using (var scanner = JsonStringScanner.Create(json)) {
76 76 try {
77 77 string value;
78 78 JsonTokenType token;
79 79 scanner.ReadToken(out value, out token);
80 80 if (!Object.Equals(value, json)) {
81 81 Console.WriteLine("'{0}' is read as {1}", json, value is String ? String.Format("'{0}'", value) : value);
82 82 continue;
83 83 }
84 84 Assert.Fail("Token '{0}' shouldn't pass", json);
85 85 } catch (ParserException e) {
86 86 Console.WriteLine(e.Message);
87 87 }
88 88 }
89 89 }
90 90 }
91 91
92 92 [Test]
93 93 public void JsonXmlReaderSimpleTest() {
94 94 var json = "\"some text\"";
95 95 //Console.WriteLine($"JSON: {json}");
96 96 //Console.WriteLine("XML");
97 97 /*using (var xmlReader = new JsonXmlReader(new JSONParser(json), new JsonXmlReaderOptions { NamespaceUri = "JsonXmlReaderSimpleTest", RootName = "string", NodesPrefix = "json" })) {
98 98 Assert.AreEqual(xmlReader.ReadState, System.Xml.ReadState.Initial);
99 99
100 100 AssertRead(xmlReader, XmlNodeType.XmlDeclaration);
101 101 AssertRead(xmlReader, XmlNodeType.Element);
102 102 AssertRead(xmlReader, XmlNodeType.Text);
103 103 AssertRead(xmlReader, XmlNodeType.EndElement);
104 104 Assert.IsFalse(xmlReader.Read());
105 105 }*/
106 106
107 107 //DumpJsonParse("\"text value\"");
108 108 //DumpJsonParse("null");
109 109 //DumpJsonParse("true");
110 110 //DumpJsonParse("{}");
111 111 //DumpJsonParse("[]");
112 112 DumpJsonParse("{\"one\":1, \"two\":2}");
113 113 DumpJsonParse("[1,\"\",2,3]");
114 114 DumpJsonParse("[{\"info\": [7,8,9]}]");
115 115 DumpJsonFlatParse("[1,2,\"\",[3,4],{\"info\": [5,6]},{\"num\": [7,8,null]}, null,[null]]");
116 116 }
117
117
118 [Test]
119 public void JsonBenchmark() {
120 var t = Environment.TickCount;
121 using (var reader = new JsonXmlReader(JsonReader.Create("e:\\citylots.json"), new JsonXmlReaderOptions { NamespaceUri = "XmlReaderSimpleTest", RootName = "data" })) {
122 while (reader.Read()) {
123 }
124 }
125 Console.WriteLine($"JsonXmlReader: {Environment.TickCount - t} ms");
126
127 t = Environment.TickCount;
128 using(var reader = JsonReader.Create("e:\\citylots.json")) {
129 while(reader.Read()) {
130 }
131 }
132
133 Console.WriteLine($"JsonReader: {Environment.TickCount - t} ms");
134
135 t = Environment.TickCount;
136 using (var reader = XmlReader.Create("file:///e:\\citylots.xml")) {
137 while (reader.Read()) {
138 }
139 }
140
141 Console.WriteLine($"XmlReader: {Environment.TickCount - t} ms");
142 }
143
118 144 void AssertRead(XmlReader reader, XmlNodeType expected) {
119 145 Assert.IsTrue(reader.Read());
120 Console.WriteLine($"{new string(' ', reader.Depth*2)}{reader}");
146 Console.WriteLine($"{new string(' ', reader.Depth * 2)}{reader}");
121 147 Assert.AreEqual(expected, reader.NodeType);
122 148 }
123 149
124 150 void DumpJsonParse(string json) {
125 151 Console.WriteLine($"JSON: {json}");
126 152 Console.WriteLine("XML");
127 153 using (var xmlReader = new JsonXmlReader(JsonReader.ParseString(json), new JsonXmlReaderOptions { NamespaceUri = "JsonXmlReaderSimpleTest", NodesPrefix = "json" })) {
128 154 while (xmlReader.Read())
129 155 Console.WriteLine($"{new string(' ', xmlReader.Depth * 2)}{xmlReader}");
130 156 }
131 157 }
132 158
133 159 void DumpJsonFlatParse(string json) {
134 160 Console.WriteLine($"JSON: {json}");
135 161 Console.WriteLine("XML");
136 162 using (var xmlWriter = XmlWriter.Create(Console.Out, new XmlWriterSettings {
137 163 Indent = true,
138 164 CloseOutput = false,
139 165 ConformanceLevel = ConformanceLevel.Document
140 166 }))
141 167 using (var xmlReader = new JsonXmlReader(JsonReader.ParseString(json), new JsonXmlReaderOptions { NamespaceUri = "JsonXmlReaderSimpleTest", NodesPrefix = "", FlattenArrays = true })) {
142 168 xmlWriter.WriteNode(xmlReader, false);
143 169 }
144 170 }
145 171 }
146 172 }
147 173
@@ -1,182 +1,92
1 1 using Implab.Formats.Json;
2 2 using Implab.Parallels;
3 3 using Implab.Xml;
4 4 using System;
5 5 using System.Collections.Concurrent;
6 6 using System.Collections.Generic;
7 7 using System.IO;
8 8 using System.Linq;
9 9 using System.Text;
10 10 using System.Threading;
11 11 using System.Threading.Tasks;
12 12 using System.Xml;
13 13 using System.Xml.Serialization;
14 14
15 15 namespace Implab.Playground {
16 16 public class Program {
17 17
18 18 static void EnqueueRange<T>(ConcurrentQueue<T> q, T[] data, int offset, int len) {
19 19 for (var i = offset; i < offset + len; i++)
20 20 q.Enqueue(data[i]);
21 21 }
22 22
23 23 static bool TryDequeueRange<T>(ConcurrentQueue<T> q,T[] buffer,int offset, int len, out int actual) {
24 24 actual = 0;
25 25 T res;
26 26 while(q.TryDequeue(out res)) {
27 27 buffer[offset + actual] = res;
28 28 actual++;
29 29 if (actual == len)
30 30 break;
31 31 }
32 32 return actual != 0;
33 33 }
34 34
35 35 static void EnqueueRange<T>(SimpleAsyncQueue<T> q, T[] data, int offset, int len) {
36 36 for (var i = offset; i < offset + len; i++)
37 37 q.Enqueue(data[i]);
38 38 }
39 39
40 40 static bool TryDequeueRange<T>(SimpleAsyncQueue<T> q, T[] buffer, int offset, int len, out int actual) {
41 41 actual = 0;
42 42 T res;
43 43 while (q.TryDequeue(out res)) {
44 44 buffer[offset + actual] = res;
45 45 actual++;
46 46 if (actual == len)
47 47 break;
48 48 }
49 49 return actual != 0;
50 50 }
51 51
52 52 static void EnqueueRange<T>(AsyncQueue<T> q, T[] data, int offset, int len) {
53 53 for (var i = offset; i < offset + len; i++)
54 54 q.Enqueue(data[i]);
55 55 }
56 56
57 57 static bool TryDequeueRange<T>(AsyncQueue<T> q, T[] buffer, int offset, int len, out int actual) {
58 58 actual = 0;
59 59 T res;
60 60 while (q.TryDequeue(out res)) {
61 61 buffer[offset + actual] = res;
62 62 actual++;
63 63 if (actual == len)
64 64 break;
65 65 }
66 66 return actual != 0;
67 67 }
68 68
69 69
70 70 /*static void EnqueueRange<T>(AsyncQueue<T> q, T[] data, int offset, int len) {
71 71 q.EnqueueRange(data, offset, len);
72 72 }
73 73
74 74 static bool TryDequeueRange<T>(AsyncQueue<T> q, T[] buffer, int offset, int len, out int actual) {
75 75 return q.TryDequeueRange(buffer, offset, len, out actual);
76 76 }*/
77 77
78 78
79 79 static void Main(string[] args) {
80 80
81 //var queue = new ConcurrentQueue<int>();
82 var queue = new AsyncQueue<int>();
83 //var queue = new SimpleAsyncQueue<int>();
84
85 const int wBatch = 32;
86 const long wCount = 1000000;
87 const long total = wBatch * wCount * 3;
88
89 long r1 = 0, r2 = 0, r3 = 0;
90 const int rBatch = 1000;
91 long read = 0;
92
93 var t1 = Environment.TickCount;
94
95 AsyncPool.RunThread(
96 () => {
97 var buffer = new int[wBatch];
98 for (int i = 0; i < wBatch; i++)
99 buffer[i] = 1;
100
101 for (int i = 0; i < wCount; i++)
102 EnqueueRange(queue, buffer, 0, wBatch);
103 Console.WriteLine("done writer #1: {0} ms", Environment.TickCount - t1);
104 },
105 () => {
106 var buffer = new int[wBatch];
107 for (int i = 0; i < wBatch; i++)
108 buffer[i] = 1;
109
110 for (int i = 0; i < wCount; i++)
111 EnqueueRange(queue, buffer, 0, wBatch);
112 Console.WriteLine("done writer #2: {0} ms", Environment.TickCount - t1);
113 },
114 () => {
115 var buffer = new int[wBatch];
116 for (int i = 0; i < wBatch; i++)
117 buffer[i] = 1;
118
119 for (int i = 0; i < wCount; i++)
120 EnqueueRange(queue, buffer, 0, wBatch);
121 Console.WriteLine("done writer #3: {0} ms", Environment.TickCount - t1);
122 },
123 () => {
124 var buffer = new int[rBatch];
125
126 while (read < total) {
127 int actual;
128 if (TryDequeueRange(queue, buffer, 0, rBatch, out actual)) {
129 for (int i = 0; i < actual; i++)
130 r1 += buffer[i];
131 Interlocked.Add(ref read, actual);
132 }
133 }
134
135 Console.WriteLine("done reader #1: {0} ms", Environment.TickCount - t1);
136 }/*,
137 () => {
138 var buffer = new int[rBatch];
139
140 while (read < total) {
141 int actual;
142 if (TryDequeueRange(queue, buffer, 0, rBatch, out actual)) {
143 for (int i = 0; i < actual; i++)
144 r2 += buffer[i];
145 Interlocked.Add(ref read, actual);
146 }
147 }
148
149 Console.WriteLine("done reader #2: {0} ms", Environment.TickCount - t1);
150 }*//*,
151 () => {
152 var buffer = new int[rBatch];
153
154 while (read < total) {
155 int actual;
156 if (TryDequeueRange(queue, buffer, 0, rBatch, out actual)) {
157 for (int i = 0; i < actual; i++)
158 r3 += buffer[i];
159 Interlocked.Add(ref read, actual);
160 }
161 }
162
163 Console.WriteLine("done reader #3: {0} ms", Environment.TickCount - t1);
164 }*/
165 )
166 .PromiseAll()
167 .Join();
168
169
170 Console.WriteLine(
171 "done: {0} ms, summ#1: {1}, summ#2: {2}, total: {3}, count: {4}",
172 Environment.TickCount - t1,
173 r1,
174 r2,
175 r1 + r2 + r3,
176 total
177 );
81 var t = Environment.TickCount;
82 using (var reader = JsonReader.Create("e:\\citylots.json")) {
83 while (reader.Read()) {
84 }
85 }
86
87 Console.WriteLine($"JsonReader: {Environment.TickCount - t} ms");
178 88
179 89 Console.WriteLine("done");
180 90 }
181 91 }
182 92 }
@@ -1,9 +1,9
1 1
2 2 namespace Implab.Automaton {
3 3 public static class AutomatonConst {
4 public const int UNREACHABLE_STATE = -1;
4 public const int UnreachableState = -1;
5 5
6 public const int UNCLASSIFIED_INPUT = 0;
6 public const int UnclassifiedInput = 0;
7 7 }
8 8 }
9 9
@@ -1,348 +1,348
1 1 using Implab;
2 2 using System;
3 3 using System.Collections.Generic;
4 4 using System.Linq;
5 5 using System.Diagnostics;
6 6 using System.IO;
7 7 using System.CodeDom.Compiler;
8 8 using System.CodeDom;
9 9
10 10 namespace Implab.Automaton {
11 11 public class DFATable : IDFATableBuilder {
12 12 int m_stateCount;
13 13 int m_symbolCount;
14 14 int m_initialState;
15 15
16 16 readonly HashSet<int> m_finalStates = new HashSet<int>();
17 17 readonly HashSet<AutomatonTransition> m_transitions = new HashSet<AutomatonTransition>();
18 18
19 19
20 20 #region IDFADefinition implementation
21 21
22 22 public bool IsFinalState(int s) {
23 23 Safe.ArgumentInRange(s, 0, m_stateCount, "s");
24 24
25 25 return m_finalStates.Contains(s);
26 26 }
27 27
28 28 public IEnumerable<int> FinalStates {
29 29 get {
30 30 return m_finalStates;
31 31 }
32 32 }
33 33
34 34 public int StateCount {
35 35 get { return m_stateCount; }
36 36 }
37 37
38 38 public int AlphabetSize {
39 39 get { return m_symbolCount; }
40 40 }
41 41
42 42 public int InitialState {
43 43 get { return m_initialState; }
44 44 }
45 45
46 46 #endregion
47 47
48 48 public void SetInitialState(int s) {
49 49 Safe.ArgumentAssert(s >= 0, "s");
50 50 m_stateCount = Math.Max(m_stateCount, s + 1);
51 51 m_initialState = s;
52 52 }
53 53
54 54 public void MarkFinalState(int state) {
55 55 m_stateCount = Math.Max(m_stateCount, state + 1);
56 56 m_finalStates.Add(state);
57 57 }
58 58
59 59 public void Add(AutomatonTransition item) {
60 60 Safe.ArgumentAssert(item.s1 >= 0, "item");
61 61 Safe.ArgumentAssert(item.s2 >= 0, "item");
62 62 Safe.ArgumentAssert(item.edge >= 0, "item");
63 63
64 64 m_stateCount = Math.Max(m_stateCount, Math.Max(item.s1, item.s2) + 1);
65 65 m_symbolCount = Math.Max(m_symbolCount, item.edge + 1);
66 66
67 67 m_transitions.Add(item);
68 68 }
69 69
70 70 public void Clear() {
71 71 m_stateCount = 0;
72 72 m_symbolCount = 0;
73 73 m_finalStates.Clear();
74 74 m_transitions.Clear();
75 75 }
76 76
77 77 public bool Contains(AutomatonTransition item) {
78 78 return m_transitions.Contains(item);
79 79 }
80 80
81 81 public void CopyTo(AutomatonTransition[] array, int arrayIndex) {
82 82 m_transitions.CopyTo(array, arrayIndex);
83 83 }
84 84
85 85 public bool Remove(AutomatonTransition item) {
86 86 return m_transitions.Remove(item);
87 87 }
88 88
89 89 public int Count {
90 90 get {
91 91 return m_transitions.Count;
92 92 }
93 93 }
94 94
95 95 public bool IsReadOnly {
96 96 get {
97 97 return false;
98 98 }
99 99 }
100 100
101 101 public IEnumerator<AutomatonTransition> GetEnumerator() {
102 102 return m_transitions.GetEnumerator();
103 103 }
104 104
105 105 System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator() {
106 106 return GetEnumerator();
107 107 }
108 108
109 109 public void AddSymbol(int symbol) {
110 110 Safe.ArgumentAssert(symbol >= 0, "symbol");
111 111 m_symbolCount = Math.Max(symbol + 1, m_symbolCount);
112 112 }
113 113
114 114 public int[,] CreateTransitionTable() {
115 115 var table = new int[StateCount,AlphabetSize];
116 116
117 117 for (int i = 0; i < StateCount; i++)
118 118 for (int j = 0; j < AlphabetSize; j++)
119 table[i, j] = AutomatonConst.UNREACHABLE_STATE;
119 table[i, j] = AutomatonConst.UnreachableState;
120 120
121 121 foreach (var t in this)
122 122 table[t.s1,t.edge] = (byte)t.s2;
123 123
124 124 return table;
125 125 }
126 126
127 127 public bool[] CreateFinalStateTable() {
128 128 var table = new bool[StateCount];
129 129
130 130 foreach (var s in FinalStates)
131 131 table[s] = true;
132 132
133 133 return table;
134 134 }
135 135
136 136 /// <summary>Формирует множества конечных состояний перед началом работы алгоритма минимизации.</summary>
137 137 /// <remarks>
138 138 /// В процессе построения минимального автомата требуется разделить множество состояний,
139 139 /// на два подмножества - конечные состояния и все остальные, после чего эти подмножества
140 140 /// будут резделены на более мелкие. Иногда требуется гарантировать различия конечных сосотяний,
141 141 /// для этого необходимо переопределить даннцю фукнцию, для получения множеств конечных состояний.
142 142 /// </remarks>
143 143 /// <returns>The final states.</returns>
144 144 protected virtual IEnumerable<HashSet<int>> SplitFinalStates(IEnumerable<int> states) {
145 145 return new [] { new HashSet<int>(states) };
146 146 }
147 147
148 148 protected void Optimize(
149 149 IDFATableBuilder optimalDFA,
150 150 IDictionary<int,int> alphabetMap,
151 151 IDictionary<int,int> stateMap
152 152 ) {
153 153 Safe.ArgumentNotNull(optimalDFA, "dfa");
154 154 Safe.ArgumentNotNull(alphabetMap, "alphabetMap");
155 155 Safe.ArgumentNotNull(stateMap, "stateMap");
156 156
157 157
158 158 var setComparer = new CustomEqualityComparer<HashSet<int>>(
159 159 (x, y) => x.SetEquals(y),
160 160 s => s.Sum(x => x.GetHashCode())
161 161 );
162 162
163 163 var optimalStates = new HashSet<HashSet<int>>(setComparer);
164 164 var queue = new HashSet<HashSet<int>>(setComparer);
165 165
166 166 optimalStates.Add(new HashSet<int>(FinalStates));
167 167
168 168 var state = new HashSet<int>(
169 169 Enumerable
170 170 .Range(0, m_stateCount)
171 171 .Where(i => !m_finalStates.Contains(i))
172 172 );
173 173
174 174 optimalStates.Add(state);
175 175 queue.Add(state);
176 176
177 177 var rmap = m_transitions
178 178 .GroupBy(t => t.s2)
179 179 .ToDictionary(
180 180 g => g.Key, // s2
181 181 g => g.ToLookup(t => t.edge, t => t.s1)//.ToDictionary(p => p.Key)
182 182 );
183 183
184 184 while (queue.Count > 0) {
185 185 var stateA = queue.First();
186 186 queue.Remove(stateA);
187 187
188 188 for (int c = 0; c < m_symbolCount; c++) {
189 189 var stateX = new HashSet<int>();
190 190 foreach(var a in stateA.Where(rmap.ContainsKey))
191 191 stateX.UnionWith(rmap[a][c]); // all states from wich the symbol 'c' leads to the state 'a'
192 192
193 193 var tmp = optimalStates.ToArray();
194 194 foreach (var stateY in tmp) {
195 195 var stateR1 = new HashSet<int>(stateY);
196 196 var stateR2 = new HashSet<int>(stateY);
197 197
198 198 stateR1.IntersectWith(stateX);
199 199 stateR2.ExceptWith(stateX);
200 200
201 201 if (stateR1.Count > 0 && stateR2.Count > 0) {
202 202
203 203
204 204 optimalStates.Remove(stateY);
205 205 optimalStates.Add(stateR1);
206 206 optimalStates.Add(stateR2);
207 207
208 208 if (queue.Contains(stateY)) {
209 209 queue.Remove(stateY);
210 210 queue.Add(stateR1);
211 211 queue.Add(stateR2);
212 212 } else {
213 213 queue.Add(stateR1.Count <= stateR2.Count ? stateR1 : stateR2);
214 214 }
215 215 }
216 216 }
217 217 }
218 218 }
219 219
220 220 // дополнительно разбиваем конечные состояния
221 221 foreach (var final in optimalStates.Where(s => s.Overlaps(m_finalStates)).ToArray()) {
222 222 optimalStates.Remove(final);
223 223 foreach (var split in SplitFinalStates(final))
224 224 optimalStates.Add(split);
225 225 }
226 226
227 227
228 228 // карта получения оптимального состояния по соотвествующему ему простому состоянию
229 229 var nextState = 0;
230 230 foreach (var item in optimalStates) {
231 231 var id = nextState++;
232 232 foreach (var s in item)
233 233 stateMap[s] = id;
234 234 }
235 235
236 236 // получаем минимальный алфавит
237 237 // входные символы не различимы, если Move(s,a1) == Move(s,a2), для любого s
238 238 // для этого используем алгоритм кластеризации, сначала
239 239 // считаем, что все символы не различимы
240 240
241 241 var minClasses = new HashSet<HashSet<int>>(setComparer);
242 242 var alphaQueue = new Queue<HashSet<int>>();
243 243 alphaQueue.Enqueue(new HashSet<int>(Enumerable.Range(0,AlphabetSize)));
244 244
245 245 // для всех состояний, будем проверять каждый класс на различимость,
246 246 // т.е. символы различимы, если они приводят к разным состояниям
247 247 for (int s = 0 ; s < optimalStates.Count; s++) {
248 248 var newQueue = new Queue<HashSet<int>>();
249 249
250 250 foreach (var A in alphaQueue) {
251 251 // классы из одного символа делить бесполезно, переводим их сразу в
252 252 // результирующий алфавит
253 253 if (A.Count == 1) {
254 254 minClasses.Add(A);
255 255 continue;
256 256 }
257 257
258 258 // различаем классы символов, которые переводят в различные оптимальные состояния
259 259 // optimalState -> alphaClass
260 260 var classes = new Dictionary<int, HashSet<int>>();
261 261
262 262 foreach (var term in A) {
263 263 // ищем все переходы класса по символу term
264 264 var s2 = m_transitions.Where(t => stateMap[t.s1] == s && t.edge == term).Select(t => stateMap[t.s2]).DefaultIfEmpty(-1).First();
265 265
266 266 HashSet<int> a2;
267 267 if (!classes.TryGetValue(s2, out a2)) {
268 268 a2 = new HashSet<int>();
269 269 newQueue.Enqueue(a2);
270 270 classes[s2] = a2;
271 271 }
272 272 a2.Add(term);
273 273 }
274 274 }
275 275
276 276 if (newQueue.Count == 0)
277 277 break;
278 278 alphaQueue = newQueue;
279 279 }
280 280
281 281 // после окончания работы алгоритма в очереди останутся минимальные различимые классы
282 282 // входных символов
283 283 foreach (var A in alphaQueue)
284 284 minClasses.Add(A);
285 285
286 286 // построение отображения алфавитов входных символов.
287 287 // поскольку символ DFAConst.UNCLASSIFIED_INPUT может иметь
288 288 // специальное значение, тогда сохраним минимальный класс,
289 289 // содержащий этот символ на томже месте.
290 290
291 291 var nextCls = 0;
292 292 foreach (var item in minClasses) {
293 if (nextCls == AutomatonConst.UNCLASSIFIED_INPUT)
293 if (nextCls == AutomatonConst.UnclassifiedInput)
294 294 nextCls++;
295 295
296 296 // сохраняем DFAConst.UNCLASSIFIED_INPUT
297 var cls = item.Contains(AutomatonConst.UNCLASSIFIED_INPUT) ? AutomatonConst.UNCLASSIFIED_INPUT : nextCls++;
297 var cls = item.Contains(AutomatonConst.UnclassifiedInput) ? AutomatonConst.UnclassifiedInput : nextCls++;
298 298 optimalDFA.AddSymbol(cls);
299 299
300 300 foreach (var a in item)
301 301 alphabetMap[a] = cls;
302 302 }
303 303
304 304 // построение автомата
305 305 optimalDFA.SetInitialState(stateMap[m_initialState]);
306 306
307 307 foreach (var sf in m_finalStates.Select(s => stateMap[s]).Distinct())
308 308 optimalDFA.MarkFinalState(sf);
309 309
310 310 foreach (var t in m_transitions.Select(t => new AutomatonTransition(stateMap[t.s1],stateMap[t.s2],alphabetMap[t.edge])).Distinct())
311 311 optimalDFA.Add(t);
312 312 }
313 313
314 314 protected string PrintDFA<TInput, TState>(IAlphabet<TInput> inputAlphabet, IAlphabet<TState> stateAlphabet) {
315 315 Safe.ArgumentNotNull(inputAlphabet, "inputAlphabet");
316 316 Safe.ArgumentNotNull(stateAlphabet, "stateAlphabet");
317 317
318 318 var data = new List<string>();
319 319
320 320 data.Add("digraph dfa {");
321 321
322 322 foreach (var final in m_finalStates)
323 323 data.Add(String.Format("{0} [shape=box];",String.Join("", stateAlphabet.GetSymbols(final))));
324 324
325 325 foreach (var t in m_transitions)
326 326 data.Add(String.Format(
327 327 "{0} -> {2} [label={1}];",
328 328 String.Join("", stateAlphabet.GetSymbols(t.s1)),
329 ToLiteral(ToLiteral(String.Join("", t.edge == AutomatonConst.UNCLASSIFIED_INPUT ? new [] { "@" } : inputAlphabet.GetSymbols(t.edge).Select(x => x.ToString())))),
329 ToLiteral(ToLiteral(String.Join("", t.edge == AutomatonConst.UnclassifiedInput ? new [] { "@" } : inputAlphabet.GetSymbols(t.edge).Select(x => x.ToString())))),
330 330 String.Join("", stateAlphabet.GetSymbols(t.s2))
331 331 ));
332 332 data.Add("}");
333 333 return String.Join("\n", data);
334 334 }
335 335
336 336 static string ToLiteral(string input)
337 337 {
338 338 using (var writer = new StringWriter())
339 339 {
340 340 using (var provider = CodeDomProvider.CreateProvider("CSharp"))
341 341 {
342 342 provider.GenerateCodeFromExpression(new CodePrimitiveExpression(input), writer, null);
343 343 return writer.ToString();
344 344 }
345 345 }
346 346 }
347 347 }
348 348 }
@@ -1,84 +1,84
1 1 using System;
2 2 using System.Collections.Generic;
3 3 using System.Linq;
4 4
5 5 namespace Implab.Automaton {
6 6 public class MapAlphabet<T> : IAlphabetBuilder<T> {
7 7 readonly Dictionary<T,int> m_map;
8 8 int m_nextCls;
9 9 readonly bool m_supportUnclassified;
10 10
11 11 public MapAlphabet(bool supportUnclassified, IEqualityComparer<T> comparer) {
12 12 m_map = comparer != null ? new Dictionary<T, int>(comparer) : new Dictionary<T,int>();
13 13 m_supportUnclassified = supportUnclassified;
14 14 m_nextCls = supportUnclassified ? 1 : 0;
15 15 }
16 16
17 17 #region IAlphabetBuilder implementation
18 18
19 19 public int DefineSymbol(T symbol) {
20 20 int cls;
21 21 return m_map.TryGetValue(symbol, out cls) ? cls : DefineSymbol(symbol, m_nextCls);
22 22 }
23 23
24 24 public int DefineSymbol(T symbol, int cls) {
25 25 Safe.ArgumentAssert(cls >= 0, "cls");
26 26
27 27 m_nextCls = Math.Max(cls + 1, m_nextCls);
28 28 m_map.Add(symbol, cls);
29 29 return cls;
30 30 }
31 31
32 32 public int DefineClass(IEnumerable<T> symbols) {
33 33 return DefineClass(symbols, m_nextCls);
34 34 }
35 35
36 36 public int DefineClass(IEnumerable<T> symbols, int cls) {
37 37 Safe.ArgumentAssert(cls >= 0, "cls");
38 38 Safe.ArgumentNotNull(symbols, "symbols");
39 39
40 40 m_nextCls = Math.Max(cls + 1, m_nextCls);
41 41
42 42 foreach (var symbol in symbols)
43 43 m_map[symbol] = cls;
44 44 return cls;
45 45 }
46 46
47 47 #endregion
48 48
49 49 #region IAlphabet implementation
50 50
51 51 public int Translate(T symbol) {
52 52 int cls;
53 53 if (m_map.TryGetValue(symbol, out cls))
54 54 return cls;
55 55 if (!m_supportUnclassified)
56 56 throw new ArgumentOutOfRangeException("symbol", "The specified symbol isn't in the alphabet");
57 return AutomatonConst.UNCLASSIFIED_INPUT;
57 return AutomatonConst.UnclassifiedInput;
58 58 }
59 59
60 60 public int Count {
61 61 get {
62 62 return m_nextCls;
63 63 }
64 64 }
65 65
66 66 public bool Contains(T symbol) {
67 67 return m_supportUnclassified || m_map.ContainsKey(symbol);
68 68 }
69 69
70 70
71 71 public IEnumerable<T> GetSymbols(int cls) {
72 72 Safe.ArgumentAssert(!m_supportUnclassified || cls > 0, "cls");
73 73 return m_map.Where(p => p.Value == cls).Select(p => p.Key);
74 74 }
75 75 #endregion
76 76
77 77 public IEnumerable<KeyValuePair<T,int>> Mappings {
78 78 get {
79 79 return m_map;
80 80 }
81 81 }
82 82 }
83 83 }
84 84
@@ -1,212 +1,212
1 1 using Implab;
2 2 using System;
3 3 using System.Collections.Generic;
4 4 using System.Diagnostics;
5 5 using System.Linq;
6 6
7 7 namespace Implab.Automaton.RegularExpressions {
8 8 /// <summary>
9 9 /// Используется для построения ДКА по регулярному выражению, сначала обходит
10 10 /// регулярное выражение и вычисляет followpos, затем используется метод
11 11 /// <see cref="BuildDFA(IDFADefinition)"/> для построения автомата.
12 12 /// </summary>
13 13 public class RegularExpressionVisitor : IVisitor {
14 14 int m_idx;
15 15 Token m_root;
16 16 HashSet<int> m_firstpos;
17 17 HashSet<int> m_lastpos;
18 18
19 19 readonly Dictionary<int, HashSet<int>> m_followpos = new Dictionary<int, HashSet<int>>();
20 20 readonly Dictionary<int, int> m_indexes = new Dictionary<int, int>();
21 21 readonly HashSet<int> m_ends = new HashSet<int>();
22 22
23 23 readonly IDFATableBuilder m_builder;
24 24 readonly IAlphabetBuilder<HashSet<int>> m_states = new MapAlphabet<HashSet<int>>(
25 25 false,
26 26 new CustomEqualityComparer<HashSet<int>>(
27 27 (x, y) => x.SetEquals(y),
28 28 x => x.Sum(n => n.GetHashCode())
29 29 )
30 30 );
31 31
32 32 public RegularExpressionVisitor(IDFATableBuilder builder) {
33 33 Safe.ArgumentNotNull(builder, "builder");
34 34
35 35 m_builder = builder;
36 36 }
37 37
38 38 HashSet<int> Followpos(int pos) {
39 39 HashSet<int> set;
40 40 return m_followpos.TryGetValue(pos, out set) ? set : m_followpos[pos] = new HashSet<int>();
41 41 }
42 42
43 43 bool Nullable(object n) {
44 44 if (n is EmptyToken || n is StarToken)
45 45 return true;
46 46 var altToken = n as AltToken;
47 47 if (altToken != null)
48 48 return Nullable(altToken.Left) || Nullable(altToken.Right);
49 49 var catToken = n as CatToken;
50 50 if (catToken != null)
51 51 return Nullable(catToken.Left) && Nullable(catToken.Right);
52 52 return false;
53 53 }
54 54
55 55 protected int Index {
56 56 get { return m_idx; }
57 57 }
58 58
59 59 public void Visit(AltToken token) {
60 60 if (m_root == null)
61 61 m_root = token;
62 62 var firtspos = new HashSet<int>();
63 63 var lastpos = new HashSet<int>();
64 64
65 65 token.Left.Accept(this);
66 66 firtspos.UnionWith(m_firstpos);
67 67 lastpos.UnionWith(m_lastpos);
68 68
69 69 token.Right.Accept(this);
70 70 firtspos.UnionWith(m_firstpos);
71 71 lastpos.UnionWith(m_lastpos);
72 72
73 73 m_firstpos = firtspos;
74 74 m_lastpos = lastpos;
75 75 }
76 76
77 77 public void Visit(StarToken token) {
78 78 if (m_root == null)
79 79 m_root = token;
80 80 token.Token.Accept(this);
81 81
82 82 foreach (var i in m_lastpos)
83 83 Followpos(i).UnionWith(m_firstpos);
84 84 }
85 85
86 86 public void Visit(CatToken token) {
87 87 if (m_root == null)
88 88 m_root = token;
89 89
90 90 var firtspos = new HashSet<int>();
91 91 var lastpos = new HashSet<int>();
92 92 token.Left.Accept(this);
93 93 firtspos.UnionWith(m_firstpos);
94 94 var leftLastpos = m_lastpos;
95 95
96 96 token.Right.Accept(this);
97 97 lastpos.UnionWith(m_lastpos);
98 98 var rightFirstpos = m_firstpos;
99 99
100 100 if (Nullable(token.Left))
101 101 firtspos.UnionWith(rightFirstpos);
102 102
103 103 if (Nullable(token.Right))
104 104 lastpos.UnionWith(leftLastpos);
105 105
106 106 m_firstpos = firtspos;
107 107 m_lastpos = lastpos;
108 108
109 109 foreach (var i in leftLastpos)
110 110 Followpos(i).UnionWith(rightFirstpos);
111 111
112 112 }
113 113
114 114 public void Visit(EmptyToken token) {
115 115 if (m_root == null)
116 116 m_root = token;
117 117 }
118 118
119 119 public void Visit(SymbolToken token) {
120 120 if (m_root == null)
121 121 m_root = token;
122 122 m_idx++;
123 123 m_indexes[m_idx] = token.Value;
124 124 m_firstpos = new HashSet<int>(new[] { m_idx });
125 125 m_lastpos = new HashSet<int>(new[] { m_idx });
126 126 }
127 127
128 128 public virtual void Visit(EndToken token) {
129 129 if (m_root == null)
130 130 m_root = token;
131 131 m_idx++;
132 m_indexes[m_idx] = AutomatonConst.UNCLASSIFIED_INPUT;
132 m_indexes[m_idx] = AutomatonConst.UnclassifiedInput;
133 133 m_firstpos = new HashSet<int>(new[] { m_idx });
134 134 m_lastpos = new HashSet<int>(new[] { m_idx });
135 135 Followpos(m_idx);
136 136 m_ends.Add(m_idx);
137 137 }
138 138
139 139 public void BuildDFA() {
140 140 AddState(m_firstpos);
141 141 SetInitialState(m_firstpos);
142 142
143 143 if(IsFinal(m_firstpos))
144 144 MarkFinalState(m_firstpos);
145 145
146 146 var inputMax = m_indexes.Values.Max();
147 147 var queue = new Queue<HashSet<int>>();
148 148
149 149 queue.Enqueue(m_firstpos);
150 150
151 151 while (queue.Count > 0) {
152 152 var s1 = queue.Dequeue();
153 153
154 154 for (int a = 0; a <= inputMax; a++) {
155 155 var s2 = new HashSet<int>();
156 156 foreach (var p in s1) {
157 157 if (m_indexes[p] == a) {
158 158 s2.UnionWith(Followpos(p));
159 159 }
160 160 }
161 161 if (s2.Count > 0) {
162 162 if (!HasState(s2)) {
163 163 AddState(s2);
164 164 if (IsFinal(s2))
165 165 MarkFinalState(s2);
166 166
167 167 queue.Enqueue(s2);
168 168 }
169 169
170 170 DefineTransition(s1, s2, a);
171 171 }
172 172
173 173 }
174 174 }
175 175 }
176 176
177 177 protected bool HasState(HashSet<int> state) {
178 178 return m_states.Contains(state);
179 179 }
180 180
181 181 protected void AddState(HashSet<int> state) {
182 182 Debug.Assert(!HasState(state));
183 183
184 184 m_states.DefineSymbol(state);
185 185 }
186 186
187 187 protected int Translate(HashSet<int> state) {
188 188 Debug.Assert(HasState(state));
189 189
190 190 return m_states.Translate(state);
191 191 }
192 192
193 193 protected virtual void SetInitialState(HashSet<int> state) {
194 194 m_builder.SetInitialState(Translate(state));
195 195 }
196 196
197 197 protected virtual void MarkFinalState(HashSet<int> state) {
198 198 m_builder.MarkFinalState(Translate(state));
199 199 }
200 200
201 201 protected virtual void DefineTransition(HashSet<int> s1, HashSet<int> s2, int ch) {
202 202
203 203 m_builder.Add(new AutomatonTransition(Translate(s1), Translate(s2), ch));
204 204 }
205 205
206 206 bool IsFinal(IEnumerable<int> state) {
207 207 Debug.Assert(state != null);
208 208 return state.Any(m_ends.Contains);
209 209 }
210 210
211 211 }
212 212 }
@@ -1,36 +1,36
1 1 using System.Collections.Generic;
2 2 using System.Linq;
3 3 using Implab.Automaton;
4 4 using System;
5 5
6 6 namespace Implab.Formats {
7 public class CharAlphabet: IndexedAlphabetBase<char> {
7 public class CharAlphabet : IndexedAlphabetBase<char> {
8 8
9 9 public override int GetSymbolIndex(char symbol) {
10 10 return symbol;
11 11 }
12 12
13 13 public IEnumerable<char> InputSymbols {
14 14 get { return Enumerable.Range(char.MinValue, char.MaxValue).Cast<char>(); }
15 15 }
16 16
17 17 public CharMap CreateCharMap() {
18 18 var map = new Dictionary<int, int>();
19 19
20 20 int max = 0, min = char.MaxValue;
21 21 foreach (var p in Mappings) {
22 22 var index = GetSymbolIndex(p.Key);
23 23 max = Math.Max(max, index);
24 24 min = Math.Min(min, index);
25 25 map[index] = p.Value;
26 26 }
27 27
28 28 var result = new int[max - min + 1];
29 29
30 30 for (int i = 0; i < result.Length; i++)
31 31 map.TryGetValue(min + i, out result[i]);
32 32
33 33 return new CharMap((char)min, result);
34 34 }
35 35 }
36 36 }
@@ -1,42 +1,42
1 1 using Implab.Automaton;
2 2 using System;
3 3 using System.Collections.Generic;
4 4 using System.Linq;
5 5 using System.Runtime.CompilerServices;
6 6 using System.Text;
7 7 using System.Threading.Tasks;
8 8
9 9 namespace Implab.Formats {
10 10 public class CharMap : IAlphabet<char> {
11 11 readonly char m_min;
12 12 readonly char m_max;
13 13 readonly int[] m_map;
14 14
15 15 public CharMap(char min, int[] map) {
16 16 Safe.ArgumentNotNull(map, nameof(map));
17 17 Count = map.Max()+1;
18 18 m_min = min;
19 19 m_map = map;
20 20 m_max = (char)(min + map.Length);
21 21 }
22 22
23 23 public int Count {
24 24 get; private set;
25 25 }
26 26
27 27 public bool Contains(char symbol) {
28 return symbol >= m_min && symbol <= m_max && m_map[symbol-m_min] != AutomatonConst.UNCLASSIFIED_INPUT;
28 return symbol >= m_min && symbol <= m_max && m_map[symbol-m_min] != AutomatonConst.UnclassifiedInput;
29 29 }
30 30
31 31 public IEnumerable<char> GetSymbols(int cls) {
32 32 for (var i = 0; i < m_map.Length; i++)
33 33 if (m_map[i] == cls)
34 34 yield return (char)(i + m_min);
35 35 }
36 36
37 37 [MethodImpl(MethodImplOptions.AggressiveInlining)]
38 38 public int Translate(char symbol) {
39 return symbol >= m_min && symbol <= m_max ? m_map[symbol-m_min] : AutomatonConst.UNCLASSIFIED_INPUT;
39 return symbol >= m_min && symbol <= m_max ? m_map[symbol-m_min] : AutomatonConst.UnclassifiedInput;
40 40 }
41 41 }
42 42 }
@@ -1,73 +1,73
1 1 using Implab;
2 2 using System;
3 3 using System.Collections.Generic;
4 4 using System.Linq;
5 5 using Implab.Automaton;
6 6 using Implab.Automaton.RegularExpressions;
7 7
8 8 namespace Implab.Formats {
9 9 /// <summary>
10 10 /// Базовый абстрактный класс. Грамматика, позволяет формулировать выражения над алфавитом типа <c>char</c>.
11 11 /// </summary>
12 12 public abstract class Grammar<TSymbol> {
13 13
14 14 protected abstract IAlphabetBuilder<TSymbol> AlphabetBuilder {
15 15 get;
16 16 }
17 17
18 18 protected SymbolToken UnclassifiedToken() {
19 return new SymbolToken(AutomatonConst.UNCLASSIFIED_INPUT);
19 return new SymbolToken(AutomatonConst.UnclassifiedInput);
20 20 }
21 21
22 22 protected void DefineAlphabet(IEnumerable<TSymbol> alphabet) {
23 23 Safe.ArgumentNotNull(alphabet, "alphabet");
24 24
25 25 foreach (var ch in alphabet)
26 26 AlphabetBuilder.DefineSymbol(ch);
27 27 }
28 28
29 29 protected Token SymbolToken(TSymbol symbol) {
30 30 return Token.New(TranslateOrAdd(symbol));
31 31 }
32 32
33 33 protected Token SymbolToken(IEnumerable<TSymbol> symbols) {
34 34 Safe.ArgumentNotNull(symbols, "symbols");
35 35
36 36 return Token.New(TranslateOrAdd(symbols).ToArray());
37 37 }
38 38
39 39 protected Token SymbolSetToken(params TSymbol[] set) {
40 40 return SymbolToken(set);
41 41 }
42 42
43 43 int TranslateOrAdd(TSymbol ch) {
44 44 var t = AlphabetBuilder.Translate(ch);
45 if (t == AutomatonConst.UNCLASSIFIED_INPUT)
45 if (t == AutomatonConst.UnclassifiedInput)
46 46 t = AlphabetBuilder.DefineSymbol(ch);
47 47 return t;
48 48 }
49 49
50 50 IEnumerable<int> TranslateOrAdd(IEnumerable<TSymbol> symbols) {
51 51 return symbols.Distinct().Select(TranslateOrAdd);
52 52 }
53 53
54 54 int TranslateOrDie(TSymbol ch) {
55 55 var t = AlphabetBuilder.Translate(ch);
56 if (t == AutomatonConst.UNCLASSIFIED_INPUT)
56 if (t == AutomatonConst.UnclassifiedInput)
57 57 throw new ApplicationException(String.Format("Symbol '{0}' is UNCLASSIFIED", ch));
58 58 return t;
59 59 }
60 60
61 61 IEnumerable<int> TranslateOrDie(IEnumerable<TSymbol> symbols) {
62 62 return symbols.Distinct().Select(TranslateOrDie);
63 63 }
64 64
65 65 protected Token SymbolTokenExcept(IEnumerable<TSymbol> symbols) {
66 66 Safe.ArgumentNotNull(symbols, "symbols");
67 67
68 68 return Token.New( Enumerable.Range(0, AlphabetBuilder.Count).Except(TranslateOrDie(symbols)).ToArray() );
69 69 }
70 70 }
71 71
72 72
73 73 }
@@ -1,84 +1,84
1 1 using Implab.Automaton;
2 2 using System;
3 3 using System.Collections.Generic;
4 4 using System.Linq;
5 5 using System.Runtime.CompilerServices;
6 6 using System.Text;
7 7 using System.Threading.Tasks;
8 8
9 9 namespace Implab.Formats {
10 10 public class InputScanner<TTag> {
11 11 readonly TTag[] m_tags;
12 12 readonly int m_initialState;
13 13 readonly int[,] m_dfa;
14 14 readonly CharMap m_alphabet;
15 15 readonly bool[] m_final;
16 16
17 17 int m_position;
18 18 int m_state;
19 19
20 20 public InputScanner(int[,] dfaTable, bool[] finalStates, TTag[] tags, int initialState, CharMap alphabet) {
21 21 Safe.ArgumentNotNull(dfaTable, nameof(dfaTable));
22 22 Safe.ArgumentNotNull(finalStates, nameof(finalStates));
23 23 Safe.ArgumentNotNull(tags, nameof(tags));
24 24 Safe.ArgumentNotNull(alphabet, nameof(alphabet));
25 25
26 26 m_dfa = dfaTable;
27 27 m_final = finalStates;
28 28 m_tags = tags;
29 29 m_initialState = initialState;
30 30 m_alphabet = alphabet;
31 31 }
32 32
33 33 public TTag Tag {
34 34 [MethodImpl(MethodImplOptions.AggressiveInlining)]
35 35 get {
36 36 return m_tags[m_state];
37 37 }
38 38 }
39 39
40 40 public int Position {
41 41 [MethodImpl(MethodImplOptions.AggressiveInlining)]
42 42 get {
43 43 return m_position;
44 44 }
45 45 }
46 46
47 47 public bool IsFinal {
48 48 [MethodImpl(MethodImplOptions.AggressiveInlining)]
49 49 get {
50 50 return m_final[m_state];
51 51 }
52 52 }
53 53
54 54 [MethodImpl(MethodImplOptions.AggressiveInlining)]
55 55 public void ResetState() {
56 56 m_state = m_initialState;
57 57 }
58 58
59 59 public InputScanner<TTag> Clone() {
60 60 var clone = new InputScanner<TTag>(m_dfa, m_final, m_tags, m_initialState, m_alphabet);
61 61 clone.m_state = m_state;
62 62 clone.m_position = m_position;
63 63 return clone;
64 64 }
65 65
66 66 //[MethodImpl(MethodImplOptions.AggressiveInlining)]
67 67 public bool Scan(char[] data, int offset, int max) {
68 68 var next = m_state;
69 69
70 70 while(offset < max) {
71 71 next = m_dfa[next, m_alphabet.Translate(data[offset])];
72 if (next == AutomatonConst.UNREACHABLE_STATE) {
72 if (next == AutomatonConst.UnreachableState) {
73 73 // scanner stops on the next position after last recognized symbol
74 74 m_position = offset;
75 75 return false;
76 76 }
77 77 m_state = next;
78 78 offset++;
79 79 }
80 80 m_position = offset;
81 81 return true;
82 82 }
83 83 }
84 84 }
@@ -1,148 +1,148
1 1 using System.Linq;
2 2 using Implab.Automaton.RegularExpressions;
3 3 using System;
4 4 using Implab.Automaton;
5 5 using Implab.Components;
6 6
7 7 namespace Implab.Formats.Json {
8 8 public class JsonGrammar : Grammar<char> {
9 9 public enum TokenType {
10 10 None,
11 11 BeginObject,
12 12 EndObject,
13 13 BeginArray,
14 14 EndArray,
15 15 String,
16 16 Number,
17 17 Literal,
18 18 NameSeparator,
19 19 ValueSeparator,
20 20 Whitespace,
21 21
22 22 StringBound,
23 23 EscapedChar,
24 24 UnescapedChar,
25 25 EscapedUnicode
26 26 }
27 27
28 28 static LazyAndWeak<JsonGrammar> _instance = new LazyAndWeak<JsonGrammar>(() => new JsonGrammar());
29 29
30 30 public static JsonGrammar Instance {
31 31 get { return _instance.Value; }
32 32 }
33 33
34 readonly InputScanner<TokenType> m_jsonExpression;
35 readonly InputScanner<TokenType> m_stringExpression;
34 readonly FastInputScanner<TokenType> m_jsonExpression;
35 readonly FastInputScanner<TokenType> m_stringExpression;
36 36 readonly CharAlphabet m_defaultAlphabet = new CharAlphabet();
37 37
38 38 public CharAlphabet DefaultAlphabet { get { return m_defaultAlphabet; } }
39 39
40 40 public JsonGrammar() {
41 41 DefineAlphabet(Enumerable.Range(0, 0x20).Select(x => (char)x));
42 42 var hexDigit = SymbolRangeToken('a','f').Or(SymbolRangeToken('A','F')).Or(SymbolRangeToken('0','9'));
43 43 var digit9 = SymbolRangeToken('1', '9');
44 44 var zero = SymbolToken('0');
45 45 var digit = zero.Or(digit9);
46 46 var dot = SymbolToken('.');
47 47 var minus = SymbolToken('-');
48 48 var sign = SymbolSetToken('-', '+');
49 49 var expSign = SymbolSetToken('e', 'E');
50 50 var letters = SymbolRangeToken('a', 'z');
51 51 var integer = zero.Or(digit9.Cat(digit.EClosure()));
52 52 var frac = dot.Cat(digit.Closure());
53 53 var exp = expSign.Cat(sign.Optional()).Cat(digit.Closure());
54 54 var quote = SymbolToken('"');
55 55 var backSlash = SymbolToken('\\');
56 56 var specialEscapeChars = SymbolSetToken('\\', '"', '/', 'b', 'f', 't', 'n', 'r');
57 57 var unicodeEspace = SymbolToken('u').Cat(hexDigit.Repeat(4));
58 58 var whitespace = SymbolSetToken('\n', '\r', '\t', ' ').EClosure();
59 59 var beginObject = whitespace.Cat(SymbolToken('{')).Cat(whitespace);
60 60 var endObject = whitespace.Cat(SymbolToken('}')).Cat(whitespace);
61 61 var beginArray = whitespace.Cat(SymbolToken('[')).Cat(whitespace);
62 62 var endArray = whitespace.Cat(SymbolToken(']')).Cat(whitespace);
63 63 var nameSep = whitespace.Cat(SymbolToken(':')).Cat(whitespace);
64 64 var valueSep = whitespace.Cat(SymbolToken(',')).Cat(whitespace);
65 65
66 66 var number = minus.Optional().Cat(integer).Cat(frac.Optional()).Cat(exp.Optional());
67 67 var literal = letters.Closure();
68 68 var unescaped = SymbolTokenExcept(Enumerable.Range(0, 0x20).Union(new int[] { '\\', '"' }).Select(x => (char)x));
69 69
70 70 var jsonExpression =
71 71 number.Tag(TokenType.Number)
72 72 .Or(literal.Tag(TokenType.Literal))
73 73 .Or(quote.Tag(TokenType.StringBound))
74 74 .Or(beginObject.Tag(TokenType.BeginObject))
75 75 .Or(endObject.Tag(TokenType.EndObject))
76 76 .Or(beginArray.Tag(TokenType.BeginArray))
77 77 .Or(endArray.Tag(TokenType.EndArray))
78 78 .Or(nameSep.Tag(TokenType.NameSeparator))
79 79 .Or(valueSep.Tag(TokenType.ValueSeparator))
80 80 .Or(SymbolSetToken('\n', '\r', '\t', ' ').Closure().Tag(TokenType.Whitespace));
81 81
82 82
83 83 var jsonStringExpression =
84 84 quote.Tag(TokenType.StringBound)
85 85 .Or(backSlash.Cat(specialEscapeChars).Tag(TokenType.EscapedChar))
86 86 .Or(backSlash.Cat(unicodeEspace).Tag(TokenType.EscapedUnicode))
87 87 .Or(unescaped.Closure().Tag(TokenType.UnescapedChar));
88 88
89 89
90 m_jsonExpression = BuildScanner(jsonExpression);
91 m_stringExpression = BuildScanner(jsonStringExpression);
90 m_jsonExpression = BuildFastScanner(jsonExpression);
91 m_stringExpression = BuildFastScanner(jsonStringExpression);
92 92 }
93 93
94 public static InputScanner<TokenType> CreateJsonExpressionScanner() {
94 public static FastInputScanner<TokenType> CreateJsonExpressionScanner() {
95 95 return Instance.m_jsonExpression.Clone();
96 96 }
97 97
98 public static InputScanner<TokenType> CreateStringExpressionScanner() {
98 public static FastInputScanner<TokenType> CreateStringExpressionScanner() {
99 99 return Instance.m_stringExpression.Clone();
100 100 }
101 101
102 102 protected override IAlphabetBuilder<char> AlphabetBuilder {
103 103 get {
104 104 return m_defaultAlphabet;
105 105 }
106 106 }
107 107
108 108 Token SymbolRangeToken(char start, char stop) {
109 109 return SymbolToken(Enumerable.Range(start, stop - start + 1).Select(x => (char)x));
110 110 }
111 111
112 public InputScanner<TokenType> BuildScanner(Token regexp) {
112 public FastInputScanner<TokenType> BuildFastScanner(Token regexp) {
113 113 var dfa = new RegularDFA<char, TokenType>(AlphabetBuilder);
114 114
115 115 var visitor = new RegularExpressionVisitor<TokenType>(dfa);
116 116 regexp.Accept(visitor);
117 117 visitor.BuildDFA();
118 118
119 119 if (dfa.IsFinalState(dfa.InitialState))
120 120 throw new ApplicationException("The specified language contains empty token");
121 121
122 122 var ab = new CharAlphabet();
123 123 var optimal = dfa.Optimize(ab);
124 124
125 return new InputScanner<TokenType>(
125 return new FastInputScanner<TokenType>(
126 126 optimal.CreateTransitionTable(),
127 127 optimal.CreateFinalStateTable(),
128 128 NormalizeTags(optimal.CreateTagTable()),
129 129 optimal.InitialState,
130 ab.CreateCharMap()
130 ab.GetTranslationMap()
131 131 );
132 132 }
133 133
134 134 static TokenType[] NormalizeTags(TokenType[][] tags) {
135 135 var result = new TokenType[tags.Length];
136 136 for(var i = 0; i< tags.Length; i++) {
137 137 if (tags[i] == null || tags[i].Length == 0)
138 138 result[i] = default(TokenType);
139 139 else if (tags[i].Length == 1)
140 140 result[i] = tags[i][0];
141 141 else
142 142 throw new Exception($"Ambigous state tags {string.Join(", ", tags[i])}");
143 143 }
144 144 return result;
145 145 }
146 146
147 147 }
148 148 }
@@ -1,318 +1,318
1 1 using System;
2 2 using System.Diagnostics;
3 3 using System.IO;
4 4 using Implab.Automaton;
5 5 using Implab.Automaton.RegularExpressions;
6 6 using System.Linq;
7 7 using Implab.Components;
8 8 using System.Collections.Generic;
9 9 using System.Text;
10 10 using System.Globalization;
11 11
12 12 namespace Implab.Formats.Json {
13 13 /// <summary>
14 14 /// Pull парсер JSON данных.
15 15 /// </summary>
16 16 /// <remarks>
17 17 /// Следует отметить отдельную интерпретацию свойства <see cref="Level"/>,
18 18 /// оно означает текущий уровень вложенности объектов, однако закрывающий
19 19 /// элемент объекта и массива имеет уровень меньше, чем сам объект.
20 20 /// <code>
21 21 /// { // Level = 1
22 22 /// "name" : "Peter", // Level = 1
23 23 /// "address" : { // Level = 2
24 24 /// city : "Stern" // Level = 2
25 25 /// } // Level = 1
26 26 /// } // Level = 0
27 27 /// </code>
28 28 /// </remarks>
29 29 public class JsonReader : Disposable {
30 30
31 31 enum MemberContext {
32 32 MemberName,
33 33 MemberValue
34 34 }
35 35
36 36 #region Parser rules
37 37 struct ParserContext {
38 38 readonly int[,] m_dfa;
39 39 int m_state;
40 40
41 41 readonly JsonElementContext m_elementContext;
42 42
43 43 public ParserContext(int[,] dfa, int state, JsonElementContext context) {
44 44 m_dfa = dfa;
45 45 m_state = state;
46 46 m_elementContext = context;
47 47 }
48 48
49 49 public bool Move(JsonTokenType token) {
50 50 var next = m_dfa[m_state, (int)token];
51 if (next == AutomatonConst.UNREACHABLE_STATE)
51 if (next == AutomatonConst.UnreachableState)
52 52 return false;
53 53 m_state = next;
54 54 return true;
55 55 }
56 56
57 57 public JsonElementContext ElementContext {
58 58 get { return m_elementContext; }
59 59 }
60 60 }
61 61
62 62 static readonly ParserContext _jsonContext;
63 63 static readonly ParserContext _objectContext;
64 64 static readonly ParserContext _arrayContext;
65 65
66 66 static JsonReader() {
67 67
68 68 var valueExpression = MakeToken(JsonTokenType.BeginArray, JsonTokenType.BeginObject, JsonTokenType.Literal, JsonTokenType.Number, JsonTokenType.String);
69 69 var memberExpression = MakeToken(JsonTokenType.String).Cat(MakeToken(JsonTokenType.NameSeparator)).Cat(valueExpression);
70 70
71 71 var objectExpression = memberExpression
72 72 .Cat(
73 73 MakeToken(JsonTokenType.ValueSeparator)
74 74 .Cat(memberExpression)
75 75 .EClosure()
76 76 )
77 77 .Optional()
78 78 .Cat(MakeToken(JsonTokenType.EndObject))
79 79 .End();
80 80
81 81 var arrayExpression = valueExpression
82 82 .Cat(
83 83 MakeToken(JsonTokenType.ValueSeparator)
84 84 .Cat(valueExpression)
85 85 .EClosure()
86 86 )
87 87 .Optional()
88 88 .Cat(MakeToken(JsonTokenType.EndArray))
89 89 .End();
90 90
91 91 var jsonExpression = valueExpression.End();
92 92
93 93 _jsonContext = CreateParserContext(jsonExpression, JsonElementContext.None);
94 94 _objectContext = CreateParserContext(objectExpression, JsonElementContext.Object);
95 95 _arrayContext = CreateParserContext(arrayExpression, JsonElementContext.Array);
96 96 }
97 97
98 98 static Token MakeToken(params JsonTokenType[] input) {
99 99 return Token.New( input.Select(t => (int)t).ToArray() );
100 100 }
101 101
102 102 static ParserContext CreateParserContext(Token expr, JsonElementContext context) {
103 103
104 104 var dfa = new DFATable();
105 105 var builder = new RegularExpressionVisitor(dfa);
106 106 expr.Accept(builder);
107 107 builder.BuildDFA();
108 108
109 109 return new ParserContext(dfa.CreateTransitionTable(), dfa.InitialState, context);
110 110 }
111 111
112 112 #endregion
113 113
114 114 readonly JsonScanner m_scanner;
115 115 // json starts from the value context and may content even a single literal
116 116 MemberContext m_memberContext = MemberContext.MemberValue;
117 117
118 118 JsonElementType m_elementType;
119 object m_elementValue;
119 string m_elementValue;
120 120 string m_memberName = String.Empty;
121 121
122 122 Stack<ParserContext> m_stack = new Stack<ParserContext>();
123 123 ParserContext m_context = _jsonContext;
124 124
125 125 /// <summary>
126 126 /// Создает новый парсер на основе строки, содержащей JSON
127 127 /// </summary>
128 128 /// <param name="text"></param>
129 129 JsonReader(JsonScanner scanner) {
130 130 m_scanner = scanner;
131 131 }
132 132
133 133 public int Level {
134 134 get { return m_stack.Count; }
135 135 }
136 136
137 137 /// <summary>
138 138 /// Тип текущего элемента на котором стоит парсер.
139 139 /// </summary>
140 140 public JsonElementType ElementType {
141 141 get { return m_elementType; }
142 142 }
143 143
144 144 /// <summary>
145 145 /// Имя элемента - имя свойства родительского контейнера. Для элементов массивов и корневого всегда
146 146 /// пустая строка.
147 147 /// </summary>
148 148 public string ElementName {
149 149 get { return m_memberName; }
150 150 }
151 151
152 152 /// <summary>
153 153 /// Значение элемента. Только для элементов типа <see cref="JsonElementType.Value"/>, для остальных <c>null</c>
154 154 /// </summary>
155 public object ElementValue {
155 public string ElementValue {
156 156 get { return m_elementValue; }
157 157 }
158 158
159 159 /// <summary>
160 160 /// Читает слеюудущий объект из потока
161 161 /// </summary>
162 162 /// <returns><c>true</c> - операция чтения прошла успешно, <c>false</c> - конец данных</returns>
163 163 public bool Read() {
164 164 string tokenValue;
165 165 JsonTokenType tokenType;
166 166
167 167 m_memberName = String.Empty;
168 168
169 169 while (m_scanner.ReadToken(out tokenValue, out tokenType)) {
170 170 if(!m_context.Move(tokenType))
171 171 UnexpectedToken(tokenValue, tokenType);
172 172
173 173 switch (tokenType) {
174 174 case JsonTokenType.BeginObject:
175 175 m_stack.Push(m_context);
176 176 m_context = _objectContext;
177 177
178 178 m_elementValue = null;
179 179 m_memberContext = MemberContext.MemberName;
180 180 m_elementType = JsonElementType.BeginObject;
181 181 return true;
182 182 case JsonTokenType.EndObject:
183 183 if (m_stack.Count == 0)
184 184 UnexpectedToken(tokenValue, tokenType);
185 185 m_context = m_stack.Pop();
186 186
187 187 m_elementValue = null;
188 188 m_elementType = JsonElementType.EndObject;
189 189 return true;
190 190 case JsonTokenType.BeginArray:
191 191 m_stack.Push(m_context);
192 192 m_context = _arrayContext;
193 193
194 194 m_elementValue = null;
195 195 m_memberContext = MemberContext.MemberValue;
196 196 m_elementType = JsonElementType.BeginArray;
197 197 return true;
198 198 case JsonTokenType.EndArray:
199 199 if (m_stack.Count == 0)
200 200 UnexpectedToken(tokenValue, tokenType);
201 201 m_context = m_stack.Pop();
202 202
203 203 m_elementValue = null;
204 204 m_elementType = JsonElementType.EndArray;
205 205 return true;
206 206 case JsonTokenType.String:
207 207 if (m_memberContext == MemberContext.MemberName) {
208 208 m_memberName = tokenValue;
209 209 break;
210 210 }
211 211 m_elementType = JsonElementType.Value;
212 212 m_elementValue = tokenValue;
213 213 return true;
214 214 case JsonTokenType.Number:
215 215 m_elementType = JsonElementType.Value;
216 m_elementValue = double.Parse(tokenValue, CultureInfo.InvariantCulture);
216 m_elementValue = tokenValue;
217 217 return true;
218 218 case JsonTokenType.Literal:
219 219 m_elementType = JsonElementType.Value;
220 m_elementValue = ParseLiteral(tokenValue);
220 m_elementValue = tokenValue == "null" ? null : tokenValue;
221 221 return true;
222 222 case JsonTokenType.NameSeparator:
223 223 m_memberContext = MemberContext.MemberValue;
224 224 break;
225 225 case JsonTokenType.ValueSeparator:
226 226 m_memberContext = m_context.ElementContext == JsonElementContext.Object ? MemberContext.MemberName : MemberContext.MemberValue;
227 227 break;
228 228 default:
229 229 UnexpectedToken(tokenValue, tokenType);
230 230 break;
231 231 }
232 232 }
233 233 if (m_context.ElementContext != JsonElementContext.None)
234 234 throw new ParserException("Unexpedted end of data");
235 235
236 236 Eof = true;
237 237
238 238 return false;
239 239 }
240 240
241 241 object ParseLiteral(string literal) {
242 242 switch (literal) {
243 243 case "null":
244 244 return null;
245 245 case "false":
246 246 return false;
247 247 case "true":
248 248 return true;
249 249 default:
250 250 UnexpectedToken(literal, JsonTokenType.Literal);
251 251 return null; // avoid compliler error
252 252 }
253 253 }
254 254
255 255 void UnexpectedToken(object value, JsonTokenType tokenType) {
256 256 throw new ParserException(String.Format("Unexpected token {0}: '{1}'", tokenType, value));
257 257 }
258 258
259 259
260 260 /// <summary>
261 261 /// Признак конца потока
262 262 /// </summary>
263 263 public bool Eof {
264 264 get;
265 265 private set;
266 266 }
267 267
268 268 protected override void Dispose(bool disposing) {
269 269 if (disposing)
270 270 m_scanner.Dispose();
271 271 }
272 272
273 273 /// <summary>
274 274 /// Переходит в конец текущего объекта.
275 275 /// </summary>
276 276 public void SeekElementEnd() {
277 277 var level = Level - 1;
278 278
279 279 Debug.Assert(level >= 0);
280 280
281 281 while (Level != level)
282 282 Read();
283 283 }
284 284
285 285 public static JsonReader Create(string file, Encoding encoding) {
286 286 return new JsonReader(JsonTextScanner.Create(file, encoding));
287 287 }
288 288
289 289 public static JsonReader Create(string file) {
290 290 return new JsonReader(JsonTextScanner.Create(file));
291 291 }
292 292
293 293 public static JsonReader Create(Stream stream, Encoding encoding) {
294 294 return new JsonReader(JsonTextScanner.Create(stream, encoding));
295 295 }
296 296
297 297 public static JsonReader Create(Stream stream) {
298 298 return new JsonReader(JsonTextScanner.Create(stream));
299 299 }
300 300
301 301 public static JsonReader Create(TextReader reader) {
302 302 return new JsonReader(JsonTextScanner.Create(reader));
303 303 }
304 304
305 305 public static JsonReader ParseString(string data) {
306 306 return new JsonReader(JsonStringScanner.Create(data));
307 307 }
308 308
309 309 public static JsonReader ParseString(string data, int offset, int length) {
310 310 return new JsonReader(JsonStringScanner.Create(data, offset, length));
311 311 }
312 312
313 313 public static JsonReader ParseString(char[] data, int offset, int lenght) {
314 314 return new JsonReader(JsonStringScanner.Create(data, offset, lenght));
315 315 }
316 316 }
317 317
318 318 }
@@ -1,190 +1,190
1 1 using System;
2 2 using System.Globalization;
3 3 using Implab.Automaton;
4 4 using System.Text;
5 5 using Implab.Components;
6 6 using System.IO;
7 7
8 8 namespace Implab.Formats.Json {
9 9 /// <summary>
10 10 /// Сканнер (лексер), разбивающий поток символов на токены JSON.
11 11 /// </summary>
12 12 public abstract class JsonScanner : Disposable {
13 readonly InputScanner<JsonGrammar.TokenType> m_jsonContext = JsonGrammar.CreateJsonExpressionScanner();
14 readonly InputScanner<JsonGrammar.TokenType> m_stringContext = JsonGrammar.CreateStringExpressionScanner();
13 readonly FastInputScanner<JsonGrammar.TokenType> m_jsonContext = JsonGrammar.CreateJsonExpressionScanner();
14 readonly FastInputScanner<JsonGrammar.TokenType> m_stringContext = JsonGrammar.CreateStringExpressionScanner();
15 15
16 16 readonly char[] m_unescapeBuf = new char[4];
17 17 readonly char[] m_buffer;
18 18 int m_length;
19 19 int m_pos;
20 20 readonly StringBuilder m_tokenBuilder = new StringBuilder();
21 21
22 22 protected JsonScanner(char[] buffer, int pos, int length) {
23 23 m_buffer = buffer;
24 24 m_pos = pos;
25 25 m_length = length;
26 26 }
27 27
28 bool ReadChunk(InputScanner<JsonGrammar.TokenType> scanner, out JsonGrammar.TokenType tokenType) {
28 bool ReadChunk(FastInputScanner<JsonGrammar.TokenType> scanner, out JsonGrammar.TokenType tokenType) {
29 29 scanner.ResetState();
30 30
31 31 while(scanner.Scan(m_buffer, m_pos, m_length)) {
32 32 // scanner requests new data
33 33
34 34 if (m_pos != m_length) // capture results for the future
35 35 m_tokenBuilder.Append(m_buffer, m_pos, m_length - m_pos);
36 36
37 37 // read next data
38 38 m_length = Read(m_buffer, 0, m_buffer.Length);
39 39
40 40 if (m_length == 0) {
41 41 // no data is read
42 42 if (scanner.Position == m_pos) {
43 43 // scanned hasn't moved, that's the end
44 44 m_pos = 0;
45 45 tokenType = JsonGrammar.TokenType.None;
46 46 return false;
47 47 }
48 48
49 49 if (scanner.IsFinal) {
50 50 m_pos = 0;
51 51 tokenType = scanner.Tag;
52 52 return true;
53 53 } else {
54 54 throw new ParserException("Unexpected EOF");
55 55 }
56 56 }
57 57
58 58 m_pos = 0;
59 59 }
60 60 var scannerPos = scanner.Position;
61 61
62 62 // scanner stops as scannerPos
63 63 if (!scanner.IsFinal)
64 64 throw new ParserException($"Unexpected character '{m_buffer[scannerPos + 1]}'");
65 65
66 66 tokenType = scanner.Tag;
67 67 if (scannerPos != m_pos && tokenType == JsonGrammar.TokenType.Number || tokenType == JsonGrammar.TokenType.Literal)
68 68 m_tokenBuilder.Append(m_buffer, m_pos, scannerPos - m_pos);
69 69
70 70 m_pos = scannerPos;
71 71 return true;
72 72 }
73 73
74 bool ReadStringChunk(InputScanner<JsonGrammar.TokenType> scanner, out JsonGrammar.TokenType tokenType) {
74 bool ReadStringChunk(FastInputScanner<JsonGrammar.TokenType> scanner, out JsonGrammar.TokenType tokenType) {
75 75 scanner.ResetState();
76 76
77 77 while (scanner.Scan(m_buffer, m_pos, m_length)) {
78 78 // scanner requests new data
79 79
80 80 if (m_pos != m_length) // capture results for the future
81 81 m_tokenBuilder.Append(m_buffer, m_pos, m_length - m_pos);
82 82
83 83 // read next data
84 84 m_length = Read(m_buffer, 0, m_buffer.Length);
85 85
86 86 if (m_length == 0) {
87 87 // no data is read
88 88 if (scanner.Position == m_pos) {
89 89 // scanned hasn't moved, that's the end
90 90 m_pos = 0;
91 91 tokenType = JsonGrammar.TokenType.None;
92 92 return false;
93 93 }
94 94
95 95 if (scanner.IsFinal) {
96 96 m_pos = 0;
97 97 tokenType = scanner.Tag;
98 98 return true;
99 99 } else {
100 100 throw new ParserException("Unexpected EOF");
101 101 }
102 102 }
103 103
104 104 m_pos = 0;
105 105 }
106 106 var scannerPos = scanner.Position;
107 107
108 108 // scanner stops as scannerPos
109 109 if (!scanner.IsFinal)
110 throw new ParserException($"Unexpected character '{m_buffer[scannerPos + 1]}'");
110 throw new ParserException($"Unexpected character '{m_buffer[scannerPos]}'");
111 111
112 112 if (scannerPos != m_pos) {
113 113 m_tokenBuilder.Append(m_buffer, m_pos, scannerPos - m_pos);
114 114 m_pos = scannerPos;
115 115 }
116 116 tokenType = scanner.Tag;
117 117 return true;
118 118 }
119 119
120 120 protected abstract int Read(char[] buffer, int offset, int size);
121 121
122 122
123 123 /// <summary>
124 124 /// Читает следующий лексический элемент из входных данных.
125 125 /// </summary>
126 126 /// <param name="tokenValue">Возвращает значение прочитанного токена.</param>
127 127 /// <param name="tokenType">Возвращает тип прочитанного токена.</param>
128 128 /// <returns><c>true</c> - чтение произведено успешно. <c>false</c> - достигнут конец входных данных</returns>
129 129 /// <remarks>В случе если токен не распознается, возникает исключение. Значения токенов обрабатываются, т.е.
130 130 /// в строках обрабатываются экранированные символы, числа становтся типа double.</remarks>
131 131 public bool ReadToken(out string tokenValue, out JsonTokenType tokenType) {
132 132 JsonGrammar.TokenType tag;
133 133 m_tokenBuilder.Clear();
134 134 while (ReadChunk(m_jsonContext, out tag)) {
135 135 switch (tag) {
136 136 case JsonGrammar.TokenType.StringBound:
137 137 tokenValue = ReadString();
138 138 tokenType = JsonTokenType.String;
139 139 break;
140 140 case JsonGrammar.TokenType.Number:
141 141 tokenValue = m_tokenBuilder.ToString();
142 142 tokenType = JsonTokenType.Number;
143 143 break;
144 144 case JsonGrammar.TokenType.Literal:
145 145 tokenType = JsonTokenType.Literal;
146 146 tokenValue = m_tokenBuilder.ToString();
147 147 break;
148 148 case JsonGrammar.TokenType.Whitespace:
149 149 m_tokenBuilder.Clear();
150 150 continue;
151 151 default:
152 152 tokenType = (JsonTokenType)tag;
153 153 tokenValue = null;
154 154 break;
155 155 }
156 156 return true;
157 157 }
158 158 tokenValue = null;
159 159 tokenType = JsonTokenType.None;
160 160 return false;
161 161 }
162 162
163 163 string ReadString() {
164 164 JsonGrammar.TokenType tag;
165 165 m_tokenBuilder.Clear();
166 166
167 167 while (ReadStringChunk(m_stringContext, out tag)) {
168 168 switch (tag) {
169 169 case JsonGrammar.TokenType.StringBound:
170 170 m_tokenBuilder.Length--;
171 171 return m_tokenBuilder.ToString();
172 172 case JsonGrammar.TokenType.UnescapedChar:
173 173 break;
174 174 case JsonGrammar.TokenType.EscapedUnicode: // \xXXXX - unicode escape sequence
175 175 m_tokenBuilder.CopyTo(m_tokenBuilder.Length - 4, m_unescapeBuf, 0, 4);
176 176 m_tokenBuilder.Length -= 6;
177 177 m_tokenBuilder.Append(StringTranslator.TranslateHexUnicode(m_unescapeBuf, 0));
178 178 break;
179 179 case JsonGrammar.TokenType.EscapedChar: // \t - escape sequence
180 180 var ch = m_tokenBuilder[m_tokenBuilder.Length-1];
181 181 m_tokenBuilder.Length -= 2;
182 182 m_tokenBuilder.Append(StringTranslator.TranslateEscapedChar(ch));
183 183 break;
184 184 }
185 185 }
186 186
187 187 throw new ParserException("Unexpected end of data");
188 188 }
189 189 }
190 190 }
@@ -1,188 +1,189
1 1 <?xml version="1.0" encoding="utf-8"?>
2 2 <Project DefaultTargets="Build" ToolsVersion="12.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
3 3 <PropertyGroup>
4 4 <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
5 5 <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
6 6 <ProjectGuid>{F550F1F8-8746-4AD0-9614-855F4C4B7F05}</ProjectGuid>
7 7 <OutputType>Library</OutputType>
8 8 <RootNamespace>Implab</RootNamespace>
9 9 <AssemblyName>Implab</AssemblyName>
10 10 <TargetFrameworkVersion>v4.5</TargetFrameworkVersion>
11 11 <TargetFrameworkProfile />
12 12 </PropertyGroup>
13 13 <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
14 14 <DebugSymbols>true</DebugSymbols>
15 15 <DebugType>full</DebugType>
16 16 <Optimize>false</Optimize>
17 17 <OutputPath>bin\Debug</OutputPath>
18 18 <DefineConstants>TRACE;DEBUG;NET_4_5</DefineConstants>
19 19 <ErrorReport>prompt</ErrorReport>
20 20 <WarningLevel>4</WarningLevel>
21 21 <ConsolePause>false</ConsolePause>
22 22 <RunCodeAnalysis>true</RunCodeAnalysis>
23 23 </PropertyGroup>
24 24 <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
25 25 <DebugType>full</DebugType>
26 26 <Optimize>true</Optimize>
27 27 <OutputPath>bin\Release</OutputPath>
28 28 <DefineConstants>NET_4_5</DefineConstants>
29 29 <ErrorReport>prompt</ErrorReport>
30 30 <WarningLevel>4</WarningLevel>
31 31 <ConsolePause>false</ConsolePause>
32 32 </PropertyGroup>
33 33 <PropertyGroup>
34 34 <SignAssembly>true</SignAssembly>
35 35 </PropertyGroup>
36 36 <PropertyGroup>
37 37 <AssemblyOriginatorKeyFile>implab.snk</AssemblyOriginatorKeyFile>
38 38 </PropertyGroup>
39 39 <ItemGroup>
40 40 <Reference Include="System" />
41 41 <Reference Include="System.Xml" />
42 42 <Reference Include="mscorlib" />
43 43 <Reference Include="System.Xml.Linq" />
44 44 </ItemGroup>
45 45 <ItemGroup>
46 46 <Compile Include="Components\StateChangeEventArgs.cs" />
47 47 <Compile Include="CustomEqualityComparer.cs" />
48 48 <Compile Include="Diagnostics\ConsoleTraceListener.cs" />
49 49 <Compile Include="Diagnostics\LogChannel.cs" />
50 50 <Compile Include="Diagnostics\LogicalOperation.cs" />
51 51 <Compile Include="Diagnostics\TextFileListener.cs" />
52 52 <Compile Include="Diagnostics\Trace.cs" />
53 53 <Compile Include="Diagnostics\TraceLog.cs" />
54 54 <Compile Include="Diagnostics\TraceEvent.cs" />
55 55 <Compile Include="Diagnostics\TraceEventType.cs" />
56 56 <Compile Include="Diagnostics\TraceSourceAttribute.cs" />
57 57 <Compile Include="Formats\CharMap.cs" />
58 <Compile Include="Formats\FastInpurScanner.cs" />
58 59 <Compile Include="Formats\InputScanner.cs" />
59 60 <Compile Include="Formats\Json\JsonStringScanner.cs" />
60 61 <Compile Include="Formats\Json\JsonTextScanner.cs" />
61 62 <Compile Include="ICancellable.cs" />
62 63 <Compile Include="IProgressHandler.cs" />
63 64 <Compile Include="IProgressNotifier.cs" />
64 65 <Compile Include="IPromiseT.cs" />
65 66 <Compile Include="IPromise.cs" />
66 67 <Compile Include="IServiceLocator.cs" />
67 68 <Compile Include="ITaskController.cs" />
68 69 <Compile Include="Parallels\DispatchPool.cs" />
69 70 <Compile Include="Parallels\ArrayTraits.cs" />
70 71 <Compile Include="Parallels\SimpleAsyncQueue.cs" />
71 72 <Compile Include="Parallels\WorkerPool.cs" />
72 73 <Compile Include="ProgressInitEventArgs.cs" />
73 74 <Compile Include="Properties\AssemblyInfo.cs" />
74 75 <Compile Include="Parallels\AsyncPool.cs" />
75 76 <Compile Include="Safe.cs" />
76 77 <Compile Include="SyncContextPromise.cs" />
77 78 <Compile Include="ValueEventArgs.cs" />
78 79 <Compile Include="PromiseExtensions.cs" />
79 80 <Compile Include="SyncContextPromiseT.cs" />
80 81 <Compile Include="Diagnostics\OperationContext.cs" />
81 82 <Compile Include="Diagnostics\TraceContext.cs" />
82 83 <Compile Include="Diagnostics\LogEventArgs.cs" />
83 84 <Compile Include="Diagnostics\LogEventArgsT.cs" />
84 85 <Compile Include="Diagnostics\Extensions.cs" />
85 86 <Compile Include="PromiseEventType.cs" />
86 87 <Compile Include="Parallels\AsyncQueue.cs" />
87 88 <Compile Include="PromiseT.cs" />
88 89 <Compile Include="IDeferred.cs" />
89 90 <Compile Include="IDeferredT.cs" />
90 91 <Compile Include="Promise.cs" />
91 92 <Compile Include="PromiseTransientException.cs" />
92 93 <Compile Include="Parallels\Signal.cs" />
93 94 <Compile Include="Parallels\SharedLock.cs" />
94 95 <Compile Include="Diagnostics\ILogWriter.cs" />
95 96 <Compile Include="Diagnostics\ListenerBase.cs" />
96 97 <Compile Include="Parallels\BlockingQueue.cs" />
97 98 <Compile Include="AbstractEvent.cs" />
98 99 <Compile Include="AbstractPromise.cs" />
99 100 <Compile Include="AbstractPromiseT.cs" />
100 101 <Compile Include="FuncTask.cs" />
101 102 <Compile Include="FuncTaskBase.cs" />
102 103 <Compile Include="FuncTaskT.cs" />
103 104 <Compile Include="ActionChainTaskBase.cs" />
104 105 <Compile Include="ActionChainTask.cs" />
105 106 <Compile Include="ActionChainTaskT.cs" />
106 107 <Compile Include="FuncChainTaskBase.cs" />
107 108 <Compile Include="FuncChainTask.cs" />
108 109 <Compile Include="FuncChainTaskT.cs" />
109 110 <Compile Include="ActionTaskBase.cs" />
110 111 <Compile Include="ActionTask.cs" />
111 112 <Compile Include="ActionTaskT.cs" />
112 113 <Compile Include="ICancellationToken.cs" />
113 114 <Compile Include="SuccessPromise.cs" />
114 115 <Compile Include="SuccessPromiseT.cs" />
115 116 <Compile Include="PromiseAwaiterT.cs" />
116 117 <Compile Include="PromiseAwaiter.cs" />
117 118 <Compile Include="Components\ComponentContainer.cs" />
118 119 <Compile Include="Components\Disposable.cs" />
119 120 <Compile Include="Components\DisposablePool.cs" />
120 121 <Compile Include="Components\ObjectPool.cs" />
121 122 <Compile Include="Components\ServiceLocator.cs" />
122 123 <Compile Include="Components\IInitializable.cs" />
123 124 <Compile Include="TaskController.cs" />
124 125 <Compile Include="Components\App.cs" />
125 126 <Compile Include="Components\IRunnable.cs" />
126 127 <Compile Include="Components\ExecutionState.cs" />
127 128 <Compile Include="Components\RunnableComponent.cs" />
128 129 <Compile Include="Components\IFactory.cs" />
129 130 <Compile Include="Automaton\IAlphabet.cs" />
130 131 <Compile Include="Automaton\ParserException.cs" />
131 132 <Compile Include="Automaton\IndexedAlphabetBase.cs" />
132 133 <Compile Include="Automaton\IAlphabetBuilder.cs" />
133 134 <Compile Include="Automaton\RegularExpressions\AltToken.cs" />
134 135 <Compile Include="Automaton\RegularExpressions\BinaryToken.cs" />
135 136 <Compile Include="Automaton\RegularExpressions\CatToken.cs" />
136 137 <Compile Include="Automaton\RegularExpressions\StarToken.cs" />
137 138 <Compile Include="Automaton\RegularExpressions\SymbolToken.cs" />
138 139 <Compile Include="Automaton\RegularExpressions\EmptyToken.cs" />
139 140 <Compile Include="Automaton\RegularExpressions\Token.cs" />
140 141 <Compile Include="Automaton\RegularExpressions\IVisitor.cs" />
141 142 <Compile Include="Automaton\AutomatonTransition.cs" />
142 143 <Compile Include="Formats\Json\JsonElementContext.cs" />
143 144 <Compile Include="Formats\Json\JsonElementType.cs" />
144 145 <Compile Include="Formats\Json\JsonGrammar.cs" />
145 146 <Compile Include="Formats\Json\JsonReader.cs" />
146 147 <Compile Include="Formats\Json\JsonScanner.cs" />
147 148 <Compile Include="Formats\Json\JsonTokenType.cs" />
148 149 <Compile Include="Formats\Json\JsonWriter.cs" />
149 150 <Compile Include="Formats\Json\StringTranslator.cs" />
150 151 <Compile Include="Automaton\MapAlphabet.cs" />
151 152 <Compile Include="Formats\CharAlphabet.cs" />
152 153 <Compile Include="Formats\ByteAlphabet.cs" />
153 154 <Compile Include="Automaton\IDFATable.cs" />
154 155 <Compile Include="Automaton\IDFATableBuilder.cs" />
155 156 <Compile Include="Automaton\DFATable.cs" />
156 157 <Compile Include="Automaton\RegularExpressions\RegularExpressionVisitor.cs" />
157 158 <Compile Include="Automaton\RegularExpressions\ITaggedDFABuilder.cs" />
158 159 <Compile Include="Formats\Grammar.cs" />
159 160 <Compile Include="Automaton\RegularExpressions\EndTokenT.cs" />
160 161 <Compile Include="Automaton\RegularExpressions\EndToken.cs" />
161 162 <Compile Include="Automaton\RegularExpressions\RegularExpressionVisitorT.cs" />
162 163 <Compile Include="Automaton\AutomatonConst.cs" />
163 164 <Compile Include="Automaton\RegularExpressions\RegularDFA.cs" />
164 165 <Compile Include="Components\LazyAndWeak.cs" />
165 166 <Compile Include="AbstractTask.cs" />
166 167 <Compile Include="AbstractTaskT.cs" />
167 168 <Compile Include="FailedPromise.cs" />
168 169 <Compile Include="FailedPromiseT.cs" />
169 170 <Compile Include="Components\PollingComponent.cs" />
170 171 <Compile Include="Xml\JsonXmlReader.cs" />
171 172 <Compile Include="Xml\JsonXmlReaderOptions.cs" />
172 173 <Compile Include="Xml\JsonXmlReaderPosition.cs" />
173 174 <Compile Include="Xml\SerializationHelpers.cs" />
174 175 <Compile Include="Xml\SerializersPool.cs" />
175 176 <Compile Include="Xml\XmlSimpleAttribute.cs" />
176 177 <Compile Include="Xml\XmlNameContext.cs" />
177 178 </ItemGroup>
178 179 <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" />
179 180 <ItemGroup>
180 181 <None Include="Implab.nuspec">
181 182 <SubType>Designer</SubType>
182 183 </None>
183 184 <None Include="implab.snk" />
184 185 </ItemGroup>
185 186 <ItemGroup>
186 187 <Content Include="license.txt" />
187 188 </ItemGroup>
188 189 </Project> No newline at end of file
@@ -1,626 +1,610
1 1 using Implab.Formats.Json;
2 2 using System;
3 3 using System.Collections.Generic;
4 4 using System.Globalization;
5 5 using System.Linq;
6 6 using System.Xml;
7 7
8 8 namespace Implab.Xml {
9 9 public class JsonXmlReader : XmlReader {
10 10 struct JsonContext {
11 11 public string localName;
12 12 public bool skip;
13 13 }
14 14
15 15 JsonReader m_parser;
16 16 JsonXmlReaderOptions m_options;
17 17 JsonXmlReaderPosition m_position = JsonXmlReaderPosition.Initial;
18 18 XmlNameTable m_nameTable;
19 19
20 20 readonly string m_jsonRootName;
21 21 readonly string m_jsonNamespace;
22 22 readonly string m_jsonPrefix;
23 23 readonly bool m_jsonFlattenArrays;
24 24 readonly string m_jsonArrayItemName;
25 25
26 26 string m_jsonLocalName;
27 27 string m_jsonValueName;
28 28 bool m_jsonSkip; // indicates wheather to generate closing tag for objects or arrays
29 29
30 30 readonly Stack<JsonContext> m_jsonNameStack = new Stack<JsonContext>();
31 31
32 32 XmlQualifiedName m_elementQName;
33 33 string m_elementPrefix;
34 34 int m_elementDepth;
35 35 bool m_elementIsEmpty;
36 36
37 37 XmlQualifiedName m_qName;
38 38 string m_prefix;
39 39 int m_xmlDepth;
40 40
41 41 XmlSimpleAttribute[] m_attributes;
42 object m_value;
42 string m_value;
43 43 bool m_isEmpty;
44 44
45 45 XmlNodeType m_nodeType = XmlNodeType.None;
46 46
47 47 bool m_isAttribute; // indicates that we are reading attribute nodes
48 48 int m_currentAttribute;
49 49 bool m_currentAttributeRead;
50 50
51 51
52 52 XmlNameContext m_context;
53 53
54 54 readonly string m_xmlnsPrefix;
55 55 readonly string m_xmlnsNamespace;
56 56 readonly string m_xsiPrefix;
57 57 readonly string m_xsiNamespace;
58 58
59 59
60 60 public JsonXmlReader(JsonReader parser, JsonXmlReaderOptions options) {
61 61 Safe.ArgumentNotNull(parser, nameof(parser));
62 62 m_parser = parser;
63 63
64 64 m_options = options ?? new JsonXmlReaderOptions();
65 65
66 66 m_jsonFlattenArrays = m_options.FlattenArrays;
67 67 m_nameTable = m_options.NameTable ?? new NameTable();
68 68
69 69 m_jsonRootName = m_nameTable.Add(string.IsNullOrEmpty(m_options.RootName) ? "data" : m_options.RootName);
70 70 m_jsonArrayItemName = m_nameTable.Add(string.IsNullOrEmpty(m_options.ArrayItemName) ? "item" : m_options.ArrayItemName);
71 71 m_jsonNamespace = m_nameTable.Add(m_options.NamespaceUri ?? string.Empty);
72 72 m_jsonPrefix = m_nameTable.Add(m_options.NodesPrefix ?? string.Empty);
73 73 m_xmlnsPrefix = m_nameTable.Add(XmlNameContext.XmlnsPrefix);
74 74 m_xmlnsNamespace = m_nameTable.Add(XmlNameContext.XmlnsNamespace);
75 75 m_xsiPrefix = m_nameTable.Add(XmlNameContext.XsiPrefix);
76 76 m_xsiNamespace = m_nameTable.Add(XmlNameContext.XsiNamespace);
77 77
78 78 // TODO validate m_jsonRootName, m_jsonArrayItemName
79 79
80 80 m_context = new XmlNameContext(null, 0);
81 81 }
82 82
83 83 public override int AttributeCount {
84 84 get {
85 85 return m_attributes == null ? 0 : m_attributes.Length;
86 86 }
87 87 }
88 88
89 89 public override string BaseURI {
90 90 get {
91 91 return string.Empty;
92 92 }
93 93 }
94 94
95 95 public override int Depth {
96 96 get {
97 97 return m_xmlDepth;
98 98 }
99 99 }
100 100
101 101 public override bool EOF {
102 102 get {
103 103 return m_position == JsonXmlReaderPosition.Eof;
104 104 }
105 105 }
106 106
107 107 public override bool IsEmptyElement {
108 108 get { return m_isEmpty; }
109 109 }
110 110
111 111
112 112 public override string LocalName {
113 113 get {
114 114 return m_qName.Name;
115 115 }
116 116 }
117 117
118 118 public override string NamespaceURI {
119 119 get {
120 120 return m_qName.Namespace;
121 121 }
122 122 }
123 123
124 124 public override XmlNameTable NameTable {
125 125 get {
126 126 return m_nameTable;
127 127 }
128 128 }
129 129
130 130 public override XmlNodeType NodeType {
131 131 get {
132 132 return m_nodeType;
133 133 }
134 134 }
135 135
136 136 public override string Prefix {
137 137 get {
138 138 return m_prefix;
139 139 }
140 140 }
141 141
142 142 public override ReadState ReadState {
143 143 get {
144 144 switch (m_position) {
145 145 case JsonXmlReaderPosition.Initial:
146 146 return ReadState.Initial;
147 147 case JsonXmlReaderPosition.Eof:
148 148 return ReadState.EndOfFile;
149 149 case JsonXmlReaderPosition.Closed:
150 150 return ReadState.Closed;
151 151 case JsonXmlReaderPosition.Error:
152 152 return ReadState.Error;
153 153 default:
154 154 return ReadState.Interactive;
155 155 };
156 156 }
157 157 }
158 158
159 159 public override string Value {
160 160 get {
161 return ConvertValueToString(m_value);
161 return m_value;
162 162 }
163 163 }
164
165 static string ConvertValueToString(object value) {
166 if (value == null)
167 return string.Empty;
168
169 switch (Convert.GetTypeCode(value)) {
170 case TypeCode.Double:
171 return ((double)value).ToString(CultureInfo.InvariantCulture);
172 case TypeCode.String:
173 return (string)value;
174 case TypeCode.Boolean:
175 return (bool)value ? "true" : "false";
176 default:
177 return value.ToString();
178 }
179 }
180
164
181 165 public override string GetAttribute(int i) {
182 166 Safe.ArgumentInRange(i, 0, AttributeCount - 1, nameof(i));
183 return ConvertValueToString(m_attributes[i].Value);
167 return m_attributes[i].Value;
184 168 }
185 169
186 170 public override string GetAttribute(string name) {
187 171 if (m_attributes == null)
188 172 return null;
189 173 var qName = m_context.Resolve(name);
190 174 var attr = Array.Find(m_attributes, x => x.QName == qName);
191 var value = ConvertValueToString(attr?.Value);
175 var value = attr?.Value;
192 176 return value == string.Empty ? null : value;
193 177 }
194 178
195 179 public override string GetAttribute(string name, string namespaceURI) {
196 180 if (m_attributes == null)
197 181 return null;
198 182 var qName = new XmlQualifiedName(name, namespaceURI);
199 183 var attr = Array.Find(m_attributes, x => x.QName == qName);
200 var value = ConvertValueToString(attr?.Value);
184 var value = attr?.Value;
201 185 return value == string.Empty ? null : value;
202 186 }
203 187
204 188 public override string LookupNamespace(string prefix) {
205 189 return m_context.ResolvePrefix(prefix);
206 190 }
207 191
208 192 public override bool MoveToAttribute(string name) {
209 193 if (m_attributes == null || m_attributes.Length == 0)
210 194 return false;
211 195
212 196 var qName = m_context.Resolve(name);
213 197 var index = Array.FindIndex(m_attributes, x => x.QName == qName);
214 198 if (index >= 0) {
215 199 MoveToAttributeImpl(index);
216 200 return true;
217 201 }
218 202 return false;
219 203 }
220 204
221 205 public override bool MoveToAttribute(string name, string ns) {
222 206 if (m_attributes == null || m_attributes.Length == 0)
223 207 return false;
224 208
225 209 var qName = m_context.Resolve(name);
226 210 var index = Array.FindIndex(m_attributes, x => x.QName == qName);
227 211 if (index >= 0) {
228 212 MoveToAttributeImpl(index);
229 213 return true;
230 214 }
231 215 return false;
232 216 }
233 217
234 218 void MoveToAttributeImpl(int i) {
235 219 if (!m_isAttribute) {
236 220 m_elementQName = m_qName;
237 221 m_elementDepth = m_xmlDepth;
238 222 m_elementPrefix = m_prefix;
239 223 m_elementIsEmpty = m_isEmpty;
240 224 m_isAttribute = true;
241 225 }
242 226
243 227 var attr = m_attributes[i];
244 228
245 229
246 230 m_currentAttribute = i;
247 231 m_currentAttributeRead = false;
248 232 m_nodeType = XmlNodeType.Attribute;
249 233
250 234 m_xmlDepth = m_elementDepth + 1;
251 235 m_qName = attr.QName;
252 236 m_value = attr.Value;
253 237 m_prefix = attr.Prefix;
254 238 }
255 239
256 240 public override bool MoveToElement() {
257 241 if (m_isAttribute) {
258 242 m_value = null;
259 243 m_nodeType = XmlNodeType.Element;
260 244 m_xmlDepth = m_elementDepth;
261 245 m_prefix = m_elementPrefix;
262 246 m_qName = m_elementQName;
263 247 m_isEmpty = m_elementIsEmpty;
264 248 m_isAttribute = false;
265 249 return true;
266 250 }
267 251 return false;
268 252 }
269 253
270 254 public override bool MoveToFirstAttribute() {
271 255 if (m_attributes != null && m_attributes.Length > 0) {
272 256 MoveToAttributeImpl(0);
273 257 return true;
274 258 }
275 259 return false;
276 260 }
277 261
278 262 public override bool MoveToNextAttribute() {
279 263 if (m_isAttribute) {
280 264 var next = m_currentAttribute + 1;
281 265 if (next < AttributeCount) {
282 266 MoveToAttributeImpl(next);
283 267 return true;
284 268 }
285 269 return false;
286 270 } else {
287 271 return MoveToFirstAttribute();
288 272 }
289 273
290 274 }
291 275
292 276 public override bool ReadAttributeValue() {
293 277 if (!m_isAttribute || m_currentAttributeRead)
294 278 return false;
295 279
296 280 ValueNode(m_attributes[m_currentAttribute].Value);
297 281 m_currentAttributeRead = true;
298 282 return true;
299 283 }
300 284
301 285 public override void ResolveEntity() {
302 286 /* do nothing */
303 287 }
304 288
305 289 /// <summary>
306 290 /// Determines do we need to increase depth after the current node
307 291 /// </summary>
308 292 /// <returns></returns>
309 293 public bool IsSibling() {
310 294 switch (m_nodeType) {
311 295 case XmlNodeType.None: // start document
312 296 case XmlNodeType.Attribute: // after attribute only it's content can be iterated with ReadAttributeValue method
313 297 return false;
314 298 case XmlNodeType.Element:
315 299 // if the elemnt is empty the next element will be it's sibling
316 300 return m_isEmpty;
317 301 default:
318 302 return true;
319 303 }
320 304 }
321 305
322 void ValueNode(object value) {
306 void ValueNode(string value) {
323 307 if (!IsSibling()) // the node is nested
324 308 m_xmlDepth++;
325 309
326 310 m_qName = XmlQualifiedName.Empty;
327 311 m_nodeType = XmlNodeType.Text;
328 312 m_prefix = string.Empty;
329 313 m_value = value;
330 314 m_isEmpty = false;
331 315 m_attributes = null;
332 316 }
333 317
334 318 void ElementNode(string name, string ns, XmlSimpleAttribute[] attrs, bool empty) {
335 319 if (!IsSibling()) // the node is nested
336 320 m_xmlDepth++;
337 321
338 322 var context = m_context;
339 323 List<XmlSimpleAttribute> definedAttrs = null;
340 324
341 325 // define new namespaces
342 326 if (attrs != null) {
343 327 foreach (var attr in attrs) {
344 328 if (attr.QName.Name == "xmlns") {
345 329 if (context == m_context)
346 330 context = new XmlNameContext(m_context, m_xmlDepth);
347 context.DefinePrefix(ConvertValueToString(attr.Value), string.Empty);
331 context.DefinePrefix(attr.Value, string.Empty);
348 332 } else if (attr.Prefix == m_xmlnsPrefix) {
349 333 if (context == m_context)
350 334 context = new XmlNameContext(m_context, m_xmlDepth);
351 context.DefinePrefix(ConvertValueToString(attr.Value), attr.QName.Name);
335 context.DefinePrefix(attr.Value, attr.QName.Name);
352 336 } else {
353 337 string attrPrefix;
354 338 if (string.IsNullOrEmpty(attr.QName.Namespace))
355 339 continue;
356 340
357 341 // auto-define prefixes
358 342 if (!context.LookupNamespacePrefix(attr.QName.Namespace, out attrPrefix) || string.IsNullOrEmpty(attrPrefix)) {
359 343 // new namespace prefix added
360 344 attrPrefix = context.CreateNamespacePrefix(attr.QName.Namespace);
361 345 attr.Prefix = attrPrefix;
362 346
363 347 if (definedAttrs == null)
364 348 definedAttrs = new List<XmlSimpleAttribute>();
365 349
366 350 definedAttrs.Add(new XmlSimpleAttribute(attrPrefix, m_xmlnsNamespace, m_xmlnsPrefix, attr.QName.Namespace));
367 351 }
368 352 }
369 353 }
370 354 }
371 355
372 356 string p;
373 357 // auto-define prefixes
374 358 if (!context.LookupNamespacePrefix(ns, out p)) {
375 359 if (context == m_context)
376 360 context = new XmlNameContext(m_context, m_xmlDepth);
377 361 p = context.CreateNamespacePrefix(ns);
378 362 if (definedAttrs == null)
379 363 definedAttrs = new List<XmlSimpleAttribute>();
380 364
381 365 definedAttrs.Add(new XmlSimpleAttribute(p, m_xmlnsNamespace, m_xmlnsPrefix, ns));
382 366 }
383 367
384 368 if (definedAttrs != null) {
385 369 if (attrs != null)
386 370 definedAttrs.AddRange(attrs);
387 371 attrs = definedAttrs.ToArray();
388 372 }
389 373
390 374 if (!empty)
391 375 m_context = context;
392 376
393 377 m_nodeType = XmlNodeType.Element;
394 378 m_qName = new XmlQualifiedName(name, ns);
395 379 m_prefix = p;
396 380 m_value = null;
397 381 m_isEmpty = empty;
398 382 m_attributes = attrs;
399 383 }
400 384
401 385 void EndElementNode(string name, string ns) {
402 386 if (IsSibling()) {
403 387 // closing the element which has children
404 388 m_xmlDepth--;
405 389 }
406 390
407 391 string p;
408 392 if (!m_context.LookupNamespacePrefix(ns, out p))
409 393 throw new Exception($"Failed to lookup namespace '{ns}'");
410 394
411 395 if (m_context.Depth == m_xmlDepth)
412 396 m_context = m_context.ParentContext;
413 397
414 398 m_nodeType = XmlNodeType.EndElement;
415 399 m_prefix = p;
416 400 m_qName = new XmlQualifiedName(name, ns);
417 401 m_value = null;
418 402 m_attributes = null;
419 403 m_isEmpty = false;
420 404 }
421 405
422 406 void XmlDeclaration() {
423 407 if (!IsSibling()) // the node is nested
424 408 m_xmlDepth++;
425 409 m_nodeType = XmlNodeType.XmlDeclaration;
426 410 m_qName = new XmlQualifiedName("xml");
427 411 m_value = "version='1.0'";
428 412 m_prefix = string.Empty;
429 413 m_attributes = null;
430 414 m_isEmpty = false;
431 415 }
432 416
433 417 public override bool Read() {
434 418 try {
435 419 string elementName;
436 420 XmlSimpleAttribute[] elementAttrs = null;
437 421 MoveToElement();
438 422
439 423 switch (m_position) {
440 424 case JsonXmlReaderPosition.Initial:
441 425 m_jsonLocalName = m_jsonRootName;
442 426 m_jsonSkip = false;
443 427 XmlDeclaration();
444 428 m_position = JsonXmlReaderPosition.Declaration;
445 429 return true;
446 430 case JsonXmlReaderPosition.Declaration:
447 431 elementAttrs = new[] {
448 432 new XmlSimpleAttribute(m_xsiPrefix, m_xmlnsNamespace, m_xmlnsPrefix, m_xsiNamespace),
449 433 string.IsNullOrEmpty(m_jsonPrefix) ?
450 434 new XmlSimpleAttribute(m_xmlnsPrefix, string.Empty, string.Empty, m_jsonNamespace) :
451 435 new XmlSimpleAttribute(m_jsonPrefix, m_xmlnsNamespace, m_xmlnsPrefix, m_jsonNamespace)
452 436 };
453 437 break;
454 438 case JsonXmlReaderPosition.ValueElement:
455 439 if (!m_isEmpty) {
456 440 if (m_parser.ElementValue != null && !m_parser.ElementValue.Equals(string.Empty))
457 441 ValueNode(m_parser.ElementValue);
458 442 else
459 443 goto case JsonXmlReaderPosition.ValueContent;
460 444 m_position = JsonXmlReaderPosition.ValueContent;
461 445 return true;
462 446 } else {
463 447 m_position = JsonXmlReaderPosition.ValueEndElement;
464 448 break;
465 449 }
466 450 case JsonXmlReaderPosition.ValueContent:
467 451 EndElementNode(m_jsonValueName, m_jsonNamespace);
468 452 m_position = JsonXmlReaderPosition.ValueEndElement;
469 453 return true;
470 454 case JsonXmlReaderPosition.Eof:
471 455 case JsonXmlReaderPosition.Closed:
472 456 case JsonXmlReaderPosition.Error:
473 457 return false;
474 458 }
475 459
476 460 while (m_parser.Read()) {
477 461 var jsonName = m_nameTable.Add(m_parser.ElementName);
478 462
479 463 switch (m_parser.ElementType) {
480 464 case JsonElementType.BeginObject:
481 465 if (!EnterJsonObject(jsonName, out elementName))
482 466 continue;
483 467
484 468 m_position = JsonXmlReaderPosition.BeginObject;
485 469 ElementNode(elementName, m_jsonNamespace, elementAttrs, false);
486 470 break;
487 471 case JsonElementType.EndObject:
488 472 if (!LeaveJsonScope(out elementName))
489 473 continue;
490 474
491 475 m_position = JsonXmlReaderPosition.EndObject;
492 476 EndElementNode(elementName, m_jsonNamespace);
493 477 break;
494 478 case JsonElementType.BeginArray:
495 479 if (!EnterJsonArray(jsonName, out elementName))
496 480 continue;
497 481
498 482 m_position = JsonXmlReaderPosition.BeginArray;
499 483 ElementNode(elementName, m_jsonNamespace, elementAttrs, false);
500 484 break;
501 485 case JsonElementType.EndArray:
502 486 if (!LeaveJsonScope(out elementName))
503 487 continue;
504 488
505 489 m_position = JsonXmlReaderPosition.EndArray;
506 490 EndElementNode(elementName, m_jsonNamespace);
507 491 break;
508 492 case JsonElementType.Value:
509 493 if (!VisitJsonValue(jsonName, out m_jsonValueName))
510 494 continue;
511 495
512 496 m_position = JsonXmlReaderPosition.ValueElement;
513 497 if (m_parser.ElementValue == null)
514 498 // generate empty element with xsi:nil="true" attribute
515 499 ElementNode(
516 500 m_jsonValueName,
517 501 m_jsonNamespace,
518 502 new[] {
519 new XmlSimpleAttribute("nil", m_xsiNamespace, m_xsiPrefix, true)
503 new XmlSimpleAttribute("nil", m_xsiNamespace, m_xsiPrefix, "true")
520 504 },
521 505 true
522 506 );
523 507 else
524 508 ElementNode(m_jsonValueName, m_jsonNamespace, elementAttrs, m_parser.ElementValue.Equals(string.Empty));
525 509 break;
526 510 default:
527 511 throw new Exception($"Unexpected JSON element {m_parser.ElementType}: {m_parser.ElementName}");
528 512 }
529 513 return true;
530 514 }
531 515
532 516 m_position = JsonXmlReaderPosition.Eof;
533 517 return false;
534 518 } catch {
535 519 m_position = JsonXmlReaderPosition.Error;
536 520 throw;
537 521 }
538 522 }
539 523
540 524 void SaveJsonName() {
541 525 m_jsonNameStack.Push(new JsonContext {
542 526 skip = m_jsonSkip,
543 527 localName = m_jsonLocalName
544 528 });
545 529
546 530 }
547 531
548 532 bool EnterJsonObject(string name, out string elementName) {
549 533 SaveJsonName();
550 534 m_jsonSkip = false;
551 535
552 536 if (string.IsNullOrEmpty(name)) {
553 537 if (m_jsonNameStack.Count != 1 && !m_jsonFlattenArrays)
554 538 m_jsonLocalName = m_jsonArrayItemName;
555 539 } else {
556 540 m_jsonLocalName = name;
557 541 }
558 542
559 543 elementName = m_jsonLocalName;
560 544 return true;
561 545 }
562 546
563 547 /// <summary>
564 548 /// Called when JSON parser visits BeginArray ('[') element.
565 549 /// </summary>
566 550 /// <param name="name">Optional property name if the array is the member of an object</param>
567 551 /// <returns>true if element should be emited, false otherwise</returns>
568 552 bool EnterJsonArray(string name, out string elementName) {
569 553 SaveJsonName();
570 554
571 555 if (string.IsNullOrEmpty(name)) {
572 556 // m_jsonNameStack.Count == 1 means the root node
573 557 if (m_jsonNameStack.Count != 1 && !m_jsonFlattenArrays)
574 558 m_jsonLocalName = m_jsonArrayItemName;
575 559
576 560 m_jsonSkip = false; // we should not flatten arrays inside arrays or in the document root
577 561 } else {
578 562 m_jsonLocalName = name;
579 563 m_jsonSkip = m_jsonFlattenArrays;
580 564 }
581 565 elementName = m_jsonLocalName;
582 566
583 567 return !m_jsonSkip;
584 568 }
585 569
586 570 bool VisitJsonValue(string name, out string elementName) {
587 571 if (string.IsNullOrEmpty(name)) {
588 572 // m_jsonNameStack.Count == 0 means that JSON document consists from simple value
589 573 elementName = (m_jsonNameStack.Count == 0 || m_jsonFlattenArrays) ? m_jsonLocalName : m_jsonArrayItemName;
590 574 } else {
591 575 elementName = name;
592 576 }
593 577 return true;
594 578 }
595 579
596 580 bool LeaveJsonScope(out string elementName) {
597 581 elementName = m_jsonLocalName;
598 582 var skip = m_jsonSkip;
599 583
600 584 var prev = m_jsonNameStack.Pop();
601 585 m_jsonLocalName = prev.localName;
602 586 m_jsonSkip = prev.skip;
603 587
604 588 return !skip;
605 589 }
606 590
607 591 public override string ToString() {
608 592 switch (NodeType) {
609 593 case XmlNodeType.Element:
610 return $"<{Name} {string.Join(" ", (m_attributes ?? new XmlSimpleAttribute[0]).Select(x => $"{x.Prefix}{(string.IsNullOrEmpty(x.Prefix) ? "" : ":")}{x.QName.Name}='{ConvertValueToString(x.Value)}'"))} {(IsEmptyElement ? "/" : "")}>";
594 return $"<{Name} {string.Join(" ", (m_attributes ?? new XmlSimpleAttribute[0]).Select(x => $"{x.Prefix}{(string.IsNullOrEmpty(x.Prefix) ? "" : ":")}{x.QName.Name}='{x.Value}'"))} {(IsEmptyElement ? "/" : "")}>";
611 595 case XmlNodeType.Attribute:
612 596 return $"@{Name}";
613 597 case XmlNodeType.Text:
614 598 return $"{Value}";
615 599 case XmlNodeType.CDATA:
616 600 return $"<![CDATA[{Value}]]>";
617 601 case XmlNodeType.EntityReference:
618 602 return $"&{Name};";
619 603 case XmlNodeType.EndElement:
620 604 return $"</{Name}>";
621 605 default:
622 606 return $".{NodeType} {Name} {Value}";
623 607 }
624 608 }
625 609 }
626 610 }
@@ -1,22 +1,22
1 1 using System;
2 2 using System.Collections.Generic;
3 3 using System.Linq;
4 4 using System.Text;
5 5 using System.Threading.Tasks;
6 6 using System.Xml;
7 7
8 8 namespace Implab.Xml {
9 9 public class XmlSimpleAttribute {
10 public XmlSimpleAttribute(string name, string ns, string prefix, object value) {
10 public XmlSimpleAttribute(string name, string ns, string prefix, string value) {
11 11 QName = new XmlQualifiedName(name, ns);
12 12 Prefix = prefix;
13 13 Value = value;
14 14 }
15 15
16 16 public XmlQualifiedName QName { get; set; }
17 17
18 18 public string Prefix { get; set; }
19 19
20 public object Value { get; set; }
20 public string Value { get; set; }
21 21 }
22 22 }
General Comments 3
Under Review
author

Auto status change to "Under Review"

Approved
author

ok, latest stable version should be in default

You need to be logged in to leave comments. Login now