Auto status change to "Under Review"
@@ -1,147 +1,173 | |||
|
1 | 1 | using NUnit.Framework; |
|
2 | 2 | using System; |
|
3 | 3 | using Implab.Automaton; |
|
4 | 4 | using Implab.Xml; |
|
5 | 5 | using System.Xml; |
|
6 | 6 | using Implab.Formats; |
|
7 | 7 | using Implab.Formats.Json; |
|
8 | 8 | using System.IO; |
|
9 | 9 | |
|
10 | 10 | namespace Implab.Format.Test { |
|
11 | 11 | [TestFixture] |
|
12 | public class JsonTests { | |
|
13 | ||
|
12 | public class JsonTests { | |
|
13 | ||
|
14 | 14 | [Test] |
|
15 | 15 | public void TestScannerValidTokens() { |
|
16 | 16 | using (var scanner = JsonStringScanner.Create(@"9123, -123, 0, 0.1, -0.2, -0.1e3, 1.3E-3, ""some \t\n\u0020 text"", literal []{}:")) { |
|
17 | 17 | |
|
18 | 18 | Tuple<JsonTokenType, object>[] expexted = { |
|
19 | 19 | new Tuple<JsonTokenType,object>(JsonTokenType.Number, "9123"), |
|
20 | 20 | new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, null), |
|
21 | 21 | new Tuple<JsonTokenType,object>(JsonTokenType.Number, "-123"), |
|
22 | 22 | new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, null), |
|
23 | 23 | new Tuple<JsonTokenType,object>(JsonTokenType.Number, "0"), |
|
24 | 24 | new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, null), |
|
25 | 25 | new Tuple<JsonTokenType,object>(JsonTokenType.Number, "0.1"), |
|
26 | 26 | new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, null), |
|
27 | 27 | new Tuple<JsonTokenType,object>(JsonTokenType.Number, "-0.2"), |
|
28 | 28 | new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, null), |
|
29 | 29 | new Tuple<JsonTokenType,object>(JsonTokenType.Number, "-0.1e3"), |
|
30 | 30 | new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, null), |
|
31 | 31 | new Tuple<JsonTokenType,object>(JsonTokenType.Number, "1.3E-3"), |
|
32 | 32 | new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, null), |
|
33 | 33 | new Tuple<JsonTokenType,object>(JsonTokenType.String, "some \t\n text"), |
|
34 | 34 | new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, null), |
|
35 | 35 | new Tuple<JsonTokenType,object>(JsonTokenType.Literal, "literal"), |
|
36 | 36 | new Tuple<JsonTokenType,object>(JsonTokenType.BeginArray, null), |
|
37 | 37 | new Tuple<JsonTokenType,object>(JsonTokenType.EndArray, null), |
|
38 | 38 | new Tuple<JsonTokenType,object>(JsonTokenType.BeginObject, null), |
|
39 | 39 | new Tuple<JsonTokenType,object>(JsonTokenType.EndObject, null), |
|
40 | 40 | new Tuple<JsonTokenType,object>(JsonTokenType.NameSeparator, null) |
|
41 | 41 | }; |
|
42 | 42 | |
|
43 | 43 | string value; |
|
44 | 44 | JsonTokenType tokenType; |
|
45 | 45 | for (var i = 0; i < expexted.Length; i++) { |
|
46 | 46 | |
|
47 | 47 | Assert.IsTrue(scanner.ReadToken(out value, out tokenType)); |
|
48 | 48 | Assert.AreEqual(expexted[i].Item1, tokenType); |
|
49 | 49 | Assert.AreEqual(expexted[i].Item2, value); |
|
50 | 50 | } |
|
51 | 51 | |
|
52 | 52 | Assert.IsFalse(scanner.ReadToken(out value, out tokenType)); |
|
53 | 53 | } |
|
54 | 54 | } |
|
55 | 55 | |
|
56 | 56 | [Test] |
|
57 | 57 | public void TestScannerBadTokens() { |
|
58 | 58 | var bad = new[] { |
|
59 | 59 | " 1", |
|
60 | 60 | " literal", |
|
61 | 61 | " \"", |
|
62 | 62 | "\"unclosed string", |
|
63 | 63 | "1.bad", |
|
64 | 64 | "001", // should be read as three numbers |
|
65 | 65 | "--10", |
|
66 | 66 | "+10", |
|
67 | 67 | "1.0.0", |
|
68 | 68 | "1e1.0", |
|
69 | 69 | "l1teral0", |
|
70 | 70 | ".123", |
|
71 | 71 | "-.123" |
|
72 | 72 | }; |
|
73 | 73 | |
|
74 | 74 | foreach (var json in bad) { |
|
75 | 75 | using (var scanner = JsonStringScanner.Create(json)) { |
|
76 | 76 | try { |
|
77 | 77 | string value; |
|
78 | 78 | JsonTokenType token; |
|
79 | 79 | scanner.ReadToken(out value, out token); |
|
80 | 80 | if (!Object.Equals(value, json)) { |
|
81 | 81 | Console.WriteLine("'{0}' is read as {1}", json, value is String ? String.Format("'{0}'", value) : value); |
|
82 | 82 | continue; |
|
83 | 83 | } |
|
84 | 84 | Assert.Fail("Token '{0}' shouldn't pass", json); |
|
85 | 85 | } catch (ParserException e) { |
|
86 | 86 | Console.WriteLine(e.Message); |
|
87 | 87 | } |
|
88 | 88 | } |
|
89 | 89 | } |
|
90 | 90 | } |
|
91 | 91 | |
|
92 | 92 | [Test] |
|
93 | 93 | public void JsonXmlReaderSimpleTest() { |
|
94 | 94 | var json = "\"some text\""; |
|
95 | 95 | //Console.WriteLine($"JSON: {json}"); |
|
96 | 96 | //Console.WriteLine("XML"); |
|
97 | 97 | /*using (var xmlReader = new JsonXmlReader(new JSONParser(json), new JsonXmlReaderOptions { NamespaceUri = "JsonXmlReaderSimpleTest", RootName = "string", NodesPrefix = "json" })) { |
|
98 | 98 | Assert.AreEqual(xmlReader.ReadState, System.Xml.ReadState.Initial); |
|
99 | 99 | |
|
100 | 100 | AssertRead(xmlReader, XmlNodeType.XmlDeclaration); |
|
101 | 101 | AssertRead(xmlReader, XmlNodeType.Element); |
|
102 | 102 | AssertRead(xmlReader, XmlNodeType.Text); |
|
103 | 103 | AssertRead(xmlReader, XmlNodeType.EndElement); |
|
104 | 104 | Assert.IsFalse(xmlReader.Read()); |
|
105 | 105 | }*/ |
|
106 | 106 | |
|
107 | 107 | //DumpJsonParse("\"text value\""); |
|
108 | 108 | //DumpJsonParse("null"); |
|
109 | 109 | //DumpJsonParse("true"); |
|
110 | 110 | //DumpJsonParse("{}"); |
|
111 | 111 | //DumpJsonParse("[]"); |
|
112 | 112 | DumpJsonParse("{\"one\":1, \"two\":2}"); |
|
113 | 113 | DumpJsonParse("[1,\"\",2,3]"); |
|
114 | 114 | DumpJsonParse("[{\"info\": [7,8,9]}]"); |
|
115 | 115 | DumpJsonFlatParse("[1,2,\"\",[3,4],{\"info\": [5,6]},{\"num\": [7,8,null]}, null,[null]]"); |
|
116 | 116 | } |
|
117 | ||
|
117 | ||
|
118 | [Test] | |
|
119 | public void JsonBenchmark() { | |
|
120 | var t = Environment.TickCount; | |
|
121 | using (var reader = new JsonXmlReader(JsonReader.Create("e:\\citylots.json"), new JsonXmlReaderOptions { NamespaceUri = "XmlReaderSimpleTest", RootName = "data" })) { | |
|
122 | while (reader.Read()) { | |
|
123 | } | |
|
124 | } | |
|
125 | Console.WriteLine($"JsonXmlReader: {Environment.TickCount - t} ms"); | |
|
126 | ||
|
127 | t = Environment.TickCount; | |
|
128 | using(var reader = JsonReader.Create("e:\\citylots.json")) { | |
|
129 | while(reader.Read()) { | |
|
130 | } | |
|
131 | } | |
|
132 | ||
|
133 | Console.WriteLine($"JsonReader: {Environment.TickCount - t} ms"); | |
|
134 | ||
|
135 | t = Environment.TickCount; | |
|
136 | using (var reader = XmlReader.Create("file:///e:\\citylots.xml")) { | |
|
137 | while (reader.Read()) { | |
|
138 | } | |
|
139 | } | |
|
140 | ||
|
141 | Console.WriteLine($"XmlReader: {Environment.TickCount - t} ms"); | |
|
142 | } | |
|
143 | ||
|
118 | 144 | void AssertRead(XmlReader reader, XmlNodeType expected) { |
|
119 | 145 | Assert.IsTrue(reader.Read()); |
|
120 | Console.WriteLine($"{new string(' ', reader.Depth*2)}{reader}"); | |
|
146 | Console.WriteLine($"{new string(' ', reader.Depth * 2)}{reader}"); | |
|
121 | 147 | Assert.AreEqual(expected, reader.NodeType); |
|
122 | 148 | } |
|
123 | 149 | |
|
124 | 150 | void DumpJsonParse(string json) { |
|
125 | 151 | Console.WriteLine($"JSON: {json}"); |
|
126 | 152 | Console.WriteLine("XML"); |
|
127 | 153 | using (var xmlReader = new JsonXmlReader(JsonReader.ParseString(json), new JsonXmlReaderOptions { NamespaceUri = "JsonXmlReaderSimpleTest", NodesPrefix = "json" })) { |
|
128 | 154 | while (xmlReader.Read()) |
|
129 | 155 | Console.WriteLine($"{new string(' ', xmlReader.Depth * 2)}{xmlReader}"); |
|
130 | 156 | } |
|
131 | 157 | } |
|
132 | 158 | |
|
133 | 159 | void DumpJsonFlatParse(string json) { |
|
134 | 160 | Console.WriteLine($"JSON: {json}"); |
|
135 | 161 | Console.WriteLine("XML"); |
|
136 | 162 | using (var xmlWriter = XmlWriter.Create(Console.Out, new XmlWriterSettings { |
|
137 | 163 | Indent = true, |
|
138 | 164 | CloseOutput = false, |
|
139 | 165 | ConformanceLevel = ConformanceLevel.Document |
|
140 | 166 | })) |
|
141 | 167 | using (var xmlReader = new JsonXmlReader(JsonReader.ParseString(json), new JsonXmlReaderOptions { NamespaceUri = "JsonXmlReaderSimpleTest", NodesPrefix = "", FlattenArrays = true })) { |
|
142 | 168 | xmlWriter.WriteNode(xmlReader, false); |
|
143 | 169 | } |
|
144 | 170 | } |
|
145 | 171 | } |
|
146 | 172 | } |
|
147 | 173 |
@@ -1,182 +1,92 | |||
|
1 | 1 | using Implab.Formats.Json; |
|
2 | 2 | using Implab.Parallels; |
|
3 | 3 | using Implab.Xml; |
|
4 | 4 | using System; |
|
5 | 5 | using System.Collections.Concurrent; |
|
6 | 6 | using System.Collections.Generic; |
|
7 | 7 | using System.IO; |
|
8 | 8 | using System.Linq; |
|
9 | 9 | using System.Text; |
|
10 | 10 | using System.Threading; |
|
11 | 11 | using System.Threading.Tasks; |
|
12 | 12 | using System.Xml; |
|
13 | 13 | using System.Xml.Serialization; |
|
14 | 14 | |
|
15 | 15 | namespace Implab.Playground { |
|
16 | 16 | public class Program { |
|
17 | 17 | |
|
18 | 18 | static void EnqueueRange<T>(ConcurrentQueue<T> q, T[] data, int offset, int len) { |
|
19 | 19 | for (var i = offset; i < offset + len; i++) |
|
20 | 20 | q.Enqueue(data[i]); |
|
21 | 21 | } |
|
22 | 22 | |
|
23 | 23 | static bool TryDequeueRange<T>(ConcurrentQueue<T> q,T[] buffer,int offset, int len, out int actual) { |
|
24 | 24 | actual = 0; |
|
25 | 25 | T res; |
|
26 | 26 | while(q.TryDequeue(out res)) { |
|
27 | 27 | buffer[offset + actual] = res; |
|
28 | 28 | actual++; |
|
29 | 29 | if (actual == len) |
|
30 | 30 | break; |
|
31 | 31 | } |
|
32 | 32 | return actual != 0; |
|
33 | 33 | } |
|
34 | 34 | |
|
35 | 35 | static void EnqueueRange<T>(SimpleAsyncQueue<T> q, T[] data, int offset, int len) { |
|
36 | 36 | for (var i = offset; i < offset + len; i++) |
|
37 | 37 | q.Enqueue(data[i]); |
|
38 | 38 | } |
|
39 | 39 | |
|
40 | 40 | static bool TryDequeueRange<T>(SimpleAsyncQueue<T> q, T[] buffer, int offset, int len, out int actual) { |
|
41 | 41 | actual = 0; |
|
42 | 42 | T res; |
|
43 | 43 | while (q.TryDequeue(out res)) { |
|
44 | 44 | buffer[offset + actual] = res; |
|
45 | 45 | actual++; |
|
46 | 46 | if (actual == len) |
|
47 | 47 | break; |
|
48 | 48 | } |
|
49 | 49 | return actual != 0; |
|
50 | 50 | } |
|
51 | 51 | |
|
52 | 52 | static void EnqueueRange<T>(AsyncQueue<T> q, T[] data, int offset, int len) { |
|
53 | 53 | for (var i = offset; i < offset + len; i++) |
|
54 | 54 | q.Enqueue(data[i]); |
|
55 | 55 | } |
|
56 | 56 | |
|
57 | 57 | static bool TryDequeueRange<T>(AsyncQueue<T> q, T[] buffer, int offset, int len, out int actual) { |
|
58 | 58 | actual = 0; |
|
59 | 59 | T res; |
|
60 | 60 | while (q.TryDequeue(out res)) { |
|
61 | 61 | buffer[offset + actual] = res; |
|
62 | 62 | actual++; |
|
63 | 63 | if (actual == len) |
|
64 | 64 | break; |
|
65 | 65 | } |
|
66 | 66 | return actual != 0; |
|
67 | 67 | } |
|
68 | 68 | |
|
69 | 69 | |
|
70 | 70 | /*static void EnqueueRange<T>(AsyncQueue<T> q, T[] data, int offset, int len) { |
|
71 | 71 | q.EnqueueRange(data, offset, len); |
|
72 | 72 | } |
|
73 | 73 | |
|
74 | 74 | static bool TryDequeueRange<T>(AsyncQueue<T> q, T[] buffer, int offset, int len, out int actual) { |
|
75 | 75 | return q.TryDequeueRange(buffer, offset, len, out actual); |
|
76 | 76 | }*/ |
|
77 | 77 | |
|
78 | 78 | |
|
79 | 79 | static void Main(string[] args) { |
|
80 | 80 | |
|
81 |
|
|
|
82 | var queue = new AsyncQueue<int>(); | |
|
83 | //var queue = new SimpleAsyncQueue<int>(); | |
|
84 | ||
|
85 | const int wBatch = 32; | |
|
86 | const long wCount = 1000000; | |
|
87 | const long total = wBatch * wCount * 3; | |
|
88 | ||
|
89 | long r1 = 0, r2 = 0, r3 = 0; | |
|
90 | const int rBatch = 1000; | |
|
91 | long read = 0; | |
|
92 | ||
|
93 | var t1 = Environment.TickCount; | |
|
94 | ||
|
95 | AsyncPool.RunThread( | |
|
96 | () => { | |
|
97 | var buffer = new int[wBatch]; | |
|
98 | for (int i = 0; i < wBatch; i++) | |
|
99 | buffer[i] = 1; | |
|
100 | ||
|
101 | for (int i = 0; i < wCount; i++) | |
|
102 | EnqueueRange(queue, buffer, 0, wBatch); | |
|
103 | Console.WriteLine("done writer #1: {0} ms", Environment.TickCount - t1); | |
|
104 | }, | |
|
105 | () => { | |
|
106 | var buffer = new int[wBatch]; | |
|
107 | for (int i = 0; i < wBatch; i++) | |
|
108 | buffer[i] = 1; | |
|
109 | ||
|
110 | for (int i = 0; i < wCount; i++) | |
|
111 | EnqueueRange(queue, buffer, 0, wBatch); | |
|
112 | Console.WriteLine("done writer #2: {0} ms", Environment.TickCount - t1); | |
|
113 | }, | |
|
114 | () => { | |
|
115 | var buffer = new int[wBatch]; | |
|
116 | for (int i = 0; i < wBatch; i++) | |
|
117 | buffer[i] = 1; | |
|
118 | ||
|
119 | for (int i = 0; i < wCount; i++) | |
|
120 | EnqueueRange(queue, buffer, 0, wBatch); | |
|
121 | Console.WriteLine("done writer #3: {0} ms", Environment.TickCount - t1); | |
|
122 | }, | |
|
123 | () => { | |
|
124 | var buffer = new int[rBatch]; | |
|
125 | ||
|
126 | while (read < total) { | |
|
127 | int actual; | |
|
128 | if (TryDequeueRange(queue, buffer, 0, rBatch, out actual)) { | |
|
129 | for (int i = 0; i < actual; i++) | |
|
130 | r1 += buffer[i]; | |
|
131 | Interlocked.Add(ref read, actual); | |
|
132 | } | |
|
133 | } | |
|
134 | ||
|
135 | Console.WriteLine("done reader #1: {0} ms", Environment.TickCount - t1); | |
|
136 | }/*, | |
|
137 | () => { | |
|
138 | var buffer = new int[rBatch]; | |
|
139 | ||
|
140 | while (read < total) { | |
|
141 | int actual; | |
|
142 | if (TryDequeueRange(queue, buffer, 0, rBatch, out actual)) { | |
|
143 | for (int i = 0; i < actual; i++) | |
|
144 | r2 += buffer[i]; | |
|
145 | Interlocked.Add(ref read, actual); | |
|
146 | } | |
|
147 | } | |
|
148 | ||
|
149 | Console.WriteLine("done reader #2: {0} ms", Environment.TickCount - t1); | |
|
150 | }*//*, | |
|
151 | () => { | |
|
152 | var buffer = new int[rBatch]; | |
|
153 | ||
|
154 | while (read < total) { | |
|
155 | int actual; | |
|
156 | if (TryDequeueRange(queue, buffer, 0, rBatch, out actual)) { | |
|
157 | for (int i = 0; i < actual; i++) | |
|
158 | r3 += buffer[i]; | |
|
159 | Interlocked.Add(ref read, actual); | |
|
160 | } | |
|
161 | } | |
|
162 | ||
|
163 | Console.WriteLine("done reader #3: {0} ms", Environment.TickCount - t1); | |
|
164 | }*/ | |
|
165 | ) | |
|
166 | .PromiseAll() | |
|
167 | .Join(); | |
|
168 | ||
|
169 | ||
|
170 | Console.WriteLine( | |
|
171 | "done: {0} ms, summ#1: {1}, summ#2: {2}, total: {3}, count: {4}", | |
|
172 | Environment.TickCount - t1, | |
|
173 | r1, | |
|
174 | r2, | |
|
175 | r1 + r2 + r3, | |
|
176 | total | |
|
177 | ); | |
|
81 | var t = Environment.TickCount; | |
|
82 | using (var reader = JsonReader.Create("e:\\citylots.json")) { | |
|
83 | while (reader.Read()) { | |
|
84 | } | |
|
85 | } | |
|
86 | ||
|
87 | Console.WriteLine($"JsonReader: {Environment.TickCount - t} ms"); | |
|
178 | 88 | |
|
179 | 89 | Console.WriteLine("done"); |
|
180 | 90 | } |
|
181 | 91 | } |
|
182 | 92 | } |
@@ -1,9 +1,9 | |||
|
1 | 1 | |
|
2 | 2 | namespace Implab.Automaton { |
|
3 | 3 | public static class AutomatonConst { |
|
4 |
public const int U |
|
|
4 | public const int UnreachableState = -1; | |
|
5 | 5 | |
|
6 |
public const int U |
|
|
6 | public const int UnclassifiedInput = 0; | |
|
7 | 7 | } |
|
8 | 8 | } |
|
9 | 9 |
@@ -1,348 +1,348 | |||
|
1 | 1 | using Implab; |
|
2 | 2 | using System; |
|
3 | 3 | using System.Collections.Generic; |
|
4 | 4 | using System.Linq; |
|
5 | 5 | using System.Diagnostics; |
|
6 | 6 | using System.IO; |
|
7 | 7 | using System.CodeDom.Compiler; |
|
8 | 8 | using System.CodeDom; |
|
9 | 9 | |
|
10 | 10 | namespace Implab.Automaton { |
|
11 | 11 | public class DFATable : IDFATableBuilder { |
|
12 | 12 | int m_stateCount; |
|
13 | 13 | int m_symbolCount; |
|
14 | 14 | int m_initialState; |
|
15 | 15 | |
|
16 | 16 | readonly HashSet<int> m_finalStates = new HashSet<int>(); |
|
17 | 17 | readonly HashSet<AutomatonTransition> m_transitions = new HashSet<AutomatonTransition>(); |
|
18 | 18 | |
|
19 | 19 | |
|
20 | 20 | #region IDFADefinition implementation |
|
21 | 21 | |
|
22 | 22 | public bool IsFinalState(int s) { |
|
23 | 23 | Safe.ArgumentInRange(s, 0, m_stateCount, "s"); |
|
24 | 24 | |
|
25 | 25 | return m_finalStates.Contains(s); |
|
26 | 26 | } |
|
27 | 27 | |
|
28 | 28 | public IEnumerable<int> FinalStates { |
|
29 | 29 | get { |
|
30 | 30 | return m_finalStates; |
|
31 | 31 | } |
|
32 | 32 | } |
|
33 | 33 | |
|
34 | 34 | public int StateCount { |
|
35 | 35 | get { return m_stateCount; } |
|
36 | 36 | } |
|
37 | 37 | |
|
38 | 38 | public int AlphabetSize { |
|
39 | 39 | get { return m_symbolCount; } |
|
40 | 40 | } |
|
41 | 41 | |
|
42 | 42 | public int InitialState { |
|
43 | 43 | get { return m_initialState; } |
|
44 | 44 | } |
|
45 | 45 | |
|
46 | 46 | #endregion |
|
47 | 47 | |
|
48 | 48 | public void SetInitialState(int s) { |
|
49 | 49 | Safe.ArgumentAssert(s >= 0, "s"); |
|
50 | 50 | m_stateCount = Math.Max(m_stateCount, s + 1); |
|
51 | 51 | m_initialState = s; |
|
52 | 52 | } |
|
53 | 53 | |
|
54 | 54 | public void MarkFinalState(int state) { |
|
55 | 55 | m_stateCount = Math.Max(m_stateCount, state + 1); |
|
56 | 56 | m_finalStates.Add(state); |
|
57 | 57 | } |
|
58 | 58 | |
|
59 | 59 | public void Add(AutomatonTransition item) { |
|
60 | 60 | Safe.ArgumentAssert(item.s1 >= 0, "item"); |
|
61 | 61 | Safe.ArgumentAssert(item.s2 >= 0, "item"); |
|
62 | 62 | Safe.ArgumentAssert(item.edge >= 0, "item"); |
|
63 | 63 | |
|
64 | 64 | m_stateCount = Math.Max(m_stateCount, Math.Max(item.s1, item.s2) + 1); |
|
65 | 65 | m_symbolCount = Math.Max(m_symbolCount, item.edge + 1); |
|
66 | 66 | |
|
67 | 67 | m_transitions.Add(item); |
|
68 | 68 | } |
|
69 | 69 | |
|
70 | 70 | public void Clear() { |
|
71 | 71 | m_stateCount = 0; |
|
72 | 72 | m_symbolCount = 0; |
|
73 | 73 | m_finalStates.Clear(); |
|
74 | 74 | m_transitions.Clear(); |
|
75 | 75 | } |
|
76 | 76 | |
|
77 | 77 | public bool Contains(AutomatonTransition item) { |
|
78 | 78 | return m_transitions.Contains(item); |
|
79 | 79 | } |
|
80 | 80 | |
|
81 | 81 | public void CopyTo(AutomatonTransition[] array, int arrayIndex) { |
|
82 | 82 | m_transitions.CopyTo(array, arrayIndex); |
|
83 | 83 | } |
|
84 | 84 | |
|
85 | 85 | public bool Remove(AutomatonTransition item) { |
|
86 | 86 | return m_transitions.Remove(item); |
|
87 | 87 | } |
|
88 | 88 | |
|
89 | 89 | public int Count { |
|
90 | 90 | get { |
|
91 | 91 | return m_transitions.Count; |
|
92 | 92 | } |
|
93 | 93 | } |
|
94 | 94 | |
|
95 | 95 | public bool IsReadOnly { |
|
96 | 96 | get { |
|
97 | 97 | return false; |
|
98 | 98 | } |
|
99 | 99 | } |
|
100 | 100 | |
|
101 | 101 | public IEnumerator<AutomatonTransition> GetEnumerator() { |
|
102 | 102 | return m_transitions.GetEnumerator(); |
|
103 | 103 | } |
|
104 | 104 | |
|
105 | 105 | System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator() { |
|
106 | 106 | return GetEnumerator(); |
|
107 | 107 | } |
|
108 | 108 | |
|
109 | 109 | public void AddSymbol(int symbol) { |
|
110 | 110 | Safe.ArgumentAssert(symbol >= 0, "symbol"); |
|
111 | 111 | m_symbolCount = Math.Max(symbol + 1, m_symbolCount); |
|
112 | 112 | } |
|
113 | 113 | |
|
114 | 114 | public int[,] CreateTransitionTable() { |
|
115 | 115 | var table = new int[StateCount,AlphabetSize]; |
|
116 | 116 | |
|
117 | 117 | for (int i = 0; i < StateCount; i++) |
|
118 | 118 | for (int j = 0; j < AlphabetSize; j++) |
|
119 |
table[i, j] = AutomatonConst.U |
|
|
119 | table[i, j] = AutomatonConst.UnreachableState; | |
|
120 | 120 | |
|
121 | 121 | foreach (var t in this) |
|
122 | 122 | table[t.s1,t.edge] = (byte)t.s2; |
|
123 | 123 | |
|
124 | 124 | return table; |
|
125 | 125 | } |
|
126 | 126 | |
|
127 | 127 | public bool[] CreateFinalStateTable() { |
|
128 | 128 | var table = new bool[StateCount]; |
|
129 | 129 | |
|
130 | 130 | foreach (var s in FinalStates) |
|
131 | 131 | table[s] = true; |
|
132 | 132 | |
|
133 | 133 | return table; |
|
134 | 134 | } |
|
135 | 135 | |
|
136 | 136 | /// <summary>Формирует множества конечных состояний перед началом работы алгоритма минимизации.</summary> |
|
137 | 137 | /// <remarks> |
|
138 | 138 | /// В процессе построения минимального автомата требуется разделить множество состояний, |
|
139 | 139 | /// на два подмножества - конечные состояния и все остальные, после чего эти подмножества |
|
140 | 140 | /// будут резделены на более мелкие. Иногда требуется гарантировать различия конечных сосотяний, |
|
141 | 141 | /// для этого необходимо переопределить даннцю фукнцию, для получения множеств конечных состояний. |
|
142 | 142 | /// </remarks> |
|
143 | 143 | /// <returns>The final states.</returns> |
|
144 | 144 | protected virtual IEnumerable<HashSet<int>> SplitFinalStates(IEnumerable<int> states) { |
|
145 | 145 | return new [] { new HashSet<int>(states) }; |
|
146 | 146 | } |
|
147 | 147 | |
|
148 | 148 | protected void Optimize( |
|
149 | 149 | IDFATableBuilder optimalDFA, |
|
150 | 150 | IDictionary<int,int> alphabetMap, |
|
151 | 151 | IDictionary<int,int> stateMap |
|
152 | 152 | ) { |
|
153 | 153 | Safe.ArgumentNotNull(optimalDFA, "dfa"); |
|
154 | 154 | Safe.ArgumentNotNull(alphabetMap, "alphabetMap"); |
|
155 | 155 | Safe.ArgumentNotNull(stateMap, "stateMap"); |
|
156 | 156 | |
|
157 | 157 | |
|
158 | 158 | var setComparer = new CustomEqualityComparer<HashSet<int>>( |
|
159 | 159 | (x, y) => x.SetEquals(y), |
|
160 | 160 | s => s.Sum(x => x.GetHashCode()) |
|
161 | 161 | ); |
|
162 | 162 | |
|
163 | 163 | var optimalStates = new HashSet<HashSet<int>>(setComparer); |
|
164 | 164 | var queue = new HashSet<HashSet<int>>(setComparer); |
|
165 | 165 | |
|
166 | 166 | optimalStates.Add(new HashSet<int>(FinalStates)); |
|
167 | 167 | |
|
168 | 168 | var state = new HashSet<int>( |
|
169 | 169 | Enumerable |
|
170 | 170 | .Range(0, m_stateCount) |
|
171 | 171 | .Where(i => !m_finalStates.Contains(i)) |
|
172 | 172 | ); |
|
173 | 173 | |
|
174 | 174 | optimalStates.Add(state); |
|
175 | 175 | queue.Add(state); |
|
176 | 176 | |
|
177 | 177 | var rmap = m_transitions |
|
178 | 178 | .GroupBy(t => t.s2) |
|
179 | 179 | .ToDictionary( |
|
180 | 180 | g => g.Key, // s2 |
|
181 | 181 | g => g.ToLookup(t => t.edge, t => t.s1)//.ToDictionary(p => p.Key) |
|
182 | 182 | ); |
|
183 | 183 | |
|
184 | 184 | while (queue.Count > 0) { |
|
185 | 185 | var stateA = queue.First(); |
|
186 | 186 | queue.Remove(stateA); |
|
187 | 187 | |
|
188 | 188 | for (int c = 0; c < m_symbolCount; c++) { |
|
189 | 189 | var stateX = new HashSet<int>(); |
|
190 | 190 | foreach(var a in stateA.Where(rmap.ContainsKey)) |
|
191 | 191 | stateX.UnionWith(rmap[a][c]); // all states from wich the symbol 'c' leads to the state 'a' |
|
192 | 192 | |
|
193 | 193 | var tmp = optimalStates.ToArray(); |
|
194 | 194 | foreach (var stateY in tmp) { |
|
195 | 195 | var stateR1 = new HashSet<int>(stateY); |
|
196 | 196 | var stateR2 = new HashSet<int>(stateY); |
|
197 | 197 | |
|
198 | 198 | stateR1.IntersectWith(stateX); |
|
199 | 199 | stateR2.ExceptWith(stateX); |
|
200 | 200 | |
|
201 | 201 | if (stateR1.Count > 0 && stateR2.Count > 0) { |
|
202 | 202 | |
|
203 | 203 | |
|
204 | 204 | optimalStates.Remove(stateY); |
|
205 | 205 | optimalStates.Add(stateR1); |
|
206 | 206 | optimalStates.Add(stateR2); |
|
207 | 207 | |
|
208 | 208 | if (queue.Contains(stateY)) { |
|
209 | 209 | queue.Remove(stateY); |
|
210 | 210 | queue.Add(stateR1); |
|
211 | 211 | queue.Add(stateR2); |
|
212 | 212 | } else { |
|
213 | 213 | queue.Add(stateR1.Count <= stateR2.Count ? stateR1 : stateR2); |
|
214 | 214 | } |
|
215 | 215 | } |
|
216 | 216 | } |
|
217 | 217 | } |
|
218 | 218 | } |
|
219 | 219 | |
|
220 | 220 | // дополнительно разбиваем конечные состояния |
|
221 | 221 | foreach (var final in optimalStates.Where(s => s.Overlaps(m_finalStates)).ToArray()) { |
|
222 | 222 | optimalStates.Remove(final); |
|
223 | 223 | foreach (var split in SplitFinalStates(final)) |
|
224 | 224 | optimalStates.Add(split); |
|
225 | 225 | } |
|
226 | 226 | |
|
227 | 227 | |
|
228 | 228 | // карта получения оптимального состояния по соотвествующему ему простому состоянию |
|
229 | 229 | var nextState = 0; |
|
230 | 230 | foreach (var item in optimalStates) { |
|
231 | 231 | var id = nextState++; |
|
232 | 232 | foreach (var s in item) |
|
233 | 233 | stateMap[s] = id; |
|
234 | 234 | } |
|
235 | 235 | |
|
236 | 236 | // получаем минимальный алфавит |
|
237 | 237 | // входные символы не различимы, если Move(s,a1) == Move(s,a2), для любого s |
|
238 | 238 | // для этого используем алгоритм кластеризации, сначала |
|
239 | 239 | // считаем, что все символы не различимы |
|
240 | 240 | |
|
241 | 241 | var minClasses = new HashSet<HashSet<int>>(setComparer); |
|
242 | 242 | var alphaQueue = new Queue<HashSet<int>>(); |
|
243 | 243 | alphaQueue.Enqueue(new HashSet<int>(Enumerable.Range(0,AlphabetSize))); |
|
244 | 244 | |
|
245 | 245 | // для всех состояний, будем проверять каждый класс на различимость, |
|
246 | 246 | // т.е. символы различимы, если они приводят к разным состояниям |
|
247 | 247 | for (int s = 0 ; s < optimalStates.Count; s++) { |
|
248 | 248 | var newQueue = new Queue<HashSet<int>>(); |
|
249 | 249 | |
|
250 | 250 | foreach (var A in alphaQueue) { |
|
251 | 251 | // классы из одного символа делить бесполезно, переводим их сразу в |
|
252 | 252 | // результирующий алфавит |
|
253 | 253 | if (A.Count == 1) { |
|
254 | 254 | minClasses.Add(A); |
|
255 | 255 | continue; |
|
256 | 256 | } |
|
257 | 257 | |
|
258 | 258 | // различаем классы символов, которые переводят в различные оптимальные состояния |
|
259 | 259 | // optimalState -> alphaClass |
|
260 | 260 | var classes = new Dictionary<int, HashSet<int>>(); |
|
261 | 261 | |
|
262 | 262 | foreach (var term in A) { |
|
263 | 263 | // ищем все переходы класса по символу term |
|
264 | 264 | var s2 = m_transitions.Where(t => stateMap[t.s1] == s && t.edge == term).Select(t => stateMap[t.s2]).DefaultIfEmpty(-1).First(); |
|
265 | 265 | |
|
266 | 266 | HashSet<int> a2; |
|
267 | 267 | if (!classes.TryGetValue(s2, out a2)) { |
|
268 | 268 | a2 = new HashSet<int>(); |
|
269 | 269 | newQueue.Enqueue(a2); |
|
270 | 270 | classes[s2] = a2; |
|
271 | 271 | } |
|
272 | 272 | a2.Add(term); |
|
273 | 273 | } |
|
274 | 274 | } |
|
275 | 275 | |
|
276 | 276 | if (newQueue.Count == 0) |
|
277 | 277 | break; |
|
278 | 278 | alphaQueue = newQueue; |
|
279 | 279 | } |
|
280 | 280 | |
|
281 | 281 | // после окончания работы алгоритма в очереди останутся минимальные различимые классы |
|
282 | 282 | // входных символов |
|
283 | 283 | foreach (var A in alphaQueue) |
|
284 | 284 | minClasses.Add(A); |
|
285 | 285 | |
|
286 | 286 | // построение отображения алфавитов входных символов. |
|
287 | 287 | // поскольку символ DFAConst.UNCLASSIFIED_INPUT может иметь |
|
288 | 288 | // специальное значение, тогда сохраним минимальный класс, |
|
289 | 289 | // содержащий этот символ на томже месте. |
|
290 | 290 | |
|
291 | 291 | var nextCls = 0; |
|
292 | 292 | foreach (var item in minClasses) { |
|
293 |
if (nextCls == AutomatonConst.U |
|
|
293 | if (nextCls == AutomatonConst.UnclassifiedInput) | |
|
294 | 294 | nextCls++; |
|
295 | 295 | |
|
296 | 296 | // сохраняем DFAConst.UNCLASSIFIED_INPUT |
|
297 |
var cls = item.Contains(AutomatonConst.U |
|
|
297 | var cls = item.Contains(AutomatonConst.UnclassifiedInput) ? AutomatonConst.UnclassifiedInput : nextCls++; | |
|
298 | 298 | optimalDFA.AddSymbol(cls); |
|
299 | 299 | |
|
300 | 300 | foreach (var a in item) |
|
301 | 301 | alphabetMap[a] = cls; |
|
302 | 302 | } |
|
303 | 303 | |
|
304 | 304 | // построение автомата |
|
305 | 305 | optimalDFA.SetInitialState(stateMap[m_initialState]); |
|
306 | 306 | |
|
307 | 307 | foreach (var sf in m_finalStates.Select(s => stateMap[s]).Distinct()) |
|
308 | 308 | optimalDFA.MarkFinalState(sf); |
|
309 | 309 | |
|
310 | 310 | foreach (var t in m_transitions.Select(t => new AutomatonTransition(stateMap[t.s1],stateMap[t.s2],alphabetMap[t.edge])).Distinct()) |
|
311 | 311 | optimalDFA.Add(t); |
|
312 | 312 | } |
|
313 | 313 | |
|
314 | 314 | protected string PrintDFA<TInput, TState>(IAlphabet<TInput> inputAlphabet, IAlphabet<TState> stateAlphabet) { |
|
315 | 315 | Safe.ArgumentNotNull(inputAlphabet, "inputAlphabet"); |
|
316 | 316 | Safe.ArgumentNotNull(stateAlphabet, "stateAlphabet"); |
|
317 | 317 | |
|
318 | 318 | var data = new List<string>(); |
|
319 | 319 | |
|
320 | 320 | data.Add("digraph dfa {"); |
|
321 | 321 | |
|
322 | 322 | foreach (var final in m_finalStates) |
|
323 | 323 | data.Add(String.Format("{0} [shape=box];",String.Join("", stateAlphabet.GetSymbols(final)))); |
|
324 | 324 | |
|
325 | 325 | foreach (var t in m_transitions) |
|
326 | 326 | data.Add(String.Format( |
|
327 | 327 | "{0} -> {2} [label={1}];", |
|
328 | 328 | String.Join("", stateAlphabet.GetSymbols(t.s1)), |
|
329 |
ToLiteral(ToLiteral(String.Join("", t.edge == AutomatonConst.U |
|
|
329 | ToLiteral(ToLiteral(String.Join("", t.edge == AutomatonConst.UnclassifiedInput ? new [] { "@" } : inputAlphabet.GetSymbols(t.edge).Select(x => x.ToString())))), | |
|
330 | 330 | String.Join("", stateAlphabet.GetSymbols(t.s2)) |
|
331 | 331 | )); |
|
332 | 332 | data.Add("}"); |
|
333 | 333 | return String.Join("\n", data); |
|
334 | 334 | } |
|
335 | 335 | |
|
336 | 336 | static string ToLiteral(string input) |
|
337 | 337 | { |
|
338 | 338 | using (var writer = new StringWriter()) |
|
339 | 339 | { |
|
340 | 340 | using (var provider = CodeDomProvider.CreateProvider("CSharp")) |
|
341 | 341 | { |
|
342 | 342 | provider.GenerateCodeFromExpression(new CodePrimitiveExpression(input), writer, null); |
|
343 | 343 | return writer.ToString(); |
|
344 | 344 | } |
|
345 | 345 | } |
|
346 | 346 | } |
|
347 | 347 | } |
|
348 | 348 | } |
@@ -1,84 +1,84 | |||
|
1 | 1 | using System; |
|
2 | 2 | using System.Collections.Generic; |
|
3 | 3 | using System.Linq; |
|
4 | 4 | |
|
5 | 5 | namespace Implab.Automaton { |
|
6 | 6 | public class MapAlphabet<T> : IAlphabetBuilder<T> { |
|
7 | 7 | readonly Dictionary<T,int> m_map; |
|
8 | 8 | int m_nextCls; |
|
9 | 9 | readonly bool m_supportUnclassified; |
|
10 | 10 | |
|
11 | 11 | public MapAlphabet(bool supportUnclassified, IEqualityComparer<T> comparer) { |
|
12 | 12 | m_map = comparer != null ? new Dictionary<T, int>(comparer) : new Dictionary<T,int>(); |
|
13 | 13 | m_supportUnclassified = supportUnclassified; |
|
14 | 14 | m_nextCls = supportUnclassified ? 1 : 0; |
|
15 | 15 | } |
|
16 | 16 | |
|
17 | 17 | #region IAlphabetBuilder implementation |
|
18 | 18 | |
|
19 | 19 | public int DefineSymbol(T symbol) { |
|
20 | 20 | int cls; |
|
21 | 21 | return m_map.TryGetValue(symbol, out cls) ? cls : DefineSymbol(symbol, m_nextCls); |
|
22 | 22 | } |
|
23 | 23 | |
|
24 | 24 | public int DefineSymbol(T symbol, int cls) { |
|
25 | 25 | Safe.ArgumentAssert(cls >= 0, "cls"); |
|
26 | 26 | |
|
27 | 27 | m_nextCls = Math.Max(cls + 1, m_nextCls); |
|
28 | 28 | m_map.Add(symbol, cls); |
|
29 | 29 | return cls; |
|
30 | 30 | } |
|
31 | 31 | |
|
32 | 32 | public int DefineClass(IEnumerable<T> symbols) { |
|
33 | 33 | return DefineClass(symbols, m_nextCls); |
|
34 | 34 | } |
|
35 | 35 | |
|
36 | 36 | public int DefineClass(IEnumerable<T> symbols, int cls) { |
|
37 | 37 | Safe.ArgumentAssert(cls >= 0, "cls"); |
|
38 | 38 | Safe.ArgumentNotNull(symbols, "symbols"); |
|
39 | 39 | |
|
40 | 40 | m_nextCls = Math.Max(cls + 1, m_nextCls); |
|
41 | 41 | |
|
42 | 42 | foreach (var symbol in symbols) |
|
43 | 43 | m_map[symbol] = cls; |
|
44 | 44 | return cls; |
|
45 | 45 | } |
|
46 | 46 | |
|
47 | 47 | #endregion |
|
48 | 48 | |
|
49 | 49 | #region IAlphabet implementation |
|
50 | 50 | |
|
51 | 51 | public int Translate(T symbol) { |
|
52 | 52 | int cls; |
|
53 | 53 | if (m_map.TryGetValue(symbol, out cls)) |
|
54 | 54 | return cls; |
|
55 | 55 | if (!m_supportUnclassified) |
|
56 | 56 | throw new ArgumentOutOfRangeException("symbol", "The specified symbol isn't in the alphabet"); |
|
57 |
return AutomatonConst.U |
|
|
57 | return AutomatonConst.UnclassifiedInput; | |
|
58 | 58 | } |
|
59 | 59 | |
|
60 | 60 | public int Count { |
|
61 | 61 | get { |
|
62 | 62 | return m_nextCls; |
|
63 | 63 | } |
|
64 | 64 | } |
|
65 | 65 | |
|
66 | 66 | public bool Contains(T symbol) { |
|
67 | 67 | return m_supportUnclassified || m_map.ContainsKey(symbol); |
|
68 | 68 | } |
|
69 | 69 | |
|
70 | 70 | |
|
71 | 71 | public IEnumerable<T> GetSymbols(int cls) { |
|
72 | 72 | Safe.ArgumentAssert(!m_supportUnclassified || cls > 0, "cls"); |
|
73 | 73 | return m_map.Where(p => p.Value == cls).Select(p => p.Key); |
|
74 | 74 | } |
|
75 | 75 | #endregion |
|
76 | 76 | |
|
77 | 77 | public IEnumerable<KeyValuePair<T,int>> Mappings { |
|
78 | 78 | get { |
|
79 | 79 | return m_map; |
|
80 | 80 | } |
|
81 | 81 | } |
|
82 | 82 | } |
|
83 | 83 | } |
|
84 | 84 |
@@ -1,212 +1,212 | |||
|
1 | 1 | using Implab; |
|
2 | 2 | using System; |
|
3 | 3 | using System.Collections.Generic; |
|
4 | 4 | using System.Diagnostics; |
|
5 | 5 | using System.Linq; |
|
6 | 6 | |
|
7 | 7 | namespace Implab.Automaton.RegularExpressions { |
|
8 | 8 | /// <summary> |
|
9 | 9 | /// Используется для построения ДКА по регулярному выражению, сначала обходит |
|
10 | 10 | /// регулярное выражение и вычисляет followpos, затем используется метод |
|
11 | 11 | /// <see cref="BuildDFA(IDFADefinition)"/> для построения автомата. |
|
12 | 12 | /// </summary> |
|
13 | 13 | public class RegularExpressionVisitor : IVisitor { |
|
14 | 14 | int m_idx; |
|
15 | 15 | Token m_root; |
|
16 | 16 | HashSet<int> m_firstpos; |
|
17 | 17 | HashSet<int> m_lastpos; |
|
18 | 18 | |
|
19 | 19 | readonly Dictionary<int, HashSet<int>> m_followpos = new Dictionary<int, HashSet<int>>(); |
|
20 | 20 | readonly Dictionary<int, int> m_indexes = new Dictionary<int, int>(); |
|
21 | 21 | readonly HashSet<int> m_ends = new HashSet<int>(); |
|
22 | 22 | |
|
23 | 23 | readonly IDFATableBuilder m_builder; |
|
24 | 24 | readonly IAlphabetBuilder<HashSet<int>> m_states = new MapAlphabet<HashSet<int>>( |
|
25 | 25 | false, |
|
26 | 26 | new CustomEqualityComparer<HashSet<int>>( |
|
27 | 27 | (x, y) => x.SetEquals(y), |
|
28 | 28 | x => x.Sum(n => n.GetHashCode()) |
|
29 | 29 | ) |
|
30 | 30 | ); |
|
31 | 31 | |
|
32 | 32 | public RegularExpressionVisitor(IDFATableBuilder builder) { |
|
33 | 33 | Safe.ArgumentNotNull(builder, "builder"); |
|
34 | 34 | |
|
35 | 35 | m_builder = builder; |
|
36 | 36 | } |
|
37 | 37 | |
|
38 | 38 | HashSet<int> Followpos(int pos) { |
|
39 | 39 | HashSet<int> set; |
|
40 | 40 | return m_followpos.TryGetValue(pos, out set) ? set : m_followpos[pos] = new HashSet<int>(); |
|
41 | 41 | } |
|
42 | 42 | |
|
43 | 43 | bool Nullable(object n) { |
|
44 | 44 | if (n is EmptyToken || n is StarToken) |
|
45 | 45 | return true; |
|
46 | 46 | var altToken = n as AltToken; |
|
47 | 47 | if (altToken != null) |
|
48 | 48 | return Nullable(altToken.Left) || Nullable(altToken.Right); |
|
49 | 49 | var catToken = n as CatToken; |
|
50 | 50 | if (catToken != null) |
|
51 | 51 | return Nullable(catToken.Left) && Nullable(catToken.Right); |
|
52 | 52 | return false; |
|
53 | 53 | } |
|
54 | 54 | |
|
55 | 55 | protected int Index { |
|
56 | 56 | get { return m_idx; } |
|
57 | 57 | } |
|
58 | 58 | |
|
59 | 59 | public void Visit(AltToken token) { |
|
60 | 60 | if (m_root == null) |
|
61 | 61 | m_root = token; |
|
62 | 62 | var firtspos = new HashSet<int>(); |
|
63 | 63 | var lastpos = new HashSet<int>(); |
|
64 | 64 | |
|
65 | 65 | token.Left.Accept(this); |
|
66 | 66 | firtspos.UnionWith(m_firstpos); |
|
67 | 67 | lastpos.UnionWith(m_lastpos); |
|
68 | 68 | |
|
69 | 69 | token.Right.Accept(this); |
|
70 | 70 | firtspos.UnionWith(m_firstpos); |
|
71 | 71 | lastpos.UnionWith(m_lastpos); |
|
72 | 72 | |
|
73 | 73 | m_firstpos = firtspos; |
|
74 | 74 | m_lastpos = lastpos; |
|
75 | 75 | } |
|
76 | 76 | |
|
77 | 77 | public void Visit(StarToken token) { |
|
78 | 78 | if (m_root == null) |
|
79 | 79 | m_root = token; |
|
80 | 80 | token.Token.Accept(this); |
|
81 | 81 | |
|
82 | 82 | foreach (var i in m_lastpos) |
|
83 | 83 | Followpos(i).UnionWith(m_firstpos); |
|
84 | 84 | } |
|
85 | 85 | |
|
86 | 86 | public void Visit(CatToken token) { |
|
87 | 87 | if (m_root == null) |
|
88 | 88 | m_root = token; |
|
89 | 89 | |
|
90 | 90 | var firtspos = new HashSet<int>(); |
|
91 | 91 | var lastpos = new HashSet<int>(); |
|
92 | 92 | token.Left.Accept(this); |
|
93 | 93 | firtspos.UnionWith(m_firstpos); |
|
94 | 94 | var leftLastpos = m_lastpos; |
|
95 | 95 | |
|
96 | 96 | token.Right.Accept(this); |
|
97 | 97 | lastpos.UnionWith(m_lastpos); |
|
98 | 98 | var rightFirstpos = m_firstpos; |
|
99 | 99 | |
|
100 | 100 | if (Nullable(token.Left)) |
|
101 | 101 | firtspos.UnionWith(rightFirstpos); |
|
102 | 102 | |
|
103 | 103 | if (Nullable(token.Right)) |
|
104 | 104 | lastpos.UnionWith(leftLastpos); |
|
105 | 105 | |
|
106 | 106 | m_firstpos = firtspos; |
|
107 | 107 | m_lastpos = lastpos; |
|
108 | 108 | |
|
109 | 109 | foreach (var i in leftLastpos) |
|
110 | 110 | Followpos(i).UnionWith(rightFirstpos); |
|
111 | 111 | |
|
112 | 112 | } |
|
113 | 113 | |
|
114 | 114 | public void Visit(EmptyToken token) { |
|
115 | 115 | if (m_root == null) |
|
116 | 116 | m_root = token; |
|
117 | 117 | } |
|
118 | 118 | |
|
119 | 119 | public void Visit(SymbolToken token) { |
|
120 | 120 | if (m_root == null) |
|
121 | 121 | m_root = token; |
|
122 | 122 | m_idx++; |
|
123 | 123 | m_indexes[m_idx] = token.Value; |
|
124 | 124 | m_firstpos = new HashSet<int>(new[] { m_idx }); |
|
125 | 125 | m_lastpos = new HashSet<int>(new[] { m_idx }); |
|
126 | 126 | } |
|
127 | 127 | |
|
128 | 128 | public virtual void Visit(EndToken token) { |
|
129 | 129 | if (m_root == null) |
|
130 | 130 | m_root = token; |
|
131 | 131 | m_idx++; |
|
132 |
m_indexes[m_idx] = AutomatonConst.U |
|
|
132 | m_indexes[m_idx] = AutomatonConst.UnclassifiedInput; | |
|
133 | 133 | m_firstpos = new HashSet<int>(new[] { m_idx }); |
|
134 | 134 | m_lastpos = new HashSet<int>(new[] { m_idx }); |
|
135 | 135 | Followpos(m_idx); |
|
136 | 136 | m_ends.Add(m_idx); |
|
137 | 137 | } |
|
138 | 138 | |
|
139 | 139 | public void BuildDFA() { |
|
140 | 140 | AddState(m_firstpos); |
|
141 | 141 | SetInitialState(m_firstpos); |
|
142 | 142 | |
|
143 | 143 | if(IsFinal(m_firstpos)) |
|
144 | 144 | MarkFinalState(m_firstpos); |
|
145 | 145 | |
|
146 | 146 | var inputMax = m_indexes.Values.Max(); |
|
147 | 147 | var queue = new Queue<HashSet<int>>(); |
|
148 | 148 | |
|
149 | 149 | queue.Enqueue(m_firstpos); |
|
150 | 150 | |
|
151 | 151 | while (queue.Count > 0) { |
|
152 | 152 | var s1 = queue.Dequeue(); |
|
153 | 153 | |
|
154 | 154 | for (int a = 0; a <= inputMax; a++) { |
|
155 | 155 | var s2 = new HashSet<int>(); |
|
156 | 156 | foreach (var p in s1) { |
|
157 | 157 | if (m_indexes[p] == a) { |
|
158 | 158 | s2.UnionWith(Followpos(p)); |
|
159 | 159 | } |
|
160 | 160 | } |
|
161 | 161 | if (s2.Count > 0) { |
|
162 | 162 | if (!HasState(s2)) { |
|
163 | 163 | AddState(s2); |
|
164 | 164 | if (IsFinal(s2)) |
|
165 | 165 | MarkFinalState(s2); |
|
166 | 166 | |
|
167 | 167 | queue.Enqueue(s2); |
|
168 | 168 | } |
|
169 | 169 | |
|
170 | 170 | DefineTransition(s1, s2, a); |
|
171 | 171 | } |
|
172 | 172 | |
|
173 | 173 | } |
|
174 | 174 | } |
|
175 | 175 | } |
|
176 | 176 | |
|
177 | 177 | protected bool HasState(HashSet<int> state) { |
|
178 | 178 | return m_states.Contains(state); |
|
179 | 179 | } |
|
180 | 180 | |
|
181 | 181 | protected void AddState(HashSet<int> state) { |
|
182 | 182 | Debug.Assert(!HasState(state)); |
|
183 | 183 | |
|
184 | 184 | m_states.DefineSymbol(state); |
|
185 | 185 | } |
|
186 | 186 | |
|
187 | 187 | protected int Translate(HashSet<int> state) { |
|
188 | 188 | Debug.Assert(HasState(state)); |
|
189 | 189 | |
|
190 | 190 | return m_states.Translate(state); |
|
191 | 191 | } |
|
192 | 192 | |
|
193 | 193 | protected virtual void SetInitialState(HashSet<int> state) { |
|
194 | 194 | m_builder.SetInitialState(Translate(state)); |
|
195 | 195 | } |
|
196 | 196 | |
|
197 | 197 | protected virtual void MarkFinalState(HashSet<int> state) { |
|
198 | 198 | m_builder.MarkFinalState(Translate(state)); |
|
199 | 199 | } |
|
200 | 200 | |
|
201 | 201 | protected virtual void DefineTransition(HashSet<int> s1, HashSet<int> s2, int ch) { |
|
202 | 202 | |
|
203 | 203 | m_builder.Add(new AutomatonTransition(Translate(s1), Translate(s2), ch)); |
|
204 | 204 | } |
|
205 | 205 | |
|
206 | 206 | bool IsFinal(IEnumerable<int> state) { |
|
207 | 207 | Debug.Assert(state != null); |
|
208 | 208 | return state.Any(m_ends.Contains); |
|
209 | 209 | } |
|
210 | 210 | |
|
211 | 211 | } |
|
212 | 212 | } |
@@ -1,36 +1,36 | |||
|
1 | 1 | using System.Collections.Generic; |
|
2 | 2 | using System.Linq; |
|
3 | 3 | using Implab.Automaton; |
|
4 | 4 | using System; |
|
5 | 5 | |
|
6 | 6 | namespace Implab.Formats { |
|
7 | public class CharAlphabet: IndexedAlphabetBase<char> { | |
|
7 | public class CharAlphabet : IndexedAlphabetBase<char> { | |
|
8 | 8 | |
|
9 | 9 | public override int GetSymbolIndex(char symbol) { |
|
10 | 10 | return symbol; |
|
11 | 11 | } |
|
12 | 12 | |
|
13 | 13 | public IEnumerable<char> InputSymbols { |
|
14 | 14 | get { return Enumerable.Range(char.MinValue, char.MaxValue).Cast<char>(); } |
|
15 | 15 | } |
|
16 | 16 | |
|
17 | 17 | public CharMap CreateCharMap() { |
|
18 | 18 | var map = new Dictionary<int, int>(); |
|
19 | 19 | |
|
20 | 20 | int max = 0, min = char.MaxValue; |
|
21 | 21 | foreach (var p in Mappings) { |
|
22 | 22 | var index = GetSymbolIndex(p.Key); |
|
23 | 23 | max = Math.Max(max, index); |
|
24 | 24 | min = Math.Min(min, index); |
|
25 | 25 | map[index] = p.Value; |
|
26 | 26 | } |
|
27 | 27 | |
|
28 | 28 | var result = new int[max - min + 1]; |
|
29 | 29 | |
|
30 | 30 | for (int i = 0; i < result.Length; i++) |
|
31 | 31 | map.TryGetValue(min + i, out result[i]); |
|
32 | 32 | |
|
33 | 33 | return new CharMap((char)min, result); |
|
34 | 34 | } |
|
35 | 35 | } |
|
36 | 36 | } |
@@ -1,42 +1,42 | |||
|
1 | 1 | using Implab.Automaton; |
|
2 | 2 | using System; |
|
3 | 3 | using System.Collections.Generic; |
|
4 | 4 | using System.Linq; |
|
5 | 5 | using System.Runtime.CompilerServices; |
|
6 | 6 | using System.Text; |
|
7 | 7 | using System.Threading.Tasks; |
|
8 | 8 | |
|
9 | 9 | namespace Implab.Formats { |
|
10 | 10 | public class CharMap : IAlphabet<char> { |
|
11 | 11 | readonly char m_min; |
|
12 | 12 | readonly char m_max; |
|
13 | 13 | readonly int[] m_map; |
|
14 | 14 | |
|
15 | 15 | public CharMap(char min, int[] map) { |
|
16 | 16 | Safe.ArgumentNotNull(map, nameof(map)); |
|
17 | 17 | Count = map.Max()+1; |
|
18 | 18 | m_min = min; |
|
19 | 19 | m_map = map; |
|
20 | 20 | m_max = (char)(min + map.Length); |
|
21 | 21 | } |
|
22 | 22 | |
|
23 | 23 | public int Count { |
|
24 | 24 | get; private set; |
|
25 | 25 | } |
|
26 | 26 | |
|
27 | 27 | public bool Contains(char symbol) { |
|
28 |
return symbol >= m_min && symbol <= m_max && m_map[symbol-m_min] != AutomatonConst.U |
|
|
28 | return symbol >= m_min && symbol <= m_max && m_map[symbol-m_min] != AutomatonConst.UnclassifiedInput; | |
|
29 | 29 | } |
|
30 | 30 | |
|
31 | 31 | public IEnumerable<char> GetSymbols(int cls) { |
|
32 | 32 | for (var i = 0; i < m_map.Length; i++) |
|
33 | 33 | if (m_map[i] == cls) |
|
34 | 34 | yield return (char)(i + m_min); |
|
35 | 35 | } |
|
36 | 36 | |
|
37 | 37 | [MethodImpl(MethodImplOptions.AggressiveInlining)] |
|
38 | 38 | public int Translate(char symbol) { |
|
39 |
return symbol >= m_min && symbol <= m_max ? m_map[symbol-m_min] : AutomatonConst.U |
|
|
39 | return symbol >= m_min && symbol <= m_max ? m_map[symbol-m_min] : AutomatonConst.UnclassifiedInput; | |
|
40 | 40 | } |
|
41 | 41 | } |
|
42 | 42 | } |
@@ -1,73 +1,73 | |||
|
1 | 1 | using Implab; |
|
2 | 2 | using System; |
|
3 | 3 | using System.Collections.Generic; |
|
4 | 4 | using System.Linq; |
|
5 | 5 | using Implab.Automaton; |
|
6 | 6 | using Implab.Automaton.RegularExpressions; |
|
7 | 7 | |
|
8 | 8 | namespace Implab.Formats { |
|
9 | 9 | /// <summary> |
|
10 | 10 | /// Базовый абстрактный класс. Грамматика, позволяет формулировать выражения над алфавитом типа <c>char</c>. |
|
11 | 11 | /// </summary> |
|
12 | 12 | public abstract class Grammar<TSymbol> { |
|
13 | 13 | |
|
14 | 14 | protected abstract IAlphabetBuilder<TSymbol> AlphabetBuilder { |
|
15 | 15 | get; |
|
16 | 16 | } |
|
17 | 17 | |
|
18 | 18 | protected SymbolToken UnclassifiedToken() { |
|
19 |
return new SymbolToken(AutomatonConst.U |
|
|
19 | return new SymbolToken(AutomatonConst.UnclassifiedInput); | |
|
20 | 20 | } |
|
21 | 21 | |
|
22 | 22 | protected void DefineAlphabet(IEnumerable<TSymbol> alphabet) { |
|
23 | 23 | Safe.ArgumentNotNull(alphabet, "alphabet"); |
|
24 | 24 | |
|
25 | 25 | foreach (var ch in alphabet) |
|
26 | 26 | AlphabetBuilder.DefineSymbol(ch); |
|
27 | 27 | } |
|
28 | 28 | |
|
29 | 29 | protected Token SymbolToken(TSymbol symbol) { |
|
30 | 30 | return Token.New(TranslateOrAdd(symbol)); |
|
31 | 31 | } |
|
32 | 32 | |
|
33 | 33 | protected Token SymbolToken(IEnumerable<TSymbol> symbols) { |
|
34 | 34 | Safe.ArgumentNotNull(symbols, "symbols"); |
|
35 | 35 | |
|
36 | 36 | return Token.New(TranslateOrAdd(symbols).ToArray()); |
|
37 | 37 | } |
|
38 | 38 | |
|
39 | 39 | protected Token SymbolSetToken(params TSymbol[] set) { |
|
40 | 40 | return SymbolToken(set); |
|
41 | 41 | } |
|
42 | 42 | |
|
43 | 43 | int TranslateOrAdd(TSymbol ch) { |
|
44 | 44 | var t = AlphabetBuilder.Translate(ch); |
|
45 |
if (t == AutomatonConst.U |
|
|
45 | if (t == AutomatonConst.UnclassifiedInput) | |
|
46 | 46 | t = AlphabetBuilder.DefineSymbol(ch); |
|
47 | 47 | return t; |
|
48 | 48 | } |
|
49 | 49 | |
|
50 | 50 | IEnumerable<int> TranslateOrAdd(IEnumerable<TSymbol> symbols) { |
|
51 | 51 | return symbols.Distinct().Select(TranslateOrAdd); |
|
52 | 52 | } |
|
53 | 53 | |
|
54 | 54 | int TranslateOrDie(TSymbol ch) { |
|
55 | 55 | var t = AlphabetBuilder.Translate(ch); |
|
56 |
if (t == AutomatonConst.U |
|
|
56 | if (t == AutomatonConst.UnclassifiedInput) | |
|
57 | 57 | throw new ApplicationException(String.Format("Symbol '{0}' is UNCLASSIFIED", ch)); |
|
58 | 58 | return t; |
|
59 | 59 | } |
|
60 | 60 | |
|
61 | 61 | IEnumerable<int> TranslateOrDie(IEnumerable<TSymbol> symbols) { |
|
62 | 62 | return symbols.Distinct().Select(TranslateOrDie); |
|
63 | 63 | } |
|
64 | 64 | |
|
65 | 65 | protected Token SymbolTokenExcept(IEnumerable<TSymbol> symbols) { |
|
66 | 66 | Safe.ArgumentNotNull(symbols, "symbols"); |
|
67 | 67 | |
|
68 | 68 | return Token.New( Enumerable.Range(0, AlphabetBuilder.Count).Except(TranslateOrDie(symbols)).ToArray() ); |
|
69 | 69 | } |
|
70 | 70 | } |
|
71 | 71 | |
|
72 | 72 | |
|
73 | 73 | } |
@@ -1,84 +1,84 | |||
|
1 | 1 | using Implab.Automaton; |
|
2 | 2 | using System; |
|
3 | 3 | using System.Collections.Generic; |
|
4 | 4 | using System.Linq; |
|
5 | 5 | using System.Runtime.CompilerServices; |
|
6 | 6 | using System.Text; |
|
7 | 7 | using System.Threading.Tasks; |
|
8 | 8 | |
|
9 | 9 | namespace Implab.Formats { |
|
10 | 10 | public class InputScanner<TTag> { |
|
11 | 11 | readonly TTag[] m_tags; |
|
12 | 12 | readonly int m_initialState; |
|
13 | 13 | readonly int[,] m_dfa; |
|
14 | 14 | readonly CharMap m_alphabet; |
|
15 | 15 | readonly bool[] m_final; |
|
16 | 16 | |
|
17 | 17 | int m_position; |
|
18 | 18 | int m_state; |
|
19 | 19 | |
|
20 | 20 | public InputScanner(int[,] dfaTable, bool[] finalStates, TTag[] tags, int initialState, CharMap alphabet) { |
|
21 | 21 | Safe.ArgumentNotNull(dfaTable, nameof(dfaTable)); |
|
22 | 22 | Safe.ArgumentNotNull(finalStates, nameof(finalStates)); |
|
23 | 23 | Safe.ArgumentNotNull(tags, nameof(tags)); |
|
24 | 24 | Safe.ArgumentNotNull(alphabet, nameof(alphabet)); |
|
25 | 25 | |
|
26 | 26 | m_dfa = dfaTable; |
|
27 | 27 | m_final = finalStates; |
|
28 | 28 | m_tags = tags; |
|
29 | 29 | m_initialState = initialState; |
|
30 | 30 | m_alphabet = alphabet; |
|
31 | 31 | } |
|
32 | 32 | |
|
33 | 33 | public TTag Tag { |
|
34 | 34 | [MethodImpl(MethodImplOptions.AggressiveInlining)] |
|
35 | 35 | get { |
|
36 | 36 | return m_tags[m_state]; |
|
37 | 37 | } |
|
38 | 38 | } |
|
39 | 39 | |
|
40 | 40 | public int Position { |
|
41 | 41 | [MethodImpl(MethodImplOptions.AggressiveInlining)] |
|
42 | 42 | get { |
|
43 | 43 | return m_position; |
|
44 | 44 | } |
|
45 | 45 | } |
|
46 | 46 | |
|
47 | 47 | public bool IsFinal { |
|
48 | 48 | [MethodImpl(MethodImplOptions.AggressiveInlining)] |
|
49 | 49 | get { |
|
50 | 50 | return m_final[m_state]; |
|
51 | 51 | } |
|
52 | 52 | } |
|
53 | 53 | |
|
54 | 54 | [MethodImpl(MethodImplOptions.AggressiveInlining)] |
|
55 | 55 | public void ResetState() { |
|
56 | 56 | m_state = m_initialState; |
|
57 | 57 | } |
|
58 | 58 | |
|
59 | 59 | public InputScanner<TTag> Clone() { |
|
60 | 60 | var clone = new InputScanner<TTag>(m_dfa, m_final, m_tags, m_initialState, m_alphabet); |
|
61 | 61 | clone.m_state = m_state; |
|
62 | 62 | clone.m_position = m_position; |
|
63 | 63 | return clone; |
|
64 | 64 | } |
|
65 | 65 | |
|
66 | 66 | //[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|
67 | 67 | public bool Scan(char[] data, int offset, int max) { |
|
68 | 68 | var next = m_state; |
|
69 | 69 | |
|
70 | 70 | while(offset < max) { |
|
71 | 71 | next = m_dfa[next, m_alphabet.Translate(data[offset])]; |
|
72 |
if (next == AutomatonConst.U |
|
|
72 | if (next == AutomatonConst.UnreachableState) { | |
|
73 | 73 | // scanner stops on the next position after last recognized symbol |
|
74 | 74 | m_position = offset; |
|
75 | 75 | return false; |
|
76 | 76 | } |
|
77 | 77 | m_state = next; |
|
78 | 78 | offset++; |
|
79 | 79 | } |
|
80 | 80 | m_position = offset; |
|
81 | 81 | return true; |
|
82 | 82 | } |
|
83 | 83 | } |
|
84 | 84 | } |
@@ -1,148 +1,148 | |||
|
1 | 1 | using System.Linq; |
|
2 | 2 | using Implab.Automaton.RegularExpressions; |
|
3 | 3 | using System; |
|
4 | 4 | using Implab.Automaton; |
|
5 | 5 | using Implab.Components; |
|
6 | 6 | |
|
7 | 7 | namespace Implab.Formats.Json { |
|
8 | 8 | public class JsonGrammar : Grammar<char> { |
|
9 | 9 | public enum TokenType { |
|
10 | 10 | None, |
|
11 | 11 | BeginObject, |
|
12 | 12 | EndObject, |
|
13 | 13 | BeginArray, |
|
14 | 14 | EndArray, |
|
15 | 15 | String, |
|
16 | 16 | Number, |
|
17 | 17 | Literal, |
|
18 | 18 | NameSeparator, |
|
19 | 19 | ValueSeparator, |
|
20 | 20 | Whitespace, |
|
21 | 21 | |
|
22 | 22 | StringBound, |
|
23 | 23 | EscapedChar, |
|
24 | 24 | UnescapedChar, |
|
25 | 25 | EscapedUnicode |
|
26 | 26 | } |
|
27 | 27 | |
|
28 | 28 | static LazyAndWeak<JsonGrammar> _instance = new LazyAndWeak<JsonGrammar>(() => new JsonGrammar()); |
|
29 | 29 | |
|
30 | 30 | public static JsonGrammar Instance { |
|
31 | 31 | get { return _instance.Value; } |
|
32 | 32 | } |
|
33 | 33 | |
|
34 | readonly InputScanner<TokenType> m_jsonExpression; | |
|
35 | readonly InputScanner<TokenType> m_stringExpression; | |
|
34 | readonly FastInputScanner<TokenType> m_jsonExpression; | |
|
35 | readonly FastInputScanner<TokenType> m_stringExpression; | |
|
36 | 36 | readonly CharAlphabet m_defaultAlphabet = new CharAlphabet(); |
|
37 | 37 | |
|
38 | 38 | public CharAlphabet DefaultAlphabet { get { return m_defaultAlphabet; } } |
|
39 | 39 | |
|
40 | 40 | public JsonGrammar() { |
|
41 | 41 | DefineAlphabet(Enumerable.Range(0, 0x20).Select(x => (char)x)); |
|
42 | 42 | var hexDigit = SymbolRangeToken('a','f').Or(SymbolRangeToken('A','F')).Or(SymbolRangeToken('0','9')); |
|
43 | 43 | var digit9 = SymbolRangeToken('1', '9'); |
|
44 | 44 | var zero = SymbolToken('0'); |
|
45 | 45 | var digit = zero.Or(digit9); |
|
46 | 46 | var dot = SymbolToken('.'); |
|
47 | 47 | var minus = SymbolToken('-'); |
|
48 | 48 | var sign = SymbolSetToken('-', '+'); |
|
49 | 49 | var expSign = SymbolSetToken('e', 'E'); |
|
50 | 50 | var letters = SymbolRangeToken('a', 'z'); |
|
51 | 51 | var integer = zero.Or(digit9.Cat(digit.EClosure())); |
|
52 | 52 | var frac = dot.Cat(digit.Closure()); |
|
53 | 53 | var exp = expSign.Cat(sign.Optional()).Cat(digit.Closure()); |
|
54 | 54 | var quote = SymbolToken('"'); |
|
55 | 55 | var backSlash = SymbolToken('\\'); |
|
56 | 56 | var specialEscapeChars = SymbolSetToken('\\', '"', '/', 'b', 'f', 't', 'n', 'r'); |
|
57 | 57 | var unicodeEspace = SymbolToken('u').Cat(hexDigit.Repeat(4)); |
|
58 | 58 | var whitespace = SymbolSetToken('\n', '\r', '\t', ' ').EClosure(); |
|
59 | 59 | var beginObject = whitespace.Cat(SymbolToken('{')).Cat(whitespace); |
|
60 | 60 | var endObject = whitespace.Cat(SymbolToken('}')).Cat(whitespace); |
|
61 | 61 | var beginArray = whitespace.Cat(SymbolToken('[')).Cat(whitespace); |
|
62 | 62 | var endArray = whitespace.Cat(SymbolToken(']')).Cat(whitespace); |
|
63 | 63 | var nameSep = whitespace.Cat(SymbolToken(':')).Cat(whitespace); |
|
64 | 64 | var valueSep = whitespace.Cat(SymbolToken(',')).Cat(whitespace); |
|
65 | 65 | |
|
66 | 66 | var number = minus.Optional().Cat(integer).Cat(frac.Optional()).Cat(exp.Optional()); |
|
67 | 67 | var literal = letters.Closure(); |
|
68 | 68 | var unescaped = SymbolTokenExcept(Enumerable.Range(0, 0x20).Union(new int[] { '\\', '"' }).Select(x => (char)x)); |
|
69 | 69 | |
|
70 | 70 | var jsonExpression = |
|
71 | 71 | number.Tag(TokenType.Number) |
|
72 | 72 | .Or(literal.Tag(TokenType.Literal)) |
|
73 | 73 | .Or(quote.Tag(TokenType.StringBound)) |
|
74 | 74 | .Or(beginObject.Tag(TokenType.BeginObject)) |
|
75 | 75 | .Or(endObject.Tag(TokenType.EndObject)) |
|
76 | 76 | .Or(beginArray.Tag(TokenType.BeginArray)) |
|
77 | 77 | .Or(endArray.Tag(TokenType.EndArray)) |
|
78 | 78 | .Or(nameSep.Tag(TokenType.NameSeparator)) |
|
79 | 79 | .Or(valueSep.Tag(TokenType.ValueSeparator)) |
|
80 | 80 | .Or(SymbolSetToken('\n', '\r', '\t', ' ').Closure().Tag(TokenType.Whitespace)); |
|
81 | 81 | |
|
82 | 82 | |
|
83 | 83 | var jsonStringExpression = |
|
84 | 84 | quote.Tag(TokenType.StringBound) |
|
85 | 85 | .Or(backSlash.Cat(specialEscapeChars).Tag(TokenType.EscapedChar)) |
|
86 | 86 | .Or(backSlash.Cat(unicodeEspace).Tag(TokenType.EscapedUnicode)) |
|
87 | 87 | .Or(unescaped.Closure().Tag(TokenType.UnescapedChar)); |
|
88 | 88 | |
|
89 | 89 | |
|
90 | m_jsonExpression = BuildScanner(jsonExpression); | |
|
91 | m_stringExpression = BuildScanner(jsonStringExpression); | |
|
90 | m_jsonExpression = BuildFastScanner(jsonExpression); | |
|
91 | m_stringExpression = BuildFastScanner(jsonStringExpression); | |
|
92 | 92 | } |
|
93 | 93 | |
|
94 | public static InputScanner<TokenType> CreateJsonExpressionScanner() { | |
|
94 | public static FastInputScanner<TokenType> CreateJsonExpressionScanner() { | |
|
95 | 95 | return Instance.m_jsonExpression.Clone(); |
|
96 | 96 | } |
|
97 | 97 | |
|
98 | public static InputScanner<TokenType> CreateStringExpressionScanner() { | |
|
98 | public static FastInputScanner<TokenType> CreateStringExpressionScanner() { | |
|
99 | 99 | return Instance.m_stringExpression.Clone(); |
|
100 | 100 | } |
|
101 | 101 | |
|
102 | 102 | protected override IAlphabetBuilder<char> AlphabetBuilder { |
|
103 | 103 | get { |
|
104 | 104 | return m_defaultAlphabet; |
|
105 | 105 | } |
|
106 | 106 | } |
|
107 | 107 | |
|
108 | 108 | Token SymbolRangeToken(char start, char stop) { |
|
109 | 109 | return SymbolToken(Enumerable.Range(start, stop - start + 1).Select(x => (char)x)); |
|
110 | 110 | } |
|
111 | 111 | |
|
112 | public InputScanner<TokenType> BuildScanner(Token regexp) { | |
|
112 | public FastInputScanner<TokenType> BuildFastScanner(Token regexp) { | |
|
113 | 113 | var dfa = new RegularDFA<char, TokenType>(AlphabetBuilder); |
|
114 | 114 | |
|
115 | 115 | var visitor = new RegularExpressionVisitor<TokenType>(dfa); |
|
116 | 116 | regexp.Accept(visitor); |
|
117 | 117 | visitor.BuildDFA(); |
|
118 | 118 | |
|
119 | 119 | if (dfa.IsFinalState(dfa.InitialState)) |
|
120 | 120 | throw new ApplicationException("The specified language contains empty token"); |
|
121 | 121 | |
|
122 | 122 | var ab = new CharAlphabet(); |
|
123 | 123 | var optimal = dfa.Optimize(ab); |
|
124 | 124 | |
|
125 | return new InputScanner<TokenType>( | |
|
125 | return new FastInputScanner<TokenType>( | |
|
126 | 126 | optimal.CreateTransitionTable(), |
|
127 | 127 | optimal.CreateFinalStateTable(), |
|
128 | 128 | NormalizeTags(optimal.CreateTagTable()), |
|
129 | 129 | optimal.InitialState, |
|
130 |
ab. |
|
|
130 | ab.GetTranslationMap() | |
|
131 | 131 | ); |
|
132 | 132 | } |
|
133 | 133 | |
|
134 | 134 | static TokenType[] NormalizeTags(TokenType[][] tags) { |
|
135 | 135 | var result = new TokenType[tags.Length]; |
|
136 | 136 | for(var i = 0; i< tags.Length; i++) { |
|
137 | 137 | if (tags[i] == null || tags[i].Length == 0) |
|
138 | 138 | result[i] = default(TokenType); |
|
139 | 139 | else if (tags[i].Length == 1) |
|
140 | 140 | result[i] = tags[i][0]; |
|
141 | 141 | else |
|
142 | 142 | throw new Exception($"Ambigous state tags {string.Join(", ", tags[i])}"); |
|
143 | 143 | } |
|
144 | 144 | return result; |
|
145 | 145 | } |
|
146 | 146 | |
|
147 | 147 | } |
|
148 | 148 | } |
@@ -1,318 +1,318 | |||
|
1 | 1 | using System; |
|
2 | 2 | using System.Diagnostics; |
|
3 | 3 | using System.IO; |
|
4 | 4 | using Implab.Automaton; |
|
5 | 5 | using Implab.Automaton.RegularExpressions; |
|
6 | 6 | using System.Linq; |
|
7 | 7 | using Implab.Components; |
|
8 | 8 | using System.Collections.Generic; |
|
9 | 9 | using System.Text; |
|
10 | 10 | using System.Globalization; |
|
11 | 11 | |
|
12 | 12 | namespace Implab.Formats.Json { |
|
13 | 13 | /// <summary> |
|
14 | 14 | /// Pull парсер JSON данных. |
|
15 | 15 | /// </summary> |
|
16 | 16 | /// <remarks> |
|
17 | 17 | /// Следует отметить отдельную интерпретацию свойства <see cref="Level"/>, |
|
18 | 18 | /// оно означает текущий уровень вложенности объектов, однако закрывающий |
|
19 | 19 | /// элемент объекта и массива имеет уровень меньше, чем сам объект. |
|
20 | 20 | /// <code> |
|
21 | 21 | /// { // Level = 1 |
|
22 | 22 | /// "name" : "Peter", // Level = 1 |
|
23 | 23 | /// "address" : { // Level = 2 |
|
24 | 24 | /// city : "Stern" // Level = 2 |
|
25 | 25 | /// } // Level = 1 |
|
26 | 26 | /// } // Level = 0 |
|
27 | 27 | /// </code> |
|
28 | 28 | /// </remarks> |
|
29 | 29 | public class JsonReader : Disposable { |
|
30 | 30 | |
|
31 | 31 | enum MemberContext { |
|
32 | 32 | MemberName, |
|
33 | 33 | MemberValue |
|
34 | 34 | } |
|
35 | 35 | |
|
36 | 36 | #region Parser rules |
|
37 | 37 | struct ParserContext { |
|
38 | 38 | readonly int[,] m_dfa; |
|
39 | 39 | int m_state; |
|
40 | 40 | |
|
41 | 41 | readonly JsonElementContext m_elementContext; |
|
42 | 42 | |
|
43 | 43 | public ParserContext(int[,] dfa, int state, JsonElementContext context) { |
|
44 | 44 | m_dfa = dfa; |
|
45 | 45 | m_state = state; |
|
46 | 46 | m_elementContext = context; |
|
47 | 47 | } |
|
48 | 48 | |
|
49 | 49 | public bool Move(JsonTokenType token) { |
|
50 | 50 | var next = m_dfa[m_state, (int)token]; |
|
51 |
if (next == AutomatonConst.U |
|
|
51 | if (next == AutomatonConst.UnreachableState) | |
|
52 | 52 | return false; |
|
53 | 53 | m_state = next; |
|
54 | 54 | return true; |
|
55 | 55 | } |
|
56 | 56 | |
|
57 | 57 | public JsonElementContext ElementContext { |
|
58 | 58 | get { return m_elementContext; } |
|
59 | 59 | } |
|
60 | 60 | } |
|
61 | 61 | |
|
62 | 62 | static readonly ParserContext _jsonContext; |
|
63 | 63 | static readonly ParserContext _objectContext; |
|
64 | 64 | static readonly ParserContext _arrayContext; |
|
65 | 65 | |
|
66 | 66 | static JsonReader() { |
|
67 | 67 | |
|
68 | 68 | var valueExpression = MakeToken(JsonTokenType.BeginArray, JsonTokenType.BeginObject, JsonTokenType.Literal, JsonTokenType.Number, JsonTokenType.String); |
|
69 | 69 | var memberExpression = MakeToken(JsonTokenType.String).Cat(MakeToken(JsonTokenType.NameSeparator)).Cat(valueExpression); |
|
70 | 70 | |
|
71 | 71 | var objectExpression = memberExpression |
|
72 | 72 | .Cat( |
|
73 | 73 | MakeToken(JsonTokenType.ValueSeparator) |
|
74 | 74 | .Cat(memberExpression) |
|
75 | 75 | .EClosure() |
|
76 | 76 | ) |
|
77 | 77 | .Optional() |
|
78 | 78 | .Cat(MakeToken(JsonTokenType.EndObject)) |
|
79 | 79 | .End(); |
|
80 | 80 | |
|
81 | 81 | var arrayExpression = valueExpression |
|
82 | 82 | .Cat( |
|
83 | 83 | MakeToken(JsonTokenType.ValueSeparator) |
|
84 | 84 | .Cat(valueExpression) |
|
85 | 85 | .EClosure() |
|
86 | 86 | ) |
|
87 | 87 | .Optional() |
|
88 | 88 | .Cat(MakeToken(JsonTokenType.EndArray)) |
|
89 | 89 | .End(); |
|
90 | 90 | |
|
91 | 91 | var jsonExpression = valueExpression.End(); |
|
92 | 92 | |
|
93 | 93 | _jsonContext = CreateParserContext(jsonExpression, JsonElementContext.None); |
|
94 | 94 | _objectContext = CreateParserContext(objectExpression, JsonElementContext.Object); |
|
95 | 95 | _arrayContext = CreateParserContext(arrayExpression, JsonElementContext.Array); |
|
96 | 96 | } |
|
97 | 97 | |
|
98 | 98 | static Token MakeToken(params JsonTokenType[] input) { |
|
99 | 99 | return Token.New( input.Select(t => (int)t).ToArray() ); |
|
100 | 100 | } |
|
101 | 101 | |
|
102 | 102 | static ParserContext CreateParserContext(Token expr, JsonElementContext context) { |
|
103 | 103 | |
|
104 | 104 | var dfa = new DFATable(); |
|
105 | 105 | var builder = new RegularExpressionVisitor(dfa); |
|
106 | 106 | expr.Accept(builder); |
|
107 | 107 | builder.BuildDFA(); |
|
108 | 108 | |
|
109 | 109 | return new ParserContext(dfa.CreateTransitionTable(), dfa.InitialState, context); |
|
110 | 110 | } |
|
111 | 111 | |
|
112 | 112 | #endregion |
|
113 | 113 | |
|
114 | 114 | readonly JsonScanner m_scanner; |
|
115 | 115 | // json starts from the value context and may content even a single literal |
|
116 | 116 | MemberContext m_memberContext = MemberContext.MemberValue; |
|
117 | 117 | |
|
118 | 118 | JsonElementType m_elementType; |
|
119 |
|
|
|
119 | string m_elementValue; | |
|
120 | 120 | string m_memberName = String.Empty; |
|
121 | 121 | |
|
122 | 122 | Stack<ParserContext> m_stack = new Stack<ParserContext>(); |
|
123 | 123 | ParserContext m_context = _jsonContext; |
|
124 | 124 | |
|
125 | 125 | /// <summary> |
|
126 | 126 | /// Создает новый парсер на основе строки, содержащей JSON |
|
127 | 127 | /// </summary> |
|
128 | 128 | /// <param name="text"></param> |
|
129 | 129 | JsonReader(JsonScanner scanner) { |
|
130 | 130 | m_scanner = scanner; |
|
131 | 131 | } |
|
132 | 132 | |
|
133 | 133 | public int Level { |
|
134 | 134 | get { return m_stack.Count; } |
|
135 | 135 | } |
|
136 | 136 | |
|
137 | 137 | /// <summary> |
|
138 | 138 | /// Тип текущего элемента на котором стоит парсер. |
|
139 | 139 | /// </summary> |
|
140 | 140 | public JsonElementType ElementType { |
|
141 | 141 | get { return m_elementType; } |
|
142 | 142 | } |
|
143 | 143 | |
|
144 | 144 | /// <summary> |
|
145 | 145 | /// Имя элемента - имя свойства родительского контейнера. Для элементов массивов и корневого всегда |
|
146 | 146 | /// пустая строка. |
|
147 | 147 | /// </summary> |
|
148 | 148 | public string ElementName { |
|
149 | 149 | get { return m_memberName; } |
|
150 | 150 | } |
|
151 | 151 | |
|
152 | 152 | /// <summary> |
|
153 | 153 | /// Значение элемента. Только для элементов типа <see cref="JsonElementType.Value"/>, для остальных <c>null</c> |
|
154 | 154 | /// </summary> |
|
155 |
public |
|
|
155 | public string ElementValue { | |
|
156 | 156 | get { return m_elementValue; } |
|
157 | 157 | } |
|
158 | 158 | |
|
159 | 159 | /// <summary> |
|
160 | 160 | /// Читает слеюудущий объект из потока |
|
161 | 161 | /// </summary> |
|
162 | 162 | /// <returns><c>true</c> - операция чтения прошла успешно, <c>false</c> - конец данных</returns> |
|
163 | 163 | public bool Read() { |
|
164 | 164 | string tokenValue; |
|
165 | 165 | JsonTokenType tokenType; |
|
166 | 166 | |
|
167 | 167 | m_memberName = String.Empty; |
|
168 | 168 | |
|
169 | 169 | while (m_scanner.ReadToken(out tokenValue, out tokenType)) { |
|
170 | 170 | if(!m_context.Move(tokenType)) |
|
171 | 171 | UnexpectedToken(tokenValue, tokenType); |
|
172 | 172 | |
|
173 | 173 | switch (tokenType) { |
|
174 | 174 | case JsonTokenType.BeginObject: |
|
175 | 175 | m_stack.Push(m_context); |
|
176 | 176 | m_context = _objectContext; |
|
177 | 177 | |
|
178 | 178 | m_elementValue = null; |
|
179 | 179 | m_memberContext = MemberContext.MemberName; |
|
180 | 180 | m_elementType = JsonElementType.BeginObject; |
|
181 | 181 | return true; |
|
182 | 182 | case JsonTokenType.EndObject: |
|
183 | 183 | if (m_stack.Count == 0) |
|
184 | 184 | UnexpectedToken(tokenValue, tokenType); |
|
185 | 185 | m_context = m_stack.Pop(); |
|
186 | 186 | |
|
187 | 187 | m_elementValue = null; |
|
188 | 188 | m_elementType = JsonElementType.EndObject; |
|
189 | 189 | return true; |
|
190 | 190 | case JsonTokenType.BeginArray: |
|
191 | 191 | m_stack.Push(m_context); |
|
192 | 192 | m_context = _arrayContext; |
|
193 | 193 | |
|
194 | 194 | m_elementValue = null; |
|
195 | 195 | m_memberContext = MemberContext.MemberValue; |
|
196 | 196 | m_elementType = JsonElementType.BeginArray; |
|
197 | 197 | return true; |
|
198 | 198 | case JsonTokenType.EndArray: |
|
199 | 199 | if (m_stack.Count == 0) |
|
200 | 200 | UnexpectedToken(tokenValue, tokenType); |
|
201 | 201 | m_context = m_stack.Pop(); |
|
202 | 202 | |
|
203 | 203 | m_elementValue = null; |
|
204 | 204 | m_elementType = JsonElementType.EndArray; |
|
205 | 205 | return true; |
|
206 | 206 | case JsonTokenType.String: |
|
207 | 207 | if (m_memberContext == MemberContext.MemberName) { |
|
208 | 208 | m_memberName = tokenValue; |
|
209 | 209 | break; |
|
210 | 210 | } |
|
211 | 211 | m_elementType = JsonElementType.Value; |
|
212 | 212 | m_elementValue = tokenValue; |
|
213 | 213 | return true; |
|
214 | 214 | case JsonTokenType.Number: |
|
215 | 215 | m_elementType = JsonElementType.Value; |
|
216 |
m_elementValue = |
|
|
216 | m_elementValue = tokenValue; | |
|
217 | 217 | return true; |
|
218 | 218 | case JsonTokenType.Literal: |
|
219 | 219 | m_elementType = JsonElementType.Value; |
|
220 |
m_elementValue = |
|
|
220 | m_elementValue = tokenValue == "null" ? null : tokenValue; | |
|
221 | 221 | return true; |
|
222 | 222 | case JsonTokenType.NameSeparator: |
|
223 | 223 | m_memberContext = MemberContext.MemberValue; |
|
224 | 224 | break; |
|
225 | 225 | case JsonTokenType.ValueSeparator: |
|
226 | 226 | m_memberContext = m_context.ElementContext == JsonElementContext.Object ? MemberContext.MemberName : MemberContext.MemberValue; |
|
227 | 227 | break; |
|
228 | 228 | default: |
|
229 | 229 | UnexpectedToken(tokenValue, tokenType); |
|
230 | 230 | break; |
|
231 | 231 | } |
|
232 | 232 | } |
|
233 | 233 | if (m_context.ElementContext != JsonElementContext.None) |
|
234 | 234 | throw new ParserException("Unexpedted end of data"); |
|
235 | 235 | |
|
236 | 236 | Eof = true; |
|
237 | 237 | |
|
238 | 238 | return false; |
|
239 | 239 | } |
|
240 | 240 | |
|
241 | 241 | object ParseLiteral(string literal) { |
|
242 | 242 | switch (literal) { |
|
243 | 243 | case "null": |
|
244 | 244 | return null; |
|
245 | 245 | case "false": |
|
246 | 246 | return false; |
|
247 | 247 | case "true": |
|
248 | 248 | return true; |
|
249 | 249 | default: |
|
250 | 250 | UnexpectedToken(literal, JsonTokenType.Literal); |
|
251 | 251 | return null; // avoid compliler error |
|
252 | 252 | } |
|
253 | 253 | } |
|
254 | 254 | |
|
255 | 255 | void UnexpectedToken(object value, JsonTokenType tokenType) { |
|
256 | 256 | throw new ParserException(String.Format("Unexpected token {0}: '{1}'", tokenType, value)); |
|
257 | 257 | } |
|
258 | 258 | |
|
259 | 259 | |
|
260 | 260 | /// <summary> |
|
261 | 261 | /// Признак конца потока |
|
262 | 262 | /// </summary> |
|
263 | 263 | public bool Eof { |
|
264 | 264 | get; |
|
265 | 265 | private set; |
|
266 | 266 | } |
|
267 | 267 | |
|
268 | 268 | protected override void Dispose(bool disposing) { |
|
269 | 269 | if (disposing) |
|
270 | 270 | m_scanner.Dispose(); |
|
271 | 271 | } |
|
272 | 272 | |
|
273 | 273 | /// <summary> |
|
274 | 274 | /// Переходит в конец текущего объекта. |
|
275 | 275 | /// </summary> |
|
276 | 276 | public void SeekElementEnd() { |
|
277 | 277 | var level = Level - 1; |
|
278 | 278 | |
|
279 | 279 | Debug.Assert(level >= 0); |
|
280 | 280 | |
|
281 | 281 | while (Level != level) |
|
282 | 282 | Read(); |
|
283 | 283 | } |
|
284 | 284 | |
|
285 | 285 | public static JsonReader Create(string file, Encoding encoding) { |
|
286 | 286 | return new JsonReader(JsonTextScanner.Create(file, encoding)); |
|
287 | 287 | } |
|
288 | 288 | |
|
289 | 289 | public static JsonReader Create(string file) { |
|
290 | 290 | return new JsonReader(JsonTextScanner.Create(file)); |
|
291 | 291 | } |
|
292 | 292 | |
|
293 | 293 | public static JsonReader Create(Stream stream, Encoding encoding) { |
|
294 | 294 | return new JsonReader(JsonTextScanner.Create(stream, encoding)); |
|
295 | 295 | } |
|
296 | 296 | |
|
297 | 297 | public static JsonReader Create(Stream stream) { |
|
298 | 298 | return new JsonReader(JsonTextScanner.Create(stream)); |
|
299 | 299 | } |
|
300 | 300 | |
|
301 | 301 | public static JsonReader Create(TextReader reader) { |
|
302 | 302 | return new JsonReader(JsonTextScanner.Create(reader)); |
|
303 | 303 | } |
|
304 | 304 | |
|
305 | 305 | public static JsonReader ParseString(string data) { |
|
306 | 306 | return new JsonReader(JsonStringScanner.Create(data)); |
|
307 | 307 | } |
|
308 | 308 | |
|
309 | 309 | public static JsonReader ParseString(string data, int offset, int length) { |
|
310 | 310 | return new JsonReader(JsonStringScanner.Create(data, offset, length)); |
|
311 | 311 | } |
|
312 | 312 | |
|
313 | 313 | public static JsonReader ParseString(char[] data, int offset, int lenght) { |
|
314 | 314 | return new JsonReader(JsonStringScanner.Create(data, offset, lenght)); |
|
315 | 315 | } |
|
316 | 316 | } |
|
317 | 317 | |
|
318 | 318 | } |
@@ -1,190 +1,190 | |||
|
1 | 1 | using System; |
|
2 | 2 | using System.Globalization; |
|
3 | 3 | using Implab.Automaton; |
|
4 | 4 | using System.Text; |
|
5 | 5 | using Implab.Components; |
|
6 | 6 | using System.IO; |
|
7 | 7 | |
|
8 | 8 | namespace Implab.Formats.Json { |
|
9 | 9 | /// <summary> |
|
10 | 10 | /// Сканнер (лексер), разбивающий поток символов на токены JSON. |
|
11 | 11 | /// </summary> |
|
12 | 12 | public abstract class JsonScanner : Disposable { |
|
13 | readonly InputScanner<JsonGrammar.TokenType> m_jsonContext = JsonGrammar.CreateJsonExpressionScanner(); | |
|
14 | readonly InputScanner<JsonGrammar.TokenType> m_stringContext = JsonGrammar.CreateStringExpressionScanner(); | |
|
13 | readonly FastInputScanner<JsonGrammar.TokenType> m_jsonContext = JsonGrammar.CreateJsonExpressionScanner(); | |
|
14 | readonly FastInputScanner<JsonGrammar.TokenType> m_stringContext = JsonGrammar.CreateStringExpressionScanner(); | |
|
15 | 15 | |
|
16 | 16 | readonly char[] m_unescapeBuf = new char[4]; |
|
17 | 17 | readonly char[] m_buffer; |
|
18 | 18 | int m_length; |
|
19 | 19 | int m_pos; |
|
20 | 20 | readonly StringBuilder m_tokenBuilder = new StringBuilder(); |
|
21 | 21 | |
|
22 | 22 | protected JsonScanner(char[] buffer, int pos, int length) { |
|
23 | 23 | m_buffer = buffer; |
|
24 | 24 | m_pos = pos; |
|
25 | 25 | m_length = length; |
|
26 | 26 | } |
|
27 | 27 | |
|
28 | bool ReadChunk(InputScanner<JsonGrammar.TokenType> scanner, out JsonGrammar.TokenType tokenType) { | |
|
28 | bool ReadChunk(FastInputScanner<JsonGrammar.TokenType> scanner, out JsonGrammar.TokenType tokenType) { | |
|
29 | 29 | scanner.ResetState(); |
|
30 | 30 | |
|
31 | 31 | while(scanner.Scan(m_buffer, m_pos, m_length)) { |
|
32 | 32 | // scanner requests new data |
|
33 | 33 | |
|
34 | 34 | if (m_pos != m_length) // capture results for the future |
|
35 | 35 | m_tokenBuilder.Append(m_buffer, m_pos, m_length - m_pos); |
|
36 | 36 | |
|
37 | 37 | // read next data |
|
38 | 38 | m_length = Read(m_buffer, 0, m_buffer.Length); |
|
39 | 39 | |
|
40 | 40 | if (m_length == 0) { |
|
41 | 41 | // no data is read |
|
42 | 42 | if (scanner.Position == m_pos) { |
|
43 | 43 | // scanned hasn't moved, that's the end |
|
44 | 44 | m_pos = 0; |
|
45 | 45 | tokenType = JsonGrammar.TokenType.None; |
|
46 | 46 | return false; |
|
47 | 47 | } |
|
48 | 48 | |
|
49 | 49 | if (scanner.IsFinal) { |
|
50 | 50 | m_pos = 0; |
|
51 | 51 | tokenType = scanner.Tag; |
|
52 | 52 | return true; |
|
53 | 53 | } else { |
|
54 | 54 | throw new ParserException("Unexpected EOF"); |
|
55 | 55 | } |
|
56 | 56 | } |
|
57 | 57 | |
|
58 | 58 | m_pos = 0; |
|
59 | 59 | } |
|
60 | 60 | var scannerPos = scanner.Position; |
|
61 | 61 | |
|
62 | 62 | // scanner stops as scannerPos |
|
63 | 63 | if (!scanner.IsFinal) |
|
64 | 64 | throw new ParserException($"Unexpected character '{m_buffer[scannerPos + 1]}'"); |
|
65 | 65 | |
|
66 | 66 | tokenType = scanner.Tag; |
|
67 | 67 | if (scannerPos != m_pos && tokenType == JsonGrammar.TokenType.Number || tokenType == JsonGrammar.TokenType.Literal) |
|
68 | 68 | m_tokenBuilder.Append(m_buffer, m_pos, scannerPos - m_pos); |
|
69 | 69 | |
|
70 | 70 | m_pos = scannerPos; |
|
71 | 71 | return true; |
|
72 | 72 | } |
|
73 | 73 | |
|
74 | bool ReadStringChunk(InputScanner<JsonGrammar.TokenType> scanner, out JsonGrammar.TokenType tokenType) { | |
|
74 | bool ReadStringChunk(FastInputScanner<JsonGrammar.TokenType> scanner, out JsonGrammar.TokenType tokenType) { | |
|
75 | 75 | scanner.ResetState(); |
|
76 | 76 | |
|
77 | 77 | while (scanner.Scan(m_buffer, m_pos, m_length)) { |
|
78 | 78 | // scanner requests new data |
|
79 | 79 | |
|
80 | 80 | if (m_pos != m_length) // capture results for the future |
|
81 | 81 | m_tokenBuilder.Append(m_buffer, m_pos, m_length - m_pos); |
|
82 | 82 | |
|
83 | 83 | // read next data |
|
84 | 84 | m_length = Read(m_buffer, 0, m_buffer.Length); |
|
85 | 85 | |
|
86 | 86 | if (m_length == 0) { |
|
87 | 87 | // no data is read |
|
88 | 88 | if (scanner.Position == m_pos) { |
|
89 | 89 | // scanned hasn't moved, that's the end |
|
90 | 90 | m_pos = 0; |
|
91 | 91 | tokenType = JsonGrammar.TokenType.None; |
|
92 | 92 | return false; |
|
93 | 93 | } |
|
94 | 94 | |
|
95 | 95 | if (scanner.IsFinal) { |
|
96 | 96 | m_pos = 0; |
|
97 | 97 | tokenType = scanner.Tag; |
|
98 | 98 | return true; |
|
99 | 99 | } else { |
|
100 | 100 | throw new ParserException("Unexpected EOF"); |
|
101 | 101 | } |
|
102 | 102 | } |
|
103 | 103 | |
|
104 | 104 | m_pos = 0; |
|
105 | 105 | } |
|
106 | 106 | var scannerPos = scanner.Position; |
|
107 | 107 | |
|
108 | 108 | // scanner stops as scannerPos |
|
109 | 109 | if (!scanner.IsFinal) |
|
110 |
throw new ParserException($"Unexpected character '{m_buffer[scannerPos |
|
|
110 | throw new ParserException($"Unexpected character '{m_buffer[scannerPos]}'"); | |
|
111 | 111 | |
|
112 | 112 | if (scannerPos != m_pos) { |
|
113 | 113 | m_tokenBuilder.Append(m_buffer, m_pos, scannerPos - m_pos); |
|
114 | 114 | m_pos = scannerPos; |
|
115 | 115 | } |
|
116 | 116 | tokenType = scanner.Tag; |
|
117 | 117 | return true; |
|
118 | 118 | } |
|
119 | 119 | |
|
120 | 120 | protected abstract int Read(char[] buffer, int offset, int size); |
|
121 | 121 | |
|
122 | 122 | |
|
123 | 123 | /// <summary> |
|
124 | 124 | /// Читает следующий лексический элемент из входных данных. |
|
125 | 125 | /// </summary> |
|
126 | 126 | /// <param name="tokenValue">Возвращает значение прочитанного токена.</param> |
|
127 | 127 | /// <param name="tokenType">Возвращает тип прочитанного токена.</param> |
|
128 | 128 | /// <returns><c>true</c> - чтение произведено успешно. <c>false</c> - достигнут конец входных данных</returns> |
|
129 | 129 | /// <remarks>В случе если токен не распознается, возникает исключение. Значения токенов обрабатываются, т.е. |
|
130 | 130 | /// в строках обрабатываются экранированные символы, числа становтся типа double.</remarks> |
|
131 | 131 | public bool ReadToken(out string tokenValue, out JsonTokenType tokenType) { |
|
132 | 132 | JsonGrammar.TokenType tag; |
|
133 | 133 | m_tokenBuilder.Clear(); |
|
134 | 134 | while (ReadChunk(m_jsonContext, out tag)) { |
|
135 | 135 | switch (tag) { |
|
136 | 136 | case JsonGrammar.TokenType.StringBound: |
|
137 | 137 | tokenValue = ReadString(); |
|
138 | 138 | tokenType = JsonTokenType.String; |
|
139 | 139 | break; |
|
140 | 140 | case JsonGrammar.TokenType.Number: |
|
141 | 141 | tokenValue = m_tokenBuilder.ToString(); |
|
142 | 142 | tokenType = JsonTokenType.Number; |
|
143 | 143 | break; |
|
144 | 144 | case JsonGrammar.TokenType.Literal: |
|
145 | 145 | tokenType = JsonTokenType.Literal; |
|
146 | 146 | tokenValue = m_tokenBuilder.ToString(); |
|
147 | 147 | break; |
|
148 | 148 | case JsonGrammar.TokenType.Whitespace: |
|
149 | 149 | m_tokenBuilder.Clear(); |
|
150 | 150 | continue; |
|
151 | 151 | default: |
|
152 | 152 | tokenType = (JsonTokenType)tag; |
|
153 | 153 | tokenValue = null; |
|
154 | 154 | break; |
|
155 | 155 | } |
|
156 | 156 | return true; |
|
157 | 157 | } |
|
158 | 158 | tokenValue = null; |
|
159 | 159 | tokenType = JsonTokenType.None; |
|
160 | 160 | return false; |
|
161 | 161 | } |
|
162 | 162 | |
|
163 | 163 | string ReadString() { |
|
164 | 164 | JsonGrammar.TokenType tag; |
|
165 | 165 | m_tokenBuilder.Clear(); |
|
166 | 166 | |
|
167 | 167 | while (ReadStringChunk(m_stringContext, out tag)) { |
|
168 | 168 | switch (tag) { |
|
169 | 169 | case JsonGrammar.TokenType.StringBound: |
|
170 | 170 | m_tokenBuilder.Length--; |
|
171 | 171 | return m_tokenBuilder.ToString(); |
|
172 | 172 | case JsonGrammar.TokenType.UnescapedChar: |
|
173 | 173 | break; |
|
174 | 174 | case JsonGrammar.TokenType.EscapedUnicode: // \xXXXX - unicode escape sequence |
|
175 | 175 | m_tokenBuilder.CopyTo(m_tokenBuilder.Length - 4, m_unescapeBuf, 0, 4); |
|
176 | 176 | m_tokenBuilder.Length -= 6; |
|
177 | 177 | m_tokenBuilder.Append(StringTranslator.TranslateHexUnicode(m_unescapeBuf, 0)); |
|
178 | 178 | break; |
|
179 | 179 | case JsonGrammar.TokenType.EscapedChar: // \t - escape sequence |
|
180 | 180 | var ch = m_tokenBuilder[m_tokenBuilder.Length-1]; |
|
181 | 181 | m_tokenBuilder.Length -= 2; |
|
182 | 182 | m_tokenBuilder.Append(StringTranslator.TranslateEscapedChar(ch)); |
|
183 | 183 | break; |
|
184 | 184 | } |
|
185 | 185 | } |
|
186 | 186 | |
|
187 | 187 | throw new ParserException("Unexpected end of data"); |
|
188 | 188 | } |
|
189 | 189 | } |
|
190 | 190 | } |
@@ -1,188 +1,189 | |||
|
1 | 1 | <?xml version="1.0" encoding="utf-8"?> |
|
2 | 2 | <Project DefaultTargets="Build" ToolsVersion="12.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> |
|
3 | 3 | <PropertyGroup> |
|
4 | 4 | <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration> |
|
5 | 5 | <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform> |
|
6 | 6 | <ProjectGuid>{F550F1F8-8746-4AD0-9614-855F4C4B7F05}</ProjectGuid> |
|
7 | 7 | <OutputType>Library</OutputType> |
|
8 | 8 | <RootNamespace>Implab</RootNamespace> |
|
9 | 9 | <AssemblyName>Implab</AssemblyName> |
|
10 | 10 | <TargetFrameworkVersion>v4.5</TargetFrameworkVersion> |
|
11 | 11 | <TargetFrameworkProfile /> |
|
12 | 12 | </PropertyGroup> |
|
13 | 13 | <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' "> |
|
14 | 14 | <DebugSymbols>true</DebugSymbols> |
|
15 | 15 | <DebugType>full</DebugType> |
|
16 | 16 | <Optimize>false</Optimize> |
|
17 | 17 | <OutputPath>bin\Debug</OutputPath> |
|
18 | 18 | <DefineConstants>TRACE;DEBUG;NET_4_5</DefineConstants> |
|
19 | 19 | <ErrorReport>prompt</ErrorReport> |
|
20 | 20 | <WarningLevel>4</WarningLevel> |
|
21 | 21 | <ConsolePause>false</ConsolePause> |
|
22 | 22 | <RunCodeAnalysis>true</RunCodeAnalysis> |
|
23 | 23 | </PropertyGroup> |
|
24 | 24 | <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' "> |
|
25 | 25 | <DebugType>full</DebugType> |
|
26 | 26 | <Optimize>true</Optimize> |
|
27 | 27 | <OutputPath>bin\Release</OutputPath> |
|
28 | 28 | <DefineConstants>NET_4_5</DefineConstants> |
|
29 | 29 | <ErrorReport>prompt</ErrorReport> |
|
30 | 30 | <WarningLevel>4</WarningLevel> |
|
31 | 31 | <ConsolePause>false</ConsolePause> |
|
32 | 32 | </PropertyGroup> |
|
33 | 33 | <PropertyGroup> |
|
34 | 34 | <SignAssembly>true</SignAssembly> |
|
35 | 35 | </PropertyGroup> |
|
36 | 36 | <PropertyGroup> |
|
37 | 37 | <AssemblyOriginatorKeyFile>implab.snk</AssemblyOriginatorKeyFile> |
|
38 | 38 | </PropertyGroup> |
|
39 | 39 | <ItemGroup> |
|
40 | 40 | <Reference Include="System" /> |
|
41 | 41 | <Reference Include="System.Xml" /> |
|
42 | 42 | <Reference Include="mscorlib" /> |
|
43 | 43 | <Reference Include="System.Xml.Linq" /> |
|
44 | 44 | </ItemGroup> |
|
45 | 45 | <ItemGroup> |
|
46 | 46 | <Compile Include="Components\StateChangeEventArgs.cs" /> |
|
47 | 47 | <Compile Include="CustomEqualityComparer.cs" /> |
|
48 | 48 | <Compile Include="Diagnostics\ConsoleTraceListener.cs" /> |
|
49 | 49 | <Compile Include="Diagnostics\LogChannel.cs" /> |
|
50 | 50 | <Compile Include="Diagnostics\LogicalOperation.cs" /> |
|
51 | 51 | <Compile Include="Diagnostics\TextFileListener.cs" /> |
|
52 | 52 | <Compile Include="Diagnostics\Trace.cs" /> |
|
53 | 53 | <Compile Include="Diagnostics\TraceLog.cs" /> |
|
54 | 54 | <Compile Include="Diagnostics\TraceEvent.cs" /> |
|
55 | 55 | <Compile Include="Diagnostics\TraceEventType.cs" /> |
|
56 | 56 | <Compile Include="Diagnostics\TraceSourceAttribute.cs" /> |
|
57 | 57 | <Compile Include="Formats\CharMap.cs" /> |
|
58 | <Compile Include="Formats\FastInpurScanner.cs" /> | |
|
58 | 59 | <Compile Include="Formats\InputScanner.cs" /> |
|
59 | 60 | <Compile Include="Formats\Json\JsonStringScanner.cs" /> |
|
60 | 61 | <Compile Include="Formats\Json\JsonTextScanner.cs" /> |
|
61 | 62 | <Compile Include="ICancellable.cs" /> |
|
62 | 63 | <Compile Include="IProgressHandler.cs" /> |
|
63 | 64 | <Compile Include="IProgressNotifier.cs" /> |
|
64 | 65 | <Compile Include="IPromiseT.cs" /> |
|
65 | 66 | <Compile Include="IPromise.cs" /> |
|
66 | 67 | <Compile Include="IServiceLocator.cs" /> |
|
67 | 68 | <Compile Include="ITaskController.cs" /> |
|
68 | 69 | <Compile Include="Parallels\DispatchPool.cs" /> |
|
69 | 70 | <Compile Include="Parallels\ArrayTraits.cs" /> |
|
70 | 71 | <Compile Include="Parallels\SimpleAsyncQueue.cs" /> |
|
71 | 72 | <Compile Include="Parallels\WorkerPool.cs" /> |
|
72 | 73 | <Compile Include="ProgressInitEventArgs.cs" /> |
|
73 | 74 | <Compile Include="Properties\AssemblyInfo.cs" /> |
|
74 | 75 | <Compile Include="Parallels\AsyncPool.cs" /> |
|
75 | 76 | <Compile Include="Safe.cs" /> |
|
76 | 77 | <Compile Include="SyncContextPromise.cs" /> |
|
77 | 78 | <Compile Include="ValueEventArgs.cs" /> |
|
78 | 79 | <Compile Include="PromiseExtensions.cs" /> |
|
79 | 80 | <Compile Include="SyncContextPromiseT.cs" /> |
|
80 | 81 | <Compile Include="Diagnostics\OperationContext.cs" /> |
|
81 | 82 | <Compile Include="Diagnostics\TraceContext.cs" /> |
|
82 | 83 | <Compile Include="Diagnostics\LogEventArgs.cs" /> |
|
83 | 84 | <Compile Include="Diagnostics\LogEventArgsT.cs" /> |
|
84 | 85 | <Compile Include="Diagnostics\Extensions.cs" /> |
|
85 | 86 | <Compile Include="PromiseEventType.cs" /> |
|
86 | 87 | <Compile Include="Parallels\AsyncQueue.cs" /> |
|
87 | 88 | <Compile Include="PromiseT.cs" /> |
|
88 | 89 | <Compile Include="IDeferred.cs" /> |
|
89 | 90 | <Compile Include="IDeferredT.cs" /> |
|
90 | 91 | <Compile Include="Promise.cs" /> |
|
91 | 92 | <Compile Include="PromiseTransientException.cs" /> |
|
92 | 93 | <Compile Include="Parallels\Signal.cs" /> |
|
93 | 94 | <Compile Include="Parallels\SharedLock.cs" /> |
|
94 | 95 | <Compile Include="Diagnostics\ILogWriter.cs" /> |
|
95 | 96 | <Compile Include="Diagnostics\ListenerBase.cs" /> |
|
96 | 97 | <Compile Include="Parallels\BlockingQueue.cs" /> |
|
97 | 98 | <Compile Include="AbstractEvent.cs" /> |
|
98 | 99 | <Compile Include="AbstractPromise.cs" /> |
|
99 | 100 | <Compile Include="AbstractPromiseT.cs" /> |
|
100 | 101 | <Compile Include="FuncTask.cs" /> |
|
101 | 102 | <Compile Include="FuncTaskBase.cs" /> |
|
102 | 103 | <Compile Include="FuncTaskT.cs" /> |
|
103 | 104 | <Compile Include="ActionChainTaskBase.cs" /> |
|
104 | 105 | <Compile Include="ActionChainTask.cs" /> |
|
105 | 106 | <Compile Include="ActionChainTaskT.cs" /> |
|
106 | 107 | <Compile Include="FuncChainTaskBase.cs" /> |
|
107 | 108 | <Compile Include="FuncChainTask.cs" /> |
|
108 | 109 | <Compile Include="FuncChainTaskT.cs" /> |
|
109 | 110 | <Compile Include="ActionTaskBase.cs" /> |
|
110 | 111 | <Compile Include="ActionTask.cs" /> |
|
111 | 112 | <Compile Include="ActionTaskT.cs" /> |
|
112 | 113 | <Compile Include="ICancellationToken.cs" /> |
|
113 | 114 | <Compile Include="SuccessPromise.cs" /> |
|
114 | 115 | <Compile Include="SuccessPromiseT.cs" /> |
|
115 | 116 | <Compile Include="PromiseAwaiterT.cs" /> |
|
116 | 117 | <Compile Include="PromiseAwaiter.cs" /> |
|
117 | 118 | <Compile Include="Components\ComponentContainer.cs" /> |
|
118 | 119 | <Compile Include="Components\Disposable.cs" /> |
|
119 | 120 | <Compile Include="Components\DisposablePool.cs" /> |
|
120 | 121 | <Compile Include="Components\ObjectPool.cs" /> |
|
121 | 122 | <Compile Include="Components\ServiceLocator.cs" /> |
|
122 | 123 | <Compile Include="Components\IInitializable.cs" /> |
|
123 | 124 | <Compile Include="TaskController.cs" /> |
|
124 | 125 | <Compile Include="Components\App.cs" /> |
|
125 | 126 | <Compile Include="Components\IRunnable.cs" /> |
|
126 | 127 | <Compile Include="Components\ExecutionState.cs" /> |
|
127 | 128 | <Compile Include="Components\RunnableComponent.cs" /> |
|
128 | 129 | <Compile Include="Components\IFactory.cs" /> |
|
129 | 130 | <Compile Include="Automaton\IAlphabet.cs" /> |
|
130 | 131 | <Compile Include="Automaton\ParserException.cs" /> |
|
131 | 132 | <Compile Include="Automaton\IndexedAlphabetBase.cs" /> |
|
132 | 133 | <Compile Include="Automaton\IAlphabetBuilder.cs" /> |
|
133 | 134 | <Compile Include="Automaton\RegularExpressions\AltToken.cs" /> |
|
134 | 135 | <Compile Include="Automaton\RegularExpressions\BinaryToken.cs" /> |
|
135 | 136 | <Compile Include="Automaton\RegularExpressions\CatToken.cs" /> |
|
136 | 137 | <Compile Include="Automaton\RegularExpressions\StarToken.cs" /> |
|
137 | 138 | <Compile Include="Automaton\RegularExpressions\SymbolToken.cs" /> |
|
138 | 139 | <Compile Include="Automaton\RegularExpressions\EmptyToken.cs" /> |
|
139 | 140 | <Compile Include="Automaton\RegularExpressions\Token.cs" /> |
|
140 | 141 | <Compile Include="Automaton\RegularExpressions\IVisitor.cs" /> |
|
141 | 142 | <Compile Include="Automaton\AutomatonTransition.cs" /> |
|
142 | 143 | <Compile Include="Formats\Json\JsonElementContext.cs" /> |
|
143 | 144 | <Compile Include="Formats\Json\JsonElementType.cs" /> |
|
144 | 145 | <Compile Include="Formats\Json\JsonGrammar.cs" /> |
|
145 | 146 | <Compile Include="Formats\Json\JsonReader.cs" /> |
|
146 | 147 | <Compile Include="Formats\Json\JsonScanner.cs" /> |
|
147 | 148 | <Compile Include="Formats\Json\JsonTokenType.cs" /> |
|
148 | 149 | <Compile Include="Formats\Json\JsonWriter.cs" /> |
|
149 | 150 | <Compile Include="Formats\Json\StringTranslator.cs" /> |
|
150 | 151 | <Compile Include="Automaton\MapAlphabet.cs" /> |
|
151 | 152 | <Compile Include="Formats\CharAlphabet.cs" /> |
|
152 | 153 | <Compile Include="Formats\ByteAlphabet.cs" /> |
|
153 | 154 | <Compile Include="Automaton\IDFATable.cs" /> |
|
154 | 155 | <Compile Include="Automaton\IDFATableBuilder.cs" /> |
|
155 | 156 | <Compile Include="Automaton\DFATable.cs" /> |
|
156 | 157 | <Compile Include="Automaton\RegularExpressions\RegularExpressionVisitor.cs" /> |
|
157 | 158 | <Compile Include="Automaton\RegularExpressions\ITaggedDFABuilder.cs" /> |
|
158 | 159 | <Compile Include="Formats\Grammar.cs" /> |
|
159 | 160 | <Compile Include="Automaton\RegularExpressions\EndTokenT.cs" /> |
|
160 | 161 | <Compile Include="Automaton\RegularExpressions\EndToken.cs" /> |
|
161 | 162 | <Compile Include="Automaton\RegularExpressions\RegularExpressionVisitorT.cs" /> |
|
162 | 163 | <Compile Include="Automaton\AutomatonConst.cs" /> |
|
163 | 164 | <Compile Include="Automaton\RegularExpressions\RegularDFA.cs" /> |
|
164 | 165 | <Compile Include="Components\LazyAndWeak.cs" /> |
|
165 | 166 | <Compile Include="AbstractTask.cs" /> |
|
166 | 167 | <Compile Include="AbstractTaskT.cs" /> |
|
167 | 168 | <Compile Include="FailedPromise.cs" /> |
|
168 | 169 | <Compile Include="FailedPromiseT.cs" /> |
|
169 | 170 | <Compile Include="Components\PollingComponent.cs" /> |
|
170 | 171 | <Compile Include="Xml\JsonXmlReader.cs" /> |
|
171 | 172 | <Compile Include="Xml\JsonXmlReaderOptions.cs" /> |
|
172 | 173 | <Compile Include="Xml\JsonXmlReaderPosition.cs" /> |
|
173 | 174 | <Compile Include="Xml\SerializationHelpers.cs" /> |
|
174 | 175 | <Compile Include="Xml\SerializersPool.cs" /> |
|
175 | 176 | <Compile Include="Xml\XmlSimpleAttribute.cs" /> |
|
176 | 177 | <Compile Include="Xml\XmlNameContext.cs" /> |
|
177 | 178 | </ItemGroup> |
|
178 | 179 | <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" /> |
|
179 | 180 | <ItemGroup> |
|
180 | 181 | <None Include="Implab.nuspec"> |
|
181 | 182 | <SubType>Designer</SubType> |
|
182 | 183 | </None> |
|
183 | 184 | <None Include="implab.snk" /> |
|
184 | 185 | </ItemGroup> |
|
185 | 186 | <ItemGroup> |
|
186 | 187 | <Content Include="license.txt" /> |
|
187 | 188 | </ItemGroup> |
|
188 | 189 | </Project> No newline at end of file |
@@ -1,626 +1,610 | |||
|
1 | 1 | using Implab.Formats.Json; |
|
2 | 2 | using System; |
|
3 | 3 | using System.Collections.Generic; |
|
4 | 4 | using System.Globalization; |
|
5 | 5 | using System.Linq; |
|
6 | 6 | using System.Xml; |
|
7 | 7 | |
|
8 | 8 | namespace Implab.Xml { |
|
9 | 9 | public class JsonXmlReader : XmlReader { |
|
10 | 10 | struct JsonContext { |
|
11 | 11 | public string localName; |
|
12 | 12 | public bool skip; |
|
13 | 13 | } |
|
14 | 14 | |
|
15 | 15 | JsonReader m_parser; |
|
16 | 16 | JsonXmlReaderOptions m_options; |
|
17 | 17 | JsonXmlReaderPosition m_position = JsonXmlReaderPosition.Initial; |
|
18 | 18 | XmlNameTable m_nameTable; |
|
19 | 19 | |
|
20 | 20 | readonly string m_jsonRootName; |
|
21 | 21 | readonly string m_jsonNamespace; |
|
22 | 22 | readonly string m_jsonPrefix; |
|
23 | 23 | readonly bool m_jsonFlattenArrays; |
|
24 | 24 | readonly string m_jsonArrayItemName; |
|
25 | 25 | |
|
26 | 26 | string m_jsonLocalName; |
|
27 | 27 | string m_jsonValueName; |
|
28 | 28 | bool m_jsonSkip; // indicates wheather to generate closing tag for objects or arrays |
|
29 | 29 | |
|
30 | 30 | readonly Stack<JsonContext> m_jsonNameStack = new Stack<JsonContext>(); |
|
31 | 31 | |
|
32 | 32 | XmlQualifiedName m_elementQName; |
|
33 | 33 | string m_elementPrefix; |
|
34 | 34 | int m_elementDepth; |
|
35 | 35 | bool m_elementIsEmpty; |
|
36 | 36 | |
|
37 | 37 | XmlQualifiedName m_qName; |
|
38 | 38 | string m_prefix; |
|
39 | 39 | int m_xmlDepth; |
|
40 | 40 | |
|
41 | 41 | XmlSimpleAttribute[] m_attributes; |
|
42 |
|
|
|
42 | string m_value; | |
|
43 | 43 | bool m_isEmpty; |
|
44 | 44 | |
|
45 | 45 | XmlNodeType m_nodeType = XmlNodeType.None; |
|
46 | 46 | |
|
47 | 47 | bool m_isAttribute; // indicates that we are reading attribute nodes |
|
48 | 48 | int m_currentAttribute; |
|
49 | 49 | bool m_currentAttributeRead; |
|
50 | 50 | |
|
51 | 51 | |
|
52 | 52 | XmlNameContext m_context; |
|
53 | 53 | |
|
54 | 54 | readonly string m_xmlnsPrefix; |
|
55 | 55 | readonly string m_xmlnsNamespace; |
|
56 | 56 | readonly string m_xsiPrefix; |
|
57 | 57 | readonly string m_xsiNamespace; |
|
58 | 58 | |
|
59 | 59 | |
|
60 | 60 | public JsonXmlReader(JsonReader parser, JsonXmlReaderOptions options) { |
|
61 | 61 | Safe.ArgumentNotNull(parser, nameof(parser)); |
|
62 | 62 | m_parser = parser; |
|
63 | 63 | |
|
64 | 64 | m_options = options ?? new JsonXmlReaderOptions(); |
|
65 | 65 | |
|
66 | 66 | m_jsonFlattenArrays = m_options.FlattenArrays; |
|
67 | 67 | m_nameTable = m_options.NameTable ?? new NameTable(); |
|
68 | 68 | |
|
69 | 69 | m_jsonRootName = m_nameTable.Add(string.IsNullOrEmpty(m_options.RootName) ? "data" : m_options.RootName); |
|
70 | 70 | m_jsonArrayItemName = m_nameTable.Add(string.IsNullOrEmpty(m_options.ArrayItemName) ? "item" : m_options.ArrayItemName); |
|
71 | 71 | m_jsonNamespace = m_nameTable.Add(m_options.NamespaceUri ?? string.Empty); |
|
72 | 72 | m_jsonPrefix = m_nameTable.Add(m_options.NodesPrefix ?? string.Empty); |
|
73 | 73 | m_xmlnsPrefix = m_nameTable.Add(XmlNameContext.XmlnsPrefix); |
|
74 | 74 | m_xmlnsNamespace = m_nameTable.Add(XmlNameContext.XmlnsNamespace); |
|
75 | 75 | m_xsiPrefix = m_nameTable.Add(XmlNameContext.XsiPrefix); |
|
76 | 76 | m_xsiNamespace = m_nameTable.Add(XmlNameContext.XsiNamespace); |
|
77 | 77 | |
|
78 | 78 | // TODO validate m_jsonRootName, m_jsonArrayItemName |
|
79 | 79 | |
|
80 | 80 | m_context = new XmlNameContext(null, 0); |
|
81 | 81 | } |
|
82 | 82 | |
|
83 | 83 | public override int AttributeCount { |
|
84 | 84 | get { |
|
85 | 85 | return m_attributes == null ? 0 : m_attributes.Length; |
|
86 | 86 | } |
|
87 | 87 | } |
|
88 | 88 | |
|
89 | 89 | public override string BaseURI { |
|
90 | 90 | get { |
|
91 | 91 | return string.Empty; |
|
92 | 92 | } |
|
93 | 93 | } |
|
94 | 94 | |
|
95 | 95 | public override int Depth { |
|
96 | 96 | get { |
|
97 | 97 | return m_xmlDepth; |
|
98 | 98 | } |
|
99 | 99 | } |
|
100 | 100 | |
|
101 | 101 | public override bool EOF { |
|
102 | 102 | get { |
|
103 | 103 | return m_position == JsonXmlReaderPosition.Eof; |
|
104 | 104 | } |
|
105 | 105 | } |
|
106 | 106 | |
|
107 | 107 | public override bool IsEmptyElement { |
|
108 | 108 | get { return m_isEmpty; } |
|
109 | 109 | } |
|
110 | 110 | |
|
111 | 111 | |
|
112 | 112 | public override string LocalName { |
|
113 | 113 | get { |
|
114 | 114 | return m_qName.Name; |
|
115 | 115 | } |
|
116 | 116 | } |
|
117 | 117 | |
|
118 | 118 | public override string NamespaceURI { |
|
119 | 119 | get { |
|
120 | 120 | return m_qName.Namespace; |
|
121 | 121 | } |
|
122 | 122 | } |
|
123 | 123 | |
|
124 | 124 | public override XmlNameTable NameTable { |
|
125 | 125 | get { |
|
126 | 126 | return m_nameTable; |
|
127 | 127 | } |
|
128 | 128 | } |
|
129 | 129 | |
|
130 | 130 | public override XmlNodeType NodeType { |
|
131 | 131 | get { |
|
132 | 132 | return m_nodeType; |
|
133 | 133 | } |
|
134 | 134 | } |
|
135 | 135 | |
|
136 | 136 | public override string Prefix { |
|
137 | 137 | get { |
|
138 | 138 | return m_prefix; |
|
139 | 139 | } |
|
140 | 140 | } |
|
141 | 141 | |
|
142 | 142 | public override ReadState ReadState { |
|
143 | 143 | get { |
|
144 | 144 | switch (m_position) { |
|
145 | 145 | case JsonXmlReaderPosition.Initial: |
|
146 | 146 | return ReadState.Initial; |
|
147 | 147 | case JsonXmlReaderPosition.Eof: |
|
148 | 148 | return ReadState.EndOfFile; |
|
149 | 149 | case JsonXmlReaderPosition.Closed: |
|
150 | 150 | return ReadState.Closed; |
|
151 | 151 | case JsonXmlReaderPosition.Error: |
|
152 | 152 | return ReadState.Error; |
|
153 | 153 | default: |
|
154 | 154 | return ReadState.Interactive; |
|
155 | 155 | }; |
|
156 | 156 | } |
|
157 | 157 | } |
|
158 | 158 | |
|
159 | 159 | public override string Value { |
|
160 | 160 | get { |
|
161 |
return |
|
|
161 | return m_value; | |
|
162 | 162 | } |
|
163 | 163 | } |
|
164 | ||
|
165 | static string ConvertValueToString(object value) { | |
|
166 | if (value == null) | |
|
167 | return string.Empty; | |
|
168 | ||
|
169 | switch (Convert.GetTypeCode(value)) { | |
|
170 | case TypeCode.Double: | |
|
171 | return ((double)value).ToString(CultureInfo.InvariantCulture); | |
|
172 | case TypeCode.String: | |
|
173 | return (string)value; | |
|
174 | case TypeCode.Boolean: | |
|
175 | return (bool)value ? "true" : "false"; | |
|
176 | default: | |
|
177 | return value.ToString(); | |
|
178 | } | |
|
179 | } | |
|
180 | ||
|
164 | ||
|
181 | 165 | public override string GetAttribute(int i) { |
|
182 | 166 | Safe.ArgumentInRange(i, 0, AttributeCount - 1, nameof(i)); |
|
183 |
return |
|
|
167 | return m_attributes[i].Value; | |
|
184 | 168 | } |
|
185 | 169 | |
|
186 | 170 | public override string GetAttribute(string name) { |
|
187 | 171 | if (m_attributes == null) |
|
188 | 172 | return null; |
|
189 | 173 | var qName = m_context.Resolve(name); |
|
190 | 174 | var attr = Array.Find(m_attributes, x => x.QName == qName); |
|
191 |
var value = |
|
|
175 | var value = attr?.Value; | |
|
192 | 176 | return value == string.Empty ? null : value; |
|
193 | 177 | } |
|
194 | 178 | |
|
195 | 179 | public override string GetAttribute(string name, string namespaceURI) { |
|
196 | 180 | if (m_attributes == null) |
|
197 | 181 | return null; |
|
198 | 182 | var qName = new XmlQualifiedName(name, namespaceURI); |
|
199 | 183 | var attr = Array.Find(m_attributes, x => x.QName == qName); |
|
200 |
var value = |
|
|
184 | var value = attr?.Value; | |
|
201 | 185 | return value == string.Empty ? null : value; |
|
202 | 186 | } |
|
203 | 187 | |
|
204 | 188 | public override string LookupNamespace(string prefix) { |
|
205 | 189 | return m_context.ResolvePrefix(prefix); |
|
206 | 190 | } |
|
207 | 191 | |
|
208 | 192 | public override bool MoveToAttribute(string name) { |
|
209 | 193 | if (m_attributes == null || m_attributes.Length == 0) |
|
210 | 194 | return false; |
|
211 | 195 | |
|
212 | 196 | var qName = m_context.Resolve(name); |
|
213 | 197 | var index = Array.FindIndex(m_attributes, x => x.QName == qName); |
|
214 | 198 | if (index >= 0) { |
|
215 | 199 | MoveToAttributeImpl(index); |
|
216 | 200 | return true; |
|
217 | 201 | } |
|
218 | 202 | return false; |
|
219 | 203 | } |
|
220 | 204 | |
|
221 | 205 | public override bool MoveToAttribute(string name, string ns) { |
|
222 | 206 | if (m_attributes == null || m_attributes.Length == 0) |
|
223 | 207 | return false; |
|
224 | 208 | |
|
225 | 209 | var qName = m_context.Resolve(name); |
|
226 | 210 | var index = Array.FindIndex(m_attributes, x => x.QName == qName); |
|
227 | 211 | if (index >= 0) { |
|
228 | 212 | MoveToAttributeImpl(index); |
|
229 | 213 | return true; |
|
230 | 214 | } |
|
231 | 215 | return false; |
|
232 | 216 | } |
|
233 | 217 | |
|
234 | 218 | void MoveToAttributeImpl(int i) { |
|
235 | 219 | if (!m_isAttribute) { |
|
236 | 220 | m_elementQName = m_qName; |
|
237 | 221 | m_elementDepth = m_xmlDepth; |
|
238 | 222 | m_elementPrefix = m_prefix; |
|
239 | 223 | m_elementIsEmpty = m_isEmpty; |
|
240 | 224 | m_isAttribute = true; |
|
241 | 225 | } |
|
242 | 226 | |
|
243 | 227 | var attr = m_attributes[i]; |
|
244 | 228 | |
|
245 | 229 | |
|
246 | 230 | m_currentAttribute = i; |
|
247 | 231 | m_currentAttributeRead = false; |
|
248 | 232 | m_nodeType = XmlNodeType.Attribute; |
|
249 | 233 | |
|
250 | 234 | m_xmlDepth = m_elementDepth + 1; |
|
251 | 235 | m_qName = attr.QName; |
|
252 | 236 | m_value = attr.Value; |
|
253 | 237 | m_prefix = attr.Prefix; |
|
254 | 238 | } |
|
255 | 239 | |
|
256 | 240 | public override bool MoveToElement() { |
|
257 | 241 | if (m_isAttribute) { |
|
258 | 242 | m_value = null; |
|
259 | 243 | m_nodeType = XmlNodeType.Element; |
|
260 | 244 | m_xmlDepth = m_elementDepth; |
|
261 | 245 | m_prefix = m_elementPrefix; |
|
262 | 246 | m_qName = m_elementQName; |
|
263 | 247 | m_isEmpty = m_elementIsEmpty; |
|
264 | 248 | m_isAttribute = false; |
|
265 | 249 | return true; |
|
266 | 250 | } |
|
267 | 251 | return false; |
|
268 | 252 | } |
|
269 | 253 | |
|
270 | 254 | public override bool MoveToFirstAttribute() { |
|
271 | 255 | if (m_attributes != null && m_attributes.Length > 0) { |
|
272 | 256 | MoveToAttributeImpl(0); |
|
273 | 257 | return true; |
|
274 | 258 | } |
|
275 | 259 | return false; |
|
276 | 260 | } |
|
277 | 261 | |
|
278 | 262 | public override bool MoveToNextAttribute() { |
|
279 | 263 | if (m_isAttribute) { |
|
280 | 264 | var next = m_currentAttribute + 1; |
|
281 | 265 | if (next < AttributeCount) { |
|
282 | 266 | MoveToAttributeImpl(next); |
|
283 | 267 | return true; |
|
284 | 268 | } |
|
285 | 269 | return false; |
|
286 | 270 | } else { |
|
287 | 271 | return MoveToFirstAttribute(); |
|
288 | 272 | } |
|
289 | 273 | |
|
290 | 274 | } |
|
291 | 275 | |
|
292 | 276 | public override bool ReadAttributeValue() { |
|
293 | 277 | if (!m_isAttribute || m_currentAttributeRead) |
|
294 | 278 | return false; |
|
295 | 279 | |
|
296 | 280 | ValueNode(m_attributes[m_currentAttribute].Value); |
|
297 | 281 | m_currentAttributeRead = true; |
|
298 | 282 | return true; |
|
299 | 283 | } |
|
300 | 284 | |
|
301 | 285 | public override void ResolveEntity() { |
|
302 | 286 | /* do nothing */ |
|
303 | 287 | } |
|
304 | 288 | |
|
305 | 289 | /// <summary> |
|
306 | 290 | /// Determines do we need to increase depth after the current node |
|
307 | 291 | /// </summary> |
|
308 | 292 | /// <returns></returns> |
|
309 | 293 | public bool IsSibling() { |
|
310 | 294 | switch (m_nodeType) { |
|
311 | 295 | case XmlNodeType.None: // start document |
|
312 | 296 | case XmlNodeType.Attribute: // after attribute only it's content can be iterated with ReadAttributeValue method |
|
313 | 297 | return false; |
|
314 | 298 | case XmlNodeType.Element: |
|
315 | 299 | // if the elemnt is empty the next element will be it's sibling |
|
316 | 300 | return m_isEmpty; |
|
317 | 301 | default: |
|
318 | 302 | return true; |
|
319 | 303 | } |
|
320 | 304 | } |
|
321 | 305 | |
|
322 |
void ValueNode( |
|
|
306 | void ValueNode(string value) { | |
|
323 | 307 | if (!IsSibling()) // the node is nested |
|
324 | 308 | m_xmlDepth++; |
|
325 | 309 | |
|
326 | 310 | m_qName = XmlQualifiedName.Empty; |
|
327 | 311 | m_nodeType = XmlNodeType.Text; |
|
328 | 312 | m_prefix = string.Empty; |
|
329 | 313 | m_value = value; |
|
330 | 314 | m_isEmpty = false; |
|
331 | 315 | m_attributes = null; |
|
332 | 316 | } |
|
333 | 317 | |
|
334 | 318 | void ElementNode(string name, string ns, XmlSimpleAttribute[] attrs, bool empty) { |
|
335 | 319 | if (!IsSibling()) // the node is nested |
|
336 | 320 | m_xmlDepth++; |
|
337 | 321 | |
|
338 | 322 | var context = m_context; |
|
339 | 323 | List<XmlSimpleAttribute> definedAttrs = null; |
|
340 | 324 | |
|
341 | 325 | // define new namespaces |
|
342 | 326 | if (attrs != null) { |
|
343 | 327 | foreach (var attr in attrs) { |
|
344 | 328 | if (attr.QName.Name == "xmlns") { |
|
345 | 329 | if (context == m_context) |
|
346 | 330 | context = new XmlNameContext(m_context, m_xmlDepth); |
|
347 |
context.DefinePrefix( |
|
|
331 | context.DefinePrefix(attr.Value, string.Empty); | |
|
348 | 332 | } else if (attr.Prefix == m_xmlnsPrefix) { |
|
349 | 333 | if (context == m_context) |
|
350 | 334 | context = new XmlNameContext(m_context, m_xmlDepth); |
|
351 |
context.DefinePrefix( |
|
|
335 | context.DefinePrefix(attr.Value, attr.QName.Name); | |
|
352 | 336 | } else { |
|
353 | 337 | string attrPrefix; |
|
354 | 338 | if (string.IsNullOrEmpty(attr.QName.Namespace)) |
|
355 | 339 | continue; |
|
356 | 340 | |
|
357 | 341 | // auto-define prefixes |
|
358 | 342 | if (!context.LookupNamespacePrefix(attr.QName.Namespace, out attrPrefix) || string.IsNullOrEmpty(attrPrefix)) { |
|
359 | 343 | // new namespace prefix added |
|
360 | 344 | attrPrefix = context.CreateNamespacePrefix(attr.QName.Namespace); |
|
361 | 345 | attr.Prefix = attrPrefix; |
|
362 | 346 | |
|
363 | 347 | if (definedAttrs == null) |
|
364 | 348 | definedAttrs = new List<XmlSimpleAttribute>(); |
|
365 | 349 | |
|
366 | 350 | definedAttrs.Add(new XmlSimpleAttribute(attrPrefix, m_xmlnsNamespace, m_xmlnsPrefix, attr.QName.Namespace)); |
|
367 | 351 | } |
|
368 | 352 | } |
|
369 | 353 | } |
|
370 | 354 | } |
|
371 | 355 | |
|
372 | 356 | string p; |
|
373 | 357 | // auto-define prefixes |
|
374 | 358 | if (!context.LookupNamespacePrefix(ns, out p)) { |
|
375 | 359 | if (context == m_context) |
|
376 | 360 | context = new XmlNameContext(m_context, m_xmlDepth); |
|
377 | 361 | p = context.CreateNamespacePrefix(ns); |
|
378 | 362 | if (definedAttrs == null) |
|
379 | 363 | definedAttrs = new List<XmlSimpleAttribute>(); |
|
380 | 364 | |
|
381 | 365 | definedAttrs.Add(new XmlSimpleAttribute(p, m_xmlnsNamespace, m_xmlnsPrefix, ns)); |
|
382 | 366 | } |
|
383 | 367 | |
|
384 | 368 | if (definedAttrs != null) { |
|
385 | 369 | if (attrs != null) |
|
386 | 370 | definedAttrs.AddRange(attrs); |
|
387 | 371 | attrs = definedAttrs.ToArray(); |
|
388 | 372 | } |
|
389 | 373 | |
|
390 | 374 | if (!empty) |
|
391 | 375 | m_context = context; |
|
392 | 376 | |
|
393 | 377 | m_nodeType = XmlNodeType.Element; |
|
394 | 378 | m_qName = new XmlQualifiedName(name, ns); |
|
395 | 379 | m_prefix = p; |
|
396 | 380 | m_value = null; |
|
397 | 381 | m_isEmpty = empty; |
|
398 | 382 | m_attributes = attrs; |
|
399 | 383 | } |
|
400 | 384 | |
|
401 | 385 | void EndElementNode(string name, string ns) { |
|
402 | 386 | if (IsSibling()) { |
|
403 | 387 | // closing the element which has children |
|
404 | 388 | m_xmlDepth--; |
|
405 | 389 | } |
|
406 | 390 | |
|
407 | 391 | string p; |
|
408 | 392 | if (!m_context.LookupNamespacePrefix(ns, out p)) |
|
409 | 393 | throw new Exception($"Failed to lookup namespace '{ns}'"); |
|
410 | 394 | |
|
411 | 395 | if (m_context.Depth == m_xmlDepth) |
|
412 | 396 | m_context = m_context.ParentContext; |
|
413 | 397 | |
|
414 | 398 | m_nodeType = XmlNodeType.EndElement; |
|
415 | 399 | m_prefix = p; |
|
416 | 400 | m_qName = new XmlQualifiedName(name, ns); |
|
417 | 401 | m_value = null; |
|
418 | 402 | m_attributes = null; |
|
419 | 403 | m_isEmpty = false; |
|
420 | 404 | } |
|
421 | 405 | |
|
422 | 406 | void XmlDeclaration() { |
|
423 | 407 | if (!IsSibling()) // the node is nested |
|
424 | 408 | m_xmlDepth++; |
|
425 | 409 | m_nodeType = XmlNodeType.XmlDeclaration; |
|
426 | 410 | m_qName = new XmlQualifiedName("xml"); |
|
427 | 411 | m_value = "version='1.0'"; |
|
428 | 412 | m_prefix = string.Empty; |
|
429 | 413 | m_attributes = null; |
|
430 | 414 | m_isEmpty = false; |
|
431 | 415 | } |
|
432 | 416 | |
|
433 | 417 | public override bool Read() { |
|
434 | 418 | try { |
|
435 | 419 | string elementName; |
|
436 | 420 | XmlSimpleAttribute[] elementAttrs = null; |
|
437 | 421 | MoveToElement(); |
|
438 | 422 | |
|
439 | 423 | switch (m_position) { |
|
440 | 424 | case JsonXmlReaderPosition.Initial: |
|
441 | 425 | m_jsonLocalName = m_jsonRootName; |
|
442 | 426 | m_jsonSkip = false; |
|
443 | 427 | XmlDeclaration(); |
|
444 | 428 | m_position = JsonXmlReaderPosition.Declaration; |
|
445 | 429 | return true; |
|
446 | 430 | case JsonXmlReaderPosition.Declaration: |
|
447 | 431 | elementAttrs = new[] { |
|
448 | 432 | new XmlSimpleAttribute(m_xsiPrefix, m_xmlnsNamespace, m_xmlnsPrefix, m_xsiNamespace), |
|
449 | 433 | string.IsNullOrEmpty(m_jsonPrefix) ? |
|
450 | 434 | new XmlSimpleAttribute(m_xmlnsPrefix, string.Empty, string.Empty, m_jsonNamespace) : |
|
451 | 435 | new XmlSimpleAttribute(m_jsonPrefix, m_xmlnsNamespace, m_xmlnsPrefix, m_jsonNamespace) |
|
452 | 436 | }; |
|
453 | 437 | break; |
|
454 | 438 | case JsonXmlReaderPosition.ValueElement: |
|
455 | 439 | if (!m_isEmpty) { |
|
456 | 440 | if (m_parser.ElementValue != null && !m_parser.ElementValue.Equals(string.Empty)) |
|
457 | 441 | ValueNode(m_parser.ElementValue); |
|
458 | 442 | else |
|
459 | 443 | goto case JsonXmlReaderPosition.ValueContent; |
|
460 | 444 | m_position = JsonXmlReaderPosition.ValueContent; |
|
461 | 445 | return true; |
|
462 | 446 | } else { |
|
463 | 447 | m_position = JsonXmlReaderPosition.ValueEndElement; |
|
464 | 448 | break; |
|
465 | 449 | } |
|
466 | 450 | case JsonXmlReaderPosition.ValueContent: |
|
467 | 451 | EndElementNode(m_jsonValueName, m_jsonNamespace); |
|
468 | 452 | m_position = JsonXmlReaderPosition.ValueEndElement; |
|
469 | 453 | return true; |
|
470 | 454 | case JsonXmlReaderPosition.Eof: |
|
471 | 455 | case JsonXmlReaderPosition.Closed: |
|
472 | 456 | case JsonXmlReaderPosition.Error: |
|
473 | 457 | return false; |
|
474 | 458 | } |
|
475 | 459 | |
|
476 | 460 | while (m_parser.Read()) { |
|
477 | 461 | var jsonName = m_nameTable.Add(m_parser.ElementName); |
|
478 | 462 | |
|
479 | 463 | switch (m_parser.ElementType) { |
|
480 | 464 | case JsonElementType.BeginObject: |
|
481 | 465 | if (!EnterJsonObject(jsonName, out elementName)) |
|
482 | 466 | continue; |
|
483 | 467 | |
|
484 | 468 | m_position = JsonXmlReaderPosition.BeginObject; |
|
485 | 469 | ElementNode(elementName, m_jsonNamespace, elementAttrs, false); |
|
486 | 470 | break; |
|
487 | 471 | case JsonElementType.EndObject: |
|
488 | 472 | if (!LeaveJsonScope(out elementName)) |
|
489 | 473 | continue; |
|
490 | 474 | |
|
491 | 475 | m_position = JsonXmlReaderPosition.EndObject; |
|
492 | 476 | EndElementNode(elementName, m_jsonNamespace); |
|
493 | 477 | break; |
|
494 | 478 | case JsonElementType.BeginArray: |
|
495 | 479 | if (!EnterJsonArray(jsonName, out elementName)) |
|
496 | 480 | continue; |
|
497 | 481 | |
|
498 | 482 | m_position = JsonXmlReaderPosition.BeginArray; |
|
499 | 483 | ElementNode(elementName, m_jsonNamespace, elementAttrs, false); |
|
500 | 484 | break; |
|
501 | 485 | case JsonElementType.EndArray: |
|
502 | 486 | if (!LeaveJsonScope(out elementName)) |
|
503 | 487 | continue; |
|
504 | 488 | |
|
505 | 489 | m_position = JsonXmlReaderPosition.EndArray; |
|
506 | 490 | EndElementNode(elementName, m_jsonNamespace); |
|
507 | 491 | break; |
|
508 | 492 | case JsonElementType.Value: |
|
509 | 493 | if (!VisitJsonValue(jsonName, out m_jsonValueName)) |
|
510 | 494 | continue; |
|
511 | 495 | |
|
512 | 496 | m_position = JsonXmlReaderPosition.ValueElement; |
|
513 | 497 | if (m_parser.ElementValue == null) |
|
514 | 498 | // generate empty element with xsi:nil="true" attribute |
|
515 | 499 | ElementNode( |
|
516 | 500 | m_jsonValueName, |
|
517 | 501 | m_jsonNamespace, |
|
518 | 502 | new[] { |
|
519 | new XmlSimpleAttribute("nil", m_xsiNamespace, m_xsiPrefix, true) | |
|
503 | new XmlSimpleAttribute("nil", m_xsiNamespace, m_xsiPrefix, "true") | |
|
520 | 504 | }, |
|
521 | 505 | true |
|
522 | 506 | ); |
|
523 | 507 | else |
|
524 | 508 | ElementNode(m_jsonValueName, m_jsonNamespace, elementAttrs, m_parser.ElementValue.Equals(string.Empty)); |
|
525 | 509 | break; |
|
526 | 510 | default: |
|
527 | 511 | throw new Exception($"Unexpected JSON element {m_parser.ElementType}: {m_parser.ElementName}"); |
|
528 | 512 | } |
|
529 | 513 | return true; |
|
530 | 514 | } |
|
531 | 515 | |
|
532 | 516 | m_position = JsonXmlReaderPosition.Eof; |
|
533 | 517 | return false; |
|
534 | 518 | } catch { |
|
535 | 519 | m_position = JsonXmlReaderPosition.Error; |
|
536 | 520 | throw; |
|
537 | 521 | } |
|
538 | 522 | } |
|
539 | 523 | |
|
540 | 524 | void SaveJsonName() { |
|
541 | 525 | m_jsonNameStack.Push(new JsonContext { |
|
542 | 526 | skip = m_jsonSkip, |
|
543 | 527 | localName = m_jsonLocalName |
|
544 | 528 | }); |
|
545 | 529 | |
|
546 | 530 | } |
|
547 | 531 | |
|
548 | 532 | bool EnterJsonObject(string name, out string elementName) { |
|
549 | 533 | SaveJsonName(); |
|
550 | 534 | m_jsonSkip = false; |
|
551 | 535 | |
|
552 | 536 | if (string.IsNullOrEmpty(name)) { |
|
553 | 537 | if (m_jsonNameStack.Count != 1 && !m_jsonFlattenArrays) |
|
554 | 538 | m_jsonLocalName = m_jsonArrayItemName; |
|
555 | 539 | } else { |
|
556 | 540 | m_jsonLocalName = name; |
|
557 | 541 | } |
|
558 | 542 | |
|
559 | 543 | elementName = m_jsonLocalName; |
|
560 | 544 | return true; |
|
561 | 545 | } |
|
562 | 546 | |
|
563 | 547 | /// <summary> |
|
564 | 548 | /// Called when JSON parser visits BeginArray ('[') element. |
|
565 | 549 | /// </summary> |
|
566 | 550 | /// <param name="name">Optional property name if the array is the member of an object</param> |
|
567 | 551 | /// <returns>true if element should be emited, false otherwise</returns> |
|
568 | 552 | bool EnterJsonArray(string name, out string elementName) { |
|
569 | 553 | SaveJsonName(); |
|
570 | 554 | |
|
571 | 555 | if (string.IsNullOrEmpty(name)) { |
|
572 | 556 | // m_jsonNameStack.Count == 1 means the root node |
|
573 | 557 | if (m_jsonNameStack.Count != 1 && !m_jsonFlattenArrays) |
|
574 | 558 | m_jsonLocalName = m_jsonArrayItemName; |
|
575 | 559 | |
|
576 | 560 | m_jsonSkip = false; // we should not flatten arrays inside arrays or in the document root |
|
577 | 561 | } else { |
|
578 | 562 | m_jsonLocalName = name; |
|
579 | 563 | m_jsonSkip = m_jsonFlattenArrays; |
|
580 | 564 | } |
|
581 | 565 | elementName = m_jsonLocalName; |
|
582 | 566 | |
|
583 | 567 | return !m_jsonSkip; |
|
584 | 568 | } |
|
585 | 569 | |
|
586 | 570 | bool VisitJsonValue(string name, out string elementName) { |
|
587 | 571 | if (string.IsNullOrEmpty(name)) { |
|
588 | 572 | // m_jsonNameStack.Count == 0 means that JSON document consists from simple value |
|
589 | 573 | elementName = (m_jsonNameStack.Count == 0 || m_jsonFlattenArrays) ? m_jsonLocalName : m_jsonArrayItemName; |
|
590 | 574 | } else { |
|
591 | 575 | elementName = name; |
|
592 | 576 | } |
|
593 | 577 | return true; |
|
594 | 578 | } |
|
595 | 579 | |
|
596 | 580 | bool LeaveJsonScope(out string elementName) { |
|
597 | 581 | elementName = m_jsonLocalName; |
|
598 | 582 | var skip = m_jsonSkip; |
|
599 | 583 | |
|
600 | 584 | var prev = m_jsonNameStack.Pop(); |
|
601 | 585 | m_jsonLocalName = prev.localName; |
|
602 | 586 | m_jsonSkip = prev.skip; |
|
603 | 587 | |
|
604 | 588 | return !skip; |
|
605 | 589 | } |
|
606 | 590 | |
|
607 | 591 | public override string ToString() { |
|
608 | 592 | switch (NodeType) { |
|
609 | 593 | case XmlNodeType.Element: |
|
610 |
return $"<{Name} {string.Join(" ", (m_attributes ?? new XmlSimpleAttribute[0]).Select(x => $"{x.Prefix}{(string.IsNullOrEmpty(x.Prefix) ? "" : ":")}{x.QName.Name}='{ |
|
|
594 | return $"<{Name} {string.Join(" ", (m_attributes ?? new XmlSimpleAttribute[0]).Select(x => $"{x.Prefix}{(string.IsNullOrEmpty(x.Prefix) ? "" : ":")}{x.QName.Name}='{x.Value}'"))} {(IsEmptyElement ? "/" : "")}>"; | |
|
611 | 595 | case XmlNodeType.Attribute: |
|
612 | 596 | return $"@{Name}"; |
|
613 | 597 | case XmlNodeType.Text: |
|
614 | 598 | return $"{Value}"; |
|
615 | 599 | case XmlNodeType.CDATA: |
|
616 | 600 | return $"<![CDATA[{Value}]]>"; |
|
617 | 601 | case XmlNodeType.EntityReference: |
|
618 | 602 | return $"&{Name};"; |
|
619 | 603 | case XmlNodeType.EndElement: |
|
620 | 604 | return $"</{Name}>"; |
|
621 | 605 | default: |
|
622 | 606 | return $".{NodeType} {Name} {Value}"; |
|
623 | 607 | } |
|
624 | 608 | } |
|
625 | 609 | } |
|
626 | 610 | } |
@@ -1,22 +1,22 | |||
|
1 | 1 | using System; |
|
2 | 2 | using System.Collections.Generic; |
|
3 | 3 | using System.Linq; |
|
4 | 4 | using System.Text; |
|
5 | 5 | using System.Threading.Tasks; |
|
6 | 6 | using System.Xml; |
|
7 | 7 | |
|
8 | 8 | namespace Implab.Xml { |
|
9 | 9 | public class XmlSimpleAttribute { |
|
10 |
public XmlSimpleAttribute(string name, string ns, string prefix, |
|
|
10 | public XmlSimpleAttribute(string name, string ns, string prefix, string value) { | |
|
11 | 11 | QName = new XmlQualifiedName(name, ns); |
|
12 | 12 | Prefix = prefix; |
|
13 | 13 | Value = value; |
|
14 | 14 | } |
|
15 | 15 | |
|
16 | 16 | public XmlQualifiedName QName { get; set; } |
|
17 | 17 | |
|
18 | 18 | public string Prefix { get; set; } |
|
19 | 19 | |
|
20 |
public |
|
|
20 | public string Value { get; set; } | |
|
21 | 21 | } |
|
22 | 22 | } |
General Comments 3
ok, latest stable version should be in default
You need to be logged in to leave comments.
Login now