##// END OF EJS Templates
sync
cin -
r79:05e6468f066f v2
parent child
Show More
@@ -1,324 +1,340
1 1 using Implab;
2 2 using Implab.Parsing;
3 3 using System;
4 4 using System.Collections.Generic;
5 5 using System.Globalization;
6 6 using System.IO;
7 7 using System.Linq;
8 8 using System.Text;
9 9 using System.Threading.Tasks;
10 10 using System.Xml;
11 11
12 12 namespace Implab.JSON {
13 13 public class JSONXmlReader : XmlReader {
14 14
15 15 enum ValueContext {
16 16 Undefined,
17 17 ElementStart,
18 18 ElementValue,
19 19 ElementEnd,
20 20 ElementEmpty
21 21 }
22 22
23 23 struct LocalNameContext {
24 24 public string localName;
25 25 public bool isArray;
26 26 }
27 27
28 28 JSONParser m_parser;
29 29 ValueContext m_valueContext;
30 30 ReadState m_state = ReadState.Initial;
31 31 Stack<LocalNameContext> m_localNameStack = new Stack<LocalNameContext>();
32 32 LocalNameContext m_localName;
33 33 int m_depthCorrection = 0;
34 34
35 35 readonly string m_rootName;
36 36 readonly string m_prefix;
37 37 readonly string m_namespaceUri;
38 38 readonly bool m_flattenArrays;
39 39 readonly string m_arrayItemName;
40 40 readonly XmlNameTable m_nameTable;
41 41
42 42 JSONXmlReader(JSONParser parser, JSONXmlReaderOptions options) {
43 43 m_parser = parser;
44 44
45 45 if (options != null) {
46 46 m_prefix = options.NodesPrefix ?? String.Empty;
47 47 m_namespaceUri = options.NamespaceURI ?? String.Empty;
48 48 m_rootName = options.RootName ?? "json";
49 49 m_flattenArrays = options.FlattenArrays;
50 50 m_arrayItemName = options.ArrayItemName ?? "item";
51 51 m_nameTable = options.NameTable ?? new NameTable();
52 52 } else {
53 53 m_prefix = String.Empty;
54 54 m_namespaceUri = String.Empty;
55 55 m_rootName = "json";
56 56 m_flattenArrays = false;
57 57 m_arrayItemName = "item";
58 58 m_nameTable = new NameTable();
59 59 }
60 60 }
61 61
62 62 /// <summary>
63 63 /// Always 0, JSON doesn't support attributes
64 64 /// </summary>
65 65 public override int AttributeCount {
66 66 get { return 0; }
67 67 }
68 68
69 69 public override string BaseURI {
70 70 get { return String.Empty; }
71 71 }
72 72
73 73 public override int Depth {
74 74 get {
75 75 return m_localNameStack.Count + m_depthCorrection;
76 76 }
77 77 }
78 78
79 79 public override bool EOF {
80 80 get { return m_parser.EOF; }
81 81 }
82 82
83 83 /// <summary>
84 84 /// Always throws an exception
85 85 /// </summary>
86 86 /// <param name="i"></param>
87 87 /// <returns></returns>
88 88 public override string GetAttribute(int i) {
89 89 throw new ArgumentOutOfRangeException();
90 90 }
91 91
92 92 /// <summary>
93 93 /// Always returns empty string
94 94 /// </summary>
95 95 /// <param name="name"></param>
96 96 /// <param name="namespaceURI"></param>
97 97 /// <returns></returns>
98 98 public override string GetAttribute(string name, string namespaceURI) {
99 99 return String.Empty;
100 100 }
101 101
102 102 /// <summary>
103 103 /// Always returns empty string
104 104 /// </summary>
105 105 /// <param name="name"></param>
106 106 /// <returns></returns>
107 107 public override string GetAttribute(string name) {
108 108 return String.Empty;
109 109 }
110 110
111 111 public override bool IsEmptyElement {
112 112 get { return m_parser.ElementType == JSONElementType.Value && m_valueContext == ValueContext.ElementEmpty; }
113 113 }
114 114
115 115 public override string LocalName {
116 116 get { return m_localName.localName; }
117 117 }
118 118
119 119 public override string LookupNamespace(string prefix) {
120 120 if (String.IsNullOrEmpty(prefix) || prefix == m_prefix)
121 121 return m_namespaceUri;
122 122 else
123 123 return String.Empty;
124 124 }
125 125
126 126 public override bool MoveToAttribute(string name, string ns) {
127 127 return false;
128 128 }
129 129
130 130 public override bool MoveToAttribute(string name) {
131 131 return false;
132 132 }
133 133
134 134 public override bool MoveToElement() {
135 135 return false;
136 136 }
137 137
138 138 public override bool MoveToFirstAttribute() {
139 139 return false;
140 140 }
141 141
142 142 public override bool MoveToNextAttribute() {
143 143 return false;
144 144 }
145 145
146 146 public override XmlNameTable NameTable {
147 147 get { return m_nameTable; }
148 148 }
149 149
150 150 public override string NamespaceURI {
151 151 get { return m_namespaceUri; }
152 152 }
153 153
154 154 public override XmlNodeType NodeType {
155 155 get {
156 156 switch (m_parser.ElementType) {
157 157 case JSONElementType.BeginObject:
158 158 case JSONElementType.BeginArray:
159 159 return XmlNodeType.Element;
160 160 case JSONElementType.EndObject:
161 161 case JSONElementType.EndArray:
162 162 return XmlNodeType.EndElement;
163 163 case JSONElementType.Value:
164 164 switch (m_valueContext) {
165 165 case ValueContext.ElementStart:
166 166 case ValueContext.ElementEmpty:
167 167 return XmlNodeType.Element;
168 168 case ValueContext.ElementValue:
169 169 return XmlNodeType.Text;
170 170 case ValueContext.ElementEnd:
171 171 return XmlNodeType.EndElement;
172 172 default:
173 173 throw new InvalidOperationException();
174 174 }
175 175 default:
176 176 throw new InvalidOperationException();
177 177 }
178 178 }
179 179 }
180 180
181 181 public override string Prefix {
182 182 get { return m_prefix; }
183 183 }
184 184
185 185 public override bool Read() {
186 186 if (m_state != System.Xml.ReadState.Interactive && m_state != System.Xml.ReadState.Initial)
187 187 return false;
188 188
189 189 if (m_state == ReadState.Initial)
190 190 m_state = System.Xml.ReadState.Interactive;
191 191
192 192 try {
193 193 switch (m_parser.ElementType) {
194 194 case JSONElementType.Value:
195 195 switch (m_valueContext) {
196 196 case ValueContext.ElementStart:
197 197 SetLocalName(String.Empty);
198 198 m_valueContext = ValueContext.ElementValue;
199 199 return true;
200 200 case ValueContext.ElementValue:
201 201 RestoreLocalName();
202 202 m_valueContext = ValueContext.ElementEnd;
203 203 return true;
204 204 case ValueContext.ElementEmpty:
205 205 case ValueContext.ElementEnd:
206 206 RestoreLocalName();
207 207 break;
208 208 }
209 209 break;
210 210 case JSONElementType.EndArray:
211 211 case JSONElementType.EndObject:
212 212 RestoreLocalName();
213 213 break;
214 214 }
215 215 string itemName = m_parser.ElementType == JSONElementType.None ? m_rootName : m_flattenArrays ? m_localName.localName : m_arrayItemName;
216 216 while (m_parser.Read()) {
217 217 if (!String.IsNullOrEmpty(m_parser.ElementName))
218 218 itemName = m_parser.ElementName;
219 219
220 220 switch (m_parser.ElementType) {
221 221 case JSONElementType.BeginArray:
222 222 if (m_flattenArrays && !m_localName.isArray) {
223 223 m_depthCorrection--;
224 224 SetLocalName(itemName, true);
225 225 continue;
226 226 } else {
227 227 SetLocalName(itemName, true);
228 228 }
229 229 break;
230 230 case JSONElementType.BeginObject:
231 231 SetLocalName(itemName);
232 232 break;
233 233 case JSONElementType.EndArray:
234 234 if (m_flattenArrays && !m_localNameStack.Peek().isArray) {
235 235 RestoreLocalName();
236 236 m_depthCorrection++;
237 237 continue;
238 238 }
239 239 break;
240 240 case JSONElementType.EndObject:
241 241 break;
242 242 case JSONElementType.Value:
243 243 SetLocalName(itemName);
244 244 m_valueContext = m_parser.ElementValue == null ? ValueContext.ElementEmpty : ValueContext.ElementStart;
245 245 break;
246 246 default:
247 247 break;
248 248 }
249 249 return true;
250 250 }
251 251
252 252 m_state = System.Xml.ReadState.EndOfFile;
253 253 return false;
254 254 } catch {
255 255 m_state = System.Xml.ReadState.Error;
256 256 throw;
257 257 }
258 258 }
259 259
260 260 public override bool ReadAttributeValue() {
261 261 return false;
262 262 }
263 263
264 264 public override ReadState ReadState {
265 265 get { return m_state; }
266 266 }
267 267
268 268 public override void ResolveEntity() {
269 269 // do nothing
270 270 }
271 271
272 272 public override string Value {
273 273 get {
274 274 if (m_parser.ElementValue == null)
275 275 return String.Empty;
276 276 if (Convert.GetTypeCode(m_parser.ElementValue) == TypeCode.Double)
277 277 return ((double)m_parser.ElementValue).ToString(CultureInfo.InvariantCulture);
278 278 else
279 279 return m_parser.ElementValue.ToString();
280 280 }
281 281 }
282 282
283 283 void SetLocalName(string name) {
284 284 m_localNameStack.Push(m_localName);
285 285 m_localName.localName = name;
286 286 m_localName.isArray = false;
287 287 }
288 288
289 289 void SetLocalName(string name, bool isArray) {
290 290 m_localNameStack.Push(m_localName);
291 291 m_localName.localName = name;
292 292 m_localName.isArray = isArray;
293 293 }
294 294
295 295 void RestoreLocalName() {
296 296 m_localName = m_localNameStack.Pop();
297 297 }
298 298
299 299 public override void Close() {
300 300
301 301 }
302 302
303 303 protected override void Dispose(bool disposing) {
304 304 if (disposing) {
305 305 m_parser.Dispose();
306 306 }
307 307 base.Dispose(disposing);
308 308 }
309 309
310 310 public static JSONXmlReader Create(string file, JSONXmlReaderOptions options) {
311 311 return Create(File.OpenText(file), options);
312 312 }
313 313
314 /// <summary>
315 /// Creates the XmlReader for the specified text stream with JSON data.
316 /// </summary>
317 /// <param name="reader">Text reader.</param>
318 /// <param name="options">Options.</param>
319 /// <remarks>
320 /// The reader will be disposed when the XmlReader is disposed.
321 /// </remarks>
314 322 public static JSONXmlReader Create(TextReader reader, JSONXmlReaderOptions options) {
315 323 return new JSONXmlReader(new JSONParser(reader, true), options);
316 324 }
317 325
326 /// <summary>
327 /// Creates the XmlReader for the specified stream with JSON data.
328 /// </summary>
329 /// <param name="stream">Stream.</param>
330 /// <param name="options">Options.</param>
331 /// <remarks>
332 /// The stream will be disposed when the XmlReader is disposed.
333 /// </remarks>
318 334 public static JSONXmlReader Create(Stream stream, JSONXmlReaderOptions options) {
319 335 Safe.ArgumentNotNull(stream, "stream");
320 336 // HACK don't dispose StreaReader to keep stream opened
321 337 return Create(new StreamReader(stream), options);
322 338 }
323 339 }
324 340 }
@@ -1,260 +1,260
1 1 using Implab;
2 2 using System;
3 3 using System.Collections.Generic;
4 4 using System.IO;
5 5 using System.Linq;
6 6 using System.Text;
7 7 using System.Threading.Tasks;
8 8
9 9 namespace Implab.Parsing {
10 10 /// <summary>
11 11 /// Базовый класс для разбора потока входных символов на токены.
12 12 /// </summary>
13 13 /// <remarks>
14 14 /// Сканнер имеет внутри буффер с симолами входного текста, по которому перемещаются два
15 15 /// указателя, начала и конца токена, при перемещении искользуется ДКА для определения
16 16 /// конца токена и допустимости текущего символа.
17 17 /// </remarks>
18 18 public abstract class Scanner : Disposable {
19 19 struct ScannerConfig {
20 20 public DFAStateDescriptior[] states;
21 21 public int[] alphabetMap;
22 22 }
23 23
24 24 Stack<ScannerConfig> m_defs = new Stack<ScannerConfig>();
25 25
26 26 DFAStateDescriptior[] m_states;
27 27 int[] m_alphabetMap;
28 28
29 29 protected DFAStateDescriptior m_currentState;
30 30 int m_previewCode;
31 31
32 32 protected int m_tokenLen = 0;
33 33 protected int m_tokenOffset;
34 34
35 35 protected char[] m_buffer;
36 36 protected int m_bufferSize;
37 37 protected int m_pointer;
38 38
39 39 TextReader m_reader;
40 40 bool m_disposeReader;
41 41 int m_chunkSize = 1024; // 1k
42 42 int m_limit = 10 * 1024 * 1024; // 10Mb
43 43
44 public Scanner(CDFADefinition definition) {
44 protected Scanner(CDFADefinition definition) {
45 45 Safe.ArgumentNotNull(definition, "definition");
46 46
47 47 m_states = definition.States;
48 48 m_alphabetMap = definition.Alphabet.GetTranslationMap();
49 49
50 50 Feed(new char[0]);
51 51 }
52 52
53 53 /// <summary>
54 54 /// Заполняет входными данными буффер.
55 55 /// </summary>
56 56 /// <param name="data">Данные для обработки.</param>
57 57 /// <remarks>Копирование данных не происходит, переданный массив используется в
58 58 /// качестве входного буффера.</remarks>
59 59 public void Feed(char[] data) {
60 60 Safe.ArgumentNotNull(data, "data");
61 61
62 62 Feed(data, data.Length);
63 63 }
64 64
65 65 /// <summary>
66 66 /// Заполняет буффур чтения входными данными.
67 67 /// </summary>
68 68 /// <param name="data">Данные для обработки.</param>
69 69 /// <param name="length">Длина данных для обработки.</param>
70 70 /// <remarks>Копирование данных не происходит, переданный массив используется в
71 71 /// качестве входного буффера.</remarks>
72 72 public void Feed(char[] data, int length) {
73 73 Safe.ArgumentNotNull(data, "data");
74 74 Safe.ArgumentInRange(length, 0, data.Length, "length");
75 75 AssertNotDisposed();
76 76
77 77 m_pointer = -1;
78 78 m_buffer = data;
79 79 m_bufferSize = length;
80 80 Shift();
81 81 }
82 82
83 83 public void Feed(TextReader reader, bool dispose) {
84 84 Safe.ArgumentNotNull(reader, "reader");
85 85 AssertNotDisposed();
86 86
87 87 if (m_reader != null && m_disposeReader)
88 88 m_reader.Dispose();
89 89
90 90 m_reader = reader;
91 91 m_disposeReader = dispose;
92 92 m_pointer = -1;
93 93 m_buffer = new char[m_chunkSize];
94 94 m_bufferSize = 0;
95 95 Shift();
96 96 }
97 97
98 98 /// <summary>
99 99 /// Получает текущий токен в виде строки.
100 100 /// </summary>
101 101 /// <returns></returns>
102 102 protected string GetTokenValue() {
103 103 return new String(m_buffer, m_tokenOffset, m_tokenLen);
104 104 }
105 105
106 106 /// <summary>
107 107 /// Метки текущего токена, которые были назначены в регулярном выражении.
108 108 /// </summary>
109 109 protected int[] TokenTags {
110 110 get {
111 111 return m_currentState.tag;
112 112 }
113 113 }
114 114
115 115 /// <summary>
116 116 /// Признак конца данных
117 117 /// </summary>
118 118 public bool EOF {
119 119 get {
120 120 return m_pointer >= m_bufferSize;
121 121 }
122 122 }
123 123
124 124 /// <summary>
125 125 /// Читает следующий токен, при этом <see cref="m_tokenOffset"/> указывает на начало токена,
126 126 /// <see cref="m_tokenLen"/> на длину токена, <see cref="m_buffer"/> - массив символов, в
127 127 /// котором находится токен.
128 128 /// </summary>
129 129 /// <returns><c>false</c> - достигнут конец данных, токен не прочитан.</returns>
130 130 protected bool ReadTokenInternal() {
131 131 if (m_pointer >= m_bufferSize)
132 132 return false;
133 133
134 134 m_currentState = m_states[CDFADefinition.INITIAL_STATE];
135 135 m_tokenLen = 0;
136 136 m_tokenOffset = m_pointer;
137 137 int nextState = CDFADefinition.UNREACHEBLE_STATE;
138 138 do {
139 139 nextState = m_currentState.transitions[m_previewCode];
140 140 if (nextState == CDFADefinition.UNREACHEBLE_STATE) {
141 141 if (m_currentState.final)
142 142 return true;
143 143 else
144 144 throw new ParserException(
145 145 String.Format(
146 146 "Unexpected symbol '{0}', at pos {1}",
147 147 m_buffer[m_pointer],
148 148 Position
149 149 )
150 150 );
151 151 } else {
152 152 m_currentState = m_states[nextState];
153 153 m_tokenLen++;
154 154 }
155 155
156 156 } while (Shift());
157 157
158 158 // END OF DATA
159 159 if (!m_currentState.final)
160 160 throw new ParserException("Unexpected end of data");
161 161
162 162 return true;
163 163 }
164 164
165 165
166 166 bool Shift() {
167 167 m_pointer++;
168 168
169 169 if (m_pointer >= m_bufferSize) {
170 170 if (!ReadNextChunk())
171 171 return false;
172 172 }
173 173
174 174 m_previewCode = m_alphabetMap[m_buffer[m_pointer]];
175 175
176 176 return true;
177 177 }
178 178
179 179 bool ReadNextChunk() {
180 180 if (m_reader == null)
181 181 return false;
182 182
183 183 // extend buffer if nesessary
184 184 if (m_pointer + m_chunkSize > m_buffer.Length) {
185 185 // trim unused buffer head
186 186 var size = m_tokenLen + m_chunkSize;
187 187 if (size >= m_limit)
188 188 throw new ParserException(String.Format("Input buffer {0} bytes limit exceeded", m_limit));
189 189 var temp = new char[size];
190 190 Array.Copy(m_buffer, m_tokenOffset, temp, 0, m_tokenLen);
191 191 m_pointer -= m_tokenOffset;
192 192 m_bufferSize -= m_tokenOffset;
193 193 m_tokenOffset = 0;
194 194 m_buffer = temp;
195 195 }
196 196
197 197 var read = m_reader.Read(m_buffer, m_tokenLen, m_chunkSize);
198 198 if (read == 0)
199 199 return false;
200 200
201 201 m_bufferSize += read;
202 202
203 203 return true;
204 204 }
205 205
206 206 /// <summary>
207 207 /// Позиция сканнера во входном буфере
208 208 /// </summary>
209 209 public int Position {
210 210 get {
211 211 return m_pointer + 1;
212 212 }
213 213 }
214 214
215 215 /// <summary>
216 216 /// Преключает внутренний ДКА на указанный, позволяет реализовать подобие захватывающей
217 217 /// группировки.
218 218 /// </summary>
219 219 /// <param name="states">Таблица состояний нового ДКА</param>
220 220 /// <param name="alphabet">Таблица входных символов для нового ДКА</param>
221 221 protected void Switch(DFAStateDescriptior[] states, int[] alphabet) {
222 222 Safe.ArgumentNotNull(states, "dfa");
223 223
224 224 m_defs.Push(new ScannerConfig {
225 225 states = m_states,
226 226 alphabetMap = m_alphabetMap
227 227 });
228 228
229 229 m_states = states;
230 230 m_alphabetMap = alphabet;
231 231
232 232 m_previewCode = m_alphabetMap[m_buffer[m_pointer]];
233 233 }
234 234
235 235 /// <summary>
236 236 /// Восстанавливает предыдущей ДКА сканнера.
237 237 /// </summary>
238 238 protected void Restore() {
239 239 if (m_defs.Count == 0)
240 240 throw new InvalidOperationException();
241 241 var prev = m_defs.Pop();
242 242 m_states = prev.states;
243 243 m_alphabetMap = prev.alphabetMap;
244 244 m_previewCode = m_alphabetMap[m_buffer[m_pointer]];
245 245 }
246 246
247 247 protected override void Dispose(bool disposing) {
248 248 if (disposing) {
249 249 if (m_reader != null && m_disposeReader)
250 250 m_reader.Dispose();
251 251 m_buffer = null;
252 252 m_bufferSize = 0;
253 253 m_pointer = 0;
254 254 m_tokenLen = 0;
255 255 m_tokenOffset = 0;
256 256 }
257 257 base.Dispose(disposing);
258 258 }
259 259 }
260 260 }
General Comments 0
You need to be logged in to leave comments. Login now