@@ -1,197 +1,203 | |||
|
1 | 1 | using Implab; |
|
2 | 2 | using Implab.Parsing; |
|
3 | 3 | using System; |
|
4 | 4 | using System.Collections.Generic; |
|
5 | 5 | using System.Diagnostics; |
|
6 | 6 | using System.Linq; |
|
7 | 7 | using System.Text; |
|
8 | 8 | using System.Threading.Tasks; |
|
9 | 9 | |
|
10 | 10 | namespace Implab.JSON { |
|
11 | 11 | /// <summary> |
|
12 | 12 | /// internal |
|
13 | 13 | /// </summary> |
|
14 | 14 | public struct JSONParserContext { |
|
15 | 15 | public string memberName; |
|
16 | 16 | public JSONElementContext elementContext; |
|
17 | 17 | } |
|
18 | 18 | |
|
19 | 19 | /// <summary> |
|
20 | 20 | /// Pull парсер JSON данных. |
|
21 | 21 | /// </summary> |
|
22 | 22 | public class JSONParser : DFAutomaton<JSONParserContext> { |
|
23 | 23 | |
|
24 | 24 | enum MemberContext { |
|
25 | 25 | MemberName, |
|
26 | 26 | MemberValue |
|
27 | 27 | } |
|
28 | 28 | |
|
29 | 29 | static readonly EnumAlphabet<JsonTokenType> _alphabet = EnumAlphabet<JsonTokenType>.FullAlphabet; |
|
30 | 30 | static readonly DFAStateDescriptior[] _jsonDFA; |
|
31 | 31 | static readonly DFAStateDescriptior[] _objectDFA; |
|
32 | 32 | static readonly DFAStateDescriptior[] _arrayDFA; |
|
33 | 33 | |
|
34 | 34 | static JSONParser() { |
|
35 | 35 | var jsonExpression = Token.New(JsonTokenType.BeginObject, JsonTokenType.BeginArray).Tag(0); |
|
36 | 36 | |
|
37 | 37 | var valueExpression = Token.New(JsonTokenType.BeginArray, JsonTokenType.BeginObject, JsonTokenType.Literal, JsonTokenType.Number, JsonTokenType.String); |
|
38 | 38 | var memberExpression = Token.New(JsonTokenType.String).Cat(Token.New(JsonTokenType.NameSeparator)).Cat(valueExpression); |
|
39 | 39 | var objectExpression = memberExpression |
|
40 | 40 | .Cat( |
|
41 | 41 | Token.New(JsonTokenType.ValueSeparator) |
|
42 | 42 | .Cat(memberExpression) |
|
43 | 43 | .EClosure() |
|
44 | 44 | ) |
|
45 | 45 | .Optional() |
|
46 | 46 | .Cat(Token.New(JsonTokenType.EndObject)) |
|
47 | 47 | .Tag(0); |
|
48 | 48 | var arrayExpression = valueExpression |
|
49 | 49 | .Cat( |
|
50 | 50 | Token.New(JsonTokenType.ValueSeparator) |
|
51 | 51 | .Cat(valueExpression) |
|
52 | 52 | .EClosure() |
|
53 | 53 | ) |
|
54 | 54 | .Optional() |
|
55 | 55 | .Cat(Token.New(JsonTokenType.EndArray)) |
|
56 | 56 | .Tag(0); |
|
57 | 57 | |
|
58 | 58 | _jsonDFA = BuildDFA(jsonExpression).States; |
|
59 | 59 | _objectDFA = BuildDFA(objectExpression).States; |
|
60 | 60 | _arrayDFA = BuildDFA(arrayExpression).States; |
|
61 | 61 | } |
|
62 | 62 | |
|
63 | 63 | static EDFADefinition<JsonTokenType> BuildDFA(Token expr) { |
|
64 | 64 | var builder = new DFABuilder(); |
|
65 | 65 | var dfa = new EDFADefinition<JsonTokenType>(_alphabet); |
|
66 | 66 | expr.Accept(builder); |
|
67 | 67 | |
|
68 | 68 | builder.BuildDFA(dfa); |
|
69 | 69 | return dfa; |
|
70 | 70 | } |
|
71 | 71 | |
|
72 | 72 | JSONScanner m_scanner; |
|
73 | 73 | MemberContext m_memberContext; |
|
74 | 74 | |
|
75 | 75 | JSONElementType m_elementType; |
|
76 | 76 | object m_elementValue; |
|
77 | 77 | |
|
78 | 78 | public JSONParser(string text) |
|
79 | 79 | : base(_jsonDFA, INITIAL_STATE, new JSONParserContext { elementContext = JSONElementContext.None, memberName = String.Empty } ) { |
|
80 | 80 | Safe.ArgumentNotEmpty(text, "text"); |
|
81 | 81 | m_scanner = new JSONScanner(); |
|
82 | 82 | m_scanner.Feed(text.ToCharArray()); |
|
83 | 83 | } |
|
84 | 84 | |
|
85 | 85 | public JSONElementType ElementType { |
|
86 | 86 | get { return m_elementType; } |
|
87 | 87 | } |
|
88 | 88 | |
|
89 | 89 | public string ElementName { |
|
90 | 90 | get { return m_context.info.memberName; } |
|
91 | 91 | } |
|
92 | 92 | |
|
93 | 93 | public object ElementValue { |
|
94 | 94 | get { return m_elementValue; } |
|
95 | 95 | } |
|
96 | 96 | |
|
97 | 97 | public bool Read() { |
|
98 | 98 | if (m_context.current == UNREACHEBLE_STATE) |
|
99 | 99 | throw new InvalidOperationException("The parser is in invalid state"); |
|
100 | 100 | object tokenValue; |
|
101 | 101 | JsonTokenType tokenType; |
|
102 | 102 | m_context.info.memberName = String.Empty; |
|
103 | 103 | while (m_scanner.ReadToken(out tokenValue, out tokenType)) { |
|
104 | 104 | Move((int)tokenType); |
|
105 | 105 | if (m_context.current == UNREACHEBLE_STATE) |
|
106 | 106 | UnexpectedToken(tokenValue, tokenType); |
|
107 | 107 | switch (tokenType) { |
|
108 | 108 | case JsonTokenType.BeginObject: |
|
109 | 109 | Switch( |
|
110 | 110 | _objectDFA, |
|
111 | 111 | INITIAL_STATE, |
|
112 | 112 | new JSONParserContext { |
|
113 | 113 | memberName = m_context.info.memberName, |
|
114 | 114 | elementContext = JSONElementContext.Object |
|
115 | 115 | } |
|
116 | 116 | ); |
|
117 | 117 | m_elementValue = null; |
|
118 | 118 | m_memberContext = MemberContext.MemberName; |
|
119 | 119 | m_elementType = JSONElementType.BeginObject; |
|
120 | 120 | return true; |
|
121 | 121 | case JsonTokenType.EndObject: |
|
122 | 122 | Restore(); |
|
123 | 123 | m_elementValue = null; |
|
124 | 124 | m_elementType = JSONElementType.EndObject; |
|
125 | 125 | return true; |
|
126 | 126 | case JsonTokenType.BeginArray: |
|
127 | 127 | Switch( |
|
128 | 128 | _arrayDFA, |
|
129 | 129 | INITIAL_STATE, |
|
130 | 130 | new JSONParserContext { |
|
131 | 131 | memberName = m_context.info.memberName, |
|
132 | 132 | elementContext = JSONElementContext.Array |
|
133 | 133 | } |
|
134 | 134 | ); |
|
135 | 135 | m_elementValue = null; |
|
136 | 136 | m_memberContext = MemberContext.MemberValue; |
|
137 | 137 | m_elementType = JSONElementType.BeginArray; |
|
138 | 138 | return true; |
|
139 | 139 | case JsonTokenType.EndArray: |
|
140 | 140 | Restore(); |
|
141 | 141 | m_elementValue = null; |
|
142 | 142 | m_elementType = JSONElementType.EndArray; |
|
143 | 143 | return true; |
|
144 | 144 | case JsonTokenType.String: |
|
145 | 145 | if (m_memberContext == MemberContext.MemberName) { |
|
146 | 146 | m_context.info.memberName = (string)tokenValue; |
|
147 | 147 | break; |
|
148 | 148 | } else { |
|
149 | 149 | m_elementType = JSONElementType.Value; |
|
150 | 150 | m_elementValue = tokenValue; |
|
151 | 151 | return true; |
|
152 | 152 | } |
|
153 | 153 | case JsonTokenType.Number: |
|
154 | 154 | m_elementType = JSONElementType.Value; |
|
155 | 155 | m_elementValue = tokenValue; |
|
156 | 156 | return true; |
|
157 | 157 | case JsonTokenType.Literal: |
|
158 | 158 | m_elementType = JSONElementType.Value; |
|
159 | 159 | m_elementValue = ParseLiteral((string)tokenValue); |
|
160 | 160 | return true; |
|
161 | 161 | case JsonTokenType.NameSeparator: |
|
162 | 162 | m_memberContext = MemberContext.MemberValue; |
|
163 | 163 | break; |
|
164 | 164 | case JsonTokenType.ValueSeparator: |
|
165 | 165 | m_memberContext = m_context.info.elementContext == JSONElementContext.Object ? MemberContext.MemberName : MemberContext.MemberValue; |
|
166 | 166 | break; |
|
167 | 167 | default: |
|
168 | 168 | UnexpectedToken(tokenValue, tokenType); |
|
169 | 169 | break; |
|
170 | 170 | } |
|
171 | 171 | } |
|
172 | 172 | if (m_context.info.elementContext != JSONElementContext.None) |
|
173 | 173 | throw new ParserException("Unexpedted end of data"); |
|
174 | 174 | return false; |
|
175 | 175 | } |
|
176 | 176 | |
|
177 | 177 | object ParseLiteral(string literal) { |
|
178 | 178 | switch (literal) { |
|
179 | 179 | case "null": |
|
180 | 180 | return null; |
|
181 | 181 | case "false" : |
|
182 | 182 | return false; |
|
183 | 183 | case "true": |
|
184 | 184 | return true; |
|
185 | 185 | default: |
|
186 | 186 | UnexpectedToken(literal, JsonTokenType.Literal); |
|
187 | 187 | return null; // avoid compliler error |
|
188 | 188 | } |
|
189 | 189 | } |
|
190 | 190 | |
|
191 | 191 | void UnexpectedToken(object value, JsonTokenType tokenType) { |
|
192 | 192 | throw new ParserException(String.Format("Unexpected token {0}: '{1}'", tokenType, value)); |
|
193 | 193 | } |
|
194 | 194 | |
|
195 | ||
|
196 | public bool EOF { | |
|
197 | get { | |
|
198 | return m_scanner.EOF; | |
|
199 | } | |
|
200 | } | |
|
195 | 201 | } |
|
196 | 202 | |
|
197 | 203 | } |
@@ -1,207 +1,216 | |||
|
1 | 1 | using Implab; |
|
2 | 2 | using System; |
|
3 | 3 | using System.Collections.Generic; |
|
4 | 4 | using System.Linq; |
|
5 | 5 | using System.Text; |
|
6 | 6 | using System.Threading.Tasks; |
|
7 | 7 | |
|
8 | 8 | namespace Implab.Parsing { |
|
9 | 9 | /// <summary> |
|
10 | 10 | /// Базовый класс для разбора потока входных символов на токены. |
|
11 | 11 | /// </summary> |
|
12 | 12 | /// <remarks> |
|
13 | 13 | /// Сканнер имеет внутри буффер с симолами входного текста, по которому перемещаются два |
|
14 | 14 | /// указателя, начала и конца токена, при перемещении искользуется ДКА для определения |
|
15 | 15 | /// конца токена и допустимости текущего символа. |
|
16 | 16 | /// </remarks> |
|
17 | 17 | public class Scanner { |
|
18 | 18 | struct ScannerConfig { |
|
19 | 19 | public DFAStateDescriptior[] states; |
|
20 | 20 | public int[] alphabetMap; |
|
21 | 21 | } |
|
22 | 22 | |
|
23 | 23 | Stack<ScannerConfig> m_defs = new Stack<ScannerConfig>(); |
|
24 | 24 | |
|
25 | 25 | DFAStateDescriptior[] m_states; |
|
26 | 26 | int[] m_alphabetMap; |
|
27 | 27 | |
|
28 | 28 | protected DFAStateDescriptior m_currentState; |
|
29 | 29 | int m_previewCode; |
|
30 | 30 | |
|
31 | 31 | protected int m_tokenLen = 0; |
|
32 | 32 | protected int m_tokenOffset; |
|
33 | 33 | |
|
34 | 34 | protected char[] m_buffer; |
|
35 | 35 | protected int m_bufferSize; |
|
36 | 36 | protected int m_pointer; |
|
37 | 37 | |
|
38 | 38 | public Scanner(CDFADefinition definition, string text) { |
|
39 | 39 | Safe.ArgumentNotNull(definition, "definition"); |
|
40 | 40 | Safe.ArgumentNotEmpty(text, "text"); |
|
41 | 41 | |
|
42 | 42 | m_states = definition.States; |
|
43 | 43 | m_alphabetMap = definition.Alphabet.GetTranslationMap(); |
|
44 | 44 | |
|
45 | 45 | Feed(text.ToCharArray()); |
|
46 | 46 | } |
|
47 | 47 | |
|
48 | 48 | public Scanner(CDFADefinition definition) { |
|
49 | 49 | Safe.ArgumentNotNull(definition, "definition"); |
|
50 | 50 | |
|
51 | 51 | m_states = definition.States; |
|
52 | 52 | m_alphabetMap = definition.Alphabet.GetTranslationMap(); |
|
53 | 53 | |
|
54 | 54 | Feed(new char[0]); |
|
55 | 55 | } |
|
56 | 56 | |
|
57 | 57 | /// <summary> |
|
58 | 58 | /// Заполняет входными данными буффер. |
|
59 | 59 | /// </summary> |
|
60 | 60 | /// <param name="data">Данные для обработки.</param> |
|
61 | 61 | /// <remarks>Копирование данных не происходит, переданный массив используется в |
|
62 | 62 | /// качестве входного буффера.</remarks> |
|
63 | 63 | public void Feed(char[] data) { |
|
64 | 64 | Safe.ArgumentNotNull(data, "data"); |
|
65 | 65 | |
|
66 | 66 | Feed(data, data.Length); |
|
67 | 67 | } |
|
68 | 68 | |
|
69 | 69 | /// <summary> |
|
70 | 70 | /// Заполняет буффур чтения входными данными. |
|
71 | 71 | /// </summary> |
|
72 | 72 | /// <param name="data">Данные для обработки.</param> |
|
73 | 73 | /// <param name="length">Длина данных для обработки.</param> |
|
74 | 74 | /// <remarks>Копирование данных не происходит, переданный массив используется в |
|
75 | 75 | /// качестве входного буффера.</remarks> |
|
76 | 76 | public void Feed(char[] data, int length) { |
|
77 | 77 | Safe.ArgumentNotNull(data, "data"); |
|
78 | 78 | Safe.ArgumentInRange(length, 0, data.Length, "length"); |
|
79 | 79 | |
|
80 | 80 | m_pointer = -1; |
|
81 | 81 | m_buffer = data; |
|
82 | 82 | m_bufferSize = length; |
|
83 | 83 | Shift(); |
|
84 | 84 | } |
|
85 | 85 | |
|
86 | 86 | /// <summary> |
|
87 | 87 | /// Получает текущий токен в виде строки. |
|
88 | 88 | /// </summary> |
|
89 | 89 | /// <returns></returns> |
|
90 | 90 | public string GetTokenValue() { |
|
91 | 91 | return new String(m_buffer, m_tokenOffset, m_tokenLen); |
|
92 | 92 | } |
|
93 | 93 | |
|
94 | 94 | /// <summary> |
|
95 | 95 | /// Метки текущего токена, которые были назначены в регулярном выражении. |
|
96 | 96 | /// </summary> |
|
97 | 97 | public int[] TokenTags { |
|
98 | 98 | get { |
|
99 | 99 | return m_currentState.tag; |
|
100 | 100 | } |
|
101 | 101 | } |
|
102 | 102 | |
|
103 | 103 | /// <summary> |
|
104 | /// Признак конца данных | |
|
105 | /// </summary> | |
|
106 | public bool EOF { | |
|
107 | get { | |
|
108 | return m_pointer >= m_bufferSize; | |
|
109 | } | |
|
110 | } | |
|
111 | ||
|
112 | /// <summary> | |
|
104 | 113 | /// Читает следующий токен, при этом <see cref="m_tokenOffset"/> указывает на начало токена, |
|
105 | 114 | /// <see cref="m_tokenLen"/> на длину токена, <see cref="m_buffer"/> - массив символов, в |
|
106 | 115 | /// котором находится токен. |
|
107 | 116 | /// </summary> |
|
108 | 117 | /// <returns><c>false</c> - достигнут конец данных, токен не прочитан.</returns> |
|
109 | 118 | protected bool ReadTokenInternal() { |
|
110 | 119 | if (m_pointer >= m_bufferSize) |
|
111 | 120 | return false; |
|
112 | 121 | |
|
113 | 122 | m_currentState = m_states[CDFADefinition.INITIAL_STATE]; |
|
114 | 123 | m_tokenLen = 0; |
|
115 | 124 | m_tokenOffset = m_pointer; |
|
116 | 125 | int nextState = CDFADefinition.UNREACHEBLE_STATE; |
|
117 | 126 | do { |
|
118 | 127 | nextState = m_currentState.transitions[m_previewCode]; |
|
119 | 128 | if (nextState == CDFADefinition.UNREACHEBLE_STATE) { |
|
120 | 129 | if (m_currentState.final) |
|
121 | 130 | return true; |
|
122 | 131 | else |
|
123 | 132 | throw new ParserException( |
|
124 | 133 | String.Format( |
|
125 | 134 | "Unexpected symbol '{0}', at pos {1}", |
|
126 | 135 | m_buffer[m_pointer], |
|
127 | 136 | Position |
|
128 | 137 | ) |
|
129 | 138 | ); |
|
130 | 139 | } else { |
|
131 | 140 | m_currentState = m_states[nextState]; |
|
132 | 141 | m_tokenLen++; |
|
133 | 142 | } |
|
134 | 143 | |
|
135 | 144 | } while (Shift()); |
|
136 | 145 | |
|
137 | 146 | // END OF DATA |
|
138 | 147 | if (!m_currentState.final) |
|
139 | 148 | throw new ParserException("Unexpected end of data"); |
|
140 | 149 | |
|
141 | 150 | return true; |
|
142 | 151 | } |
|
143 | 152 | |
|
144 | 153 | |
|
145 | 154 | bool Shift() { |
|
146 | 155 | m_pointer++; |
|
147 | 156 | |
|
148 | 157 | if (m_pointer >= m_bufferSize) { |
|
149 | 158 | return ReadNextChunk(); |
|
150 | 159 | } |
|
151 | 160 | |
|
152 | 161 | m_previewCode = m_alphabetMap[m_buffer[m_pointer]]; |
|
153 | 162 | |
|
154 | 163 | return true; |
|
155 | 164 | } |
|
156 | 165 | |
|
157 | 166 | /// <summary> |
|
158 | 167 | /// Вызывается по достижению конца входного буффера для получения |
|
159 | 168 | /// новых данных. |
|
160 | 169 | /// </summary> |
|
161 | 170 | /// <returns><c>true</c> - новые двнные получены, можно продолжать обработку.</returns> |
|
162 | 171 | protected virtual bool ReadNextChunk() { |
|
163 | 172 | return false; |
|
164 | 173 | } |
|
165 | 174 | |
|
166 | 175 | /// <summary> |
|
167 | 176 | /// Позиция сканнера во входном буфере |
|
168 | 177 | /// </summary> |
|
169 | 178 | public int Position { |
|
170 | 179 | get { |
|
171 | 180 | return m_pointer + 1; |
|
172 | 181 | } |
|
173 | 182 | } |
|
174 | 183 | |
|
175 | 184 | /// <summary> |
|
176 | 185 | /// Преключает внутренний ДКА на указанный, позволяет реализовать подобие захватывающей |
|
177 | 186 | /// группировки. |
|
178 | 187 | /// </summary> |
|
179 | 188 | /// <param name="states">Таблица состояний нового ДКА</param> |
|
180 | 189 | /// <param name="alphabet">Таблица входных символов для нового ДКА</param> |
|
181 | 190 | protected void Switch(DFAStateDescriptior[] states, int[] alphabet) { |
|
182 | 191 | Safe.ArgumentNotNull(states, "dfa"); |
|
183 | 192 | |
|
184 | 193 | m_defs.Push(new ScannerConfig { |
|
185 | 194 | states = m_states, |
|
186 | 195 | alphabetMap = m_alphabetMap |
|
187 | 196 | }); |
|
188 | 197 | |
|
189 | 198 | m_states = states; |
|
190 | 199 | m_alphabetMap = alphabet; |
|
191 | 200 | |
|
192 | 201 | m_previewCode = m_alphabetMap[m_buffer[m_pointer]]; |
|
193 | 202 | } |
|
194 | 203 | |
|
195 | 204 | /// <summary> |
|
196 | 205 | /// Восстанавливает предыдущей ДКА сканнера. |
|
197 | 206 | /// </summary> |
|
198 | 207 | protected void Restore() { |
|
199 | 208 | if (m_defs.Count == 0) |
|
200 | 209 | throw new InvalidOperationException(); |
|
201 | 210 | var prev = m_defs.Pop(); |
|
202 | 211 | m_states = prev.states; |
|
203 | 212 | m_alphabetMap = prev.alphabetMap; |
|
204 | 213 | m_previewCode = m_alphabetMap[m_buffer[m_pointer]]; |
|
205 | 214 | } |
|
206 | 215 | } |
|
207 | 216 | } |
General Comments 0
You need to be logged in to leave comments.
Login now