@@ -1,197 +1,203 | |||||
1 | using Implab; |
|
1 | using Implab; | |
2 | using Implab.Parsing; |
|
2 | using Implab.Parsing; | |
3 | using System; |
|
3 | using System; | |
4 | using System.Collections.Generic; |
|
4 | using System.Collections.Generic; | |
5 | using System.Diagnostics; |
|
5 | using System.Diagnostics; | |
6 | using System.Linq; |
|
6 | using System.Linq; | |
7 | using System.Text; |
|
7 | using System.Text; | |
8 | using System.Threading.Tasks; |
|
8 | using System.Threading.Tasks; | |
9 |
|
9 | |||
10 | namespace Implab.JSON { |
|
10 | namespace Implab.JSON { | |
11 | /// <summary> |
|
11 | /// <summary> | |
12 | /// internal |
|
12 | /// internal | |
13 | /// </summary> |
|
13 | /// </summary> | |
14 | public struct JSONParserContext { |
|
14 | public struct JSONParserContext { | |
15 | public string memberName; |
|
15 | public string memberName; | |
16 | public JSONElementContext elementContext; |
|
16 | public JSONElementContext elementContext; | |
17 | } |
|
17 | } | |
18 |
|
18 | |||
19 | /// <summary> |
|
19 | /// <summary> | |
20 | /// Pull парсер JSON данных. |
|
20 | /// Pull парсер JSON данных. | |
21 | /// </summary> |
|
21 | /// </summary> | |
22 | public class JSONParser : DFAutomaton<JSONParserContext> { |
|
22 | public class JSONParser : DFAutomaton<JSONParserContext> { | |
23 |
|
23 | |||
24 | enum MemberContext { |
|
24 | enum MemberContext { | |
25 | MemberName, |
|
25 | MemberName, | |
26 | MemberValue |
|
26 | MemberValue | |
27 | } |
|
27 | } | |
28 |
|
28 | |||
29 | static readonly EnumAlphabet<JsonTokenType> _alphabet = EnumAlphabet<JsonTokenType>.FullAlphabet; |
|
29 | static readonly EnumAlphabet<JsonTokenType> _alphabet = EnumAlphabet<JsonTokenType>.FullAlphabet; | |
30 | static readonly DFAStateDescriptior[] _jsonDFA; |
|
30 | static readonly DFAStateDescriptior[] _jsonDFA; | |
31 | static readonly DFAStateDescriptior[] _objectDFA; |
|
31 | static readonly DFAStateDescriptior[] _objectDFA; | |
32 | static readonly DFAStateDescriptior[] _arrayDFA; |
|
32 | static readonly DFAStateDescriptior[] _arrayDFA; | |
33 |
|
33 | |||
34 | static JSONParser() { |
|
34 | static JSONParser() { | |
35 | var jsonExpression = Token.New(JsonTokenType.BeginObject, JsonTokenType.BeginArray).Tag(0); |
|
35 | var jsonExpression = Token.New(JsonTokenType.BeginObject, JsonTokenType.BeginArray).Tag(0); | |
36 |
|
36 | |||
37 | var valueExpression = Token.New(JsonTokenType.BeginArray, JsonTokenType.BeginObject, JsonTokenType.Literal, JsonTokenType.Number, JsonTokenType.String); |
|
37 | var valueExpression = Token.New(JsonTokenType.BeginArray, JsonTokenType.BeginObject, JsonTokenType.Literal, JsonTokenType.Number, JsonTokenType.String); | |
38 | var memberExpression = Token.New(JsonTokenType.String).Cat(Token.New(JsonTokenType.NameSeparator)).Cat(valueExpression); |
|
38 | var memberExpression = Token.New(JsonTokenType.String).Cat(Token.New(JsonTokenType.NameSeparator)).Cat(valueExpression); | |
39 | var objectExpression = memberExpression |
|
39 | var objectExpression = memberExpression | |
40 | .Cat( |
|
40 | .Cat( | |
41 | Token.New(JsonTokenType.ValueSeparator) |
|
41 | Token.New(JsonTokenType.ValueSeparator) | |
42 | .Cat(memberExpression) |
|
42 | .Cat(memberExpression) | |
43 | .EClosure() |
|
43 | .EClosure() | |
44 | ) |
|
44 | ) | |
45 | .Optional() |
|
45 | .Optional() | |
46 | .Cat(Token.New(JsonTokenType.EndObject)) |
|
46 | .Cat(Token.New(JsonTokenType.EndObject)) | |
47 | .Tag(0); |
|
47 | .Tag(0); | |
48 | var arrayExpression = valueExpression |
|
48 | var arrayExpression = valueExpression | |
49 | .Cat( |
|
49 | .Cat( | |
50 | Token.New(JsonTokenType.ValueSeparator) |
|
50 | Token.New(JsonTokenType.ValueSeparator) | |
51 | .Cat(valueExpression) |
|
51 | .Cat(valueExpression) | |
52 | .EClosure() |
|
52 | .EClosure() | |
53 | ) |
|
53 | ) | |
54 | .Optional() |
|
54 | .Optional() | |
55 | .Cat(Token.New(JsonTokenType.EndArray)) |
|
55 | .Cat(Token.New(JsonTokenType.EndArray)) | |
56 | .Tag(0); |
|
56 | .Tag(0); | |
57 |
|
57 | |||
58 | _jsonDFA = BuildDFA(jsonExpression).States; |
|
58 | _jsonDFA = BuildDFA(jsonExpression).States; | |
59 | _objectDFA = BuildDFA(objectExpression).States; |
|
59 | _objectDFA = BuildDFA(objectExpression).States; | |
60 | _arrayDFA = BuildDFA(arrayExpression).States; |
|
60 | _arrayDFA = BuildDFA(arrayExpression).States; | |
61 | } |
|
61 | } | |
62 |
|
62 | |||
63 | static EDFADefinition<JsonTokenType> BuildDFA(Token expr) { |
|
63 | static EDFADefinition<JsonTokenType> BuildDFA(Token expr) { | |
64 | var builder = new DFABuilder(); |
|
64 | var builder = new DFABuilder(); | |
65 | var dfa = new EDFADefinition<JsonTokenType>(_alphabet); |
|
65 | var dfa = new EDFADefinition<JsonTokenType>(_alphabet); | |
66 | expr.Accept(builder); |
|
66 | expr.Accept(builder); | |
67 |
|
67 | |||
68 | builder.BuildDFA(dfa); |
|
68 | builder.BuildDFA(dfa); | |
69 | return dfa; |
|
69 | return dfa; | |
70 | } |
|
70 | } | |
71 |
|
71 | |||
72 | JSONScanner m_scanner; |
|
72 | JSONScanner m_scanner; | |
73 | MemberContext m_memberContext; |
|
73 | MemberContext m_memberContext; | |
74 |
|
74 | |||
75 | JSONElementType m_elementType; |
|
75 | JSONElementType m_elementType; | |
76 | object m_elementValue; |
|
76 | object m_elementValue; | |
77 |
|
77 | |||
78 | public JSONParser(string text) |
|
78 | public JSONParser(string text) | |
79 | : base(_jsonDFA, INITIAL_STATE, new JSONParserContext { elementContext = JSONElementContext.None, memberName = String.Empty } ) { |
|
79 | : base(_jsonDFA, INITIAL_STATE, new JSONParserContext { elementContext = JSONElementContext.None, memberName = String.Empty } ) { | |
80 | Safe.ArgumentNotEmpty(text, "text"); |
|
80 | Safe.ArgumentNotEmpty(text, "text"); | |
81 | m_scanner = new JSONScanner(); |
|
81 | m_scanner = new JSONScanner(); | |
82 | m_scanner.Feed(text.ToCharArray()); |
|
82 | m_scanner.Feed(text.ToCharArray()); | |
83 | } |
|
83 | } | |
84 |
|
84 | |||
85 | public JSONElementType ElementType { |
|
85 | public JSONElementType ElementType { | |
86 | get { return m_elementType; } |
|
86 | get { return m_elementType; } | |
87 | } |
|
87 | } | |
88 |
|
88 | |||
89 | public string ElementName { |
|
89 | public string ElementName { | |
90 | get { return m_context.info.memberName; } |
|
90 | get { return m_context.info.memberName; } | |
91 | } |
|
91 | } | |
92 |
|
92 | |||
93 | public object ElementValue { |
|
93 | public object ElementValue { | |
94 | get { return m_elementValue; } |
|
94 | get { return m_elementValue; } | |
95 | } |
|
95 | } | |
96 |
|
96 | |||
97 | public bool Read() { |
|
97 | public bool Read() { | |
98 | if (m_context.current == UNREACHEBLE_STATE) |
|
98 | if (m_context.current == UNREACHEBLE_STATE) | |
99 | throw new InvalidOperationException("The parser is in invalid state"); |
|
99 | throw new InvalidOperationException("The parser is in invalid state"); | |
100 | object tokenValue; |
|
100 | object tokenValue; | |
101 | JsonTokenType tokenType; |
|
101 | JsonTokenType tokenType; | |
102 | m_context.info.memberName = String.Empty; |
|
102 | m_context.info.memberName = String.Empty; | |
103 | while (m_scanner.ReadToken(out tokenValue, out tokenType)) { |
|
103 | while (m_scanner.ReadToken(out tokenValue, out tokenType)) { | |
104 | Move((int)tokenType); |
|
104 | Move((int)tokenType); | |
105 | if (m_context.current == UNREACHEBLE_STATE) |
|
105 | if (m_context.current == UNREACHEBLE_STATE) | |
106 | UnexpectedToken(tokenValue, tokenType); |
|
106 | UnexpectedToken(tokenValue, tokenType); | |
107 | switch (tokenType) { |
|
107 | switch (tokenType) { | |
108 | case JsonTokenType.BeginObject: |
|
108 | case JsonTokenType.BeginObject: | |
109 | Switch( |
|
109 | Switch( | |
110 | _objectDFA, |
|
110 | _objectDFA, | |
111 | INITIAL_STATE, |
|
111 | INITIAL_STATE, | |
112 | new JSONParserContext { |
|
112 | new JSONParserContext { | |
113 | memberName = m_context.info.memberName, |
|
113 | memberName = m_context.info.memberName, | |
114 | elementContext = JSONElementContext.Object |
|
114 | elementContext = JSONElementContext.Object | |
115 | } |
|
115 | } | |
116 | ); |
|
116 | ); | |
117 | m_elementValue = null; |
|
117 | m_elementValue = null; | |
118 | m_memberContext = MemberContext.MemberName; |
|
118 | m_memberContext = MemberContext.MemberName; | |
119 | m_elementType = JSONElementType.BeginObject; |
|
119 | m_elementType = JSONElementType.BeginObject; | |
120 | return true; |
|
120 | return true; | |
121 | case JsonTokenType.EndObject: |
|
121 | case JsonTokenType.EndObject: | |
122 | Restore(); |
|
122 | Restore(); | |
123 | m_elementValue = null; |
|
123 | m_elementValue = null; | |
124 | m_elementType = JSONElementType.EndObject; |
|
124 | m_elementType = JSONElementType.EndObject; | |
125 | return true; |
|
125 | return true; | |
126 | case JsonTokenType.BeginArray: |
|
126 | case JsonTokenType.BeginArray: | |
127 | Switch( |
|
127 | Switch( | |
128 | _arrayDFA, |
|
128 | _arrayDFA, | |
129 | INITIAL_STATE, |
|
129 | INITIAL_STATE, | |
130 | new JSONParserContext { |
|
130 | new JSONParserContext { | |
131 | memberName = m_context.info.memberName, |
|
131 | memberName = m_context.info.memberName, | |
132 | elementContext = JSONElementContext.Array |
|
132 | elementContext = JSONElementContext.Array | |
133 | } |
|
133 | } | |
134 | ); |
|
134 | ); | |
135 | m_elementValue = null; |
|
135 | m_elementValue = null; | |
136 | m_memberContext = MemberContext.MemberValue; |
|
136 | m_memberContext = MemberContext.MemberValue; | |
137 | m_elementType = JSONElementType.BeginArray; |
|
137 | m_elementType = JSONElementType.BeginArray; | |
138 | return true; |
|
138 | return true; | |
139 | case JsonTokenType.EndArray: |
|
139 | case JsonTokenType.EndArray: | |
140 | Restore(); |
|
140 | Restore(); | |
141 | m_elementValue = null; |
|
141 | m_elementValue = null; | |
142 | m_elementType = JSONElementType.EndArray; |
|
142 | m_elementType = JSONElementType.EndArray; | |
143 | return true; |
|
143 | return true; | |
144 | case JsonTokenType.String: |
|
144 | case JsonTokenType.String: | |
145 | if (m_memberContext == MemberContext.MemberName) { |
|
145 | if (m_memberContext == MemberContext.MemberName) { | |
146 | m_context.info.memberName = (string)tokenValue; |
|
146 | m_context.info.memberName = (string)tokenValue; | |
147 | break; |
|
147 | break; | |
148 | } else { |
|
148 | } else { | |
149 | m_elementType = JSONElementType.Value; |
|
149 | m_elementType = JSONElementType.Value; | |
150 | m_elementValue = tokenValue; |
|
150 | m_elementValue = tokenValue; | |
151 | return true; |
|
151 | return true; | |
152 | } |
|
152 | } | |
153 | case JsonTokenType.Number: |
|
153 | case JsonTokenType.Number: | |
154 | m_elementType = JSONElementType.Value; |
|
154 | m_elementType = JSONElementType.Value; | |
155 | m_elementValue = tokenValue; |
|
155 | m_elementValue = tokenValue; | |
156 | return true; |
|
156 | return true; | |
157 | case JsonTokenType.Literal: |
|
157 | case JsonTokenType.Literal: | |
158 | m_elementType = JSONElementType.Value; |
|
158 | m_elementType = JSONElementType.Value; | |
159 | m_elementValue = ParseLiteral((string)tokenValue); |
|
159 | m_elementValue = ParseLiteral((string)tokenValue); | |
160 | return true; |
|
160 | return true; | |
161 | case JsonTokenType.NameSeparator: |
|
161 | case JsonTokenType.NameSeparator: | |
162 | m_memberContext = MemberContext.MemberValue; |
|
162 | m_memberContext = MemberContext.MemberValue; | |
163 | break; |
|
163 | break; | |
164 | case JsonTokenType.ValueSeparator: |
|
164 | case JsonTokenType.ValueSeparator: | |
165 | m_memberContext = m_context.info.elementContext == JSONElementContext.Object ? MemberContext.MemberName : MemberContext.MemberValue; |
|
165 | m_memberContext = m_context.info.elementContext == JSONElementContext.Object ? MemberContext.MemberName : MemberContext.MemberValue; | |
166 | break; |
|
166 | break; | |
167 | default: |
|
167 | default: | |
168 | UnexpectedToken(tokenValue, tokenType); |
|
168 | UnexpectedToken(tokenValue, tokenType); | |
169 | break; |
|
169 | break; | |
170 | } |
|
170 | } | |
171 | } |
|
171 | } | |
172 | if (m_context.info.elementContext != JSONElementContext.None) |
|
172 | if (m_context.info.elementContext != JSONElementContext.None) | |
173 | throw new ParserException("Unexpedted end of data"); |
|
173 | throw new ParserException("Unexpedted end of data"); | |
174 | return false; |
|
174 | return false; | |
175 | } |
|
175 | } | |
176 |
|
176 | |||
177 | object ParseLiteral(string literal) { |
|
177 | object ParseLiteral(string literal) { | |
178 | switch (literal) { |
|
178 | switch (literal) { | |
179 | case "null": |
|
179 | case "null": | |
180 | return null; |
|
180 | return null; | |
181 | case "false" : |
|
181 | case "false" : | |
182 | return false; |
|
182 | return false; | |
183 | case "true": |
|
183 | case "true": | |
184 | return true; |
|
184 | return true; | |
185 | default: |
|
185 | default: | |
186 | UnexpectedToken(literal, JsonTokenType.Literal); |
|
186 | UnexpectedToken(literal, JsonTokenType.Literal); | |
187 | return null; // avoid compliler error |
|
187 | return null; // avoid compliler error | |
188 | } |
|
188 | } | |
189 | } |
|
189 | } | |
190 |
|
190 | |||
191 | void UnexpectedToken(object value, JsonTokenType tokenType) { |
|
191 | void UnexpectedToken(object value, JsonTokenType tokenType) { | |
192 | throw new ParserException(String.Format("Unexpected token {0}: '{1}'", tokenType, value)); |
|
192 | throw new ParserException(String.Format("Unexpected token {0}: '{1}'", tokenType, value)); | |
193 | } |
|
193 | } | |
194 |
|
194 | |||
|
195 | ||||
|
196 | public bool EOF { | |||
|
197 | get { | |||
|
198 | return m_scanner.EOF; | |||
|
199 | } | |||
|
200 | } | |||
195 | } |
|
201 | } | |
196 |
|
202 | |||
197 | } |
|
203 | } |
@@ -1,207 +1,216 | |||||
1 | using Implab; |
|
1 | using Implab; | |
2 | using System; |
|
2 | using System; | |
3 | using System.Collections.Generic; |
|
3 | using System.Collections.Generic; | |
4 | using System.Linq; |
|
4 | using System.Linq; | |
5 | using System.Text; |
|
5 | using System.Text; | |
6 | using System.Threading.Tasks; |
|
6 | using System.Threading.Tasks; | |
7 |
|
7 | |||
8 | namespace Implab.Parsing { |
|
8 | namespace Implab.Parsing { | |
9 | /// <summary> |
|
9 | /// <summary> | |
10 | /// Базовый класс для разбора потока входных символов на токены. |
|
10 | /// Базовый класс для разбора потока входных символов на токены. | |
11 | /// </summary> |
|
11 | /// </summary> | |
12 | /// <remarks> |
|
12 | /// <remarks> | |
13 | /// Сканнер имеет внутри буффер с симолами входного текста, по которому перемещаются два |
|
13 | /// Сканнер имеет внутри буффер с симолами входного текста, по которому перемещаются два | |
14 | /// указателя, начала и конца токена, при перемещении искользуется ДКА для определения |
|
14 | /// указателя, начала и конца токена, при перемещении искользуется ДКА для определения | |
15 | /// конца токена и допустимости текущего символа. |
|
15 | /// конца токена и допустимости текущего символа. | |
16 | /// </remarks> |
|
16 | /// </remarks> | |
17 | public class Scanner { |
|
17 | public class Scanner { | |
18 | struct ScannerConfig { |
|
18 | struct ScannerConfig { | |
19 | public DFAStateDescriptior[] states; |
|
19 | public DFAStateDescriptior[] states; | |
20 | public int[] alphabetMap; |
|
20 | public int[] alphabetMap; | |
21 | } |
|
21 | } | |
22 |
|
22 | |||
23 | Stack<ScannerConfig> m_defs = new Stack<ScannerConfig>(); |
|
23 | Stack<ScannerConfig> m_defs = new Stack<ScannerConfig>(); | |
24 |
|
24 | |||
25 | DFAStateDescriptior[] m_states; |
|
25 | DFAStateDescriptior[] m_states; | |
26 | int[] m_alphabetMap; |
|
26 | int[] m_alphabetMap; | |
27 |
|
27 | |||
28 | protected DFAStateDescriptior m_currentState; |
|
28 | protected DFAStateDescriptior m_currentState; | |
29 | int m_previewCode; |
|
29 | int m_previewCode; | |
30 |
|
30 | |||
31 | protected int m_tokenLen = 0; |
|
31 | protected int m_tokenLen = 0; | |
32 | protected int m_tokenOffset; |
|
32 | protected int m_tokenOffset; | |
33 |
|
33 | |||
34 | protected char[] m_buffer; |
|
34 | protected char[] m_buffer; | |
35 | protected int m_bufferSize; |
|
35 | protected int m_bufferSize; | |
36 | protected int m_pointer; |
|
36 | protected int m_pointer; | |
37 |
|
37 | |||
38 | public Scanner(CDFADefinition definition, string text) { |
|
38 | public Scanner(CDFADefinition definition, string text) { | |
39 | Safe.ArgumentNotNull(definition, "definition"); |
|
39 | Safe.ArgumentNotNull(definition, "definition"); | |
40 | Safe.ArgumentNotEmpty(text, "text"); |
|
40 | Safe.ArgumentNotEmpty(text, "text"); | |
41 |
|
41 | |||
42 | m_states = definition.States; |
|
42 | m_states = definition.States; | |
43 | m_alphabetMap = definition.Alphabet.GetTranslationMap(); |
|
43 | m_alphabetMap = definition.Alphabet.GetTranslationMap(); | |
44 |
|
44 | |||
45 | Feed(text.ToCharArray()); |
|
45 | Feed(text.ToCharArray()); | |
46 | } |
|
46 | } | |
47 |
|
47 | |||
48 | public Scanner(CDFADefinition definition) { |
|
48 | public Scanner(CDFADefinition definition) { | |
49 | Safe.ArgumentNotNull(definition, "definition"); |
|
49 | Safe.ArgumentNotNull(definition, "definition"); | |
50 |
|
50 | |||
51 | m_states = definition.States; |
|
51 | m_states = definition.States; | |
52 | m_alphabetMap = definition.Alphabet.GetTranslationMap(); |
|
52 | m_alphabetMap = definition.Alphabet.GetTranslationMap(); | |
53 |
|
53 | |||
54 | Feed(new char[0]); |
|
54 | Feed(new char[0]); | |
55 | } |
|
55 | } | |
56 |
|
56 | |||
57 | /// <summary> |
|
57 | /// <summary> | |
58 | /// Заполняет входными данными буффер. |
|
58 | /// Заполняет входными данными буффер. | |
59 | /// </summary> |
|
59 | /// </summary> | |
60 | /// <param name="data">Данные для обработки.</param> |
|
60 | /// <param name="data">Данные для обработки.</param> | |
61 | /// <remarks>Копирование данных не происходит, переданный массив используется в |
|
61 | /// <remarks>Копирование данных не происходит, переданный массив используется в | |
62 | /// качестве входного буффера.</remarks> |
|
62 | /// качестве входного буффера.</remarks> | |
63 | public void Feed(char[] data) { |
|
63 | public void Feed(char[] data) { | |
64 | Safe.ArgumentNotNull(data, "data"); |
|
64 | Safe.ArgumentNotNull(data, "data"); | |
65 |
|
65 | |||
66 | Feed(data, data.Length); |
|
66 | Feed(data, data.Length); | |
67 | } |
|
67 | } | |
68 |
|
68 | |||
69 | /// <summary> |
|
69 | /// <summary> | |
70 | /// Заполняет буффур чтения входными данными. |
|
70 | /// Заполняет буффур чтения входными данными. | |
71 | /// </summary> |
|
71 | /// </summary> | |
72 | /// <param name="data">Данные для обработки.</param> |
|
72 | /// <param name="data">Данные для обработки.</param> | |
73 | /// <param name="length">Длина данных для обработки.</param> |
|
73 | /// <param name="length">Длина данных для обработки.</param> | |
74 | /// <remarks>Копирование данных не происходит, переданный массив используется в |
|
74 | /// <remarks>Копирование данных не происходит, переданный массив используется в | |
75 | /// качестве входного буффера.</remarks> |
|
75 | /// качестве входного буффера.</remarks> | |
76 | public void Feed(char[] data, int length) { |
|
76 | public void Feed(char[] data, int length) { | |
77 | Safe.ArgumentNotNull(data, "data"); |
|
77 | Safe.ArgumentNotNull(data, "data"); | |
78 | Safe.ArgumentInRange(length, 0, data.Length, "length"); |
|
78 | Safe.ArgumentInRange(length, 0, data.Length, "length"); | |
79 |
|
79 | |||
80 | m_pointer = -1; |
|
80 | m_pointer = -1; | |
81 | m_buffer = data; |
|
81 | m_buffer = data; | |
82 | m_bufferSize = length; |
|
82 | m_bufferSize = length; | |
83 | Shift(); |
|
83 | Shift(); | |
84 | } |
|
84 | } | |
85 |
|
85 | |||
86 | /// <summary> |
|
86 | /// <summary> | |
87 | /// Получает текущий токен в виде строки. |
|
87 | /// Получает текущий токен в виде строки. | |
88 | /// </summary> |
|
88 | /// </summary> | |
89 | /// <returns></returns> |
|
89 | /// <returns></returns> | |
90 | public string GetTokenValue() { |
|
90 | public string GetTokenValue() { | |
91 | return new String(m_buffer, m_tokenOffset, m_tokenLen); |
|
91 | return new String(m_buffer, m_tokenOffset, m_tokenLen); | |
92 | } |
|
92 | } | |
93 |
|
93 | |||
94 | /// <summary> |
|
94 | /// <summary> | |
95 | /// Метки текущего токена, которые были назначены в регулярном выражении. |
|
95 | /// Метки текущего токена, которые были назначены в регулярном выражении. | |
96 | /// </summary> |
|
96 | /// </summary> | |
97 | public int[] TokenTags { |
|
97 | public int[] TokenTags { | |
98 | get { |
|
98 | get { | |
99 | return m_currentState.tag; |
|
99 | return m_currentState.tag; | |
100 | } |
|
100 | } | |
101 | } |
|
101 | } | |
102 |
|
102 | |||
103 | /// <summary> |
|
103 | /// <summary> | |
|
104 | /// Признак конца данных | |||
|
105 | /// </summary> | |||
|
106 | public bool EOF { | |||
|
107 | get { | |||
|
108 | return m_pointer >= m_bufferSize; | |||
|
109 | } | |||
|
110 | } | |||
|
111 | ||||
|
112 | /// <summary> | |||
104 | /// Читает следующий токен, при этом <see cref="m_tokenOffset"/> указывает на начало токена, |
|
113 | /// Читает следующий токен, при этом <see cref="m_tokenOffset"/> указывает на начало токена, | |
105 | /// <see cref="m_tokenLen"/> на длину токена, <see cref="m_buffer"/> - массив символов, в |
|
114 | /// <see cref="m_tokenLen"/> на длину токена, <see cref="m_buffer"/> - массив символов, в | |
106 | /// котором находится токен. |
|
115 | /// котором находится токен. | |
107 | /// </summary> |
|
116 | /// </summary> | |
108 | /// <returns><c>false</c> - достигнут конец данных, токен не прочитан.</returns> |
|
117 | /// <returns><c>false</c> - достигнут конец данных, токен не прочитан.</returns> | |
109 | protected bool ReadTokenInternal() { |
|
118 | protected bool ReadTokenInternal() { | |
110 | if (m_pointer >= m_bufferSize) |
|
119 | if (m_pointer >= m_bufferSize) | |
111 | return false; |
|
120 | return false; | |
112 |
|
121 | |||
113 | m_currentState = m_states[CDFADefinition.INITIAL_STATE]; |
|
122 | m_currentState = m_states[CDFADefinition.INITIAL_STATE]; | |
114 | m_tokenLen = 0; |
|
123 | m_tokenLen = 0; | |
115 | m_tokenOffset = m_pointer; |
|
124 | m_tokenOffset = m_pointer; | |
116 | int nextState = CDFADefinition.UNREACHEBLE_STATE; |
|
125 | int nextState = CDFADefinition.UNREACHEBLE_STATE; | |
117 | do { |
|
126 | do { | |
118 | nextState = m_currentState.transitions[m_previewCode]; |
|
127 | nextState = m_currentState.transitions[m_previewCode]; | |
119 | if (nextState == CDFADefinition.UNREACHEBLE_STATE) { |
|
128 | if (nextState == CDFADefinition.UNREACHEBLE_STATE) { | |
120 | if (m_currentState.final) |
|
129 | if (m_currentState.final) | |
121 | return true; |
|
130 | return true; | |
122 | else |
|
131 | else | |
123 | throw new ParserException( |
|
132 | throw new ParserException( | |
124 | String.Format( |
|
133 | String.Format( | |
125 | "Unexpected symbol '{0}', at pos {1}", |
|
134 | "Unexpected symbol '{0}', at pos {1}", | |
126 | m_buffer[m_pointer], |
|
135 | m_buffer[m_pointer], | |
127 | Position |
|
136 | Position | |
128 | ) |
|
137 | ) | |
129 | ); |
|
138 | ); | |
130 | } else { |
|
139 | } else { | |
131 | m_currentState = m_states[nextState]; |
|
140 | m_currentState = m_states[nextState]; | |
132 | m_tokenLen++; |
|
141 | m_tokenLen++; | |
133 | } |
|
142 | } | |
134 |
|
143 | |||
135 | } while (Shift()); |
|
144 | } while (Shift()); | |
136 |
|
145 | |||
137 | // END OF DATA |
|
146 | // END OF DATA | |
138 | if (!m_currentState.final) |
|
147 | if (!m_currentState.final) | |
139 | throw new ParserException("Unexpected end of data"); |
|
148 | throw new ParserException("Unexpected end of data"); | |
140 |
|
149 | |||
141 | return true; |
|
150 | return true; | |
142 | } |
|
151 | } | |
143 |
|
152 | |||
144 |
|
153 | |||
145 | bool Shift() { |
|
154 | bool Shift() { | |
146 | m_pointer++; |
|
155 | m_pointer++; | |
147 |
|
156 | |||
148 | if (m_pointer >= m_bufferSize) { |
|
157 | if (m_pointer >= m_bufferSize) { | |
149 | return ReadNextChunk(); |
|
158 | return ReadNextChunk(); | |
150 | } |
|
159 | } | |
151 |
|
160 | |||
152 | m_previewCode = m_alphabetMap[m_buffer[m_pointer]]; |
|
161 | m_previewCode = m_alphabetMap[m_buffer[m_pointer]]; | |
153 |
|
162 | |||
154 | return true; |
|
163 | return true; | |
155 | } |
|
164 | } | |
156 |
|
165 | |||
157 | /// <summary> |
|
166 | /// <summary> | |
158 | /// Вызывается по достижению конца входного буффера для получения |
|
167 | /// Вызывается по достижению конца входного буффера для получения | |
159 | /// новых данных. |
|
168 | /// новых данных. | |
160 | /// </summary> |
|
169 | /// </summary> | |
161 | /// <returns><c>true</c> - новые двнные получены, можно продолжать обработку.</returns> |
|
170 | /// <returns><c>true</c> - новые двнные получены, можно продолжать обработку.</returns> | |
162 | protected virtual bool ReadNextChunk() { |
|
171 | protected virtual bool ReadNextChunk() { | |
163 | return false; |
|
172 | return false; | |
164 | } |
|
173 | } | |
165 |
|
174 | |||
166 | /// <summary> |
|
175 | /// <summary> | |
167 | /// Позиция сканнера во входном буфере |
|
176 | /// Позиция сканнера во входном буфере | |
168 | /// </summary> |
|
177 | /// </summary> | |
169 | public int Position { |
|
178 | public int Position { | |
170 | get { |
|
179 | get { | |
171 | return m_pointer + 1; |
|
180 | return m_pointer + 1; | |
172 | } |
|
181 | } | |
173 | } |
|
182 | } | |
174 |
|
183 | |||
175 | /// <summary> |
|
184 | /// <summary> | |
176 | /// Преключает внутренний ДКА на указанный, позволяет реализовать подобие захватывающей |
|
185 | /// Преключает внутренний ДКА на указанный, позволяет реализовать подобие захватывающей | |
177 | /// группировки. |
|
186 | /// группировки. | |
178 | /// </summary> |
|
187 | /// </summary> | |
179 | /// <param name="states">Таблица состояний нового ДКА</param> |
|
188 | /// <param name="states">Таблица состояний нового ДКА</param> | |
180 | /// <param name="alphabet">Таблица входных символов для нового ДКА</param> |
|
189 | /// <param name="alphabet">Таблица входных символов для нового ДКА</param> | |
181 | protected void Switch(DFAStateDescriptior[] states, int[] alphabet) { |
|
190 | protected void Switch(DFAStateDescriptior[] states, int[] alphabet) { | |
182 | Safe.ArgumentNotNull(states, "dfa"); |
|
191 | Safe.ArgumentNotNull(states, "dfa"); | |
183 |
|
192 | |||
184 | m_defs.Push(new ScannerConfig { |
|
193 | m_defs.Push(new ScannerConfig { | |
185 | states = m_states, |
|
194 | states = m_states, | |
186 | alphabetMap = m_alphabetMap |
|
195 | alphabetMap = m_alphabetMap | |
187 | }); |
|
196 | }); | |
188 |
|
197 | |||
189 | m_states = states; |
|
198 | m_states = states; | |
190 | m_alphabetMap = alphabet; |
|
199 | m_alphabetMap = alphabet; | |
191 |
|
200 | |||
192 | m_previewCode = m_alphabetMap[m_buffer[m_pointer]]; |
|
201 | m_previewCode = m_alphabetMap[m_buffer[m_pointer]]; | |
193 | } |
|
202 | } | |
194 |
|
203 | |||
195 | /// <summary> |
|
204 | /// <summary> | |
196 | /// Восстанавливает предыдущей ДКА сканнера. |
|
205 | /// Восстанавливает предыдущей ДКА сканнера. | |
197 | /// </summary> |
|
206 | /// </summary> | |
198 | protected void Restore() { |
|
207 | protected void Restore() { | |
199 | if (m_defs.Count == 0) |
|
208 | if (m_defs.Count == 0) | |
200 | throw new InvalidOperationException(); |
|
209 | throw new InvalidOperationException(); | |
201 | var prev = m_defs.Pop(); |
|
210 | var prev = m_defs.Pop(); | |
202 | m_states = prev.states; |
|
211 | m_states = prev.states; | |
203 | m_alphabetMap = prev.alphabetMap; |
|
212 | m_alphabetMap = prev.alphabetMap; | |
204 | m_previewCode = m_alphabetMap[m_buffer[m_pointer]]; |
|
213 | m_previewCode = m_alphabetMap[m_buffer[m_pointer]]; | |
205 | } |
|
214 | } | |
206 | } |
|
215 | } | |
207 | } |
|
216 | } |
General Comments 0
You need to be logged in to leave comments.
Login now