@@ -0,0 +1,49 | |||
|
1 | using Implab; | |
|
2 | using System; | |
|
3 | using System.Collections.Generic; | |
|
4 | using System.Linq; | |
|
5 | using System.Text; | |
|
6 | using System.Threading.Tasks; | |
|
7 | ||
|
8 | namespace Implab { | |
|
9 | /// <summary> | |
|
10 | /// Обертка для создания <c>IEqualityComparer</c> с использованием делегатов или лямда-выражений. | |
|
11 | /// </summary> | |
|
12 | /// <typeparam name="T">Тип сравниваемых значений</typeparam> | |
|
13 | public class CustomEqualityComparer<T> : IEqualityComparer<T> { | |
|
14 | Func<T, T, bool> m_equals; | |
|
15 | Func<T, int> m_hash; | |
|
16 | ||
|
17 | /// <summary> | |
|
18 | /// Создает новый объект с указанными функциями сравнения на раветво и получения хеш-кода. | |
|
19 | /// </summary> | |
|
20 | /// <param name="equality">Функция проверки на равенство</param> | |
|
21 | /// <param name="hash">Функция получения хешкода</param> | |
|
22 | public CustomEqualityComparer(Func<T, T, bool> equality, Func<T, int> hash) { | |
|
23 | Safe.ArgumentNotNull(equality, "equality"); | |
|
24 | Safe.ArgumentNotNull(hash, "hash"); | |
|
25 | m_hash = hash; | |
|
26 | m_equals = equality; | |
|
27 | } | |
|
28 | ||
|
29 | /// <summary> | |
|
30 | /// Сравнивает два знаечния на ревенство. | |
|
31 | /// </summary> | |
|
32 | /// <param name="x"></param> | |
|
33 | /// <param name="y"></param> | |
|
34 | /// <returns>Результат сравнения на равенство</returns> | |
|
35 | public bool Equals(T x, T y) { | |
|
36 | return m_equals(x,y); | |
|
37 | } | |
|
38 | ||
|
39 | /// <summary> | |
|
40 | /// Получает хеш-код для указанного значения. | |
|
41 | /// </summary> | |
|
42 | /// <param name="obj"></param> | |
|
43 | /// <remarks>Равные знаечния *должны* иметь одинаковый хеш-код.</remarks> | |
|
44 | /// <returns>Хеш-код</returns> | |
|
45 | public int GetHashCode(T obj) { | |
|
46 | return m_hash(obj); | |
|
47 | } | |
|
48 | } | |
|
49 | } |
@@ -0,0 +1,16 | |||
|
1 | using System; | |
|
2 | using System.Collections.Generic; | |
|
3 | using System.Linq; | |
|
4 | using System.Text; | |
|
5 | using System.Threading.Tasks; | |
|
6 | ||
|
7 | namespace Implab.JSON { | |
|
8 | /// <summary> | |
|
9 | /// internal | |
|
10 | /// </summary> | |
|
11 | public enum JSONElementContext { | |
|
12 | None, | |
|
13 | Object, | |
|
14 | Array | |
|
15 | } | |
|
16 | } |
@@ -0,0 +1,34 | |||
|
1 | using System; | |
|
2 | using System.Collections.Generic; | |
|
3 | using System.Linq; | |
|
4 | using System.Text; | |
|
5 | using System.Threading.Tasks; | |
|
6 | ||
|
7 | namespace Implab.JSON { | |
|
8 | /// <summary> | |
|
9 | /// Тип элемента на котором находится парсер | |
|
10 | /// </summary> | |
|
11 | public enum JSONElementType { | |
|
12 | None, | |
|
13 | /// <summary> | |
|
14 | /// Начало объекта | |
|
15 | /// </summary> | |
|
16 | BeginObject, | |
|
17 | /// <summary> | |
|
18 | /// Конец объекта | |
|
19 | /// </summary> | |
|
20 | EndObject, | |
|
21 | /// <summary> | |
|
22 | /// Начало массива | |
|
23 | /// </summary> | |
|
24 | BeginArray, | |
|
25 | /// <summary> | |
|
26 | /// Конец массива | |
|
27 | /// </summary> | |
|
28 | EndArray, | |
|
29 | /// <summary> | |
|
30 | /// Простое значение | |
|
31 | /// </summary> | |
|
32 | Value | |
|
33 | } | |
|
34 | } |
@@ -0,0 +1,113 | |||
|
1 | using Implab.Parsing; | |
|
2 | using System; | |
|
3 | using System.Collections.Generic; | |
|
4 | using System.Linq; | |
|
5 | using System.Text; | |
|
6 | using System.Threading.Tasks; | |
|
7 | ||
|
8 | namespace Implab.JSON { | |
|
9 | internal class JSONGrammar : Grammar<JSONGrammar> { | |
|
10 | public enum TokenType : int{ | |
|
11 | None, | |
|
12 | BeginObject, | |
|
13 | EndObject, | |
|
14 | BeginArray, | |
|
15 | EndArray, | |
|
16 | String, | |
|
17 | Number, | |
|
18 | Literal, | |
|
19 | NameSeparator, | |
|
20 | ValueSeparator, | |
|
21 | ||
|
22 | StringBound, | |
|
23 | EscapedChar, | |
|
24 | UnescapedChar, | |
|
25 | EscapedUnicode, | |
|
26 | ||
|
27 | Minus, | |
|
28 | Plus, | |
|
29 | Sign, | |
|
30 | Integer, | |
|
31 | Dot, | |
|
32 | Exp | |
|
33 | } | |
|
34 | ||
|
35 | readonly CDFADefinition m_jsonDFA; | |
|
36 | readonly CDFADefinition m_stringDFA; | |
|
37 | ||
|
38 | public JSONGrammar() { | |
|
39 | DefineAlphabet(Enumerable.Range(0, 0x20).Select(x => (char)x)); | |
|
40 | var hexDigit = SymbolRangeToken('a','f').Or(SymbolRangeToken('A','F')).Or(SymbolRangeToken('0','9')); | |
|
41 | var digit9 = SymbolRangeToken('1', '9'); | |
|
42 | var zero = SymbolToken('0'); | |
|
43 | var digit = zero.Or(digit9); | |
|
44 | var dot = SymbolToken('.'); | |
|
45 | var minus = SymbolToken('-'); | |
|
46 | var sign = SymbolSetToken('-', '+'); | |
|
47 | var expSign = SymbolSetToken('e', 'E'); | |
|
48 | var letters = SymbolRangeToken('a', 'z'); | |
|
49 | var integer = zero.Or(digit9.Cat(digit.EClosure())); | |
|
50 | var frac = dot.Cat(digit.Closure()); | |
|
51 | var exp = expSign.Cat(sign.Optional()).Cat(digit.Closure()); | |
|
52 | var quote = SymbolToken('"'); | |
|
53 | var backSlash = SymbolToken('\\'); | |
|
54 | var specialEscapeChars = SymbolSetToken('\\', '"', '/', 'b', 'f', 't', 'n', 'r'); | |
|
55 | var unicodeEspace = SymbolToken('u').Cat(hexDigit.Repeat(4)); | |
|
56 | var escape = backSlash.Cat(specialEscapeChars.Or(unicodeEspace)); | |
|
57 | var whitespace = SymbolSetToken('\n', '\r', '\t', ' ').EClosure(); | |
|
58 | var beginObject = whitespace.Cat(SymbolToken('{')).Cat(whitespace); | |
|
59 | var endObject = whitespace.Cat(SymbolToken('}')).Cat(whitespace); | |
|
60 | var beginArray = whitespace.Cat(SymbolToken('[')).Cat(whitespace); | |
|
61 | var endArray = whitespace.Cat(SymbolToken(']')).Cat(whitespace); | |
|
62 | var nameSep = whitespace.Cat(SymbolToken(':')).Cat(whitespace); | |
|
63 | var valueSep = whitespace.Cat(SymbolToken(',')).Cat(whitespace); | |
|
64 | ||
|
65 | var number = minus.Optional().Cat(integer).Cat(frac.Optional()).Cat(exp.Optional()); | |
|
66 | var literal = letters.Closure(); | |
|
67 | var unescaped = SymbolTokenExcept(Enumerable.Range(0, 0x20).Union(new int[] { '\\', '"' }).Select(x => (char)x)); | |
|
68 | var character = unescaped.Or(escape); | |
|
69 | var str = quote.Cat(character.EClosure()).Cat(quote); | |
|
70 | ||
|
71 | ||
|
72 | var jsonExpression = | |
|
73 | number.Tag(TokenType.Number) | |
|
74 | .Or(literal.Tag(TokenType.Literal)) | |
|
75 | .Or(quote.Tag(TokenType.StringBound)) | |
|
76 | .Or(beginObject.Tag(TokenType.BeginObject)) | |
|
77 | .Or(endObject.Tag(TokenType.EndObject)) | |
|
78 | .Or(beginArray.Tag(TokenType.BeginArray)) | |
|
79 | .Or(endArray.Tag(TokenType.EndArray)) | |
|
80 | .Or(nameSep.Tag(TokenType.NameSeparator)) | |
|
81 | .Or(valueSep.Tag(TokenType.ValueSeparator)); | |
|
82 | ||
|
83 | ||
|
84 | var jsonStringExpression = | |
|
85 | quote.Tag(TokenType.StringBound) | |
|
86 | .Or(backSlash.Cat(specialEscapeChars).Tag(TokenType.EscapedChar)) | |
|
87 | .Or(backSlash.Cat(unicodeEspace).Tag(TokenType.EscapedUnicode)) | |
|
88 | .Or(unescaped.Closure().Tag(TokenType.UnescapedChar)); | |
|
89 | ||
|
90 | var jsonNumberExpression = | |
|
91 | minus.Tag(TokenType.Minus) | |
|
92 | .Or(SymbolToken('+').Tag(TokenType.Plus)) | |
|
93 | .Or(digit.Closure().Tag(TokenType.Integer)) | |
|
94 | .Or(dot.Tag(TokenType.Dot)) | |
|
95 | .Or(expSign.Tag(TokenType.Exp)); | |
|
96 | ||
|
97 | m_jsonDFA = BuildDFA(jsonExpression); | |
|
98 | m_stringDFA = BuildDFA(jsonStringExpression); | |
|
99 | } | |
|
100 | ||
|
101 | public CDFADefinition JsonDFA { | |
|
102 | get { | |
|
103 | return m_jsonDFA; | |
|
104 | } | |
|
105 | } | |
|
106 | ||
|
107 | public CDFADefinition JsonStringDFA { | |
|
108 | get { | |
|
109 | return m_stringDFA; | |
|
110 | } | |
|
111 | } | |
|
112 | } | |
|
113 | } |
@@ -0,0 +1,197 | |||
|
1 | using Implab; | |
|
2 | using Implab.Parsing; | |
|
3 | using System; | |
|
4 | using System.Collections.Generic; | |
|
5 | using System.Diagnostics; | |
|
6 | using System.Linq; | |
|
7 | using System.Text; | |
|
8 | using System.Threading.Tasks; | |
|
9 | ||
|
10 | namespace Implab.JSON { | |
|
11 | /// <summary> | |
|
12 | /// internal | |
|
13 | /// </summary> | |
|
14 | public struct JSONParserContext { | |
|
15 | public string memberName; | |
|
16 | public JSONElementContext elementContext; | |
|
17 | } | |
|
18 | ||
|
19 | /// <summary> | |
|
20 | /// Pull парсер JSON данных. | |
|
21 | /// </summary> | |
|
22 | public class JSONParser : DFAutomaton<JSONParserContext> { | |
|
23 | ||
|
24 | enum MemberContext { | |
|
25 | MemberName, | |
|
26 | MemberValue | |
|
27 | } | |
|
28 | ||
|
29 | static readonly EnumAlphabet<JsonTokenType> _alphabet = EnumAlphabet<JsonTokenType>.FullAlphabet; | |
|
30 | static readonly DFAStateDescriptior[] _jsonDFA; | |
|
31 | static readonly DFAStateDescriptior[] _objectDFA; | |
|
32 | static readonly DFAStateDescriptior[] _arrayDFA; | |
|
33 | ||
|
34 | static JSONParser() { | |
|
35 | var jsonExpression = Token.New(JsonTokenType.BeginObject, JsonTokenType.BeginArray).Tag(0); | |
|
36 | ||
|
37 | var valueExpression = Token.New(JsonTokenType.BeginArray, JsonTokenType.BeginObject, JsonTokenType.Literal, JsonTokenType.Number, JsonTokenType.String); | |
|
38 | var memberExpression = Token.New(JsonTokenType.String).Cat(Token.New(JsonTokenType.NameSeparator)).Cat(valueExpression); | |
|
39 | var objectExpression = memberExpression | |
|
40 | .Cat( | |
|
41 | Token.New(JsonTokenType.ValueSeparator) | |
|
42 | .Cat(memberExpression) | |
|
43 | .EClosure() | |
|
44 | ) | |
|
45 | .Optional() | |
|
46 | .Cat(Token.New(JsonTokenType.EndObject)) | |
|
47 | .Tag(0); | |
|
48 | var arrayExpression = valueExpression | |
|
49 | .Cat( | |
|
50 | Token.New(JsonTokenType.ValueSeparator) | |
|
51 | .Cat(valueExpression) | |
|
52 | .EClosure() | |
|
53 | ) | |
|
54 | .Optional() | |
|
55 | .Cat(Token.New(JsonTokenType.EndArray)) | |
|
56 | .Tag(0); | |
|
57 | ||
|
58 | _jsonDFA = BuildDFA(jsonExpression).States; | |
|
59 | _objectDFA = BuildDFA(objectExpression).States; | |
|
60 | _arrayDFA = BuildDFA(arrayExpression).States; | |
|
61 | } | |
|
62 | ||
|
63 | static EDFADefinition<JsonTokenType> BuildDFA(Token expr) { | |
|
64 | var builder = new DFABuilder(); | |
|
65 | var dfa = new EDFADefinition<JsonTokenType>(_alphabet); | |
|
66 | expr.Accept(builder); | |
|
67 | ||
|
68 | builder.BuildDFA(dfa); | |
|
69 | return dfa; | |
|
70 | } | |
|
71 | ||
|
72 | JSONScanner m_scanner; | |
|
73 | MemberContext m_memberContext; | |
|
74 | ||
|
75 | JSONElementType m_elementType; | |
|
76 | object m_elementValue; | |
|
77 | ||
|
78 | public JSONParser(string text) | |
|
79 | : base(_jsonDFA, INITIAL_STATE, new JSONParserContext { elementContext = JSONElementContext.None, memberName = String.Empty } ) { | |
|
80 | Safe.ArgumentNotEmpty(text, "text"); | |
|
81 | m_scanner = new JSONScanner(); | |
|
82 | m_scanner.Feed(text.ToCharArray()); | |
|
83 | } | |
|
84 | ||
|
85 | public JSONElementType ElementType { | |
|
86 | get { return m_elementType; } | |
|
87 | } | |
|
88 | ||
|
89 | public string ElementName { | |
|
90 | get { return m_context.info.memberName; } | |
|
91 | } | |
|
92 | ||
|
93 | public object ElementValue { | |
|
94 | get { return m_elementValue; } | |
|
95 | } | |
|
96 | ||
|
97 | public bool Read() { | |
|
98 | if (m_context.current == UNREACHEBLE_STATE) | |
|
99 | throw new InvalidOperationException("The parser is in invalid state"); | |
|
100 | object tokenValue; | |
|
101 | JsonTokenType tokenType; | |
|
102 | m_context.info.memberName = String.Empty; | |
|
103 | while (m_scanner.ReadToken(out tokenValue, out tokenType)) { | |
|
104 | Move((int)tokenType); | |
|
105 | if (m_context.current == UNREACHEBLE_STATE) | |
|
106 | UnexpectedToken(tokenValue, tokenType); | |
|
107 | switch (tokenType) { | |
|
108 | case JsonTokenType.BeginObject: | |
|
109 | Switch( | |
|
110 | _objectDFA, | |
|
111 | INITIAL_STATE, | |
|
112 | new JSONParserContext { | |
|
113 | memberName = m_context.info.memberName, | |
|
114 | elementContext = JSONElementContext.Object | |
|
115 | } | |
|
116 | ); | |
|
117 | m_elementValue = null; | |
|
118 | m_memberContext = MemberContext.MemberName; | |
|
119 | m_elementType = JSONElementType.BeginObject; | |
|
120 | return true; | |
|
121 | case JsonTokenType.EndObject: | |
|
122 | Restore(); | |
|
123 | m_elementValue = null; | |
|
124 | m_elementType = JSONElementType.EndObject; | |
|
125 | return true; | |
|
126 | case JsonTokenType.BeginArray: | |
|
127 | Switch( | |
|
128 | _arrayDFA, | |
|
129 | INITIAL_STATE, | |
|
130 | new JSONParserContext { | |
|
131 | memberName = m_context.info.memberName, | |
|
132 | elementContext = JSONElementContext.Array | |
|
133 | } | |
|
134 | ); | |
|
135 | m_elementValue = null; | |
|
136 | m_memberContext = MemberContext.MemberValue; | |
|
137 | m_elementType = JSONElementType.BeginArray; | |
|
138 | return true; | |
|
139 | case JsonTokenType.EndArray: | |
|
140 | Restore(); | |
|
141 | m_elementValue = null; | |
|
142 | m_elementType = JSONElementType.EndArray; | |
|
143 | return true; | |
|
144 | case JsonTokenType.String: | |
|
145 | if (m_memberContext == MemberContext.MemberName) { | |
|
146 | m_context.info.memberName = (string)tokenValue; | |
|
147 | break; | |
|
148 | } else { | |
|
149 | m_elementType = JSONElementType.Value; | |
|
150 | m_elementValue = tokenValue; | |
|
151 | return true; | |
|
152 | } | |
|
153 | case JsonTokenType.Number: | |
|
154 | m_elementType = JSONElementType.Value; | |
|
155 | m_elementValue = tokenValue; | |
|
156 | return true; | |
|
157 | case JsonTokenType.Literal: | |
|
158 | m_elementType = JSONElementType.Value; | |
|
159 | m_elementValue = ParseLiteral((string)tokenValue); | |
|
160 | return true; | |
|
161 | case JsonTokenType.NameSeparator: | |
|
162 | m_memberContext = MemberContext.MemberValue; | |
|
163 | break; | |
|
164 | case JsonTokenType.ValueSeparator: | |
|
165 | m_memberContext = m_context.info.elementContext == JSONElementContext.Object ? MemberContext.MemberName : MemberContext.MemberValue; | |
|
166 | break; | |
|
167 | default: | |
|
168 | UnexpectedToken(tokenValue, tokenType); | |
|
169 | break; | |
|
170 | } | |
|
171 | } | |
|
172 | if (m_context.info.elementContext != JSONElementContext.None) | |
|
173 | throw new ParserException("Unexpedted end of data"); | |
|
174 | return false; | |
|
175 | } | |
|
176 | ||
|
177 | object ParseLiteral(string literal) { | |
|
178 | switch (literal) { | |
|
179 | case "null": | |
|
180 | return null; | |
|
181 | case "false" : | |
|
182 | return false; | |
|
183 | case "true": | |
|
184 | return true; | |
|
185 | default: | |
|
186 | UnexpectedToken(literal, JsonTokenType.Literal); | |
|
187 | return null; // avoid compliler error | |
|
188 | } | |
|
189 | } | |
|
190 | ||
|
191 | void UnexpectedToken(object value, JsonTokenType tokenType) { | |
|
192 | throw new ParserException(String.Format("Unexpected token {0}: '{1}'", tokenType, value)); | |
|
193 | } | |
|
194 | ||
|
195 | } | |
|
196 | ||
|
197 | } |
@@ -0,0 +1,89 | |||
|
1 | using Implab.Parsing; | |
|
2 | using System; | |
|
3 | using System.Collections.Generic; | |
|
4 | using System.Globalization; | |
|
5 | using System.Linq; | |
|
6 | using System.Text; | |
|
7 | using System.Threading.Tasks; | |
|
8 | ||
|
9 | namespace Implab.JSON { | |
|
10 | /// <summary> | |
|
11 | /// Сканнер, разбивающий поток символов на токены JSON. | |
|
12 | /// </summary> | |
|
13 | public class JSONScanner : Scanner { | |
|
14 | char[] m_stringBuffer; | |
|
15 | DFAStateDescriptior[] m_stringDFA; | |
|
16 | int[] m_stringAlphabet; | |
|
17 | ||
|
18 | public JSONScanner() | |
|
19 | : base(JSONGrammar.Instance.JsonDFA) { | |
|
20 | m_stringBuffer = new char[1024]; | |
|
21 | var dfa = JSONGrammar.Instance.JsonStringDFA; | |
|
22 | m_stringAlphabet = dfa.Alphabet.GetTranslationMap(); | |
|
23 | m_stringDFA = dfa.States; | |
|
24 | } | |
|
25 | ||
|
26 | public bool ReadToken(out object tokenValue, out JsonTokenType tokenType) { | |
|
27 | if (ReadTokenInternal()) { | |
|
28 | switch ((JSONGrammar.TokenType)m_currentState.tag[0]) { | |
|
29 | case JSONGrammar.TokenType.StringBound: | |
|
30 | tokenValue = ReadString(); | |
|
31 | tokenType = JsonTokenType.String; | |
|
32 | break; | |
|
33 | case JSONGrammar.TokenType.Number: | |
|
34 | tokenValue = Double.Parse(new String(m_buffer, m_tokenOffset, m_tokenLen), CultureInfo.InvariantCulture); | |
|
35 | tokenType = JsonTokenType.Number; | |
|
36 | break; | |
|
37 | default: | |
|
38 | tokenType = (JsonTokenType)m_currentState.tag[0]; | |
|
39 | tokenValue = new String(m_buffer, m_tokenOffset, m_tokenLen); | |
|
40 | break; | |
|
41 | } | |
|
42 | return true; | |
|
43 | } | |
|
44 | tokenValue = null; | |
|
45 | tokenType = JsonTokenType.None; | |
|
46 | return false; | |
|
47 | } | |
|
48 | ||
|
49 | string ReadString() { | |
|
50 | int pos = 0; | |
|
51 | Switch(m_stringDFA, m_stringAlphabet); | |
|
52 | while (ReadTokenInternal()) { | |
|
53 | switch ((JSONGrammar.TokenType)m_currentState.tag[0]) { | |
|
54 | case JSONGrammar.TokenType.StringBound: | |
|
55 | Restore(); | |
|
56 | return new String(m_stringBuffer, 0, pos); | |
|
57 | case JSONGrammar.TokenType.UnescapedChar: | |
|
58 | EnsureStringBufferSize(pos + m_tokenLen); | |
|
59 | Array.Copy(m_buffer, m_tokenOffset, m_stringBuffer, pos, m_tokenLen); | |
|
60 | pos += m_tokenLen; | |
|
61 | break; | |
|
62 | case JSONGrammar.TokenType.EscapedUnicode: | |
|
63 | EnsureStringBufferSize(pos + 1); | |
|
64 | m_stringBuffer[pos] = StringTranslator.TranslateHexUnicode(m_buffer, m_tokenOffset + 2); | |
|
65 | pos++; | |
|
66 | break; | |
|
67 | case JSONGrammar.TokenType.EscapedChar: | |
|
68 | EnsureStringBufferSize(pos + 1); | |
|
69 | m_stringBuffer[pos] = StringTranslator.TranslateEscapedChar(m_buffer[m_tokenOffset + 1]); | |
|
70 | pos++; | |
|
71 | break; | |
|
72 | default: | |
|
73 | break; | |
|
74 | } | |
|
75 | ||
|
76 | } | |
|
77 | ||
|
78 | throw new ParserException("Unexpected end of data"); | |
|
79 | } | |
|
80 | ||
|
81 | void EnsureStringBufferSize(int size) { | |
|
82 | if (size > m_stringBuffer.Length) { | |
|
83 | var newBuffer = new char[size]; | |
|
84 | m_stringBuffer.CopyTo(newBuffer, 0); | |
|
85 | m_stringBuffer = newBuffer; | |
|
86 | } | |
|
87 | } | |
|
88 | } | |
|
89 | } |
@@ -0,0 +1,227 | |||
|
1 | using System; | |
|
2 | using System.Collections.Generic; | |
|
3 | using System.IO; | |
|
4 | using System.Linq; | |
|
5 | using System.Text; | |
|
6 | using System.Threading.Tasks; | |
|
7 | ||
|
8 | namespace Implab.JSON { | |
|
9 | public class JSONWriter { | |
|
10 | struct Context { | |
|
11 | public bool needComma; | |
|
12 | public JSONElementContext element; | |
|
13 | } | |
|
14 | Stack<Context> m_contextStack = new Stack<Context>(); | |
|
15 | Context m_context; | |
|
16 | ||
|
17 | TextWriter m_writer; | |
|
18 | bool m_indent; | |
|
19 | ||
|
20 | static readonly char [] _escapeBKS, | |
|
21 | _escapeFWD, | |
|
22 | _escapeCR, | |
|
23 | _escapeNL, | |
|
24 | _escapeTAB, | |
|
25 | _escapeSLASH, | |
|
26 | _escapeBSLASH, | |
|
27 | _escapeQ; | |
|
28 | ||
|
29 | static JSONWriter() { | |
|
30 | _escapeBKS = "\\b".ToCharArray(); | |
|
31 | _escapeFWD = "\\f".ToCharArray(); | |
|
32 | _escapeCR = "\\r".ToCharArray(); | |
|
33 | _escapeNL = "\\n".ToCharArray(); | |
|
34 | _escapeTAB = "\\t".ToCharArray(); | |
|
35 | _escapeBSLASH = "\\\\".ToCharArray(); | |
|
36 | _escapeSLASH = "\\/".ToCharArray(); | |
|
37 | _escapeQ = "\\\"".ToCharArray(); | |
|
38 | } | |
|
39 | ||
|
40 | public JSONWriter(TextWriter writer) { | |
|
41 | Safe.ArgumentNotNull(writer, "writer"); | |
|
42 | ||
|
43 | m_writer = writer; | |
|
44 | } | |
|
45 | ||
|
46 | void WriteMemberName(string name) { | |
|
47 | Safe.ArgumentNotEmpty(name, "name"); | |
|
48 | if (m_context.element != JSONElementContext.Object) | |
|
49 | OperationNotApplicable("WriteMember"); | |
|
50 | if (m_context.needComma) | |
|
51 | m_writer.Write(", "); | |
|
52 | // TODO indent | |
|
53 | m_context.needComma = true; | |
|
54 | Write(name); | |
|
55 | m_writer.Write(" : "); | |
|
56 | } | |
|
57 | ||
|
58 | public void WriteValue(string name, string value) { | |
|
59 | WriteMemberName(name); | |
|
60 | Write(value); | |
|
61 | } | |
|
62 | ||
|
63 | public void WriteValue(string name, bool value) { | |
|
64 | WriteMemberName(name); | |
|
65 | Write(value); | |
|
66 | } | |
|
67 | ||
|
68 | public void WriteValue(string name, double value) { | |
|
69 | WriteMemberName(name); | |
|
70 | Write(value); | |
|
71 | } | |
|
72 | ||
|
73 | ||
|
74 | ||
|
75 | public void WriteValue(string value) { | |
|
76 | if (m_context.element != JSONElementContext.Array) | |
|
77 | OperationNotApplicable("WriteValue"); | |
|
78 | if (m_context.needComma) | |
|
79 | m_writer.Write(", "); | |
|
80 | m_context.needComma = true; | |
|
81 | ||
|
82 | Write(value); | |
|
83 | } | |
|
84 | ||
|
85 | public void WriteValue(bool value) { | |
|
86 | if (m_context.element != JSONElementContext.Array) | |
|
87 | OperationNotApplicable("WriteValue"); | |
|
88 | if (m_context.needComma) | |
|
89 | m_writer.Write(", "); | |
|
90 | m_context.needComma = true; | |
|
91 | ||
|
92 | Write(value); | |
|
93 | } | |
|
94 | ||
|
95 | public void WriteValue(double value) { | |
|
96 | if (m_context.element != JSONElementContext.Array) | |
|
97 | OperationNotApplicable("WriteValue"); | |
|
98 | if (m_context.needComma) | |
|
99 | m_writer.Write(", "); | |
|
100 | m_context.needComma = true; | |
|
101 | ||
|
102 | Write(value); | |
|
103 | } | |
|
104 | ||
|
105 | public void BeginObject() { | |
|
106 | if (m_context.element != JSONElementContext.None && m_context.element != JSONElementContext.Array) | |
|
107 | OperationNotApplicable("BeginObject"); | |
|
108 | if (m_context.needComma) | |
|
109 | m_writer.Write(", "); | |
|
110 | m_context.needComma = true; | |
|
111 | ||
|
112 | m_contextStack.Push(m_context); | |
|
113 | ||
|
114 | m_context = new Context { element = JSONElementContext.Object, needComma = false }; | |
|
115 | m_writer.Write("{ "); | |
|
116 | } | |
|
117 | ||
|
118 | public void BeginObject(string name) { | |
|
119 | WriteMemberName(name); | |
|
120 | ||
|
121 | m_contextStack.Push(m_context); | |
|
122 | ||
|
123 | m_context = new Context { element = JSONElementContext.Object, needComma = false }; | |
|
124 | m_writer.Write("{ "); | |
|
125 | } | |
|
126 | ||
|
127 | public void EndObject() { | |
|
128 | if (m_context.element != JSONElementContext.Object) | |
|
129 | OperationNotApplicable("EndArray"); | |
|
130 | ||
|
131 | m_writer.Write(" }"); | |
|
132 | m_context = m_contextStack.Pop(); | |
|
133 | } | |
|
134 | ||
|
135 | public void BeginArray() { | |
|
136 | if (m_context.element != JSONElementContext.None && m_context.element != JSONElementContext.Array) | |
|
137 | throw new InvalidOperationException(); | |
|
138 | if (m_context.needComma) | |
|
139 | m_writer.Write(", "); | |
|
140 | m_context.needComma = true; | |
|
141 | ||
|
142 | m_contextStack.Push(m_context); | |
|
143 | ||
|
144 | m_context = new Context { element = JSONElementContext.Array, needComma = false }; | |
|
145 | m_writer.Write("[ "); | |
|
146 | } | |
|
147 | ||
|
148 | public void BeginArray(string name) { | |
|
149 | WriteMemberName(name); | |
|
150 | ||
|
151 | m_contextStack.Push(m_context); | |
|
152 | ||
|
153 | m_context = new Context { element = JSONElementContext.Array, needComma = false }; | |
|
154 | m_writer.Write("[ "); | |
|
155 | } | |
|
156 | ||
|
157 | public void EndArray() { | |
|
158 | if (m_context.element != JSONElementContext.Array) | |
|
159 | OperationNotApplicable("EndArray"); | |
|
160 | ||
|
161 | m_writer.Write(" ]"); | |
|
162 | m_context = m_contextStack.Pop(); | |
|
163 | } | |
|
164 | ||
|
165 | void Write(bool value) { | |
|
166 | m_writer.Write(value ? "true" : "false"); | |
|
167 | } | |
|
168 | ||
|
169 | ||
|
170 | void Write(string value) { | |
|
171 | if (value == null) | |
|
172 | m_writer.Write("null"); | |
|
173 | ||
|
174 | var chars = value.ToCharArray(); | |
|
175 | m_writer.Write('"'); | |
|
176 | ||
|
177 | for (int i = 0; i < chars.Length; i++) { | |
|
178 | var ch = chars[i]; | |
|
179 | ||
|
180 | switch (ch) { | |
|
181 | case '\b': | |
|
182 | m_writer.Write(_escapeBKS); | |
|
183 | break; | |
|
184 | case '\f': | |
|
185 | m_writer.Write(_escapeFWD); | |
|
186 | break; | |
|
187 | case '\r': | |
|
188 | m_writer.Write(_escapeCR); | |
|
189 | break; | |
|
190 | case '\n': | |
|
191 | m_writer.Write(_escapeNL); | |
|
192 | break; | |
|
193 | case '\t': | |
|
194 | m_writer.Write(_escapeTAB); | |
|
195 | break; | |
|
196 | case '\\': | |
|
197 | m_writer.Write(_escapeBSLASH); | |
|
198 | break; | |
|
199 | case '/': | |
|
200 | m_writer.Write(_escapeSLASH); | |
|
201 | break; | |
|
202 | case '"': | |
|
203 | m_writer.Write(_escapeQ); | |
|
204 | break; | |
|
205 | default: | |
|
206 | if (ch < 0x20) { | |
|
207 | m_writer.Write("\\u00{0:x2}",(int)ch); | |
|
208 | } else { | |
|
209 | m_writer.Write(ch); | |
|
210 | } | |
|
211 | break; | |
|
212 | } | |
|
213 | } | |
|
214 | ||
|
215 | m_writer.Write('"'); | |
|
216 | } | |
|
217 | ||
|
218 | void Write(double value) { | |
|
219 | m_writer.Write(value); | |
|
220 | } | |
|
221 | ||
|
222 | void OperationNotApplicable(string opName) { | |
|
223 | throw new InvalidOperationException(String.Format("The operation '{0}' isn't applicable in the context of '{1}'", opName, m_context.element )); | |
|
224 | } | |
|
225 | ||
|
226 | } | |
|
227 | } |
@@ -0,0 +1,50 | |||
|
1 | using System; | |
|
2 | using System.Collections.Generic; | |
|
3 | using System.Linq; | |
|
4 | using System.Text; | |
|
5 | using System.Threading.Tasks; | |
|
6 | ||
|
7 | namespace Implab.JSON { | |
|
8 | /// <summary> | |
|
9 | /// Тип токенов, возвращаемых <see cref="JSONScanner"/>. | |
|
10 | /// </summary> | |
|
11 | public enum JsonTokenType : int { | |
|
12 | None = 0, | |
|
13 | /// <summary> | |
|
14 | /// Начало объекта | |
|
15 | /// </summary> | |
|
16 | BeginObject, | |
|
17 | /// <summary> | |
|
18 | /// Конец объекта | |
|
19 | /// </summary> | |
|
20 | EndObject, | |
|
21 | /// <summary> | |
|
22 | /// Начало массива | |
|
23 | /// </summary> | |
|
24 | BeginArray, | |
|
25 | /// <summary> | |
|
26 | /// Конец массива | |
|
27 | /// </summary> | |
|
28 | EndArray, | |
|
29 | /// <summary> | |
|
30 | /// Строка | |
|
31 | /// </summary> | |
|
32 | String, | |
|
33 | /// <summary> | |
|
34 | /// Число | |
|
35 | /// </summary> | |
|
36 | Number, | |
|
37 | /// <summary> | |
|
38 | /// Литерал | |
|
39 | /// </summary> | |
|
40 | Literal, | |
|
41 | /// <summary> | |
|
42 | /// Разделитель имени <c>:</c> | |
|
43 | /// </summary> | |
|
44 | NameSeparator, | |
|
45 | /// <summary> | |
|
46 | /// Разделитель имени <c>,</c> | |
|
47 | /// </summary> | |
|
48 | ValueSeparator | |
|
49 | } | |
|
50 | } |
@@ -0,0 +1,96 | |||
|
1 | using Implab; | |
|
2 | using Implab.Parsing; | |
|
3 | using System; | |
|
4 | using System.Collections.Generic; | |
|
5 | using System.Diagnostics; | |
|
6 | using System.Linq; | |
|
7 | using System.Text; | |
|
8 | using System.Threading.Tasks; | |
|
9 | ||
|
10 | namespace Implab.JSON { | |
|
11 | /// <summary> | |
|
12 | /// Класс для преобразования экранированной строки JSON | |
|
13 | /// </summary> | |
|
14 | public class StringTranslator : Scanner { | |
|
15 | static readonly char[] _escMap; | |
|
16 | static readonly int[] _hexMap; | |
|
17 | ||
|
18 | static StringTranslator() { | |
|
19 | var chars = new char[] { 'b', 'f', 't', 'r', 'n', '\\', '/' }; | |
|
20 | var vals = new char[] { '\b', '\f', '\t', '\r', '\n', '\\', '/' }; | |
|
21 | ||
|
22 | _escMap = new char[chars.Max() + 1]; | |
|
23 | ||
|
24 | for (int i = 0; i < chars.Length; i++) | |
|
25 | _escMap[chars[i]] = vals[i]; | |
|
26 | ||
|
27 | var hexs = new char[] { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', 'A', 'B', 'C', 'D', 'E', 'F' }; | |
|
28 | var ints = new int[] { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 10, 11, 12, 13, 14, 15 }; | |
|
29 | ||
|
30 | _hexMap = new int[hexs.Max() + 1]; | |
|
31 | ||
|
32 | for (int i = 0; i < hexs.Length; i++) | |
|
33 | _hexMap[hexs[i]] = ints[i]; | |
|
34 | ||
|
35 | } | |
|
36 | ||
|
37 | public StringTranslator() | |
|
38 | : base(JSONGrammar.Instance.JsonStringDFA) { | |
|
39 | } | |
|
40 | ||
|
41 | public string Translate(string data) { | |
|
42 | Safe.ArgumentNotNull(data, "data"); | |
|
43 | return Translate(data.ToCharArray()); | |
|
44 | } | |
|
45 | ||
|
46 | public string Translate(char[] data) { | |
|
47 | Safe.ArgumentNotNull(data, "data"); | |
|
48 | return Translate(data, data.Length); | |
|
49 | } | |
|
50 | ||
|
51 | public string Translate(char[] data, int length) { | |
|
52 | Safe.ArgumentNotNull(data, "data"); | |
|
53 | Safe.ArgumentInRange(length, 0, data.Length, "length"); | |
|
54 | ||
|
55 | var translated = new char[length]; | |
|
56 | ||
|
57 | Feed(data,length); | |
|
58 | ||
|
59 | int pos = 0; | |
|
60 | ||
|
61 | while (ReadTokenInternal()) { | |
|
62 | switch ((JSONGrammar.TokenType)TokenTags[0]) { | |
|
63 | case JSONGrammar.TokenType.UnescapedChar: | |
|
64 | Array.Copy(m_buffer,m_tokenOffset,translated,pos,m_tokenLen); | |
|
65 | pos += m_tokenLen; | |
|
66 | break; | |
|
67 | case JSONGrammar.TokenType.EscapedChar: | |
|
68 | translated[pos] = _escMap[m_buffer[m_tokenOffset + 1]]; | |
|
69 | pos++; | |
|
70 | break; | |
|
71 | case JSONGrammar.TokenType.EscapedUnicode: | |
|
72 | translated[pos] = TranslateHexUnicode(m_buffer,m_tokenOffset + 2); | |
|
73 | pos++; | |
|
74 | break; | |
|
75 | } | |
|
76 | } | |
|
77 | ||
|
78 | return new String(translated, 0, pos); | |
|
79 | } | |
|
80 | ||
|
81 | internal static char TranslateEscapedChar(char symbol) { | |
|
82 | return _escMap[symbol]; | |
|
83 | } | |
|
84 | ||
|
85 | internal static char TranslateHexUnicode(char[] symbols, int offset) { | |
|
86 | Debug.Assert(symbols != null); | |
|
87 | Debug.Assert(symbols.Length - offset >= 4); | |
|
88 | ||
|
89 | int value = (_hexMap[symbols[offset]] << 12) | |
|
90 | | (_hexMap[symbols[offset + 1]] << 8) | |
|
91 | | (_hexMap[symbols[offset + 2]] << 4) | |
|
92 | | (_hexMap[symbols[offset + 3]]); | |
|
93 | return (char)value; | |
|
94 | } | |
|
95 | } | |
|
96 | } |
@@ -0,0 +1,23 | |||
|
1 | using Implab; | |
|
2 | using System; | |
|
3 | using System.Collections.Generic; | |
|
4 | using System.Linq; | |
|
5 | using System.Text; | |
|
6 | using System.Threading.Tasks; | |
|
7 | ||
|
8 | namespace Implab.Parsing { | |
|
9 | public class Alphabet: AlphabetBase<char> { | |
|
10 | ||
|
11 | public override int GetSymbolIndex(char symbol) { | |
|
12 | return symbol; | |
|
13 | } | |
|
14 | ||
|
15 | public override IEnumerable<char> InputSymbols { | |
|
16 | get { return Enumerable.Range(char.MinValue, char.MaxValue).Select(x => (char)x); } | |
|
17 | } | |
|
18 | ||
|
19 | protected override int MapSize { | |
|
20 | get { return char.MaxValue + 1; } | |
|
21 | } | |
|
22 | } | |
|
23 | } |
@@ -0,0 +1,103 | |||
|
1 | using Implab; | |
|
2 | using System; | |
|
3 | using System.Collections.Generic; | |
|
4 | using System.Diagnostics; | |
|
5 | using System.Linq; | |
|
6 | using System.Text; | |
|
7 | using System.Threading.Tasks; | |
|
8 | ||
|
9 | namespace Implab.Parsing { | |
|
10 | public abstract class AlphabetBase<T> : IAlphabet<T> { | |
|
11 | public const int UNCLASSIFIED = 0; | |
|
12 | ||
|
13 | int m_nextId = 1; | |
|
14 | int[] m_map; | |
|
15 | ||
|
16 | public int Count { | |
|
17 | get { return m_nextId; } | |
|
18 | } | |
|
19 | ||
|
20 | protected AlphabetBase() { | |
|
21 | m_map = new int[MapSize]; | |
|
22 | } | |
|
23 | ||
|
24 | protected AlphabetBase(int[] map) { | |
|
25 | Debug.Assert(map != null); | |
|
26 | Debug.Assert(map.Length == MapSize); | |
|
27 | ||
|
28 | m_map = map; | |
|
29 | m_nextId = map.Max() + 1; | |
|
30 | } | |
|
31 | ||
|
32 | public int DefineSymbol(T symbol) { | |
|
33 | var index = GetSymbolIndex(symbol); | |
|
34 | if (m_map[index] == UNCLASSIFIED) | |
|
35 | m_map[index] = m_nextId++; | |
|
36 | return m_map[index]; | |
|
37 | } | |
|
38 | ||
|
39 | public int DefineClass(IEnumerable<T> symbols) { | |
|
40 | Safe.ArgumentNotNull(symbols, "symbols"); | |
|
41 | symbols = symbols.Distinct(); | |
|
42 | ||
|
43 | foreach (var symbol in symbols) { | |
|
44 | var index = GetSymbolIndex(symbol); | |
|
45 | if (m_map[index] == UNCLASSIFIED) | |
|
46 | m_map[GetSymbolIndex(symbol)] = m_nextId; | |
|
47 | else | |
|
48 | throw new InvalidOperationException(String.Format("Symbol '{0}' already in use", symbol)); | |
|
49 | } | |
|
50 | return m_nextId++; | |
|
51 | } | |
|
52 | ||
|
53 | public List<T>[] CreateReverseMap() { | |
|
54 | return | |
|
55 | Enumerable.Range(UNCLASSIFIED, Count) | |
|
56 | .Select( | |
|
57 | i => InputSymbols | |
|
58 | .Where(x => i != UNCLASSIFIED && m_map[GetSymbolIndex(x)] == i) | |
|
59 | .ToList() | |
|
60 | ) | |
|
61 | .ToArray(); | |
|
62 | } | |
|
63 | ||
|
64 | public int[] Reclassify(IAlphabet<T> newAlphabet, IEnumerable<ICollection<int>> classes) { | |
|
65 | Safe.ArgumentNotNull(newAlphabet, "newAlphabet"); | |
|
66 | Safe.ArgumentNotNull(classes, "classes"); | |
|
67 | var reverseMap = CreateReverseMap(); | |
|
68 | ||
|
69 | int[] translationMap = new int[Count]; | |
|
70 | ||
|
71 | foreach (var scl in classes) { | |
|
72 | // skip if the supper class contains the unclassified element | |
|
73 | if (scl.Contains(UNCLASSIFIED)) | |
|
74 | continue; | |
|
75 | var range = new List<T>(); | |
|
76 | foreach (var cl in scl) { | |
|
77 | if (cl < 0 || cl >= reverseMap.Length) | |
|
78 | throw new ArgumentOutOfRangeException(String.Format("Class {0} is not valid for the current alphabet", cl)); | |
|
79 | range.AddRange(reverseMap[cl]); | |
|
80 | } | |
|
81 | var newClass = newAlphabet.DefineClass(range); | |
|
82 | foreach (var cl in scl) | |
|
83 | translationMap[cl] = newClass; | |
|
84 | } | |
|
85 | ||
|
86 | return translationMap; | |
|
87 | } | |
|
88 | ||
|
89 | public int Translate(T symbol) { | |
|
90 | return m_map[GetSymbolIndex(symbol)]; | |
|
91 | } | |
|
92 | ||
|
93 | public abstract int GetSymbolIndex(T symbol); | |
|
94 | ||
|
95 | public abstract IEnumerable<T> InputSymbols { get; } | |
|
96 | ||
|
97 | protected abstract int MapSize { get; } | |
|
98 | ||
|
99 | public int[] GetTranslationMap() { | |
|
100 | return m_map; | |
|
101 | } | |
|
102 | } | |
|
103 | } |
@@ -0,0 +1,22 | |||
|
1 | using Implab; | |
|
2 | using System; | |
|
3 | using System.Collections.Generic; | |
|
4 | using System.Linq; | |
|
5 | using System.Text; | |
|
6 | using System.Threading.Tasks; | |
|
7 | ||
|
8 | namespace Implab.Parsing { | |
|
9 | public class AltToken: BinaryToken { | |
|
10 | public AltToken(Token left, Token right) | |
|
11 | : base(left, right) { | |
|
12 | } | |
|
13 | ||
|
14 | public override void Accept(IVisitor visitor) { | |
|
15 | Safe.ArgumentNotNull(visitor, "visitor"); | |
|
16 | visitor.Visit(this); | |
|
17 | } | |
|
18 | public override string ToString() { | |
|
19 | return String.Format(Right is BinaryToken ? "{0}|({1})" : "{0}|{1}", Left, Right); | |
|
20 | } | |
|
21 | } | |
|
22 | } |
@@ -0,0 +1,26 | |||
|
1 | using Implab; | |
|
2 | using System; | |
|
3 | using System.Collections.Generic; | |
|
4 | using System.Linq; | |
|
5 | using System.Text; | |
|
6 | using System.Threading.Tasks; | |
|
7 | ||
|
8 | namespace Implab.Parsing { | |
|
9 | public abstract class BinaryToken : Token { | |
|
10 | Token m_left; | |
|
11 | Token m_right; | |
|
12 | ||
|
13 | public Token Left { | |
|
14 | get { return m_left; } | |
|
15 | } | |
|
16 | ||
|
17 | public Token Right { | |
|
18 | get { return m_right; } | |
|
19 | } | |
|
20 | ||
|
21 | protected BinaryToken(Token left, Token right) { | |
|
22 | Safe.ArgumentNotNull(m_left = left, "left"); | |
|
23 | Safe.ArgumentNotNull(m_right = right, "right"); | |
|
24 | } | |
|
25 | } | |
|
26 | } |
@@ -0,0 +1,36 | |||
|
1 | using Implab; | |
|
2 | using System; | |
|
3 | using System.Collections.Generic; | |
|
4 | using System.Linq; | |
|
5 | using System.Text; | |
|
6 | using System.Threading.Tasks; | |
|
7 | ||
|
8 | namespace Implab.Parsing { | |
|
9 | public class CDFADefinition : DFADefinitionBase { | |
|
10 | Alphabet m_alphabet; | |
|
11 | ||
|
12 | public Alphabet Alphabet { | |
|
13 | get { return m_alphabet; } | |
|
14 | } | |
|
15 | ||
|
16 | public override int AlphabetSize { | |
|
17 | get { return m_alphabet.Count; } | |
|
18 | } | |
|
19 | ||
|
20 | public CDFADefinition(Alphabet alphabet): base() { | |
|
21 | Safe.ArgumentNotNull(alphabet, "alphabet"); | |
|
22 | m_alphabet = alphabet; | |
|
23 | } | |
|
24 | ||
|
25 | public CDFADefinition Optimize() { | |
|
26 | var optimized = new CDFADefinition(new Alphabet()); | |
|
27 | ||
|
28 | Optimize(optimized, m_alphabet, optimized.Alphabet); | |
|
29 | return optimized; | |
|
30 | } | |
|
31 | ||
|
32 | public void PrintDFA() { | |
|
33 | PrintDFA(m_alphabet); | |
|
34 | } | |
|
35 | } | |
|
36 | } |
@@ -0,0 +1,27 | |||
|
1 | using Implab; | |
|
2 | using System; | |
|
3 | using System.Collections.Generic; | |
|
4 | using System.Linq; | |
|
5 | using System.Text; | |
|
6 | using System.Threading.Tasks; | |
|
7 | ||
|
8 | namespace Implab.Parsing { | |
|
9 | public class CatToken : BinaryToken { | |
|
10 | public CatToken(Token left, Token right) | |
|
11 | : base(left, right) { | |
|
12 | } | |
|
13 | ||
|
14 | public override void Accept(IVisitor visitor) { | |
|
15 | Safe.ArgumentNotNull(visitor, "visitor"); | |
|
16 | visitor.Visit(this); | |
|
17 | } | |
|
18 | ||
|
19 | public override string ToString() { | |
|
20 | return String.Format("{0}{1}", FormatToken(Left), FormatToken(Right)); | |
|
21 | } | |
|
22 | ||
|
23 | string FormatToken(Token token) { | |
|
24 | return String.Format(token is AltToken ? "({0})" : "{0}", token); | |
|
25 | } | |
|
26 | } | |
|
27 | } |
@@ -0,0 +1,182 | |||
|
1 | using Implab; | |
|
2 | using System; | |
|
3 | using System.Collections.Generic; | |
|
4 | using System.Diagnostics; | |
|
5 | using System.Linq; | |
|
6 | using System.Text; | |
|
7 | using System.Threading.Tasks; | |
|
8 | ||
|
9 | namespace Implab.Parsing { | |
|
10 | /// <summary> | |
|
11 | /// Используется для построения ДКА по регулярному выражению, сначала обходит | |
|
12 | /// регулярное выражение и вычисляет followpos, затем используется метод | |
|
13 | /// <see cref="BuildDFA(IDFADefinition)"/> для построения автомата. | |
|
14 | /// </summary> | |
|
15 | public class DFABuilder : IVisitor { | |
|
16 | int m_idx = 0; | |
|
17 | Token m_root; | |
|
18 | HashSet<int> m_firstpos; | |
|
19 | HashSet<int> m_lastpos; | |
|
20 | ||
|
21 | Dictionary<int, HashSet<int>> m_followpos = new Dictionary<int, HashSet<int>>(); | |
|
22 | Dictionary<int, int> m_indexes = new Dictionary<int, int>(); | |
|
23 | Dictionary<int, int> m_ends = new Dictionary<int, int>(); | |
|
24 | ||
|
25 | public Dictionary<int, HashSet<int>> FollowposMap { | |
|
26 | get { return m_followpos; } | |
|
27 | } | |
|
28 | ||
|
29 | public HashSet<int> Followpos(int pos) { | |
|
30 | HashSet<int> set; | |
|
31 | if (m_followpos.TryGetValue(pos, out set)) | |
|
32 | return set; | |
|
33 | return m_followpos[pos] = new HashSet<int>(); | |
|
34 | } | |
|
35 | ||
|
36 | bool Nullable(object n) { | |
|
37 | if (n is EmptyToken || n is StarToken) | |
|
38 | return true; | |
|
39 | if (n is AltToken) | |
|
40 | return Nullable(((AltToken)n).Left) || Nullable(((AltToken)n).Right); | |
|
41 | if (n is CatToken) | |
|
42 | return Nullable(((CatToken)n).Left) && Nullable(((CatToken)n).Right); | |
|
43 | return false; | |
|
44 | } | |
|
45 | ||
|
46 | ||
|
47 | public void Visit(AltToken token) { | |
|
48 | if (m_root == null) | |
|
49 | m_root = token; | |
|
50 | var firtspos = new HashSet<int>(); | |
|
51 | var lastpos = new HashSet<int>(); | |
|
52 | ||
|
53 | token.Left.Accept(this); | |
|
54 | firtspos.UnionWith(m_firstpos); | |
|
55 | lastpos.UnionWith(m_lastpos); | |
|
56 | ||
|
57 | token.Right.Accept(this); | |
|
58 | firtspos.UnionWith(m_firstpos); | |
|
59 | lastpos.UnionWith(m_lastpos); | |
|
60 | ||
|
61 | m_firstpos = firtspos; | |
|
62 | m_lastpos = lastpos; | |
|
63 | } | |
|
64 | ||
|
65 | public void Visit(StarToken token) { | |
|
66 | if (m_root == null) | |
|
67 | m_root = token; | |
|
68 | token.Token.Accept(this); | |
|
69 | ||
|
70 | foreach (var i in m_lastpos) | |
|
71 | Followpos(i).UnionWith(m_firstpos); | |
|
72 | } | |
|
73 | ||
|
74 | public void Visit(CatToken token) { | |
|
75 | if (m_root == null) | |
|
76 | m_root = token; | |
|
77 | ||
|
78 | var firtspos = new HashSet<int>(); | |
|
79 | var lastpos = new HashSet<int>(); | |
|
80 | token.Left.Accept(this); | |
|
81 | firtspos.UnionWith(m_firstpos); | |
|
82 | var leftLastpos = m_lastpos; | |
|
83 | ||
|
84 | token.Right.Accept(this); | |
|
85 | lastpos.UnionWith(m_lastpos); | |
|
86 | var rightFirstpos = m_firstpos; | |
|
87 | ||
|
88 | if (Nullable(token.Left)) | |
|
89 | firtspos.UnionWith(rightFirstpos); | |
|
90 | ||
|
91 | if (Nullable(token.Right)) | |
|
92 | lastpos.UnionWith(leftLastpos); | |
|
93 | ||
|
94 | m_firstpos = firtspos; | |
|
95 | m_lastpos = lastpos; | |
|
96 | ||
|
97 | foreach (var i in leftLastpos) | |
|
98 | Followpos(i).UnionWith(rightFirstpos); | |
|
99 | ||
|
100 | } | |
|
101 | ||
|
102 | public void Visit(EmptyToken token) { | |
|
103 | if (m_root == null) | |
|
104 | m_root = token; | |
|
105 | ; | |
|
106 | } | |
|
107 | ||
|
108 | public void Visit(SymbolToken token) { | |
|
109 | if (m_root == null) | |
|
110 | m_root = token; | |
|
111 | m_idx++; | |
|
112 | m_indexes[m_idx] = token.Value; | |
|
113 | m_firstpos = new HashSet<int>(new[] { m_idx }); | |
|
114 | m_lastpos = new HashSet<int>(new[] { m_idx }); | |
|
115 | } | |
|
116 | ||
|
117 | public void Visit(EndToken token) { | |
|
118 | if (m_root == null) | |
|
119 | m_root = token; | |
|
120 | m_idx++; | |
|
121 | m_indexes[m_idx] = Alphabet.UNCLASSIFIED; | |
|
122 | m_firstpos = new HashSet<int>(new[] { m_idx }); | |
|
123 | m_lastpos = new HashSet<int>(new[] { m_idx }); | |
|
124 | Followpos(m_idx); | |
|
125 | m_ends.Add(m_idx, token.Tag); | |
|
126 | } | |
|
127 | ||
|
128 | public void BuildDFA(IDFADefinition dfa) { | |
|
129 | Safe.ArgumentNotNull(dfa,"dfa"); | |
|
130 | ||
|
131 | var stateMap = new Dictionary<HashSet<int>, int>(new CustomEqualityComparer<HashSet<int>>( | |
|
132 | (x, y) => x.SetEquals(y), | |
|
133 | (x) => x.Sum(n => n.GetHashCode()) | |
|
134 | )); | |
|
135 | ||
|
136 | stateMap[m_firstpos] = DefineState( dfa, m_firstpos); | |
|
137 | Debug.Assert(stateMap[m_firstpos] == DFADefinitionBase.INITIAL_STATE); | |
|
138 | ||
|
139 | var queue = new Queue<HashSet<int>>(); | |
|
140 | ||
|
141 | queue.Enqueue(m_firstpos); | |
|
142 | ||
|
143 | while (queue.Count > 0) { | |
|
144 | var state = queue.Dequeue(); | |
|
145 | var s1 = stateMap[state]; | |
|
146 | ||
|
147 | for (int a = 0; a < dfa.AlphabetSize; a++) { | |
|
148 | var next = new HashSet<int>(); | |
|
149 | foreach (var p in state) { | |
|
150 | if (m_indexes[p] == a) { | |
|
151 | next.UnionWith(Followpos(p)); | |
|
152 | } | |
|
153 | } | |
|
154 | if (next.Count > 0) { | |
|
155 | int s2; | |
|
156 | if (!stateMap.TryGetValue(next, out s2)) { | |
|
157 | stateMap[next] = s2 = DefineState(dfa, next); | |
|
158 | queue.Enqueue(next); | |
|
159 | } | |
|
160 | dfa.DefineTransition(s1, s2, a); | |
|
161 | } | |
|
162 | } | |
|
163 | ||
|
164 | } | |
|
165 | } | |
|
166 | ||
|
167 | int[] GetStateTags(HashSet<int> state) { | |
|
168 | Debug.Assert(state != null); | |
|
169 | return state.Where(pos => m_ends.ContainsKey(pos)).Select(pos => m_ends[pos]).ToArray(); | |
|
170 | } | |
|
171 | ||
|
172 | int DefineState(IDFADefinition automa, HashSet<int> state) { | |
|
173 | Debug.Assert(automa != null); | |
|
174 | Debug.Assert(state != null); | |
|
175 | ||
|
176 | var tags = GetStateTags(state); | |
|
177 | ||
|
178 | return tags.Length > 0 ? automa.AddState(tags) : automa.AddState(); | |
|
179 | } | |
|
180 | ||
|
181 | } | |
|
182 | } |
@@ -0,0 +1,262 | |||
|
1 | using Implab; | |
|
2 | using System; | |
|
3 | using System.Collections.Generic; | |
|
4 | using System.Diagnostics; | |
|
5 | using System.Linq; | |
|
6 | using System.Text; | |
|
7 | using System.Threading.Tasks; | |
|
8 | ||
|
9 | namespace Implab.Parsing { | |
|
10 | public abstract class DFADefinitionBase : IDFADefinition { | |
|
11 | readonly List<DFAStateDescriptior> m_states; | |
|
12 | ||
|
13 | public const int INITIAL_STATE = 1; | |
|
14 | public const int UNREACHEBLE_STATE = 0; | |
|
15 | ||
|
16 | DFAStateDescriptior[] m_statesArray; | |
|
17 | ||
|
18 | public DFADefinitionBase() { | |
|
19 | m_states = new List<DFAStateDescriptior>(); | |
|
20 | ||
|
21 | m_states.Add(new DFAStateDescriptior()); | |
|
22 | } | |
|
23 | ||
|
24 | public DFAStateDescriptior[] States { | |
|
25 | get { | |
|
26 | if (m_statesArray == null) | |
|
27 | m_statesArray = m_states.ToArray(); | |
|
28 | return m_statesArray; | |
|
29 | } | |
|
30 | } | |
|
31 | ||
|
32 | public bool InitialStateIsFinal { | |
|
33 | get { | |
|
34 | return m_states[INITIAL_STATE].final; | |
|
35 | } | |
|
36 | } | |
|
37 | ||
|
38 | public int AddState() { | |
|
39 | var index = m_states.Count; | |
|
40 | m_states.Add(new DFAStateDescriptior { | |
|
41 | final = false, | |
|
42 | transitions = new int[AlphabetSize] | |
|
43 | }); | |
|
44 | ||
|
45 | return index; | |
|
46 | } | |
|
47 | ||
|
48 | public int AddState(int[] tag) { | |
|
49 | var index = m_states.Count; | |
|
50 | bool final = tag == null || tag.Length == 0 ? false : true; | |
|
51 | m_states.Add(new DFAStateDescriptior { | |
|
52 | final = final, | |
|
53 | transitions = new int[AlphabetSize], | |
|
54 | tag = final ? tag : null | |
|
55 | }); | |
|
56 | return index; | |
|
57 | } | |
|
58 | ||
|
59 | public void DefineTransition(int s1,int s2, int symbol) { | |
|
60 | Safe.ArgumentInRange(s1, 0, m_states.Count-1, "s1"); | |
|
61 | Safe.ArgumentInRange(s2, 0, m_states.Count-1, "s2"); | |
|
62 | Safe.ArgumentInRange(symbol, 0, AlphabetSize-1, "symbol"); | |
|
63 | ||
|
64 | m_states[s1].transitions[symbol] = s2; | |
|
65 | } | |
|
66 | ||
|
67 | protected void Optimize<TA>(IDFADefinition minimalDFA,IAlphabet<TA> sourceAlphabet, IAlphabet<TA> minimalAlphabet) { | |
|
68 | Safe.ArgumentNotNull(minimalDFA, "minimalDFA"); | |
|
69 | Safe.ArgumentNotNull(minimalAlphabet, "minimalAlphabet"); | |
|
70 | ||
|
71 | var setComparer = new CustomEqualityComparer<HashSet<int>>( | |
|
72 | (x, y) => x.SetEquals(y), | |
|
73 | (s) => s.Sum(x => x.GetHashCode()) | |
|
74 | ); | |
|
75 | ||
|
76 | var arrayComparer = new CustomEqualityComparer<int[]>( | |
|
77 | (x,y) => (new HashSet<int>(x)).SetEquals(new HashSet<int>(y)), | |
|
78 | (a) => a.Sum(x => x.GetHashCode()) | |
|
79 | ); | |
|
80 | ||
|
81 | var optimalStates = new HashSet<HashSet<int>>(setComparer); | |
|
82 | var queue = new HashSet<HashSet<int>>(setComparer); | |
|
83 | ||
|
84 | foreach (var g in Enumerable | |
|
85 | .Range(INITIAL_STATE, m_states.Count-1) | |
|
86 | .Select(i => new { | |
|
87 | index = i, | |
|
88 | descriptor = m_states[i] | |
|
89 | }) | |
|
90 | .Where(x => x.descriptor.final) | |
|
91 | .GroupBy(x => x.descriptor.tag, arrayComparer) | |
|
92 | ) { | |
|
93 | optimalStates.Add(new HashSet<int>(g.Select(x => x.index))); | |
|
94 | } | |
|
95 | ||
|
96 | var state = new HashSet<int>( | |
|
97 | Enumerable | |
|
98 | .Range(INITIAL_STATE, m_states.Count - 1) | |
|
99 | .Where(i => !m_states[i].final) | |
|
100 | ); | |
|
101 | optimalStates.Add(state); | |
|
102 | queue.Add(state); | |
|
103 | ||
|
104 | while (queue.Count > 0) { | |
|
105 | var stateA = queue.First(); | |
|
106 | queue.Remove(stateA); | |
|
107 | ||
|
108 | for (int c = 0; c < AlphabetSize; c++) { | |
|
109 | var stateX = new HashSet<int>(); | |
|
110 | ||
|
111 | for(int s = 1; s < m_states.Count; s++) { | |
|
112 | if (stateA.Contains(m_states[s].transitions[c])) | |
|
113 | stateX.Add(s); | |
|
114 | } | |
|
115 | ||
|
116 | foreach (var stateY in optimalStates.ToArray()) { | |
|
117 | if (stateX.Overlaps(stateY) && !stateY.IsSubsetOf(stateX)) { | |
|
118 | var stateR1 = new HashSet<int>(stateY); | |
|
119 | var stateR2 = new HashSet<int>(stateY); | |
|
120 | ||
|
121 | stateR1.IntersectWith(stateX); | |
|
122 | stateR2.ExceptWith(stateX); | |
|
123 | ||
|
124 | optimalStates.Remove(stateY); | |
|
125 | optimalStates.Add(stateR1); | |
|
126 | optimalStates.Add(stateR2); | |
|
127 | ||
|
128 | if (queue.Contains(stateY)) { | |
|
129 | queue.Remove(stateY); | |
|
130 | queue.Add(stateR1); | |
|
131 | queue.Add(stateR2); | |
|
132 | } else { | |
|
133 | queue.Add(stateR1.Count <= stateR2.Count ? stateR1 : stateR2); | |
|
134 | } | |
|
135 | } | |
|
136 | } | |
|
137 | } | |
|
138 | } | |
|
139 | ||
|
140 | // строим карты соотвествия оптимальных состояний с оригинальными | |
|
141 | ||
|
142 | var initialState = optimalStates.Where(x => x.Contains(INITIAL_STATE)).Single(); | |
|
143 | ||
|
144 | // карта получения оптимального состояния по соотвествующему ему простому состоянию | |
|
145 | int[] reveseOptimalMap = new int[m_states.Count]; | |
|
146 | // карта с индексами оптимальных состояний | |
|
147 | HashSet<int>[] optimalMap = new HashSet<int>[optimalStates.Count + 1]; | |
|
148 | { | |
|
149 | optimalMap[0] = new HashSet<int>(); // unreachable state | |
|
150 | optimalMap[1] = initialState; // initial state | |
|
151 | foreach (var ss in initialState) | |
|
152 | reveseOptimalMap[ss] = 1; | |
|
153 | ||
|
154 | int i = 2; | |
|
155 | foreach (var s in optimalStates) { | |
|
156 | if (s.SetEquals(initialState)) | |
|
157 | continue; | |
|
158 | optimalMap[i] = s; | |
|
159 | foreach (var ss in s) | |
|
160 | reveseOptimalMap[ss] = i; | |
|
161 | i++; | |
|
162 | } | |
|
163 | } | |
|
164 | ||
|
165 | // получаем минимальный алфавит | |
|
166 | ||
|
167 | var minClasses = new HashSet<HashSet<int>>(setComparer); | |
|
168 | var alphaQueue = new Queue<HashSet<int>>(); | |
|
169 | alphaQueue.Enqueue(new HashSet<int>(Enumerable.Range(0,AlphabetSize))); | |
|
170 | ||
|
171 | for (int s = 1 ; s < optimalMap.Length; s++) { | |
|
172 | var newQueue = new Queue<HashSet<int>>(); | |
|
173 | ||
|
174 | foreach (var A in alphaQueue) { | |
|
175 | if (A.Count == 1) { | |
|
176 | minClasses.Add(A); | |
|
177 | continue; | |
|
178 | } | |
|
179 | ||
|
180 | // различаем классы символов, которые переводят в различные оптимальные состояния | |
|
181 | // optimalState -> alphaClass | |
|
182 | var classes = new Dictionary<int, HashSet<int>>(); | |
|
183 | ||
|
184 | foreach (var term in A) { | |
|
185 | // ищем все переходы класса по символу term | |
|
186 | var s2 = reveseOptimalMap[ | |
|
187 | optimalMap[s].Select(x => m_states[x].transitions[term]) // все элементарные состояния, куда переходит класс s | |
|
188 | .Where(x => x != 0) // только допустимые | |
|
189 | .FirstOrDefault() // первое допустимое элементарное состояние, если есть | |
|
190 | ]; | |
|
191 | ||
|
192 | HashSet<int> A2; | |
|
193 | if (!classes.TryGetValue(s2, out A2)) { | |
|
194 | A2 = new HashSet<int>(); | |
|
195 | newQueue.Enqueue(A2); | |
|
196 | classes[s2] = A2; | |
|
197 | } | |
|
198 | A2.Add(term); | |
|
199 | } | |
|
200 | } | |
|
201 | ||
|
202 | if (newQueue.Count == 0) | |
|
203 | break; | |
|
204 | alphaQueue = newQueue; | |
|
205 | } | |
|
206 | ||
|
207 | foreach (var A in alphaQueue) | |
|
208 | minClasses.Add(A); | |
|
209 | ||
|
210 | var alphabetMap = sourceAlphabet.Reclassify(minimalAlphabet, minClasses); | |
|
211 | ||
|
212 | // построение автомата | |
|
213 | ||
|
214 | var states = new int[ optimalMap.Length ]; | |
|
215 | states[0] = UNREACHEBLE_STATE; | |
|
216 | ||
|
217 | for(var s = INITIAL_STATE; s < states.Length; s++) { | |
|
218 | var tags = optimalMap[s].SelectMany(x => m_states[x].tag ?? Enumerable.Empty<int>()).Distinct().ToArray(); | |
|
219 | if (tags.Length > 0) | |
|
220 | states[s] = minimalDFA.AddState(tags); | |
|
221 | else | |
|
222 | states[s] = minimalDFA.AddState(); | |
|
223 | } | |
|
224 | ||
|
225 | Debug.Assert(states[INITIAL_STATE] == INITIAL_STATE); | |
|
226 | ||
|
227 | for (int s1 = 1; s1 < m_states.Count; s1++) { | |
|
228 | for (int c = 0; c < AlphabetSize; c++) { | |
|
229 | var s2 = m_states[s1].transitions[c]; | |
|
230 | if (s2 != UNREACHEBLE_STATE) { | |
|
231 | minimalDFA.DefineTransition( | |
|
232 | reveseOptimalMap[s1], | |
|
233 | reveseOptimalMap[s2], | |
|
234 | alphabetMap[c] | |
|
235 | ); | |
|
236 | } | |
|
237 | } | |
|
238 | } | |
|
239 | ||
|
240 | } | |
|
241 | ||
|
242 | protected void PrintDFA<TA>(IAlphabet<TA> alphabet) { | |
|
243 | ||
|
244 | var reverseMap = alphabet.CreateReverseMap(); | |
|
245 | ||
|
246 | for (int i = 1; i < reverseMap.Length; i++) { | |
|
247 | Console.WriteLine("C{0}: {1}", i, String.Join(",", reverseMap[i])); | |
|
248 | } | |
|
249 | ||
|
250 | for (int i = 1; i < m_states.Count; i++) { | |
|
251 | var s = m_states[i]; | |
|
252 | for (int c = 0; c < AlphabetSize; c++) | |
|
253 | if (s.transitions[c] != UNREACHEBLE_STATE) | |
|
254 | Console.WriteLine("S{0} -{1}-> S{2}{3}", i, String.Join(",", reverseMap[c]), s.transitions[c], m_states[s.transitions[c]].final ? "$" : ""); | |
|
255 | } | |
|
256 | } | |
|
257 | ||
|
258 | public abstract int AlphabetSize { | |
|
259 | get; | |
|
260 | } | |
|
261 | } | |
|
262 | } |
@@ -0,0 +1,13 | |||
|
1 | using System; | |
|
2 | using System.Collections.Generic; | |
|
3 | using System.Linq; | |
|
4 | using System.Text; | |
|
5 | using System.Threading.Tasks; | |
|
6 | ||
|
7 | namespace Implab.Parsing { | |
|
8 | public struct DFAStateDescriptior { | |
|
9 | public bool final; | |
|
10 | public int[] tag; | |
|
11 | public int[] transitions; | |
|
12 | } | |
|
13 | } |
@@ -0,0 +1,56 | |||
|
1 | using Implab; | |
|
2 | using System; | |
|
3 | using System.Collections.Generic; | |
|
4 | using System.Diagnostics; | |
|
5 | using System.Linq; | |
|
6 | using System.Text; | |
|
7 | using System.Threading.Tasks; | |
|
8 | ||
|
9 | namespace Implab.Parsing { | |
|
10 | public abstract class DFAutomaton<T> { | |
|
11 | protected struct ContextFrame { | |
|
12 | public DFAStateDescriptior[] states; | |
|
13 | public int current; | |
|
14 | public T info; | |
|
15 | } | |
|
16 | ||
|
17 | public const int INITIAL_STATE = DFADefinitionBase.INITIAL_STATE; | |
|
18 | public const int UNREACHEBLE_STATE = DFADefinitionBase.UNREACHEBLE_STATE; | |
|
19 | ||
|
20 | protected ContextFrame m_context; | |
|
21 | Stack<ContextFrame> m_contextStack = new Stack<ContextFrame>(); | |
|
22 | ||
|
23 | public int Level { | |
|
24 | get { return m_contextStack.Count; } | |
|
25 | } | |
|
26 | ||
|
27 | protected DFAutomaton(DFAStateDescriptior[] states, int startState, T info) { | |
|
28 | Safe.ArgumentNotNull(states, "states"); | |
|
29 | Safe.ArgumentInRange(startState, 0, states.Length - 1, "startState"); | |
|
30 | ||
|
31 | m_context.states = states; | |
|
32 | m_context.current = startState; | |
|
33 | m_context.info = info; | |
|
34 | } | |
|
35 | ||
|
36 | protected void Switch(DFAStateDescriptior[] states, int current, T info) { | |
|
37 | Debug.Assert(states != null); | |
|
38 | Debug.Assert(current >= 0 && current < states.Length); | |
|
39 | m_contextStack.Push(m_context); | |
|
40 | m_context. states = states; | |
|
41 | m_context.current = current; | |
|
42 | m_context.info = info; | |
|
43 | } | |
|
44 | ||
|
45 | protected void Restore() { | |
|
46 | Debug.Assert(m_contextStack.Count > 0); | |
|
47 | ||
|
48 | m_context = m_contextStack.Pop(); | |
|
49 | } | |
|
50 | ||
|
51 | protected void Move(int input) { | |
|
52 | Debug.Assert(input > 0 && input < m_context.states[m_context.current].transitions.Length); | |
|
53 | m_context.current = m_context.states[m_context.current].transitions[input]; | |
|
54 | } | |
|
55 | } | |
|
56 | } |
@@ -0,0 +1,37 | |||
|
1 | using Implab; | |
|
2 | using System; | |
|
3 | using System.Collections.Generic; | |
|
4 | using System.Linq; | |
|
5 | using System.Text; | |
|
6 | using System.Threading.Tasks; | |
|
7 | ||
|
8 | namespace Implab.Parsing { | |
|
9 | public class EDFADefinition<T> : DFADefinitionBase where T : struct, IConvertible { | |
|
10 | EnumAlphabet<T> m_alphabet; | |
|
11 | ||
|
12 | public EnumAlphabet<T> Alphabet { | |
|
13 | get { return m_alphabet; } | |
|
14 | } | |
|
15 | ||
|
16 | public EDFADefinition(EnumAlphabet<T> alphabet) | |
|
17 | : base() { | |
|
18 | Safe.ArgumentNotNull(alphabet, "alphabet"); | |
|
19 | m_alphabet = alphabet; | |
|
20 | } | |
|
21 | ||
|
22 | public override int AlphabetSize { | |
|
23 | get { return m_alphabet.Count; } | |
|
24 | } | |
|
25 | ||
|
26 | public EDFADefinition<T> Optimize() { | |
|
27 | var optimized = new EDFADefinition<T>(new EnumAlphabet<T>()); | |
|
28 | Optimize(optimized, m_alphabet, optimized.Alphabet); | |
|
29 | ||
|
30 | return optimized; | |
|
31 | } | |
|
32 | ||
|
33 | public void PrintDFA() { | |
|
34 | PrintDFA(m_alphabet); | |
|
35 | } | |
|
36 | } | |
|
37 | } |
@@ -0,0 +1,18 | |||
|
1 | using Implab; | |
|
2 | using System; | |
|
3 | using System.Collections.Generic; | |
|
4 | using System.Linq; | |
|
5 | using System.Text; | |
|
6 | using System.Threading.Tasks; | |
|
7 | ||
|
8 | namespace Implab.Parsing { | |
|
9 | public class EmptyToken : Token { | |
|
10 | public override void Accept(IVisitor visitor) { | |
|
11 | Safe.ArgumentNotNull(visitor, "visitor"); | |
|
12 | visitor.Visit(this); | |
|
13 | } | |
|
14 | public override string ToString() { | |
|
15 | return "$"; | |
|
16 | } | |
|
17 | } | |
|
18 | } |
@@ -0,0 +1,37 | |||
|
1 | using Implab; | |
|
2 | using System; | |
|
3 | using System.Collections.Generic; | |
|
4 | using System.Linq; | |
|
5 | using System.Text; | |
|
6 | using System.Threading.Tasks; | |
|
7 | ||
|
8 | namespace Implab.Parsing { | |
|
9 | /// <summary> | |
|
10 | /// Конечный символ расширенного регулярного выражения, при построении ДКА | |
|
11 | /// используется для определения конечных состояний. | |
|
12 | /// </summary> | |
|
13 | public class EndToken: Token { | |
|
14 | ||
|
15 | int m_tag; | |
|
16 | ||
|
17 | public EndToken(int tag) { | |
|
18 | m_tag = tag; | |
|
19 | } | |
|
20 | ||
|
21 | public EndToken() | |
|
22 | : this(0) { | |
|
23 | } | |
|
24 | ||
|
25 | public int Tag { | |
|
26 | get { return m_tag; } | |
|
27 | } | |
|
28 | ||
|
29 | public override void Accept(IVisitor visitor) { | |
|
30 | Safe.ArgumentNotNull(visitor, "visitor"); | |
|
31 | visitor.Visit(this); | |
|
32 | } | |
|
33 | public override string ToString() { | |
|
34 | return "#"; | |
|
35 | } | |
|
36 | } | |
|
37 | } |
@@ -0,0 +1,68 | |||
|
1 | using Implab; | |
|
2 | using System; | |
|
3 | using System.Collections.Generic; | |
|
4 | using System.Globalization; | |
|
5 | using System.Linq; | |
|
6 | using System.Text; | |
|
7 | using System.Threading.Tasks; | |
|
8 | ||
|
9 | namespace Implab.Parsing { | |
|
10 | /// <summary> | |
|
11 | /// Алфавит символами которого являются элементы перечислений. | |
|
12 | /// </summary> | |
|
13 | /// <typeparam name="T">Тип перечислений</typeparam> | |
|
14 | public class EnumAlphabet<T> : AlphabetBase<T> where T : struct, IConvertible { | |
|
15 | static readonly T[] _symbols; | |
|
16 | static readonly EnumAlphabet<T> _fullAlphabet; | |
|
17 | ||
|
18 | static EnumAlphabet() { | |
|
19 | if (!typeof(T).IsEnum) | |
|
20 | throw new InvalidOperationException("Invalid generic parameter, enumeration is required"); | |
|
21 | ||
|
22 | if (Enum.GetUnderlyingType(typeof(T)) != typeof(Int32)) | |
|
23 | throw new InvalidOperationException("Only enums based on Int32 are supported"); | |
|
24 | ||
|
25 | _symbols = ((T[])Enum.GetValues(typeof(T))) | |
|
26 | .OrderBy(x => x.ToInt32(CultureInfo.InvariantCulture)) | |
|
27 | .ToArray(); | |
|
28 | ||
|
29 | if ( | |
|
30 | _symbols[_symbols.Length - 1].ToInt32(CultureInfo.InvariantCulture) >= _symbols.Length | |
|
31 | || _symbols[0].ToInt32(CultureInfo.InvariantCulture) != 0 | |
|
32 | ) | |
|
33 | throw new InvalidOperationException("The specified enumeration must be zero-based and continuously numbered"); | |
|
34 | ||
|
35 | _fullAlphabet = new EnumAlphabet<T>(_symbols.Select(x => x.ToInt32(CultureInfo.InvariantCulture)).ToArray()); | |
|
36 | } | |
|
37 | ||
|
38 | ||
|
39 | ||
|
40 | public static EnumAlphabet<T> FullAlphabet { | |
|
41 | get { | |
|
42 | return _fullAlphabet; | |
|
43 | } | |
|
44 | } | |
|
45 | ||
|
46 | ||
|
47 | public EnumAlphabet() | |
|
48 | : base() { | |
|
49 | } | |
|
50 | ||
|
51 | public EnumAlphabet(int[] map) | |
|
52 | : base(map) { | |
|
53 | } | |
|
54 | ||
|
55 | ||
|
56 | public override int GetSymbolIndex(T symbol) { | |
|
57 | return symbol.ToInt32(CultureInfo.InvariantCulture); | |
|
58 | } | |
|
59 | ||
|
60 | public override IEnumerable<T> InputSymbols { | |
|
61 | get { return _symbols; } | |
|
62 | } | |
|
63 | ||
|
64 | protected override int MapSize { | |
|
65 | get { return _symbols.Length; } | |
|
66 | } | |
|
67 | } | |
|
68 | } |
@@ -0,0 +1,103 | |||
|
1 | using Implab; | |
|
2 | using System; | |
|
3 | using System.Collections.Generic; | |
|
4 | using System.Linq; | |
|
5 | using System.Text; | |
|
6 | using System.Threading.Tasks; | |
|
7 | ||
|
8 | namespace Implab.Parsing { | |
|
9 | /// <summary> | |
|
10 | /// Базовый абстрактный класс. Грамматика, позволяет формулировать выражения над алфавитом типа <c>char</c>. | |
|
11 | /// </summary> | |
|
12 | /// <typeparam name="TGrammar"></typeparam> | |
|
13 | public abstract class Grammar<TGrammar> where TGrammar: Grammar<TGrammar>, new() { | |
|
14 | Alphabet m_alphabet = new Alphabet(); | |
|
15 | static TGrammar _instance; | |
|
16 | ||
|
17 | public static TGrammar Instance{ | |
|
18 | get { | |
|
19 | if (_instance == null) | |
|
20 | _instance = new TGrammar(); | |
|
21 | return _instance; | |
|
22 | } | |
|
23 | } | |
|
24 | ||
|
25 | public SymbolToken UnclassifiedToken() { | |
|
26 | return new SymbolToken(Alphabet.UNCLASSIFIED); | |
|
27 | } | |
|
28 | ||
|
29 | public void DefineAlphabet(IEnumerable<char> alphabet) { | |
|
30 | Safe.ArgumentNotNull(alphabet, "alphabet"); | |
|
31 | ||
|
32 | foreach (var ch in alphabet) | |
|
33 | m_alphabet.DefineSymbol(ch); | |
|
34 | } | |
|
35 | public Token SymbolRangeToken(char start, char end) { | |
|
36 | return SymbolToken(Enumerable.Range(start, end - start + 1).Select(x => (char)x)); | |
|
37 | } | |
|
38 | ||
|
39 | public Token SymbolToken(char symbol) { | |
|
40 | return Token.New(TranslateOrAdd(symbol)); | |
|
41 | } | |
|
42 | ||
|
43 | public Token SymbolToken(IEnumerable<char> symbols) { | |
|
44 | Safe.ArgumentNotNull(symbols, "symbols"); | |
|
45 | ||
|
46 | return Token.New(TranslateOrAdd(symbols).ToArray()); | |
|
47 | } | |
|
48 | ||
|
49 | public Token SymbolSetToken(params char[] set) { | |
|
50 | return SymbolToken(set); | |
|
51 | } | |
|
52 | ||
|
53 | int TranslateOrAdd(char ch) { | |
|
54 | var t = m_alphabet.Translate(ch); | |
|
55 | if (t == Alphabet.UNCLASSIFIED) | |
|
56 | t = m_alphabet.DefineSymbol(ch); | |
|
57 | return t; | |
|
58 | } | |
|
59 | ||
|
60 | IEnumerable<int> TranslateOrAdd(IEnumerable<char> symbols) { | |
|
61 | return symbols.Distinct().Select(TranslateOrAdd); | |
|
62 | } | |
|
63 | ||
|
64 | int TranslateOrDie(char ch) { | |
|
65 | var t = m_alphabet.Translate(ch); | |
|
66 | if (t == Alphabet.UNCLASSIFIED) | |
|
67 | throw new ApplicationException(String.Format("Symbol '{0}' is UNCLASSIFIED", ch)); | |
|
68 | return t; | |
|
69 | } | |
|
70 | ||
|
71 | IEnumerable<int> TranslateOrDie(IEnumerable<char> symbols) { | |
|
72 | return symbols.Distinct().Select(TranslateOrDie); | |
|
73 | } | |
|
74 | ||
|
75 | public Token SymbolTokenExcept(IEnumerable<char> symbols) { | |
|
76 | Safe.ArgumentNotNull(symbols, "symbols"); | |
|
77 | ||
|
78 | return Token.New( Enumerable.Range(0, m_alphabet.Count).Except(TranslateOrDie(symbols)).ToArray()); | |
|
79 | } | |
|
80 | ||
|
81 | protected CDFADefinition BuildDFA(Token lang) { | |
|
82 | Safe.ArgumentNotNull(lang, "lang"); | |
|
83 | ||
|
84 | var dfa = new CDFADefinition(m_alphabet); | |
|
85 | ||
|
86 | var builder = new DFABuilder(); | |
|
87 | ||
|
88 | lang.Accept( builder ); | |
|
89 | ||
|
90 | builder.BuildDFA(dfa); | |
|
91 | if (dfa.InitialStateIsFinal) | |
|
92 | throw new ApplicationException("The specified language contains empty token"); | |
|
93 | ||
|
94 | return dfa.Optimize(); | |
|
95 | } | |
|
96 | ||
|
97 | ||
|
98 | ||
|
99 | //protected abstract TGrammar CreateInstance(); | |
|
100 | } | |
|
101 | ||
|
102 | ||
|
103 | } |
@@ -0,0 +1,56 | |||
|
1 | using System; | |
|
2 | using System.Collections.Generic; | |
|
3 | using System.Linq; | |
|
4 | using System.Text; | |
|
5 | using System.Threading.Tasks; | |
|
6 | ||
|
7 | namespace Implab.Parsing { | |
|
8 | /// <summary> | |
|
9 | /// Алфавит. Множество символов, которые разбиты на классы, при этом классы имеют непрерывную нумерацию, | |
|
10 | /// что позволяет использовать их в качестве индексов массивов. | |
|
11 | /// </summary> | |
|
12 | /// <remarks>Далее вимволами алфавита будем называть классы исходных символов.</remarks> | |
|
13 | /// <typeparam name="TSymbol">Тип символов.</typeparam> | |
|
14 | public interface IAlphabet<TSymbol> { | |
|
15 | /// <summary> | |
|
16 | /// Количество символов в алфавите. | |
|
17 | /// </summary> | |
|
18 | int Count { get; } | |
|
19 | /// <summary> | |
|
20 | /// Добавляет новый символ в алфавит, если символ уже был добавлен, то | |
|
21 | /// возвращается ранее сопоставленный с символом класс. | |
|
22 | /// </summary> | |
|
23 | /// <param name="symbol">Символ для добавления.</param> | |
|
24 | /// <returns>Индекс класса, который попоставлен с символом.</returns> | |
|
25 | int DefineSymbol(TSymbol symbol); | |
|
26 | /// <summary> | |
|
27 | /// Доабвляем класс символов. Множеству указанных исходных символов | |
|
28 | /// будет сопоставлен символ в алфавите. | |
|
29 | /// </summary> | |
|
30 | /// <param name="symbols">Множестов исходных символов</param> | |
|
31 | /// <returns>Идентификатор символа алфавита.</returns> | |
|
32 | int DefineClass(IEnumerable<TSymbol> symbols); | |
|
33 | /// <summary> | |
|
34 | /// Создает карту обратного сопоставления символа алфавита и сопоставленным | |
|
35 | /// ему исходным символам. | |
|
36 | /// </summary> | |
|
37 | /// <returns></returns> | |
|
38 | List<TSymbol>[] CreateReverseMap(); | |
|
39 | /// <summary> | |
|
40 | /// Создает новый алфавит на основе текущего, горппируя его сиволы в более | |
|
41 | /// крупные непересекающиеся классы символов. | |
|
42 | /// </summary> | |
|
43 | /// <param name="newAlphabet">Новый, пустой алфавит, в котором быдут определены классы.</param> | |
|
44 | /// <param name="classes">Множество классов символов текущего алфавита.</param> | |
|
45 | /// <returns>Карта для перехода символов текущего | |
|
46 | /// алфавита к символам нового.</returns> | |
|
47 | int[] Reclassify(IAlphabet<TSymbol> newAlphabet, IEnumerable<ICollection<int>> classes); | |
|
48 | ||
|
49 | /// <summary> | |
|
50 | /// Преобразует входной символ в индекс символа из алфавита. | |
|
51 | /// </summary> | |
|
52 | /// <param name="symobl">Исходный символ</param> | |
|
53 | /// <returns>Индекс в алфавите</returns> | |
|
54 | int Translate(TSymbol symobl); | |
|
55 | } | |
|
56 | } |
@@ -0,0 +1,36 | |||
|
1 | using System; | |
|
2 | using System.Collections.Generic; | |
|
3 | using System.Linq; | |
|
4 | using System.Text; | |
|
5 | using System.Threading.Tasks; | |
|
6 | ||
|
7 | namespace Implab.Parsing { | |
|
8 | /// <summary> | |
|
9 | /// Интерфейс для определения ДКА, позволяет добавить состояния и определить переходы. | |
|
10 | /// </summary> | |
|
11 | public interface IDFADefinition { | |
|
12 | /// <summary> | |
|
13 | /// Добавляет состояние в автомат. | |
|
14 | /// </summary> | |
|
15 | /// <returns>Индекс добавленного состояния.</returns> | |
|
16 | int AddState(); | |
|
17 | /// <summary> | |
|
18 | /// Добавляет конечное состояние с указанными метками, если метки не заданы, то | |
|
19 | /// добавленное состояние не будет конечным. | |
|
20 | /// </summary> | |
|
21 | /// <param name="tags">Метки состояния.</param> | |
|
22 | /// <returns>Индекс добавленного состояния.</returns> | |
|
23 | int AddState(int[] tags); | |
|
24 | /// <summary> | |
|
25 | /// Определяет переход между состояниями. | |
|
26 | /// </summary> | |
|
27 | /// <param name="s1">Исходное состояние.</param> | |
|
28 | /// <param name="s2">Конечное состояние.</param> | |
|
29 | /// <param name="input">Входной символ.</param> | |
|
30 | void DefineTransition(int s1, int s2, int input); | |
|
31 | /// <summary> | |
|
32 | /// Размер входного алфавита. | |
|
33 | /// </summary> | |
|
34 | int AlphabetSize { get; } | |
|
35 | } | |
|
36 | } |
@@ -0,0 +1,19 | |||
|
1 | using System; | |
|
2 | using System.Collections.Generic; | |
|
3 | using System.Linq; | |
|
4 | using System.Text; | |
|
5 | using System.Threading.Tasks; | |
|
6 | ||
|
7 | namespace Implab.Parsing { | |
|
8 | /// <summary> | |
|
9 | /// Интерфейс обходчика синтаксического дерева регулярного выражения | |
|
10 | /// </summary> | |
|
11 | public interface IVisitor { | |
|
12 | void Visit(AltToken token); | |
|
13 | void Visit(StarToken token); | |
|
14 | void Visit(CatToken token); | |
|
15 | void Visit(EmptyToken token); | |
|
16 | void Visit(EndToken token); | |
|
17 | void Visit(SymbolToken token); | |
|
18 | } | |
|
19 | } |
@@ -0,0 +1,17 | |||
|
1 | using System; | |
|
2 | using System.Collections.Generic; | |
|
3 | using System.Linq; | |
|
4 | using System.Text; | |
|
5 | ||
|
6 | namespace Implab.Parsing { | |
|
7 | [Serializable] | |
|
8 | public class ParserException : Exception { | |
|
9 | public ParserException() { } | |
|
10 | public ParserException(string message) : base(message) { } | |
|
11 | public ParserException(string message, Exception inner) : base(message, inner) { } | |
|
12 | protected ParserException( | |
|
13 | System.Runtime.Serialization.SerializationInfo info, | |
|
14 | System.Runtime.Serialization.StreamingContext context) | |
|
15 | : base(info, context) { } | |
|
16 | } | |
|
17 | } |
@@ -0,0 +1,207 | |||
|
1 | using Implab; | |
|
2 | using System; | |
|
3 | using System.Collections.Generic; | |
|
4 | using System.Linq; | |
|
5 | using System.Text; | |
|
6 | using System.Threading.Tasks; | |
|
7 | ||
|
8 | namespace Implab.Parsing { | |
|
9 | /// <summary> | |
|
10 | /// Базовый класс для разбора потока входных символов на токены. | |
|
11 | /// </summary> | |
|
12 | /// <remarks> | |
|
13 | /// Сканнер имеет внутри буффер с симолами входного текста, по которому перемещаются два | |
|
14 | /// указателя, начала и конца токена, при перемещении искользуется ДКА для определения | |
|
15 | /// конца токена и допустимости текущего символа. | |
|
16 | /// </remarks> | |
|
17 | public class Scanner { | |
|
18 | struct ScannerConfig { | |
|
19 | public DFAStateDescriptior[] states; | |
|
20 | public int[] alphabetMap; | |
|
21 | } | |
|
22 | ||
|
23 | Stack<ScannerConfig> m_defs = new Stack<ScannerConfig>(); | |
|
24 | ||
|
25 | DFAStateDescriptior[] m_states; | |
|
26 | int[] m_alphabetMap; | |
|
27 | ||
|
28 | protected DFAStateDescriptior m_currentState; | |
|
29 | int m_previewCode; | |
|
30 | ||
|
31 | protected int m_tokenLen = 0; | |
|
32 | protected int m_tokenOffset; | |
|
33 | ||
|
34 | protected char[] m_buffer; | |
|
35 | protected int m_bufferSize; | |
|
36 | protected int m_pointer; | |
|
37 | ||
|
38 | public Scanner(CDFADefinition definition, string text) { | |
|
39 | Safe.ArgumentNotNull(definition, "definition"); | |
|
40 | Safe.ArgumentNotEmpty(text, "text"); | |
|
41 | ||
|
42 | m_states = definition.States; | |
|
43 | m_alphabetMap = definition.Alphabet.GetTranslationMap(); | |
|
44 | ||
|
45 | Feed(text.ToCharArray()); | |
|
46 | } | |
|
47 | ||
|
48 | public Scanner(CDFADefinition definition) { | |
|
49 | Safe.ArgumentNotNull(definition, "definition"); | |
|
50 | ||
|
51 | m_states = definition.States; | |
|
52 | m_alphabetMap = definition.Alphabet.GetTranslationMap(); | |
|
53 | ||
|
54 | Feed(new char[0]); | |
|
55 | } | |
|
56 | ||
|
57 | /// <summary> | |
|
58 | /// Заполняет входными данными буффер. | |
|
59 | /// </summary> | |
|
60 | /// <param name="data">Данные для обработки.</param> | |
|
61 | /// <remarks>Копирование данных не происходит, переданный массив используется в | |
|
62 | /// качестве входного буффера.</remarks> | |
|
63 | public void Feed(char[] data) { | |
|
64 | Safe.ArgumentNotNull(data, "data"); | |
|
65 | ||
|
66 | Feed(data, data.Length); | |
|
67 | } | |
|
68 | ||
|
69 | /// <summary> | |
|
70 | /// Заполняет буффур чтения входными данными. | |
|
71 | /// </summary> | |
|
72 | /// <param name="data">Данные для обработки.</param> | |
|
73 | /// <param name="length">Длина данных для обработки.</param> | |
|
74 | /// <remarks>Копирование данных не происходит, переданный массив используется в | |
|
75 | /// качестве входного буффера.</remarks> | |
|
76 | public void Feed(char[] data, int length) { | |
|
77 | Safe.ArgumentNotNull(data, "data"); | |
|
78 | Safe.ArgumentInRange(length, 0, data.Length, "length"); | |
|
79 | ||
|
80 | m_pointer = -1; | |
|
81 | m_buffer = data; | |
|
82 | m_bufferSize = length; | |
|
83 | Shift(); | |
|
84 | } | |
|
85 | ||
|
86 | /// <summary> | |
|
87 | /// Получает текущий токен в виде строки. | |
|
88 | /// </summary> | |
|
89 | /// <returns></returns> | |
|
90 | public string GetTokenValue() { | |
|
91 | return new String(m_buffer, m_tokenOffset, m_tokenLen); | |
|
92 | } | |
|
93 | ||
|
94 | /// <summary> | |
|
95 | /// Метки текущего токена, которые были назначены в регулярном выражении. | |
|
96 | /// </summary> | |
|
97 | public int[] TokenTags { | |
|
98 | get { | |
|
99 | return m_currentState.tag; | |
|
100 | } | |
|
101 | } | |
|
102 | ||
|
103 | /// <summary> | |
|
104 | /// Читает следующий токен, при этом <see cref="m_tokenOffset"/> указывает на начало токена, | |
|
105 | /// <see cref="m_tokenLen"/> на длину токена, <see cref="m_buffer"/> - массив символов, в | |
|
106 | /// котором находится токен. | |
|
107 | /// </summary> | |
|
108 | /// <returns><c>false</c> - достигнут конец данных, токен не прочитан.</returns> | |
|
109 | protected bool ReadTokenInternal() { | |
|
110 | if (m_pointer >= m_bufferSize) | |
|
111 | return false; | |
|
112 | ||
|
113 | m_currentState = m_states[CDFADefinition.INITIAL_STATE]; | |
|
114 | m_tokenLen = 0; | |
|
115 | m_tokenOffset = m_pointer; | |
|
116 | int nextState = CDFADefinition.UNREACHEBLE_STATE; | |
|
117 | do { | |
|
118 | nextState = m_currentState.transitions[m_previewCode]; | |
|
119 | if (nextState == CDFADefinition.UNREACHEBLE_STATE) { | |
|
120 | if (m_currentState.final) | |
|
121 | return true; | |
|
122 | else | |
|
123 | throw new ParserException( | |
|
124 | String.Format( | |
|
125 | "Unexpected symbol '{0}', at pos {1}", | |
|
126 | m_buffer[m_pointer], | |
|
127 | Position | |
|
128 | ) | |
|
129 | ); | |
|
130 | } else { | |
|
131 | m_currentState = m_states[nextState]; | |
|
132 | m_tokenLen++; | |
|
133 | } | |
|
134 | ||
|
135 | } while (Shift()); | |
|
136 | ||
|
137 | // END OF DATA | |
|
138 | if (!m_currentState.final) | |
|
139 | throw new ParserException("Unexpected end of data"); | |
|
140 | ||
|
141 | return true; | |
|
142 | } | |
|
143 | ||
|
144 | ||
|
145 | bool Shift() { | |
|
146 | m_pointer++; | |
|
147 | ||
|
148 | if (m_pointer >= m_bufferSize) { | |
|
149 | return ReadNextChunk(); | |
|
150 | } | |
|
151 | ||
|
152 | m_previewCode = m_alphabetMap[m_buffer[m_pointer]]; | |
|
153 | ||
|
154 | return true; | |
|
155 | } | |
|
156 | ||
|
157 | /// <summary> | |
|
158 | /// Вызывается по достижению конца входного буффера для получения | |
|
159 | /// новых данных. | |
|
160 | /// </summary> | |
|
161 | /// <returns><c>true</c> - новые двнные получены, можно продолжать обработку.</returns> | |
|
162 | protected virtual bool ReadNextChunk() { | |
|
163 | return false; | |
|
164 | } | |
|
165 | ||
|
166 | /// <summary> | |
|
167 | /// Позиция сканнера во входном буфере | |
|
168 | /// </summary> | |
|
169 | public int Position { | |
|
170 | get { | |
|
171 | return m_pointer + 1; | |
|
172 | } | |
|
173 | } | |
|
174 | ||
|
175 | /// <summary> | |
|
176 | /// Преключает внутренний ДКА на указанный, позволяет реализовать подобие захватывающей | |
|
177 | /// группировки. | |
|
178 | /// </summary> | |
|
179 | /// <param name="states">Таблица состояний нового ДКА</param> | |
|
180 | /// <param name="alphabet">Таблица входных символов для нового ДКА</param> | |
|
181 | protected void Switch(DFAStateDescriptior[] states, int[] alphabet) { | |
|
182 | Safe.ArgumentNotNull(states, "dfa"); | |
|
183 | ||
|
184 | m_defs.Push(new ScannerConfig { | |
|
185 | states = m_states, | |
|
186 | alphabetMap = m_alphabetMap | |
|
187 | }); | |
|
188 | ||
|
189 | m_states = states; | |
|
190 | m_alphabetMap = alphabet; | |
|
191 | ||
|
192 | m_previewCode = m_alphabetMap[m_buffer[m_pointer]]; | |
|
193 | } | |
|
194 | ||
|
195 | /// <summary> | |
|
196 | /// Восстанавливает предыдущей ДКА сканнера. | |
|
197 | /// </summary> | |
|
198 | protected void Restore() { | |
|
199 | if (m_defs.Count == 0) | |
|
200 | throw new InvalidOperationException(); | |
|
201 | var prev = m_defs.Pop(); | |
|
202 | m_states = prev.states; | |
|
203 | m_alphabetMap = prev.alphabetMap; | |
|
204 | m_previewCode = m_alphabetMap[m_buffer[m_pointer]]; | |
|
205 | } | |
|
206 | } | |
|
207 | } |
@@ -0,0 +1,34 | |||
|
1 | using Implab; | |
|
2 | using System; | |
|
3 | using System.Collections.Generic; | |
|
4 | using System.Linq; | |
|
5 | using System.Text; | |
|
6 | using System.Threading.Tasks; | |
|
7 | ||
|
8 | namespace Implab.Parsing { | |
|
9 | /// <summary> | |
|
10 | /// Замыкание выражения с 0 и более повторов. | |
|
11 | /// </summary> | |
|
12 | public class StarToken: Token { | |
|
13 | ||
|
14 | Token m_token; | |
|
15 | ||
|
16 | public Token Token { | |
|
17 | get { return m_token; } | |
|
18 | } | |
|
19 | ||
|
20 | public StarToken(Token token) { | |
|
21 | Safe.ArgumentNotNull(token, "token"); | |
|
22 | m_token = token; | |
|
23 | } | |
|
24 | ||
|
25 | public override void Accept(IVisitor visitor) { | |
|
26 | Safe.ArgumentNotNull(visitor, "visitor"); | |
|
27 | visitor.Visit(this); | |
|
28 | } | |
|
29 | ||
|
30 | public override string ToString() { | |
|
31 | return String.Format("({0})*", Token.ToString()); | |
|
32 | } | |
|
33 | } | |
|
34 | } |
@@ -0,0 +1,33 | |||
|
1 | using Implab; | |
|
2 | using System; | |
|
3 | using System.Collections.Generic; | |
|
4 | using System.Linq; | |
|
5 | using System.Text; | |
|
6 | using System.Threading.Tasks; | |
|
7 | ||
|
8 | namespace Implab.Parsing { | |
|
9 | /// <summary> | |
|
10 | /// Выражение, соответсвующее одному символу. | |
|
11 | /// </summary> | |
|
12 | public class SymbolToken : Token { | |
|
13 | int m_value; | |
|
14 | ||
|
15 | public int Value { | |
|
16 | get { return m_value; } | |
|
17 | } | |
|
18 | ||
|
19 | public SymbolToken(int value) { | |
|
20 | m_value = value; | |
|
21 | } | |
|
22 | public override void Accept(IVisitor visitor) { | |
|
23 | Safe.ArgumentNotNull(visitor, "visitor"); | |
|
24 | ||
|
25 | visitor.Visit(this); | |
|
26 | ||
|
27 | } | |
|
28 | ||
|
29 | public override string ToString() { | |
|
30 | return Value.ToString(); | |
|
31 | } | |
|
32 | } | |
|
33 | } |
@@ -0,0 +1,67 | |||
|
1 | using Implab; | |
|
2 | using System; | |
|
3 | using System.Collections.Generic; | |
|
4 | using System.Globalization; | |
|
5 | using System.Linq; | |
|
6 | using System.Text; | |
|
7 | using System.Threading.Tasks; | |
|
8 | ||
|
9 | namespace Implab.Parsing { | |
|
10 | public abstract class Token { | |
|
11 | public abstract void Accept(IVisitor visitor); | |
|
12 | ||
|
13 | public Token Extend() { | |
|
14 | return new CatToken(this, new EndToken()); | |
|
15 | } | |
|
16 | ||
|
17 | public Token Tag<T>(T tag) where T : IConvertible { | |
|
18 | return new CatToken(this, new EndToken(tag.ToInt32(CultureInfo.InvariantCulture))); | |
|
19 | } | |
|
20 | ||
|
21 | public Token Cat(Token right) { | |
|
22 | return new CatToken(this, right); | |
|
23 | } | |
|
24 | ||
|
25 | public Token Or(Token right) { | |
|
26 | return new AltToken(this, right); | |
|
27 | } | |
|
28 | ||
|
29 | public Token Optional() { | |
|
30 | return Or(new EmptyToken()); | |
|
31 | } | |
|
32 | ||
|
33 | public Token EClosure() { | |
|
34 | return new StarToken(this); | |
|
35 | } | |
|
36 | ||
|
37 | public Token Closure() { | |
|
38 | return new CatToken(this, new StarToken(this)); | |
|
39 | } | |
|
40 | ||
|
41 | public Token Repeat(int count) { | |
|
42 | Token token = null; | |
|
43 | ||
|
44 | for (int i = 0; i < count; i++) | |
|
45 | token = token != null ? token.Cat(this) : this; | |
|
46 | return token ?? new EmptyToken(); | |
|
47 | } | |
|
48 | ||
|
49 | public Token Repeat(int min, int max) { | |
|
50 | if (min > max || min < 1) | |
|
51 | throw new ArgumentOutOfRangeException(); | |
|
52 | var token = Repeat(min); | |
|
53 | ||
|
54 | for (int i = min; i < max; i++) | |
|
55 | token = token.Cat( this.Optional() ); | |
|
56 | return token; | |
|
57 | } | |
|
58 | ||
|
59 | public static Token New<T>(params T[] set) where T : struct, IConvertible { | |
|
60 | Safe.ArgumentNotNull(set, "set"); | |
|
61 | Token token = null; | |
|
62 | foreach(var c in set.Distinct()) | |
|
63 | token = token == null ? new SymbolToken(c.ToInt32(CultureInfo.InvariantCulture)) : token.Or(new SymbolToken(c.ToInt32(CultureInfo.InvariantCulture))); | |
|
64 | return token; | |
|
65 | } | |
|
66 | } | |
|
67 | } |
@@ -33,6 +33,7 | |||
|
33 | 33 | </ItemGroup> |
|
34 | 34 | <ItemGroup> |
|
35 | 35 | <Compile Include="Component.cs" /> |
|
36 | <Compile Include="CustomEqualityComparer.cs" /> | |
|
36 | 37 | <Compile Include="Diagnostics\ConsoleTraceListener.cs" /> |
|
37 | 38 | <Compile Include="Diagnostics\EventText.cs" /> |
|
38 | 39 | <Compile Include="Diagnostics\IEventTextFormatter.cs" /> |
@@ -52,10 +53,41 | |||
|
52 | 53 | <Compile Include="IPromiseBase.cs" /> |
|
53 | 54 | <Compile Include="IServiceLocator.cs" /> |
|
54 | 55 | <Compile Include="ITaskController.cs" /> |
|
56 | <Compile Include="JSON\JSONElementContext.cs" /> | |
|
57 | <Compile Include="JSON\JSONElementType.cs" /> | |
|
58 | <Compile Include="JSON\JSONGrammar.cs" /> | |
|
59 | <Compile Include="JSON\JSONParser.cs" /> | |
|
60 | <Compile Include="JSON\JSONScanner.cs" /> | |
|
61 | <Compile Include="JSON\JsonTokenType.cs" /> | |
|
62 | <Compile Include="JSON\JSONWriter.cs" /> | |
|
63 | <Compile Include="JSON\StringTranslator.cs" /> | |
|
55 | 64 | <Compile Include="Parallels\DispatchPool.cs" /> |
|
56 | 65 | <Compile Include="Parallels\ArrayTraits.cs" /> |
|
57 | 66 | <Compile Include="Parallels\MTQueue.cs" /> |
|
58 | 67 | <Compile Include="Parallels\WorkerPool.cs" /> |
|
68 | <Compile Include="Parsing\Alphabet.cs" /> | |
|
69 | <Compile Include="Parsing\AlphabetBase.cs" /> | |
|
70 | <Compile Include="Parsing\AltToken.cs" /> | |
|
71 | <Compile Include="Parsing\BinaryToken.cs" /> | |
|
72 | <Compile Include="Parsing\CatToken.cs" /> | |
|
73 | <Compile Include="Parsing\CDFADefinition.cs" /> | |
|
74 | <Compile Include="Parsing\DFABuilder.cs" /> | |
|
75 | <Compile Include="Parsing\DFADefinitionBase.cs" /> | |
|
76 | <Compile Include="Parsing\DFAStateDescriptor.cs" /> | |
|
77 | <Compile Include="Parsing\DFAutomaton.cs" /> | |
|
78 | <Compile Include="Parsing\EDFADefinition.cs" /> | |
|
79 | <Compile Include="Parsing\EmptyToken.cs" /> | |
|
80 | <Compile Include="Parsing\EndToken.cs" /> | |
|
81 | <Compile Include="Parsing\EnumAlphabet.cs" /> | |
|
82 | <Compile Include="Parsing\Grammar.cs" /> | |
|
83 | <Compile Include="Parsing\IAlphabet.cs" /> | |
|
84 | <Compile Include="Parsing\IDFADefinition.cs" /> | |
|
85 | <Compile Include="Parsing\IVisitor.cs" /> | |
|
86 | <Compile Include="Parsing\ParserException.cs" /> | |
|
87 | <Compile Include="Parsing\Scanner.cs" /> | |
|
88 | <Compile Include="Parsing\StarToken.cs" /> | |
|
89 | <Compile Include="Parsing\SymbolToken.cs" /> | |
|
90 | <Compile Include="Parsing\Token.cs" /> | |
|
59 | 91 | <Compile Include="ServiceLocator.cs" /> |
|
60 | 92 | <Compile Include="TaskController.cs" /> |
|
61 | 93 | <Compile Include="ProgressInitEventArgs.cs" /> |
@@ -25,6 +25,11 namespace Implab | |||
|
25 | 25 | throw new ArgumentNullException(name); |
|
26 | 26 | } |
|
27 | 27 | |
|
28 | public static void ArgumentInRange(int arg, int min, int max, string name) { | |
|
29 | if (arg < min || arg > max) | |
|
30 | throw new ArgumentOutOfRangeException(name); | |
|
31 | } | |
|
32 | ||
|
28 | 33 | public static void Dispose<T>(T obj) where T : class |
|
29 | 34 | { |
|
30 | 35 | var disp = obj as IDisposable; |
General Comments 0
You need to be logged in to leave comments.
Login now