@@ -0,0 +1,49 | |||||
|
1 | using Implab; | |||
|
2 | using System; | |||
|
3 | using System.Collections.Generic; | |||
|
4 | using System.Linq; | |||
|
5 | using System.Text; | |||
|
6 | using System.Threading.Tasks; | |||
|
7 | ||||
|
8 | namespace Implab { | |||
|
9 | /// <summary> | |||
|
10 | /// Обертка для создания <c>IEqualityComparer</c> с использованием делегатов или лямда-выражений. | |||
|
11 | /// </summary> | |||
|
12 | /// <typeparam name="T">Тип сравниваемых значений</typeparam> | |||
|
13 | public class CustomEqualityComparer<T> : IEqualityComparer<T> { | |||
|
14 | Func<T, T, bool> m_equals; | |||
|
15 | Func<T, int> m_hash; | |||
|
16 | ||||
|
17 | /// <summary> | |||
|
18 | /// Создает новый объект с указанными функциями сравнения на раветво и получения хеш-кода. | |||
|
19 | /// </summary> | |||
|
20 | /// <param name="equality">Функция проверки на равенство</param> | |||
|
21 | /// <param name="hash">Функция получения хешкода</param> | |||
|
22 | public CustomEqualityComparer(Func<T, T, bool> equality, Func<T, int> hash) { | |||
|
23 | Safe.ArgumentNotNull(equality, "equality"); | |||
|
24 | Safe.ArgumentNotNull(hash, "hash"); | |||
|
25 | m_hash = hash; | |||
|
26 | m_equals = equality; | |||
|
27 | } | |||
|
28 | ||||
|
29 | /// <summary> | |||
|
30 | /// Сравнивает два знаечния на ревенство. | |||
|
31 | /// </summary> | |||
|
32 | /// <param name="x"></param> | |||
|
33 | /// <param name="y"></param> | |||
|
34 | /// <returns>Результат сравнения на равенство</returns> | |||
|
35 | public bool Equals(T x, T y) { | |||
|
36 | return m_equals(x,y); | |||
|
37 | } | |||
|
38 | ||||
|
39 | /// <summary> | |||
|
40 | /// Получает хеш-код для указанного значения. | |||
|
41 | /// </summary> | |||
|
42 | /// <param name="obj"></param> | |||
|
43 | /// <remarks>Равные знаечния *должны* иметь одинаковый хеш-код.</remarks> | |||
|
44 | /// <returns>Хеш-код</returns> | |||
|
45 | public int GetHashCode(T obj) { | |||
|
46 | return m_hash(obj); | |||
|
47 | } | |||
|
48 | } | |||
|
49 | } |
@@ -0,0 +1,16 | |||||
|
1 | using System; | |||
|
2 | using System.Collections.Generic; | |||
|
3 | using System.Linq; | |||
|
4 | using System.Text; | |||
|
5 | using System.Threading.Tasks; | |||
|
6 | ||||
|
7 | namespace Implab.JSON { | |||
|
8 | /// <summary> | |||
|
9 | /// internal | |||
|
10 | /// </summary> | |||
|
11 | public enum JSONElementContext { | |||
|
12 | None, | |||
|
13 | Object, | |||
|
14 | Array | |||
|
15 | } | |||
|
16 | } |
@@ -0,0 +1,34 | |||||
|
1 | using System; | |||
|
2 | using System.Collections.Generic; | |||
|
3 | using System.Linq; | |||
|
4 | using System.Text; | |||
|
5 | using System.Threading.Tasks; | |||
|
6 | ||||
|
7 | namespace Implab.JSON { | |||
|
8 | /// <summary> | |||
|
9 | /// Тип элемента на котором находится парсер | |||
|
10 | /// </summary> | |||
|
11 | public enum JSONElementType { | |||
|
12 | None, | |||
|
13 | /// <summary> | |||
|
14 | /// Начало объекта | |||
|
15 | /// </summary> | |||
|
16 | BeginObject, | |||
|
17 | /// <summary> | |||
|
18 | /// Конец объекта | |||
|
19 | /// </summary> | |||
|
20 | EndObject, | |||
|
21 | /// <summary> | |||
|
22 | /// Начало массива | |||
|
23 | /// </summary> | |||
|
24 | BeginArray, | |||
|
25 | /// <summary> | |||
|
26 | /// Конец массива | |||
|
27 | /// </summary> | |||
|
28 | EndArray, | |||
|
29 | /// <summary> | |||
|
30 | /// Простое значение | |||
|
31 | /// </summary> | |||
|
32 | Value | |||
|
33 | } | |||
|
34 | } |
@@ -0,0 +1,113 | |||||
|
1 | using Implab.Parsing; | |||
|
2 | using System; | |||
|
3 | using System.Collections.Generic; | |||
|
4 | using System.Linq; | |||
|
5 | using System.Text; | |||
|
6 | using System.Threading.Tasks; | |||
|
7 | ||||
|
8 | namespace Implab.JSON { | |||
|
9 | internal class JSONGrammar : Grammar<JSONGrammar> { | |||
|
10 | public enum TokenType : int{ | |||
|
11 | None, | |||
|
12 | BeginObject, | |||
|
13 | EndObject, | |||
|
14 | BeginArray, | |||
|
15 | EndArray, | |||
|
16 | String, | |||
|
17 | Number, | |||
|
18 | Literal, | |||
|
19 | NameSeparator, | |||
|
20 | ValueSeparator, | |||
|
21 | ||||
|
22 | StringBound, | |||
|
23 | EscapedChar, | |||
|
24 | UnescapedChar, | |||
|
25 | EscapedUnicode, | |||
|
26 | ||||
|
27 | Minus, | |||
|
28 | Plus, | |||
|
29 | Sign, | |||
|
30 | Integer, | |||
|
31 | Dot, | |||
|
32 | Exp | |||
|
33 | } | |||
|
34 | ||||
|
35 | readonly CDFADefinition m_jsonDFA; | |||
|
36 | readonly CDFADefinition m_stringDFA; | |||
|
37 | ||||
|
38 | public JSONGrammar() { | |||
|
39 | DefineAlphabet(Enumerable.Range(0, 0x20).Select(x => (char)x)); | |||
|
40 | var hexDigit = SymbolRangeToken('a','f').Or(SymbolRangeToken('A','F')).Or(SymbolRangeToken('0','9')); | |||
|
41 | var digit9 = SymbolRangeToken('1', '9'); | |||
|
42 | var zero = SymbolToken('0'); | |||
|
43 | var digit = zero.Or(digit9); | |||
|
44 | var dot = SymbolToken('.'); | |||
|
45 | var minus = SymbolToken('-'); | |||
|
46 | var sign = SymbolSetToken('-', '+'); | |||
|
47 | var expSign = SymbolSetToken('e', 'E'); | |||
|
48 | var letters = SymbolRangeToken('a', 'z'); | |||
|
49 | var integer = zero.Or(digit9.Cat(digit.EClosure())); | |||
|
50 | var frac = dot.Cat(digit.Closure()); | |||
|
51 | var exp = expSign.Cat(sign.Optional()).Cat(digit.Closure()); | |||
|
52 | var quote = SymbolToken('"'); | |||
|
53 | var backSlash = SymbolToken('\\'); | |||
|
54 | var specialEscapeChars = SymbolSetToken('\\', '"', '/', 'b', 'f', 't', 'n', 'r'); | |||
|
55 | var unicodeEspace = SymbolToken('u').Cat(hexDigit.Repeat(4)); | |||
|
56 | var escape = backSlash.Cat(specialEscapeChars.Or(unicodeEspace)); | |||
|
57 | var whitespace = SymbolSetToken('\n', '\r', '\t', ' ').EClosure(); | |||
|
58 | var beginObject = whitespace.Cat(SymbolToken('{')).Cat(whitespace); | |||
|
59 | var endObject = whitespace.Cat(SymbolToken('}')).Cat(whitespace); | |||
|
60 | var beginArray = whitespace.Cat(SymbolToken('[')).Cat(whitespace); | |||
|
61 | var endArray = whitespace.Cat(SymbolToken(']')).Cat(whitespace); | |||
|
62 | var nameSep = whitespace.Cat(SymbolToken(':')).Cat(whitespace); | |||
|
63 | var valueSep = whitespace.Cat(SymbolToken(',')).Cat(whitespace); | |||
|
64 | ||||
|
65 | var number = minus.Optional().Cat(integer).Cat(frac.Optional()).Cat(exp.Optional()); | |||
|
66 | var literal = letters.Closure(); | |||
|
67 | var unescaped = SymbolTokenExcept(Enumerable.Range(0, 0x20).Union(new int[] { '\\', '"' }).Select(x => (char)x)); | |||
|
68 | var character = unescaped.Or(escape); | |||
|
69 | var str = quote.Cat(character.EClosure()).Cat(quote); | |||
|
70 | ||||
|
71 | ||||
|
72 | var jsonExpression = | |||
|
73 | number.Tag(TokenType.Number) | |||
|
74 | .Or(literal.Tag(TokenType.Literal)) | |||
|
75 | .Or(quote.Tag(TokenType.StringBound)) | |||
|
76 | .Or(beginObject.Tag(TokenType.BeginObject)) | |||
|
77 | .Or(endObject.Tag(TokenType.EndObject)) | |||
|
78 | .Or(beginArray.Tag(TokenType.BeginArray)) | |||
|
79 | .Or(endArray.Tag(TokenType.EndArray)) | |||
|
80 | .Or(nameSep.Tag(TokenType.NameSeparator)) | |||
|
81 | .Or(valueSep.Tag(TokenType.ValueSeparator)); | |||
|
82 | ||||
|
83 | ||||
|
84 | var jsonStringExpression = | |||
|
85 | quote.Tag(TokenType.StringBound) | |||
|
86 | .Or(backSlash.Cat(specialEscapeChars).Tag(TokenType.EscapedChar)) | |||
|
87 | .Or(backSlash.Cat(unicodeEspace).Tag(TokenType.EscapedUnicode)) | |||
|
88 | .Or(unescaped.Closure().Tag(TokenType.UnescapedChar)); | |||
|
89 | ||||
|
90 | var jsonNumberExpression = | |||
|
91 | minus.Tag(TokenType.Minus) | |||
|
92 | .Or(SymbolToken('+').Tag(TokenType.Plus)) | |||
|
93 | .Or(digit.Closure().Tag(TokenType.Integer)) | |||
|
94 | .Or(dot.Tag(TokenType.Dot)) | |||
|
95 | .Or(expSign.Tag(TokenType.Exp)); | |||
|
96 | ||||
|
97 | m_jsonDFA = BuildDFA(jsonExpression); | |||
|
98 | m_stringDFA = BuildDFA(jsonStringExpression); | |||
|
99 | } | |||
|
100 | ||||
|
101 | public CDFADefinition JsonDFA { | |||
|
102 | get { | |||
|
103 | return m_jsonDFA; | |||
|
104 | } | |||
|
105 | } | |||
|
106 | ||||
|
107 | public CDFADefinition JsonStringDFA { | |||
|
108 | get { | |||
|
109 | return m_stringDFA; | |||
|
110 | } | |||
|
111 | } | |||
|
112 | } | |||
|
113 | } |
@@ -0,0 +1,197 | |||||
|
1 | using Implab; | |||
|
2 | using Implab.Parsing; | |||
|
3 | using System; | |||
|
4 | using System.Collections.Generic; | |||
|
5 | using System.Diagnostics; | |||
|
6 | using System.Linq; | |||
|
7 | using System.Text; | |||
|
8 | using System.Threading.Tasks; | |||
|
9 | ||||
|
10 | namespace Implab.JSON { | |||
|
11 | /// <summary> | |||
|
12 | /// internal | |||
|
13 | /// </summary> | |||
|
14 | public struct JSONParserContext { | |||
|
15 | public string memberName; | |||
|
16 | public JSONElementContext elementContext; | |||
|
17 | } | |||
|
18 | ||||
|
19 | /// <summary> | |||
|
20 | /// Pull парсер JSON данных. | |||
|
21 | /// </summary> | |||
|
22 | public class JSONParser : DFAutomaton<JSONParserContext> { | |||
|
23 | ||||
|
24 | enum MemberContext { | |||
|
25 | MemberName, | |||
|
26 | MemberValue | |||
|
27 | } | |||
|
28 | ||||
|
29 | static readonly EnumAlphabet<JsonTokenType> _alphabet = EnumAlphabet<JsonTokenType>.FullAlphabet; | |||
|
30 | static readonly DFAStateDescriptior[] _jsonDFA; | |||
|
31 | static readonly DFAStateDescriptior[] _objectDFA; | |||
|
32 | static readonly DFAStateDescriptior[] _arrayDFA; | |||
|
33 | ||||
|
34 | static JSONParser() { | |||
|
35 | var jsonExpression = Token.New(JsonTokenType.BeginObject, JsonTokenType.BeginArray).Tag(0); | |||
|
36 | ||||
|
37 | var valueExpression = Token.New(JsonTokenType.BeginArray, JsonTokenType.BeginObject, JsonTokenType.Literal, JsonTokenType.Number, JsonTokenType.String); | |||
|
38 | var memberExpression = Token.New(JsonTokenType.String).Cat(Token.New(JsonTokenType.NameSeparator)).Cat(valueExpression); | |||
|
39 | var objectExpression = memberExpression | |||
|
40 | .Cat( | |||
|
41 | Token.New(JsonTokenType.ValueSeparator) | |||
|
42 | .Cat(memberExpression) | |||
|
43 | .EClosure() | |||
|
44 | ) | |||
|
45 | .Optional() | |||
|
46 | .Cat(Token.New(JsonTokenType.EndObject)) | |||
|
47 | .Tag(0); | |||
|
48 | var arrayExpression = valueExpression | |||
|
49 | .Cat( | |||
|
50 | Token.New(JsonTokenType.ValueSeparator) | |||
|
51 | .Cat(valueExpression) | |||
|
52 | .EClosure() | |||
|
53 | ) | |||
|
54 | .Optional() | |||
|
55 | .Cat(Token.New(JsonTokenType.EndArray)) | |||
|
56 | .Tag(0); | |||
|
57 | ||||
|
58 | _jsonDFA = BuildDFA(jsonExpression).States; | |||
|
59 | _objectDFA = BuildDFA(objectExpression).States; | |||
|
60 | _arrayDFA = BuildDFA(arrayExpression).States; | |||
|
61 | } | |||
|
62 | ||||
|
63 | static EDFADefinition<JsonTokenType> BuildDFA(Token expr) { | |||
|
64 | var builder = new DFABuilder(); | |||
|
65 | var dfa = new EDFADefinition<JsonTokenType>(_alphabet); | |||
|
66 | expr.Accept(builder); | |||
|
67 | ||||
|
68 | builder.BuildDFA(dfa); | |||
|
69 | return dfa; | |||
|
70 | } | |||
|
71 | ||||
|
72 | JSONScanner m_scanner; | |||
|
73 | MemberContext m_memberContext; | |||
|
74 | ||||
|
75 | JSONElementType m_elementType; | |||
|
76 | object m_elementValue; | |||
|
77 | ||||
|
78 | public JSONParser(string text) | |||
|
79 | : base(_jsonDFA, INITIAL_STATE, new JSONParserContext { elementContext = JSONElementContext.None, memberName = String.Empty } ) { | |||
|
80 | Safe.ArgumentNotEmpty(text, "text"); | |||
|
81 | m_scanner = new JSONScanner(); | |||
|
82 | m_scanner.Feed(text.ToCharArray()); | |||
|
83 | } | |||
|
84 | ||||
|
85 | public JSONElementType ElementType { | |||
|
86 | get { return m_elementType; } | |||
|
87 | } | |||
|
88 | ||||
|
89 | public string ElementName { | |||
|
90 | get { return m_context.info.memberName; } | |||
|
91 | } | |||
|
92 | ||||
|
93 | public object ElementValue { | |||
|
94 | get { return m_elementValue; } | |||
|
95 | } | |||
|
96 | ||||
|
97 | public bool Read() { | |||
|
98 | if (m_context.current == UNREACHEBLE_STATE) | |||
|
99 | throw new InvalidOperationException("The parser is in invalid state"); | |||
|
100 | object tokenValue; | |||
|
101 | JsonTokenType tokenType; | |||
|
102 | m_context.info.memberName = String.Empty; | |||
|
103 | while (m_scanner.ReadToken(out tokenValue, out tokenType)) { | |||
|
104 | Move((int)tokenType); | |||
|
105 | if (m_context.current == UNREACHEBLE_STATE) | |||
|
106 | UnexpectedToken(tokenValue, tokenType); | |||
|
107 | switch (tokenType) { | |||
|
108 | case JsonTokenType.BeginObject: | |||
|
109 | Switch( | |||
|
110 | _objectDFA, | |||
|
111 | INITIAL_STATE, | |||
|
112 | new JSONParserContext { | |||
|
113 | memberName = m_context.info.memberName, | |||
|
114 | elementContext = JSONElementContext.Object | |||
|
115 | } | |||
|
116 | ); | |||
|
117 | m_elementValue = null; | |||
|
118 | m_memberContext = MemberContext.MemberName; | |||
|
119 | m_elementType = JSONElementType.BeginObject; | |||
|
120 | return true; | |||
|
121 | case JsonTokenType.EndObject: | |||
|
122 | Restore(); | |||
|
123 | m_elementValue = null; | |||
|
124 | m_elementType = JSONElementType.EndObject; | |||
|
125 | return true; | |||
|
126 | case JsonTokenType.BeginArray: | |||
|
127 | Switch( | |||
|
128 | _arrayDFA, | |||
|
129 | INITIAL_STATE, | |||
|
130 | new JSONParserContext { | |||
|
131 | memberName = m_context.info.memberName, | |||
|
132 | elementContext = JSONElementContext.Array | |||
|
133 | } | |||
|
134 | ); | |||
|
135 | m_elementValue = null; | |||
|
136 | m_memberContext = MemberContext.MemberValue; | |||
|
137 | m_elementType = JSONElementType.BeginArray; | |||
|
138 | return true; | |||
|
139 | case JsonTokenType.EndArray: | |||
|
140 | Restore(); | |||
|
141 | m_elementValue = null; | |||
|
142 | m_elementType = JSONElementType.EndArray; | |||
|
143 | return true; | |||
|
144 | case JsonTokenType.String: | |||
|
145 | if (m_memberContext == MemberContext.MemberName) { | |||
|
146 | m_context.info.memberName = (string)tokenValue; | |||
|
147 | break; | |||
|
148 | } else { | |||
|
149 | m_elementType = JSONElementType.Value; | |||
|
150 | m_elementValue = tokenValue; | |||
|
151 | return true; | |||
|
152 | } | |||
|
153 | case JsonTokenType.Number: | |||
|
154 | m_elementType = JSONElementType.Value; | |||
|
155 | m_elementValue = tokenValue; | |||
|
156 | return true; | |||
|
157 | case JsonTokenType.Literal: | |||
|
158 | m_elementType = JSONElementType.Value; | |||
|
159 | m_elementValue = ParseLiteral((string)tokenValue); | |||
|
160 | return true; | |||
|
161 | case JsonTokenType.NameSeparator: | |||
|
162 | m_memberContext = MemberContext.MemberValue; | |||
|
163 | break; | |||
|
164 | case JsonTokenType.ValueSeparator: | |||
|
165 | m_memberContext = m_context.info.elementContext == JSONElementContext.Object ? MemberContext.MemberName : MemberContext.MemberValue; | |||
|
166 | break; | |||
|
167 | default: | |||
|
168 | UnexpectedToken(tokenValue, tokenType); | |||
|
169 | break; | |||
|
170 | } | |||
|
171 | } | |||
|
172 | if (m_context.info.elementContext != JSONElementContext.None) | |||
|
173 | throw new ParserException("Unexpedted end of data"); | |||
|
174 | return false; | |||
|
175 | } | |||
|
176 | ||||
|
177 | object ParseLiteral(string literal) { | |||
|
178 | switch (literal) { | |||
|
179 | case "null": | |||
|
180 | return null; | |||
|
181 | case "false" : | |||
|
182 | return false; | |||
|
183 | case "true": | |||
|
184 | return true; | |||
|
185 | default: | |||
|
186 | UnexpectedToken(literal, JsonTokenType.Literal); | |||
|
187 | return null; // avoid compliler error | |||
|
188 | } | |||
|
189 | } | |||
|
190 | ||||
|
191 | void UnexpectedToken(object value, JsonTokenType tokenType) { | |||
|
192 | throw new ParserException(String.Format("Unexpected token {0}: '{1}'", tokenType, value)); | |||
|
193 | } | |||
|
194 | ||||
|
195 | } | |||
|
196 | ||||
|
197 | } |
@@ -0,0 +1,89 | |||||
|
1 | using Implab.Parsing; | |||
|
2 | using System; | |||
|
3 | using System.Collections.Generic; | |||
|
4 | using System.Globalization; | |||
|
5 | using System.Linq; | |||
|
6 | using System.Text; | |||
|
7 | using System.Threading.Tasks; | |||
|
8 | ||||
|
9 | namespace Implab.JSON { | |||
|
10 | /// <summary> | |||
|
11 | /// Сканнер, разбивающий поток символов на токены JSON. | |||
|
12 | /// </summary> | |||
|
13 | public class JSONScanner : Scanner { | |||
|
14 | char[] m_stringBuffer; | |||
|
15 | DFAStateDescriptior[] m_stringDFA; | |||
|
16 | int[] m_stringAlphabet; | |||
|
17 | ||||
|
18 | public JSONScanner() | |||
|
19 | : base(JSONGrammar.Instance.JsonDFA) { | |||
|
20 | m_stringBuffer = new char[1024]; | |||
|
21 | var dfa = JSONGrammar.Instance.JsonStringDFA; | |||
|
22 | m_stringAlphabet = dfa.Alphabet.GetTranslationMap(); | |||
|
23 | m_stringDFA = dfa.States; | |||
|
24 | } | |||
|
25 | ||||
|
26 | public bool ReadToken(out object tokenValue, out JsonTokenType tokenType) { | |||
|
27 | if (ReadTokenInternal()) { | |||
|
28 | switch ((JSONGrammar.TokenType)m_currentState.tag[0]) { | |||
|
29 | case JSONGrammar.TokenType.StringBound: | |||
|
30 | tokenValue = ReadString(); | |||
|
31 | tokenType = JsonTokenType.String; | |||
|
32 | break; | |||
|
33 | case JSONGrammar.TokenType.Number: | |||
|
34 | tokenValue = Double.Parse(new String(m_buffer, m_tokenOffset, m_tokenLen), CultureInfo.InvariantCulture); | |||
|
35 | tokenType = JsonTokenType.Number; | |||
|
36 | break; | |||
|
37 | default: | |||
|
38 | tokenType = (JsonTokenType)m_currentState.tag[0]; | |||
|
39 | tokenValue = new String(m_buffer, m_tokenOffset, m_tokenLen); | |||
|
40 | break; | |||
|
41 | } | |||
|
42 | return true; | |||
|
43 | } | |||
|
44 | tokenValue = null; | |||
|
45 | tokenType = JsonTokenType.None; | |||
|
46 | return false; | |||
|
47 | } | |||
|
48 | ||||
|
49 | string ReadString() { | |||
|
50 | int pos = 0; | |||
|
51 | Switch(m_stringDFA, m_stringAlphabet); | |||
|
52 | while (ReadTokenInternal()) { | |||
|
53 | switch ((JSONGrammar.TokenType)m_currentState.tag[0]) { | |||
|
54 | case JSONGrammar.TokenType.StringBound: | |||
|
55 | Restore(); | |||
|
56 | return new String(m_stringBuffer, 0, pos); | |||
|
57 | case JSONGrammar.TokenType.UnescapedChar: | |||
|
58 | EnsureStringBufferSize(pos + m_tokenLen); | |||
|
59 | Array.Copy(m_buffer, m_tokenOffset, m_stringBuffer, pos, m_tokenLen); | |||
|
60 | pos += m_tokenLen; | |||
|
61 | break; | |||
|
62 | case JSONGrammar.TokenType.EscapedUnicode: | |||
|
63 | EnsureStringBufferSize(pos + 1); | |||
|
64 | m_stringBuffer[pos] = StringTranslator.TranslateHexUnicode(m_buffer, m_tokenOffset + 2); | |||
|
65 | pos++; | |||
|
66 | break; | |||
|
67 | case JSONGrammar.TokenType.EscapedChar: | |||
|
68 | EnsureStringBufferSize(pos + 1); | |||
|
69 | m_stringBuffer[pos] = StringTranslator.TranslateEscapedChar(m_buffer[m_tokenOffset + 1]); | |||
|
70 | pos++; | |||
|
71 | break; | |||
|
72 | default: | |||
|
73 | break; | |||
|
74 | } | |||
|
75 | ||||
|
76 | } | |||
|
77 | ||||
|
78 | throw new ParserException("Unexpected end of data"); | |||
|
79 | } | |||
|
80 | ||||
|
81 | void EnsureStringBufferSize(int size) { | |||
|
82 | if (size > m_stringBuffer.Length) { | |||
|
83 | var newBuffer = new char[size]; | |||
|
84 | m_stringBuffer.CopyTo(newBuffer, 0); | |||
|
85 | m_stringBuffer = newBuffer; | |||
|
86 | } | |||
|
87 | } | |||
|
88 | } | |||
|
89 | } |
@@ -0,0 +1,227 | |||||
|
1 | using System; | |||
|
2 | using System.Collections.Generic; | |||
|
3 | using System.IO; | |||
|
4 | using System.Linq; | |||
|
5 | using System.Text; | |||
|
6 | using System.Threading.Tasks; | |||
|
7 | ||||
|
8 | namespace Implab.JSON { | |||
|
9 | public class JSONWriter { | |||
|
10 | struct Context { | |||
|
11 | public bool needComma; | |||
|
12 | public JSONElementContext element; | |||
|
13 | } | |||
|
14 | Stack<Context> m_contextStack = new Stack<Context>(); | |||
|
15 | Context m_context; | |||
|
16 | ||||
|
17 | TextWriter m_writer; | |||
|
18 | bool m_indent; | |||
|
19 | ||||
|
20 | static readonly char [] _escapeBKS, | |||
|
21 | _escapeFWD, | |||
|
22 | _escapeCR, | |||
|
23 | _escapeNL, | |||
|
24 | _escapeTAB, | |||
|
25 | _escapeSLASH, | |||
|
26 | _escapeBSLASH, | |||
|
27 | _escapeQ; | |||
|
28 | ||||
|
29 | static JSONWriter() { | |||
|
30 | _escapeBKS = "\\b".ToCharArray(); | |||
|
31 | _escapeFWD = "\\f".ToCharArray(); | |||
|
32 | _escapeCR = "\\r".ToCharArray(); | |||
|
33 | _escapeNL = "\\n".ToCharArray(); | |||
|
34 | _escapeTAB = "\\t".ToCharArray(); | |||
|
35 | _escapeBSLASH = "\\\\".ToCharArray(); | |||
|
36 | _escapeSLASH = "\\/".ToCharArray(); | |||
|
37 | _escapeQ = "\\\"".ToCharArray(); | |||
|
38 | } | |||
|
39 | ||||
|
40 | public JSONWriter(TextWriter writer) { | |||
|
41 | Safe.ArgumentNotNull(writer, "writer"); | |||
|
42 | ||||
|
43 | m_writer = writer; | |||
|
44 | } | |||
|
45 | ||||
|
46 | void WriteMemberName(string name) { | |||
|
47 | Safe.ArgumentNotEmpty(name, "name"); | |||
|
48 | if (m_context.element != JSONElementContext.Object) | |||
|
49 | OperationNotApplicable("WriteMember"); | |||
|
50 | if (m_context.needComma) | |||
|
51 | m_writer.Write(", "); | |||
|
52 | // TODO indent | |||
|
53 | m_context.needComma = true; | |||
|
54 | Write(name); | |||
|
55 | m_writer.Write(" : "); | |||
|
56 | } | |||
|
57 | ||||
|
58 | public void WriteValue(string name, string value) { | |||
|
59 | WriteMemberName(name); | |||
|
60 | Write(value); | |||
|
61 | } | |||
|
62 | ||||
|
63 | public void WriteValue(string name, bool value) { | |||
|
64 | WriteMemberName(name); | |||
|
65 | Write(value); | |||
|
66 | } | |||
|
67 | ||||
|
68 | public void WriteValue(string name, double value) { | |||
|
69 | WriteMemberName(name); | |||
|
70 | Write(value); | |||
|
71 | } | |||
|
72 | ||||
|
73 | ||||
|
74 | ||||
|
75 | public void WriteValue(string value) { | |||
|
76 | if (m_context.element != JSONElementContext.Array) | |||
|
77 | OperationNotApplicable("WriteValue"); | |||
|
78 | if (m_context.needComma) | |||
|
79 | m_writer.Write(", "); | |||
|
80 | m_context.needComma = true; | |||
|
81 | ||||
|
82 | Write(value); | |||
|
83 | } | |||
|
84 | ||||
|
85 | public void WriteValue(bool value) { | |||
|
86 | if (m_context.element != JSONElementContext.Array) | |||
|
87 | OperationNotApplicable("WriteValue"); | |||
|
88 | if (m_context.needComma) | |||
|
89 | m_writer.Write(", "); | |||
|
90 | m_context.needComma = true; | |||
|
91 | ||||
|
92 | Write(value); | |||
|
93 | } | |||
|
94 | ||||
|
95 | public void WriteValue(double value) { | |||
|
96 | if (m_context.element != JSONElementContext.Array) | |||
|
97 | OperationNotApplicable("WriteValue"); | |||
|
98 | if (m_context.needComma) | |||
|
99 | m_writer.Write(", "); | |||
|
100 | m_context.needComma = true; | |||
|
101 | ||||
|
102 | Write(value); | |||
|
103 | } | |||
|
104 | ||||
|
105 | public void BeginObject() { | |||
|
106 | if (m_context.element != JSONElementContext.None && m_context.element != JSONElementContext.Array) | |||
|
107 | OperationNotApplicable("BeginObject"); | |||
|
108 | if (m_context.needComma) | |||
|
109 | m_writer.Write(", "); | |||
|
110 | m_context.needComma = true; | |||
|
111 | ||||
|
112 | m_contextStack.Push(m_context); | |||
|
113 | ||||
|
114 | m_context = new Context { element = JSONElementContext.Object, needComma = false }; | |||
|
115 | m_writer.Write("{ "); | |||
|
116 | } | |||
|
117 | ||||
|
118 | public void BeginObject(string name) { | |||
|
119 | WriteMemberName(name); | |||
|
120 | ||||
|
121 | m_contextStack.Push(m_context); | |||
|
122 | ||||
|
123 | m_context = new Context { element = JSONElementContext.Object, needComma = false }; | |||
|
124 | m_writer.Write("{ "); | |||
|
125 | } | |||
|
126 | ||||
|
127 | public void EndObject() { | |||
|
128 | if (m_context.element != JSONElementContext.Object) | |||
|
129 | OperationNotApplicable("EndArray"); | |||
|
130 | ||||
|
131 | m_writer.Write(" }"); | |||
|
132 | m_context = m_contextStack.Pop(); | |||
|
133 | } | |||
|
134 | ||||
|
135 | public void BeginArray() { | |||
|
136 | if (m_context.element != JSONElementContext.None && m_context.element != JSONElementContext.Array) | |||
|
137 | throw new InvalidOperationException(); | |||
|
138 | if (m_context.needComma) | |||
|
139 | m_writer.Write(", "); | |||
|
140 | m_context.needComma = true; | |||
|
141 | ||||
|
142 | m_contextStack.Push(m_context); | |||
|
143 | ||||
|
144 | m_context = new Context { element = JSONElementContext.Array, needComma = false }; | |||
|
145 | m_writer.Write("[ "); | |||
|
146 | } | |||
|
147 | ||||
|
148 | public void BeginArray(string name) { | |||
|
149 | WriteMemberName(name); | |||
|
150 | ||||
|
151 | m_contextStack.Push(m_context); | |||
|
152 | ||||
|
153 | m_context = new Context { element = JSONElementContext.Array, needComma = false }; | |||
|
154 | m_writer.Write("[ "); | |||
|
155 | } | |||
|
156 | ||||
|
157 | public void EndArray() { | |||
|
158 | if (m_context.element != JSONElementContext.Array) | |||
|
159 | OperationNotApplicable("EndArray"); | |||
|
160 | ||||
|
161 | m_writer.Write(" ]"); | |||
|
162 | m_context = m_contextStack.Pop(); | |||
|
163 | } | |||
|
164 | ||||
|
165 | void Write(bool value) { | |||
|
166 | m_writer.Write(value ? "true" : "false"); | |||
|
167 | } | |||
|
168 | ||||
|
169 | ||||
|
170 | void Write(string value) { | |||
|
171 | if (value == null) | |||
|
172 | m_writer.Write("null"); | |||
|
173 | ||||
|
174 | var chars = value.ToCharArray(); | |||
|
175 | m_writer.Write('"'); | |||
|
176 | ||||
|
177 | for (int i = 0; i < chars.Length; i++) { | |||
|
178 | var ch = chars[i]; | |||
|
179 | ||||
|
180 | switch (ch) { | |||
|
181 | case '\b': | |||
|
182 | m_writer.Write(_escapeBKS); | |||
|
183 | break; | |||
|
184 | case '\f': | |||
|
185 | m_writer.Write(_escapeFWD); | |||
|
186 | break; | |||
|
187 | case '\r': | |||
|
188 | m_writer.Write(_escapeCR); | |||
|
189 | break; | |||
|
190 | case '\n': | |||
|
191 | m_writer.Write(_escapeNL); | |||
|
192 | break; | |||
|
193 | case '\t': | |||
|
194 | m_writer.Write(_escapeTAB); | |||
|
195 | break; | |||
|
196 | case '\\': | |||
|
197 | m_writer.Write(_escapeBSLASH); | |||
|
198 | break; | |||
|
199 | case '/': | |||
|
200 | m_writer.Write(_escapeSLASH); | |||
|
201 | break; | |||
|
202 | case '"': | |||
|
203 | m_writer.Write(_escapeQ); | |||
|
204 | break; | |||
|
205 | default: | |||
|
206 | if (ch < 0x20) { | |||
|
207 | m_writer.Write("\\u00{0:x2}",(int)ch); | |||
|
208 | } else { | |||
|
209 | m_writer.Write(ch); | |||
|
210 | } | |||
|
211 | break; | |||
|
212 | } | |||
|
213 | } | |||
|
214 | ||||
|
215 | m_writer.Write('"'); | |||
|
216 | } | |||
|
217 | ||||
|
218 | void Write(double value) { | |||
|
219 | m_writer.Write(value); | |||
|
220 | } | |||
|
221 | ||||
|
222 | void OperationNotApplicable(string opName) { | |||
|
223 | throw new InvalidOperationException(String.Format("The operation '{0}' isn't applicable in the context of '{1}'", opName, m_context.element )); | |||
|
224 | } | |||
|
225 | ||||
|
226 | } | |||
|
227 | } |
@@ -0,0 +1,50 | |||||
|
1 | using System; | |||
|
2 | using System.Collections.Generic; | |||
|
3 | using System.Linq; | |||
|
4 | using System.Text; | |||
|
5 | using System.Threading.Tasks; | |||
|
6 | ||||
|
7 | namespace Implab.JSON { | |||
|
8 | /// <summary> | |||
|
9 | /// Тип токенов, возвращаемых <see cref="JSONScanner"/>. | |||
|
10 | /// </summary> | |||
|
11 | public enum JsonTokenType : int { | |||
|
12 | None = 0, | |||
|
13 | /// <summary> | |||
|
14 | /// Начало объекта | |||
|
15 | /// </summary> | |||
|
16 | BeginObject, | |||
|
17 | /// <summary> | |||
|
18 | /// Конец объекта | |||
|
19 | /// </summary> | |||
|
20 | EndObject, | |||
|
21 | /// <summary> | |||
|
22 | /// Начало массива | |||
|
23 | /// </summary> | |||
|
24 | BeginArray, | |||
|
25 | /// <summary> | |||
|
26 | /// Конец массива | |||
|
27 | /// </summary> | |||
|
28 | EndArray, | |||
|
29 | /// <summary> | |||
|
30 | /// Строка | |||
|
31 | /// </summary> | |||
|
32 | String, | |||
|
33 | /// <summary> | |||
|
34 | /// Число | |||
|
35 | /// </summary> | |||
|
36 | Number, | |||
|
37 | /// <summary> | |||
|
38 | /// Литерал | |||
|
39 | /// </summary> | |||
|
40 | Literal, | |||
|
41 | /// <summary> | |||
|
42 | /// Разделитель имени <c>:</c> | |||
|
43 | /// </summary> | |||
|
44 | NameSeparator, | |||
|
45 | /// <summary> | |||
|
46 | /// Разделитель имени <c>,</c> | |||
|
47 | /// </summary> | |||
|
48 | ValueSeparator | |||
|
49 | } | |||
|
50 | } |
@@ -0,0 +1,96 | |||||
|
1 | using Implab; | |||
|
2 | using Implab.Parsing; | |||
|
3 | using System; | |||
|
4 | using System.Collections.Generic; | |||
|
5 | using System.Diagnostics; | |||
|
6 | using System.Linq; | |||
|
7 | using System.Text; | |||
|
8 | using System.Threading.Tasks; | |||
|
9 | ||||
|
10 | namespace Implab.JSON { | |||
|
11 | /// <summary> | |||
|
12 | /// Класс для преобразования экранированной строки JSON | |||
|
13 | /// </summary> | |||
|
14 | public class StringTranslator : Scanner { | |||
|
15 | static readonly char[] _escMap; | |||
|
16 | static readonly int[] _hexMap; | |||
|
17 | ||||
|
18 | static StringTranslator() { | |||
|
19 | var chars = new char[] { 'b', 'f', 't', 'r', 'n', '\\', '/' }; | |||
|
20 | var vals = new char[] { '\b', '\f', '\t', '\r', '\n', '\\', '/' }; | |||
|
21 | ||||
|
22 | _escMap = new char[chars.Max() + 1]; | |||
|
23 | ||||
|
24 | for (int i = 0; i < chars.Length; i++) | |||
|
25 | _escMap[chars[i]] = vals[i]; | |||
|
26 | ||||
|
27 | var hexs = new char[] { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', 'A', 'B', 'C', 'D', 'E', 'F' }; | |||
|
28 | var ints = new int[] { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 10, 11, 12, 13, 14, 15 }; | |||
|
29 | ||||
|
30 | _hexMap = new int[hexs.Max() + 1]; | |||
|
31 | ||||
|
32 | for (int i = 0; i < hexs.Length; i++) | |||
|
33 | _hexMap[hexs[i]] = ints[i]; | |||
|
34 | ||||
|
35 | } | |||
|
36 | ||||
|
37 | public StringTranslator() | |||
|
38 | : base(JSONGrammar.Instance.JsonStringDFA) { | |||
|
39 | } | |||
|
40 | ||||
|
41 | public string Translate(string data) { | |||
|
42 | Safe.ArgumentNotNull(data, "data"); | |||
|
43 | return Translate(data.ToCharArray()); | |||
|
44 | } | |||
|
45 | ||||
|
46 | public string Translate(char[] data) { | |||
|
47 | Safe.ArgumentNotNull(data, "data"); | |||
|
48 | return Translate(data, data.Length); | |||
|
49 | } | |||
|
50 | ||||
|
51 | public string Translate(char[] data, int length) { | |||
|
52 | Safe.ArgumentNotNull(data, "data"); | |||
|
53 | Safe.ArgumentInRange(length, 0, data.Length, "length"); | |||
|
54 | ||||
|
55 | var translated = new char[length]; | |||
|
56 | ||||
|
57 | Feed(data,length); | |||
|
58 | ||||
|
59 | int pos = 0; | |||
|
60 | ||||
|
61 | while (ReadTokenInternal()) { | |||
|
62 | switch ((JSONGrammar.TokenType)TokenTags[0]) { | |||
|
63 | case JSONGrammar.TokenType.UnescapedChar: | |||
|
64 | Array.Copy(m_buffer,m_tokenOffset,translated,pos,m_tokenLen); | |||
|
65 | pos += m_tokenLen; | |||
|
66 | break; | |||
|
67 | case JSONGrammar.TokenType.EscapedChar: | |||
|
68 | translated[pos] = _escMap[m_buffer[m_tokenOffset + 1]]; | |||
|
69 | pos++; | |||
|
70 | break; | |||
|
71 | case JSONGrammar.TokenType.EscapedUnicode: | |||
|
72 | translated[pos] = TranslateHexUnicode(m_buffer,m_tokenOffset + 2); | |||
|
73 | pos++; | |||
|
74 | break; | |||
|
75 | } | |||
|
76 | } | |||
|
77 | ||||
|
78 | return new String(translated, 0, pos); | |||
|
79 | } | |||
|
80 | ||||
|
81 | internal static char TranslateEscapedChar(char symbol) { | |||
|
82 | return _escMap[symbol]; | |||
|
83 | } | |||
|
84 | ||||
|
85 | internal static char TranslateHexUnicode(char[] symbols, int offset) { | |||
|
86 | Debug.Assert(symbols != null); | |||
|
87 | Debug.Assert(symbols.Length - offset >= 4); | |||
|
88 | ||||
|
89 | int value = (_hexMap[symbols[offset]] << 12) | |||
|
90 | | (_hexMap[symbols[offset + 1]] << 8) | |||
|
91 | | (_hexMap[symbols[offset + 2]] << 4) | |||
|
92 | | (_hexMap[symbols[offset + 3]]); | |||
|
93 | return (char)value; | |||
|
94 | } | |||
|
95 | } | |||
|
96 | } |
@@ -0,0 +1,23 | |||||
|
1 | using Implab; | |||
|
2 | using System; | |||
|
3 | using System.Collections.Generic; | |||
|
4 | using System.Linq; | |||
|
5 | using System.Text; | |||
|
6 | using System.Threading.Tasks; | |||
|
7 | ||||
|
8 | namespace Implab.Parsing { | |||
|
9 | public class Alphabet: AlphabetBase<char> { | |||
|
10 | ||||
|
11 | public override int GetSymbolIndex(char symbol) { | |||
|
12 | return symbol; | |||
|
13 | } | |||
|
14 | ||||
|
15 | public override IEnumerable<char> InputSymbols { | |||
|
16 | get { return Enumerable.Range(char.MinValue, char.MaxValue).Select(x => (char)x); } | |||
|
17 | } | |||
|
18 | ||||
|
19 | protected override int MapSize { | |||
|
20 | get { return char.MaxValue + 1; } | |||
|
21 | } | |||
|
22 | } | |||
|
23 | } |
@@ -0,0 +1,103 | |||||
|
1 | using Implab; | |||
|
2 | using System; | |||
|
3 | using System.Collections.Generic; | |||
|
4 | using System.Diagnostics; | |||
|
5 | using System.Linq; | |||
|
6 | using System.Text; | |||
|
7 | using System.Threading.Tasks; | |||
|
8 | ||||
|
9 | namespace Implab.Parsing { | |||
|
10 | public abstract class AlphabetBase<T> : IAlphabet<T> { | |||
|
11 | public const int UNCLASSIFIED = 0; | |||
|
12 | ||||
|
13 | int m_nextId = 1; | |||
|
14 | int[] m_map; | |||
|
15 | ||||
|
16 | public int Count { | |||
|
17 | get { return m_nextId; } | |||
|
18 | } | |||
|
19 | ||||
|
20 | protected AlphabetBase() { | |||
|
21 | m_map = new int[MapSize]; | |||
|
22 | } | |||
|
23 | ||||
|
24 | protected AlphabetBase(int[] map) { | |||
|
25 | Debug.Assert(map != null); | |||
|
26 | Debug.Assert(map.Length == MapSize); | |||
|
27 | ||||
|
28 | m_map = map; | |||
|
29 | m_nextId = map.Max() + 1; | |||
|
30 | } | |||
|
31 | ||||
|
32 | public int DefineSymbol(T symbol) { | |||
|
33 | var index = GetSymbolIndex(symbol); | |||
|
34 | if (m_map[index] == UNCLASSIFIED) | |||
|
35 | m_map[index] = m_nextId++; | |||
|
36 | return m_map[index]; | |||
|
37 | } | |||
|
38 | ||||
|
39 | public int DefineClass(IEnumerable<T> symbols) { | |||
|
40 | Safe.ArgumentNotNull(symbols, "symbols"); | |||
|
41 | symbols = symbols.Distinct(); | |||
|
42 | ||||
|
43 | foreach (var symbol in symbols) { | |||
|
44 | var index = GetSymbolIndex(symbol); | |||
|
45 | if (m_map[index] == UNCLASSIFIED) | |||
|
46 | m_map[GetSymbolIndex(symbol)] = m_nextId; | |||
|
47 | else | |||
|
48 | throw new InvalidOperationException(String.Format("Symbol '{0}' already in use", symbol)); | |||
|
49 | } | |||
|
50 | return m_nextId++; | |||
|
51 | } | |||
|
52 | ||||
|
53 | public List<T>[] CreateReverseMap() { | |||
|
54 | return | |||
|
55 | Enumerable.Range(UNCLASSIFIED, Count) | |||
|
56 | .Select( | |||
|
57 | i => InputSymbols | |||
|
58 | .Where(x => i != UNCLASSIFIED && m_map[GetSymbolIndex(x)] == i) | |||
|
59 | .ToList() | |||
|
60 | ) | |||
|
61 | .ToArray(); | |||
|
62 | } | |||
|
63 | ||||
|
64 | public int[] Reclassify(IAlphabet<T> newAlphabet, IEnumerable<ICollection<int>> classes) { | |||
|
65 | Safe.ArgumentNotNull(newAlphabet, "newAlphabet"); | |||
|
66 | Safe.ArgumentNotNull(classes, "classes"); | |||
|
67 | var reverseMap = CreateReverseMap(); | |||
|
68 | ||||
|
69 | int[] translationMap = new int[Count]; | |||
|
70 | ||||
|
71 | foreach (var scl in classes) { | |||
|
72 | // skip if the supper class contains the unclassified element | |||
|
73 | if (scl.Contains(UNCLASSIFIED)) | |||
|
74 | continue; | |||
|
75 | var range = new List<T>(); | |||
|
76 | foreach (var cl in scl) { | |||
|
77 | if (cl < 0 || cl >= reverseMap.Length) | |||
|
78 | throw new ArgumentOutOfRangeException(String.Format("Class {0} is not valid for the current alphabet", cl)); | |||
|
79 | range.AddRange(reverseMap[cl]); | |||
|
80 | } | |||
|
81 | var newClass = newAlphabet.DefineClass(range); | |||
|
82 | foreach (var cl in scl) | |||
|
83 | translationMap[cl] = newClass; | |||
|
84 | } | |||
|
85 | ||||
|
86 | return translationMap; | |||
|
87 | } | |||
|
88 | ||||
|
89 | public int Translate(T symbol) { | |||
|
90 | return m_map[GetSymbolIndex(symbol)]; | |||
|
91 | } | |||
|
92 | ||||
|
93 | public abstract int GetSymbolIndex(T symbol); | |||
|
94 | ||||
|
95 | public abstract IEnumerable<T> InputSymbols { get; } | |||
|
96 | ||||
|
97 | protected abstract int MapSize { get; } | |||
|
98 | ||||
|
99 | public int[] GetTranslationMap() { | |||
|
100 | return m_map; | |||
|
101 | } | |||
|
102 | } | |||
|
103 | } |
@@ -0,0 +1,22 | |||||
|
1 | using Implab; | |||
|
2 | using System; | |||
|
3 | using System.Collections.Generic; | |||
|
4 | using System.Linq; | |||
|
5 | using System.Text; | |||
|
6 | using System.Threading.Tasks; | |||
|
7 | ||||
|
8 | namespace Implab.Parsing { | |||
|
9 | public class AltToken: BinaryToken { | |||
|
10 | public AltToken(Token left, Token right) | |||
|
11 | : base(left, right) { | |||
|
12 | } | |||
|
13 | ||||
|
14 | public override void Accept(IVisitor visitor) { | |||
|
15 | Safe.ArgumentNotNull(visitor, "visitor"); | |||
|
16 | visitor.Visit(this); | |||
|
17 | } | |||
|
18 | public override string ToString() { | |||
|
19 | return String.Format(Right is BinaryToken ? "{0}|({1})" : "{0}|{1}", Left, Right); | |||
|
20 | } | |||
|
21 | } | |||
|
22 | } |
@@ -0,0 +1,26 | |||||
|
1 | using Implab; | |||
|
2 | using System; | |||
|
3 | using System.Collections.Generic; | |||
|
4 | using System.Linq; | |||
|
5 | using System.Text; | |||
|
6 | using System.Threading.Tasks; | |||
|
7 | ||||
|
8 | namespace Implab.Parsing { | |||
|
9 | public abstract class BinaryToken : Token { | |||
|
10 | Token m_left; | |||
|
11 | Token m_right; | |||
|
12 | ||||
|
13 | public Token Left { | |||
|
14 | get { return m_left; } | |||
|
15 | } | |||
|
16 | ||||
|
17 | public Token Right { | |||
|
18 | get { return m_right; } | |||
|
19 | } | |||
|
20 | ||||
|
21 | protected BinaryToken(Token left, Token right) { | |||
|
22 | Safe.ArgumentNotNull(m_left = left, "left"); | |||
|
23 | Safe.ArgumentNotNull(m_right = right, "right"); | |||
|
24 | } | |||
|
25 | } | |||
|
26 | } |
@@ -0,0 +1,36 | |||||
|
1 | using Implab; | |||
|
2 | using System; | |||
|
3 | using System.Collections.Generic; | |||
|
4 | using System.Linq; | |||
|
5 | using System.Text; | |||
|
6 | using System.Threading.Tasks; | |||
|
7 | ||||
|
8 | namespace Implab.Parsing { | |||
|
9 | public class CDFADefinition : DFADefinitionBase { | |||
|
10 | Alphabet m_alphabet; | |||
|
11 | ||||
|
12 | public Alphabet Alphabet { | |||
|
13 | get { return m_alphabet; } | |||
|
14 | } | |||
|
15 | ||||
|
16 | public override int AlphabetSize { | |||
|
17 | get { return m_alphabet.Count; } | |||
|
18 | } | |||
|
19 | ||||
|
20 | public CDFADefinition(Alphabet alphabet): base() { | |||
|
21 | Safe.ArgumentNotNull(alphabet, "alphabet"); | |||
|
22 | m_alphabet = alphabet; | |||
|
23 | } | |||
|
24 | ||||
|
25 | public CDFADefinition Optimize() { | |||
|
26 | var optimized = new CDFADefinition(new Alphabet()); | |||
|
27 | ||||
|
28 | Optimize(optimized, m_alphabet, optimized.Alphabet); | |||
|
29 | return optimized; | |||
|
30 | } | |||
|
31 | ||||
|
32 | public void PrintDFA() { | |||
|
33 | PrintDFA(m_alphabet); | |||
|
34 | } | |||
|
35 | } | |||
|
36 | } |
@@ -0,0 +1,27 | |||||
|
1 | using Implab; | |||
|
2 | using System; | |||
|
3 | using System.Collections.Generic; | |||
|
4 | using System.Linq; | |||
|
5 | using System.Text; | |||
|
6 | using System.Threading.Tasks; | |||
|
7 | ||||
|
8 | namespace Implab.Parsing { | |||
|
9 | public class CatToken : BinaryToken { | |||
|
10 | public CatToken(Token left, Token right) | |||
|
11 | : base(left, right) { | |||
|
12 | } | |||
|
13 | ||||
|
14 | public override void Accept(IVisitor visitor) { | |||
|
15 | Safe.ArgumentNotNull(visitor, "visitor"); | |||
|
16 | visitor.Visit(this); | |||
|
17 | } | |||
|
18 | ||||
|
19 | public override string ToString() { | |||
|
20 | return String.Format("{0}{1}", FormatToken(Left), FormatToken(Right)); | |||
|
21 | } | |||
|
22 | ||||
|
23 | string FormatToken(Token token) { | |||
|
24 | return String.Format(token is AltToken ? "({0})" : "{0}", token); | |||
|
25 | } | |||
|
26 | } | |||
|
27 | } |
@@ -0,0 +1,182 | |||||
|
1 | using Implab; | |||
|
2 | using System; | |||
|
3 | using System.Collections.Generic; | |||
|
4 | using System.Diagnostics; | |||
|
5 | using System.Linq; | |||
|
6 | using System.Text; | |||
|
7 | using System.Threading.Tasks; | |||
|
8 | ||||
|
9 | namespace Implab.Parsing { | |||
|
10 | /// <summary> | |||
|
11 | /// Используется для построения ДКА по регулярному выражению, сначала обходит | |||
|
12 | /// регулярное выражение и вычисляет followpos, затем используется метод | |||
|
13 | /// <see cref="BuildDFA(IDFADefinition)"/> для построения автомата. | |||
|
14 | /// </summary> | |||
|
15 | public class DFABuilder : IVisitor { | |||
|
16 | int m_idx = 0; | |||
|
17 | Token m_root; | |||
|
18 | HashSet<int> m_firstpos; | |||
|
19 | HashSet<int> m_lastpos; | |||
|
20 | ||||
|
21 | Dictionary<int, HashSet<int>> m_followpos = new Dictionary<int, HashSet<int>>(); | |||
|
22 | Dictionary<int, int> m_indexes = new Dictionary<int, int>(); | |||
|
23 | Dictionary<int, int> m_ends = new Dictionary<int, int>(); | |||
|
24 | ||||
|
25 | public Dictionary<int, HashSet<int>> FollowposMap { | |||
|
26 | get { return m_followpos; } | |||
|
27 | } | |||
|
28 | ||||
|
29 | public HashSet<int> Followpos(int pos) { | |||
|
30 | HashSet<int> set; | |||
|
31 | if (m_followpos.TryGetValue(pos, out set)) | |||
|
32 | return set; | |||
|
33 | return m_followpos[pos] = new HashSet<int>(); | |||
|
34 | } | |||
|
35 | ||||
|
36 | bool Nullable(object n) { | |||
|
37 | if (n is EmptyToken || n is StarToken) | |||
|
38 | return true; | |||
|
39 | if (n is AltToken) | |||
|
40 | return Nullable(((AltToken)n).Left) || Nullable(((AltToken)n).Right); | |||
|
41 | if (n is CatToken) | |||
|
42 | return Nullable(((CatToken)n).Left) && Nullable(((CatToken)n).Right); | |||
|
43 | return false; | |||
|
44 | } | |||
|
45 | ||||
|
46 | ||||
|
47 | public void Visit(AltToken token) { | |||
|
48 | if (m_root == null) | |||
|
49 | m_root = token; | |||
|
50 | var firtspos = new HashSet<int>(); | |||
|
51 | var lastpos = new HashSet<int>(); | |||
|
52 | ||||
|
53 | token.Left.Accept(this); | |||
|
54 | firtspos.UnionWith(m_firstpos); | |||
|
55 | lastpos.UnionWith(m_lastpos); | |||
|
56 | ||||
|
57 | token.Right.Accept(this); | |||
|
58 | firtspos.UnionWith(m_firstpos); | |||
|
59 | lastpos.UnionWith(m_lastpos); | |||
|
60 | ||||
|
61 | m_firstpos = firtspos; | |||
|
62 | m_lastpos = lastpos; | |||
|
63 | } | |||
|
64 | ||||
|
65 | public void Visit(StarToken token) { | |||
|
66 | if (m_root == null) | |||
|
67 | m_root = token; | |||
|
68 | token.Token.Accept(this); | |||
|
69 | ||||
|
70 | foreach (var i in m_lastpos) | |||
|
71 | Followpos(i).UnionWith(m_firstpos); | |||
|
72 | } | |||
|
73 | ||||
|
74 | public void Visit(CatToken token) { | |||
|
75 | if (m_root == null) | |||
|
76 | m_root = token; | |||
|
77 | ||||
|
78 | var firtspos = new HashSet<int>(); | |||
|
79 | var lastpos = new HashSet<int>(); | |||
|
80 | token.Left.Accept(this); | |||
|
81 | firtspos.UnionWith(m_firstpos); | |||
|
82 | var leftLastpos = m_lastpos; | |||
|
83 | ||||
|
84 | token.Right.Accept(this); | |||
|
85 | lastpos.UnionWith(m_lastpos); | |||
|
86 | var rightFirstpos = m_firstpos; | |||
|
87 | ||||
|
88 | if (Nullable(token.Left)) | |||
|
89 | firtspos.UnionWith(rightFirstpos); | |||
|
90 | ||||
|
91 | if (Nullable(token.Right)) | |||
|
92 | lastpos.UnionWith(leftLastpos); | |||
|
93 | ||||
|
94 | m_firstpos = firtspos; | |||
|
95 | m_lastpos = lastpos; | |||
|
96 | ||||
|
97 | foreach (var i in leftLastpos) | |||
|
98 | Followpos(i).UnionWith(rightFirstpos); | |||
|
99 | ||||
|
100 | } | |||
|
101 | ||||
|
102 | public void Visit(EmptyToken token) { | |||
|
103 | if (m_root == null) | |||
|
104 | m_root = token; | |||
|
105 | ; | |||
|
106 | } | |||
|
107 | ||||
|
108 | public void Visit(SymbolToken token) { | |||
|
109 | if (m_root == null) | |||
|
110 | m_root = token; | |||
|
111 | m_idx++; | |||
|
112 | m_indexes[m_idx] = token.Value; | |||
|
113 | m_firstpos = new HashSet<int>(new[] { m_idx }); | |||
|
114 | m_lastpos = new HashSet<int>(new[] { m_idx }); | |||
|
115 | } | |||
|
116 | ||||
|
117 | public void Visit(EndToken token) { | |||
|
118 | if (m_root == null) | |||
|
119 | m_root = token; | |||
|
120 | m_idx++; | |||
|
121 | m_indexes[m_idx] = Alphabet.UNCLASSIFIED; | |||
|
122 | m_firstpos = new HashSet<int>(new[] { m_idx }); | |||
|
123 | m_lastpos = new HashSet<int>(new[] { m_idx }); | |||
|
124 | Followpos(m_idx); | |||
|
125 | m_ends.Add(m_idx, token.Tag); | |||
|
126 | } | |||
|
127 | ||||
|
128 | public void BuildDFA(IDFADefinition dfa) { | |||
|
129 | Safe.ArgumentNotNull(dfa,"dfa"); | |||
|
130 | ||||
|
131 | var stateMap = new Dictionary<HashSet<int>, int>(new CustomEqualityComparer<HashSet<int>>( | |||
|
132 | (x, y) => x.SetEquals(y), | |||
|
133 | (x) => x.Sum(n => n.GetHashCode()) | |||
|
134 | )); | |||
|
135 | ||||
|
136 | stateMap[m_firstpos] = DefineState( dfa, m_firstpos); | |||
|
137 | Debug.Assert(stateMap[m_firstpos] == DFADefinitionBase.INITIAL_STATE); | |||
|
138 | ||||
|
139 | var queue = new Queue<HashSet<int>>(); | |||
|
140 | ||||
|
141 | queue.Enqueue(m_firstpos); | |||
|
142 | ||||
|
143 | while (queue.Count > 0) { | |||
|
144 | var state = queue.Dequeue(); | |||
|
145 | var s1 = stateMap[state]; | |||
|
146 | ||||
|
147 | for (int a = 0; a < dfa.AlphabetSize; a++) { | |||
|
148 | var next = new HashSet<int>(); | |||
|
149 | foreach (var p in state) { | |||
|
150 | if (m_indexes[p] == a) { | |||
|
151 | next.UnionWith(Followpos(p)); | |||
|
152 | } | |||
|
153 | } | |||
|
154 | if (next.Count > 0) { | |||
|
155 | int s2; | |||
|
156 | if (!stateMap.TryGetValue(next, out s2)) { | |||
|
157 | stateMap[next] = s2 = DefineState(dfa, next); | |||
|
158 | queue.Enqueue(next); | |||
|
159 | } | |||
|
160 | dfa.DefineTransition(s1, s2, a); | |||
|
161 | } | |||
|
162 | } | |||
|
163 | ||||
|
164 | } | |||
|
165 | } | |||
|
166 | ||||
|
167 | int[] GetStateTags(HashSet<int> state) { | |||
|
168 | Debug.Assert(state != null); | |||
|
169 | return state.Where(pos => m_ends.ContainsKey(pos)).Select(pos => m_ends[pos]).ToArray(); | |||
|
170 | } | |||
|
171 | ||||
|
172 | int DefineState(IDFADefinition automa, HashSet<int> state) { | |||
|
173 | Debug.Assert(automa != null); | |||
|
174 | Debug.Assert(state != null); | |||
|
175 | ||||
|
176 | var tags = GetStateTags(state); | |||
|
177 | ||||
|
178 | return tags.Length > 0 ? automa.AddState(tags) : automa.AddState(); | |||
|
179 | } | |||
|
180 | ||||
|
181 | } | |||
|
182 | } |
@@ -0,0 +1,262 | |||||
|
1 | using Implab; | |||
|
2 | using System; | |||
|
3 | using System.Collections.Generic; | |||
|
4 | using System.Diagnostics; | |||
|
5 | using System.Linq; | |||
|
6 | using System.Text; | |||
|
7 | using System.Threading.Tasks; | |||
|
8 | ||||
|
9 | namespace Implab.Parsing { | |||
|
10 | public abstract class DFADefinitionBase : IDFADefinition { | |||
|
11 | readonly List<DFAStateDescriptior> m_states; | |||
|
12 | ||||
|
13 | public const int INITIAL_STATE = 1; | |||
|
14 | public const int UNREACHEBLE_STATE = 0; | |||
|
15 | ||||
|
16 | DFAStateDescriptior[] m_statesArray; | |||
|
17 | ||||
|
18 | public DFADefinitionBase() { | |||
|
19 | m_states = new List<DFAStateDescriptior>(); | |||
|
20 | ||||
|
21 | m_states.Add(new DFAStateDescriptior()); | |||
|
22 | } | |||
|
23 | ||||
|
24 | public DFAStateDescriptior[] States { | |||
|
25 | get { | |||
|
26 | if (m_statesArray == null) | |||
|
27 | m_statesArray = m_states.ToArray(); | |||
|
28 | return m_statesArray; | |||
|
29 | } | |||
|
30 | } | |||
|
31 | ||||
|
32 | public bool InitialStateIsFinal { | |||
|
33 | get { | |||
|
34 | return m_states[INITIAL_STATE].final; | |||
|
35 | } | |||
|
36 | } | |||
|
37 | ||||
|
38 | public int AddState() { | |||
|
39 | var index = m_states.Count; | |||
|
40 | m_states.Add(new DFAStateDescriptior { | |||
|
41 | final = false, | |||
|
42 | transitions = new int[AlphabetSize] | |||
|
43 | }); | |||
|
44 | ||||
|
45 | return index; | |||
|
46 | } | |||
|
47 | ||||
|
48 | public int AddState(int[] tag) { | |||
|
49 | var index = m_states.Count; | |||
|
50 | bool final = tag == null || tag.Length == 0 ? false : true; | |||
|
51 | m_states.Add(new DFAStateDescriptior { | |||
|
52 | final = final, | |||
|
53 | transitions = new int[AlphabetSize], | |||
|
54 | tag = final ? tag : null | |||
|
55 | }); | |||
|
56 | return index; | |||
|
57 | } | |||
|
58 | ||||
|
59 | public void DefineTransition(int s1,int s2, int symbol) { | |||
|
60 | Safe.ArgumentInRange(s1, 0, m_states.Count-1, "s1"); | |||
|
61 | Safe.ArgumentInRange(s2, 0, m_states.Count-1, "s2"); | |||
|
62 | Safe.ArgumentInRange(symbol, 0, AlphabetSize-1, "symbol"); | |||
|
63 | ||||
|
64 | m_states[s1].transitions[symbol] = s2; | |||
|
65 | } | |||
|
66 | ||||
|
67 | protected void Optimize<TA>(IDFADefinition minimalDFA,IAlphabet<TA> sourceAlphabet, IAlphabet<TA> minimalAlphabet) { | |||
|
68 | Safe.ArgumentNotNull(minimalDFA, "minimalDFA"); | |||
|
69 | Safe.ArgumentNotNull(minimalAlphabet, "minimalAlphabet"); | |||
|
70 | ||||
|
71 | var setComparer = new CustomEqualityComparer<HashSet<int>>( | |||
|
72 | (x, y) => x.SetEquals(y), | |||
|
73 | (s) => s.Sum(x => x.GetHashCode()) | |||
|
74 | ); | |||
|
75 | ||||
|
76 | var arrayComparer = new CustomEqualityComparer<int[]>( | |||
|
77 | (x,y) => (new HashSet<int>(x)).SetEquals(new HashSet<int>(y)), | |||
|
78 | (a) => a.Sum(x => x.GetHashCode()) | |||
|
79 | ); | |||
|
80 | ||||
|
81 | var optimalStates = new HashSet<HashSet<int>>(setComparer); | |||
|
82 | var queue = new HashSet<HashSet<int>>(setComparer); | |||
|
83 | ||||
|
84 | foreach (var g in Enumerable | |||
|
85 | .Range(INITIAL_STATE, m_states.Count-1) | |||
|
86 | .Select(i => new { | |||
|
87 | index = i, | |||
|
88 | descriptor = m_states[i] | |||
|
89 | }) | |||
|
90 | .Where(x => x.descriptor.final) | |||
|
91 | .GroupBy(x => x.descriptor.tag, arrayComparer) | |||
|
92 | ) { | |||
|
93 | optimalStates.Add(new HashSet<int>(g.Select(x => x.index))); | |||
|
94 | } | |||
|
95 | ||||
|
96 | var state = new HashSet<int>( | |||
|
97 | Enumerable | |||
|
98 | .Range(INITIAL_STATE, m_states.Count - 1) | |||
|
99 | .Where(i => !m_states[i].final) | |||
|
100 | ); | |||
|
101 | optimalStates.Add(state); | |||
|
102 | queue.Add(state); | |||
|
103 | ||||
|
104 | while (queue.Count > 0) { | |||
|
105 | var stateA = queue.First(); | |||
|
106 | queue.Remove(stateA); | |||
|
107 | ||||
|
108 | for (int c = 0; c < AlphabetSize; c++) { | |||
|
109 | var stateX = new HashSet<int>(); | |||
|
110 | ||||
|
111 | for(int s = 1; s < m_states.Count; s++) { | |||
|
112 | if (stateA.Contains(m_states[s].transitions[c])) | |||
|
113 | stateX.Add(s); | |||
|
114 | } | |||
|
115 | ||||
|
116 | foreach (var stateY in optimalStates.ToArray()) { | |||
|
117 | if (stateX.Overlaps(stateY) && !stateY.IsSubsetOf(stateX)) { | |||
|
118 | var stateR1 = new HashSet<int>(stateY); | |||
|
119 | var stateR2 = new HashSet<int>(stateY); | |||
|
120 | ||||
|
121 | stateR1.IntersectWith(stateX); | |||
|
122 | stateR2.ExceptWith(stateX); | |||
|
123 | ||||
|
124 | optimalStates.Remove(stateY); | |||
|
125 | optimalStates.Add(stateR1); | |||
|
126 | optimalStates.Add(stateR2); | |||
|
127 | ||||
|
128 | if (queue.Contains(stateY)) { | |||
|
129 | queue.Remove(stateY); | |||
|
130 | queue.Add(stateR1); | |||
|
131 | queue.Add(stateR2); | |||
|
132 | } else { | |||
|
133 | queue.Add(stateR1.Count <= stateR2.Count ? stateR1 : stateR2); | |||
|
134 | } | |||
|
135 | } | |||
|
136 | } | |||
|
137 | } | |||
|
138 | } | |||
|
139 | ||||
|
140 | // строим карты соотвествия оптимальных состояний с оригинальными | |||
|
141 | ||||
|
142 | var initialState = optimalStates.Where(x => x.Contains(INITIAL_STATE)).Single(); | |||
|
143 | ||||
|
144 | // карта получения оптимального состояния по соотвествующему ему простому состоянию | |||
|
145 | int[] reveseOptimalMap = new int[m_states.Count]; | |||
|
146 | // карта с индексами оптимальных состояний | |||
|
147 | HashSet<int>[] optimalMap = new HashSet<int>[optimalStates.Count + 1]; | |||
|
148 | { | |||
|
149 | optimalMap[0] = new HashSet<int>(); // unreachable state | |||
|
150 | optimalMap[1] = initialState; // initial state | |||
|
151 | foreach (var ss in initialState) | |||
|
152 | reveseOptimalMap[ss] = 1; | |||
|
153 | ||||
|
154 | int i = 2; | |||
|
155 | foreach (var s in optimalStates) { | |||
|
156 | if (s.SetEquals(initialState)) | |||
|
157 | continue; | |||
|
158 | optimalMap[i] = s; | |||
|
159 | foreach (var ss in s) | |||
|
160 | reveseOptimalMap[ss] = i; | |||
|
161 | i++; | |||
|
162 | } | |||
|
163 | } | |||
|
164 | ||||
|
165 | // получаем минимальный алфавит | |||
|
166 | ||||
|
167 | var minClasses = new HashSet<HashSet<int>>(setComparer); | |||
|
168 | var alphaQueue = new Queue<HashSet<int>>(); | |||
|
169 | alphaQueue.Enqueue(new HashSet<int>(Enumerable.Range(0,AlphabetSize))); | |||
|
170 | ||||
|
171 | for (int s = 1 ; s < optimalMap.Length; s++) { | |||
|
172 | var newQueue = new Queue<HashSet<int>>(); | |||
|
173 | ||||
|
174 | foreach (var A in alphaQueue) { | |||
|
175 | if (A.Count == 1) { | |||
|
176 | minClasses.Add(A); | |||
|
177 | continue; | |||
|
178 | } | |||
|
179 | ||||
|
180 | // различаем классы символов, которые переводят в различные оптимальные состояния | |||
|
181 | // optimalState -> alphaClass | |||
|
182 | var classes = new Dictionary<int, HashSet<int>>(); | |||
|
183 | ||||
|
184 | foreach (var term in A) { | |||
|
185 | // ищем все переходы класса по символу term | |||
|
186 | var s2 = reveseOptimalMap[ | |||
|
187 | optimalMap[s].Select(x => m_states[x].transitions[term]) // все элементарные состояния, куда переходит класс s | |||
|
188 | .Where(x => x != 0) // только допустимые | |||
|
189 | .FirstOrDefault() // первое допустимое элементарное состояние, если есть | |||
|
190 | ]; | |||
|
191 | ||||
|
192 | HashSet<int> A2; | |||
|
193 | if (!classes.TryGetValue(s2, out A2)) { | |||
|
194 | A2 = new HashSet<int>(); | |||
|
195 | newQueue.Enqueue(A2); | |||
|
196 | classes[s2] = A2; | |||
|
197 | } | |||
|
198 | A2.Add(term); | |||
|
199 | } | |||
|
200 | } | |||
|
201 | ||||
|
202 | if (newQueue.Count == 0) | |||
|
203 | break; | |||
|
204 | alphaQueue = newQueue; | |||
|
205 | } | |||
|
206 | ||||
|
207 | foreach (var A in alphaQueue) | |||
|
208 | minClasses.Add(A); | |||
|
209 | ||||
|
210 | var alphabetMap = sourceAlphabet.Reclassify(minimalAlphabet, minClasses); | |||
|
211 | ||||
|
212 | // построение автомата | |||
|
213 | ||||
|
214 | var states = new int[ optimalMap.Length ]; | |||
|
215 | states[0] = UNREACHEBLE_STATE; | |||
|
216 | ||||
|
217 | for(var s = INITIAL_STATE; s < states.Length; s++) { | |||
|
218 | var tags = optimalMap[s].SelectMany(x => m_states[x].tag ?? Enumerable.Empty<int>()).Distinct().ToArray(); | |||
|
219 | if (tags.Length > 0) | |||
|
220 | states[s] = minimalDFA.AddState(tags); | |||
|
221 | else | |||
|
222 | states[s] = minimalDFA.AddState(); | |||
|
223 | } | |||
|
224 | ||||
|
225 | Debug.Assert(states[INITIAL_STATE] == INITIAL_STATE); | |||
|
226 | ||||
|
227 | for (int s1 = 1; s1 < m_states.Count; s1++) { | |||
|
228 | for (int c = 0; c < AlphabetSize; c++) { | |||
|
229 | var s2 = m_states[s1].transitions[c]; | |||
|
230 | if (s2 != UNREACHEBLE_STATE) { | |||
|
231 | minimalDFA.DefineTransition( | |||
|
232 | reveseOptimalMap[s1], | |||
|
233 | reveseOptimalMap[s2], | |||
|
234 | alphabetMap[c] | |||
|
235 | ); | |||
|
236 | } | |||
|
237 | } | |||
|
238 | } | |||
|
239 | ||||
|
240 | } | |||
|
241 | ||||
|
242 | protected void PrintDFA<TA>(IAlphabet<TA> alphabet) { | |||
|
243 | ||||
|
244 | var reverseMap = alphabet.CreateReverseMap(); | |||
|
245 | ||||
|
246 | for (int i = 1; i < reverseMap.Length; i++) { | |||
|
247 | Console.WriteLine("C{0}: {1}", i, String.Join(",", reverseMap[i])); | |||
|
248 | } | |||
|
249 | ||||
|
250 | for (int i = 1; i < m_states.Count; i++) { | |||
|
251 | var s = m_states[i]; | |||
|
252 | for (int c = 0; c < AlphabetSize; c++) | |||
|
253 | if (s.transitions[c] != UNREACHEBLE_STATE) | |||
|
254 | Console.WriteLine("S{0} -{1}-> S{2}{3}", i, String.Join(",", reverseMap[c]), s.transitions[c], m_states[s.transitions[c]].final ? "$" : ""); | |||
|
255 | } | |||
|
256 | } | |||
|
257 | ||||
|
258 | public abstract int AlphabetSize { | |||
|
259 | get; | |||
|
260 | } | |||
|
261 | } | |||
|
262 | } |
@@ -0,0 +1,13 | |||||
|
1 | using System; | |||
|
2 | using System.Collections.Generic; | |||
|
3 | using System.Linq; | |||
|
4 | using System.Text; | |||
|
5 | using System.Threading.Tasks; | |||
|
6 | ||||
|
7 | namespace Implab.Parsing { | |||
|
8 | public struct DFAStateDescriptior { | |||
|
9 | public bool final; | |||
|
10 | public int[] tag; | |||
|
11 | public int[] transitions; | |||
|
12 | } | |||
|
13 | } |
@@ -0,0 +1,56 | |||||
|
1 | using Implab; | |||
|
2 | using System; | |||
|
3 | using System.Collections.Generic; | |||
|
4 | using System.Diagnostics; | |||
|
5 | using System.Linq; | |||
|
6 | using System.Text; | |||
|
7 | using System.Threading.Tasks; | |||
|
8 | ||||
|
9 | namespace Implab.Parsing { | |||
|
10 | public abstract class DFAutomaton<T> { | |||
|
11 | protected struct ContextFrame { | |||
|
12 | public DFAStateDescriptior[] states; | |||
|
13 | public int current; | |||
|
14 | public T info; | |||
|
15 | } | |||
|
16 | ||||
|
17 | public const int INITIAL_STATE = DFADefinitionBase.INITIAL_STATE; | |||
|
18 | public const int UNREACHEBLE_STATE = DFADefinitionBase.UNREACHEBLE_STATE; | |||
|
19 | ||||
|
20 | protected ContextFrame m_context; | |||
|
21 | Stack<ContextFrame> m_contextStack = new Stack<ContextFrame>(); | |||
|
22 | ||||
|
23 | public int Level { | |||
|
24 | get { return m_contextStack.Count; } | |||
|
25 | } | |||
|
26 | ||||
|
27 | protected DFAutomaton(DFAStateDescriptior[] states, int startState, T info) { | |||
|
28 | Safe.ArgumentNotNull(states, "states"); | |||
|
29 | Safe.ArgumentInRange(startState, 0, states.Length - 1, "startState"); | |||
|
30 | ||||
|
31 | m_context.states = states; | |||
|
32 | m_context.current = startState; | |||
|
33 | m_context.info = info; | |||
|
34 | } | |||
|
35 | ||||
|
36 | protected void Switch(DFAStateDescriptior[] states, int current, T info) { | |||
|
37 | Debug.Assert(states != null); | |||
|
38 | Debug.Assert(current >= 0 && current < states.Length); | |||
|
39 | m_contextStack.Push(m_context); | |||
|
40 | m_context. states = states; | |||
|
41 | m_context.current = current; | |||
|
42 | m_context.info = info; | |||
|
43 | } | |||
|
44 | ||||
|
45 | protected void Restore() { | |||
|
46 | Debug.Assert(m_contextStack.Count > 0); | |||
|
47 | ||||
|
48 | m_context = m_contextStack.Pop(); | |||
|
49 | } | |||
|
50 | ||||
|
51 | protected void Move(int input) { | |||
|
52 | Debug.Assert(input > 0 && input < m_context.states[m_context.current].transitions.Length); | |||
|
53 | m_context.current = m_context.states[m_context.current].transitions[input]; | |||
|
54 | } | |||
|
55 | } | |||
|
56 | } |
@@ -0,0 +1,37 | |||||
|
1 | using Implab; | |||
|
2 | using System; | |||
|
3 | using System.Collections.Generic; | |||
|
4 | using System.Linq; | |||
|
5 | using System.Text; | |||
|
6 | using System.Threading.Tasks; | |||
|
7 | ||||
|
8 | namespace Implab.Parsing { | |||
|
9 | public class EDFADefinition<T> : DFADefinitionBase where T : struct, IConvertible { | |||
|
10 | EnumAlphabet<T> m_alphabet; | |||
|
11 | ||||
|
12 | public EnumAlphabet<T> Alphabet { | |||
|
13 | get { return m_alphabet; } | |||
|
14 | } | |||
|
15 | ||||
|
16 | public EDFADefinition(EnumAlphabet<T> alphabet) | |||
|
17 | : base() { | |||
|
18 | Safe.ArgumentNotNull(alphabet, "alphabet"); | |||
|
19 | m_alphabet = alphabet; | |||
|
20 | } | |||
|
21 | ||||
|
22 | public override int AlphabetSize { | |||
|
23 | get { return m_alphabet.Count; } | |||
|
24 | } | |||
|
25 | ||||
|
26 | public EDFADefinition<T> Optimize() { | |||
|
27 | var optimized = new EDFADefinition<T>(new EnumAlphabet<T>()); | |||
|
28 | Optimize(optimized, m_alphabet, optimized.Alphabet); | |||
|
29 | ||||
|
30 | return optimized; | |||
|
31 | } | |||
|
32 | ||||
|
33 | public void PrintDFA() { | |||
|
34 | PrintDFA(m_alphabet); | |||
|
35 | } | |||
|
36 | } | |||
|
37 | } |
@@ -0,0 +1,18 | |||||
|
1 | using Implab; | |||
|
2 | using System; | |||
|
3 | using System.Collections.Generic; | |||
|
4 | using System.Linq; | |||
|
5 | using System.Text; | |||
|
6 | using System.Threading.Tasks; | |||
|
7 | ||||
|
8 | namespace Implab.Parsing { | |||
|
9 | public class EmptyToken : Token { | |||
|
10 | public override void Accept(IVisitor visitor) { | |||
|
11 | Safe.ArgumentNotNull(visitor, "visitor"); | |||
|
12 | visitor.Visit(this); | |||
|
13 | } | |||
|
14 | public override string ToString() { | |||
|
15 | return "$"; | |||
|
16 | } | |||
|
17 | } | |||
|
18 | } |
@@ -0,0 +1,37 | |||||
|
1 | using Implab; | |||
|
2 | using System; | |||
|
3 | using System.Collections.Generic; | |||
|
4 | using System.Linq; | |||
|
5 | using System.Text; | |||
|
6 | using System.Threading.Tasks; | |||
|
7 | ||||
|
8 | namespace Implab.Parsing { | |||
|
9 | /// <summary> | |||
|
10 | /// Конечный символ расширенного регулярного выражения, при построении ДКА | |||
|
11 | /// используется для определения конечных состояний. | |||
|
12 | /// </summary> | |||
|
13 | public class EndToken: Token { | |||
|
14 | ||||
|
15 | int m_tag; | |||
|
16 | ||||
|
17 | public EndToken(int tag) { | |||
|
18 | m_tag = tag; | |||
|
19 | } | |||
|
20 | ||||
|
21 | public EndToken() | |||
|
22 | : this(0) { | |||
|
23 | } | |||
|
24 | ||||
|
25 | public int Tag { | |||
|
26 | get { return m_tag; } | |||
|
27 | } | |||
|
28 | ||||
|
29 | public override void Accept(IVisitor visitor) { | |||
|
30 | Safe.ArgumentNotNull(visitor, "visitor"); | |||
|
31 | visitor.Visit(this); | |||
|
32 | } | |||
|
33 | public override string ToString() { | |||
|
34 | return "#"; | |||
|
35 | } | |||
|
36 | } | |||
|
37 | } |
@@ -0,0 +1,68 | |||||
|
1 | using Implab; | |||
|
2 | using System; | |||
|
3 | using System.Collections.Generic; | |||
|
4 | using System.Globalization; | |||
|
5 | using System.Linq; | |||
|
6 | using System.Text; | |||
|
7 | using System.Threading.Tasks; | |||
|
8 | ||||
|
9 | namespace Implab.Parsing { | |||
|
10 | /// <summary> | |||
|
11 | /// Алфавит символами которого являются элементы перечислений. | |||
|
12 | /// </summary> | |||
|
13 | /// <typeparam name="T">Тип перечислений</typeparam> | |||
|
14 | public class EnumAlphabet<T> : AlphabetBase<T> where T : struct, IConvertible { | |||
|
15 | static readonly T[] _symbols; | |||
|
16 | static readonly EnumAlphabet<T> _fullAlphabet; | |||
|
17 | ||||
|
18 | static EnumAlphabet() { | |||
|
19 | if (!typeof(T).IsEnum) | |||
|
20 | throw new InvalidOperationException("Invalid generic parameter, enumeration is required"); | |||
|
21 | ||||
|
22 | if (Enum.GetUnderlyingType(typeof(T)) != typeof(Int32)) | |||
|
23 | throw new InvalidOperationException("Only enums based on Int32 are supported"); | |||
|
24 | ||||
|
25 | _symbols = ((T[])Enum.GetValues(typeof(T))) | |||
|
26 | .OrderBy(x => x.ToInt32(CultureInfo.InvariantCulture)) | |||
|
27 | .ToArray(); | |||
|
28 | ||||
|
29 | if ( | |||
|
30 | _symbols[_symbols.Length - 1].ToInt32(CultureInfo.InvariantCulture) >= _symbols.Length | |||
|
31 | || _symbols[0].ToInt32(CultureInfo.InvariantCulture) != 0 | |||
|
32 | ) | |||
|
33 | throw new InvalidOperationException("The specified enumeration must be zero-based and continuously numbered"); | |||
|
34 | ||||
|
35 | _fullAlphabet = new EnumAlphabet<T>(_symbols.Select(x => x.ToInt32(CultureInfo.InvariantCulture)).ToArray()); | |||
|
36 | } | |||
|
37 | ||||
|
38 | ||||
|
39 | ||||
|
40 | public static EnumAlphabet<T> FullAlphabet { | |||
|
41 | get { | |||
|
42 | return _fullAlphabet; | |||
|
43 | } | |||
|
44 | } | |||
|
45 | ||||
|
46 | ||||
|
47 | public EnumAlphabet() | |||
|
48 | : base() { | |||
|
49 | } | |||
|
50 | ||||
|
51 | public EnumAlphabet(int[] map) | |||
|
52 | : base(map) { | |||
|
53 | } | |||
|
54 | ||||
|
55 | ||||
|
56 | public override int GetSymbolIndex(T symbol) { | |||
|
57 | return symbol.ToInt32(CultureInfo.InvariantCulture); | |||
|
58 | } | |||
|
59 | ||||
|
60 | public override IEnumerable<T> InputSymbols { | |||
|
61 | get { return _symbols; } | |||
|
62 | } | |||
|
63 | ||||
|
64 | protected override int MapSize { | |||
|
65 | get { return _symbols.Length; } | |||
|
66 | } | |||
|
67 | } | |||
|
68 | } |
@@ -0,0 +1,103 | |||||
|
1 | using Implab; | |||
|
2 | using System; | |||
|
3 | using System.Collections.Generic; | |||
|
4 | using System.Linq; | |||
|
5 | using System.Text; | |||
|
6 | using System.Threading.Tasks; | |||
|
7 | ||||
|
8 | namespace Implab.Parsing { | |||
|
9 | /// <summary> | |||
|
10 | /// Базовый абстрактный класс. Грамматика, позволяет формулировать выражения над алфавитом типа <c>char</c>. | |||
|
11 | /// </summary> | |||
|
12 | /// <typeparam name="TGrammar"></typeparam> | |||
|
13 | public abstract class Grammar<TGrammar> where TGrammar: Grammar<TGrammar>, new() { | |||
|
14 | Alphabet m_alphabet = new Alphabet(); | |||
|
15 | static TGrammar _instance; | |||
|
16 | ||||
|
17 | public static TGrammar Instance{ | |||
|
18 | get { | |||
|
19 | if (_instance == null) | |||
|
20 | _instance = new TGrammar(); | |||
|
21 | return _instance; | |||
|
22 | } | |||
|
23 | } | |||
|
24 | ||||
|
25 | public SymbolToken UnclassifiedToken() { | |||
|
26 | return new SymbolToken(Alphabet.UNCLASSIFIED); | |||
|
27 | } | |||
|
28 | ||||
|
29 | public void DefineAlphabet(IEnumerable<char> alphabet) { | |||
|
30 | Safe.ArgumentNotNull(alphabet, "alphabet"); | |||
|
31 | ||||
|
32 | foreach (var ch in alphabet) | |||
|
33 | m_alphabet.DefineSymbol(ch); | |||
|
34 | } | |||
|
35 | public Token SymbolRangeToken(char start, char end) { | |||
|
36 | return SymbolToken(Enumerable.Range(start, end - start + 1).Select(x => (char)x)); | |||
|
37 | } | |||
|
38 | ||||
|
39 | public Token SymbolToken(char symbol) { | |||
|
40 | return Token.New(TranslateOrAdd(symbol)); | |||
|
41 | } | |||
|
42 | ||||
|
43 | public Token SymbolToken(IEnumerable<char> symbols) { | |||
|
44 | Safe.ArgumentNotNull(symbols, "symbols"); | |||
|
45 | ||||
|
46 | return Token.New(TranslateOrAdd(symbols).ToArray()); | |||
|
47 | } | |||
|
48 | ||||
|
49 | public Token SymbolSetToken(params char[] set) { | |||
|
50 | return SymbolToken(set); | |||
|
51 | } | |||
|
52 | ||||
|
53 | int TranslateOrAdd(char ch) { | |||
|
54 | var t = m_alphabet.Translate(ch); | |||
|
55 | if (t == Alphabet.UNCLASSIFIED) | |||
|
56 | t = m_alphabet.DefineSymbol(ch); | |||
|
57 | return t; | |||
|
58 | } | |||
|
59 | ||||
|
60 | IEnumerable<int> TranslateOrAdd(IEnumerable<char> symbols) { | |||
|
61 | return symbols.Distinct().Select(TranslateOrAdd); | |||
|
62 | } | |||
|
63 | ||||
|
64 | int TranslateOrDie(char ch) { | |||
|
65 | var t = m_alphabet.Translate(ch); | |||
|
66 | if (t == Alphabet.UNCLASSIFIED) | |||
|
67 | throw new ApplicationException(String.Format("Symbol '{0}' is UNCLASSIFIED", ch)); | |||
|
68 | return t; | |||
|
69 | } | |||
|
70 | ||||
|
71 | IEnumerable<int> TranslateOrDie(IEnumerable<char> symbols) { | |||
|
72 | return symbols.Distinct().Select(TranslateOrDie); | |||
|
73 | } | |||
|
74 | ||||
|
75 | public Token SymbolTokenExcept(IEnumerable<char> symbols) { | |||
|
76 | Safe.ArgumentNotNull(symbols, "symbols"); | |||
|
77 | ||||
|
78 | return Token.New( Enumerable.Range(0, m_alphabet.Count).Except(TranslateOrDie(symbols)).ToArray()); | |||
|
79 | } | |||
|
80 | ||||
|
81 | protected CDFADefinition BuildDFA(Token lang) { | |||
|
82 | Safe.ArgumentNotNull(lang, "lang"); | |||
|
83 | ||||
|
84 | var dfa = new CDFADefinition(m_alphabet); | |||
|
85 | ||||
|
86 | var builder = new DFABuilder(); | |||
|
87 | ||||
|
88 | lang.Accept( builder ); | |||
|
89 | ||||
|
90 | builder.BuildDFA(dfa); | |||
|
91 | if (dfa.InitialStateIsFinal) | |||
|
92 | throw new ApplicationException("The specified language contains empty token"); | |||
|
93 | ||||
|
94 | return dfa.Optimize(); | |||
|
95 | } | |||
|
96 | ||||
|
97 | ||||
|
98 | ||||
|
99 | //protected abstract TGrammar CreateInstance(); | |||
|
100 | } | |||
|
101 | ||||
|
102 | ||||
|
103 | } |
@@ -0,0 +1,56 | |||||
|
1 | using System; | |||
|
2 | using System.Collections.Generic; | |||
|
3 | using System.Linq; | |||
|
4 | using System.Text; | |||
|
5 | using System.Threading.Tasks; | |||
|
6 | ||||
|
7 | namespace Implab.Parsing { | |||
|
8 | /// <summary> | |||
|
9 | /// Алфавит. Множество символов, которые разбиты на классы, при этом классы имеют непрерывную нумерацию, | |||
|
10 | /// что позволяет использовать их в качестве индексов массивов. | |||
|
11 | /// </summary> | |||
|
12 | /// <remarks>Далее вимволами алфавита будем называть классы исходных символов.</remarks> | |||
|
13 | /// <typeparam name="TSymbol">Тип символов.</typeparam> | |||
|
14 | public interface IAlphabet<TSymbol> { | |||
|
15 | /// <summary> | |||
|
16 | /// Количество символов в алфавите. | |||
|
17 | /// </summary> | |||
|
18 | int Count { get; } | |||
|
19 | /// <summary> | |||
|
20 | /// Добавляет новый символ в алфавит, если символ уже был добавлен, то | |||
|
21 | /// возвращается ранее сопоставленный с символом класс. | |||
|
22 | /// </summary> | |||
|
23 | /// <param name="symbol">Символ для добавления.</param> | |||
|
24 | /// <returns>Индекс класса, который попоставлен с символом.</returns> | |||
|
25 | int DefineSymbol(TSymbol symbol); | |||
|
26 | /// <summary> | |||
|
27 | /// Доабвляем класс символов. Множеству указанных исходных символов | |||
|
28 | /// будет сопоставлен символ в алфавите. | |||
|
29 | /// </summary> | |||
|
30 | /// <param name="symbols">Множестов исходных символов</param> | |||
|
31 | /// <returns>Идентификатор символа алфавита.</returns> | |||
|
32 | int DefineClass(IEnumerable<TSymbol> symbols); | |||
|
33 | /// <summary> | |||
|
34 | /// Создает карту обратного сопоставления символа алфавита и сопоставленным | |||
|
35 | /// ему исходным символам. | |||
|
36 | /// </summary> | |||
|
37 | /// <returns></returns> | |||
|
38 | List<TSymbol>[] CreateReverseMap(); | |||
|
39 | /// <summary> | |||
|
40 | /// Создает новый алфавит на основе текущего, горппируя его сиволы в более | |||
|
41 | /// крупные непересекающиеся классы символов. | |||
|
42 | /// </summary> | |||
|
43 | /// <param name="newAlphabet">Новый, пустой алфавит, в котором быдут определены классы.</param> | |||
|
44 | /// <param name="classes">Множество классов символов текущего алфавита.</param> | |||
|
45 | /// <returns>Карта для перехода символов текущего | |||
|
46 | /// алфавита к символам нового.</returns> | |||
|
47 | int[] Reclassify(IAlphabet<TSymbol> newAlphabet, IEnumerable<ICollection<int>> classes); | |||
|
48 | ||||
|
49 | /// <summary> | |||
|
50 | /// Преобразует входной символ в индекс символа из алфавита. | |||
|
51 | /// </summary> | |||
|
52 | /// <param name="symobl">Исходный символ</param> | |||
|
53 | /// <returns>Индекс в алфавите</returns> | |||
|
54 | int Translate(TSymbol symobl); | |||
|
55 | } | |||
|
56 | } |
@@ -0,0 +1,36 | |||||
|
1 | using System; | |||
|
2 | using System.Collections.Generic; | |||
|
3 | using System.Linq; | |||
|
4 | using System.Text; | |||
|
5 | using System.Threading.Tasks; | |||
|
6 | ||||
|
7 | namespace Implab.Parsing { | |||
|
8 | /// <summary> | |||
|
9 | /// Интерфейс для определения ДКА, позволяет добавить состояния и определить переходы. | |||
|
10 | /// </summary> | |||
|
11 | public interface IDFADefinition { | |||
|
12 | /// <summary> | |||
|
13 | /// Добавляет состояние в автомат. | |||
|
14 | /// </summary> | |||
|
15 | /// <returns>Индекс добавленного состояния.</returns> | |||
|
16 | int AddState(); | |||
|
17 | /// <summary> | |||
|
18 | /// Добавляет конечное состояние с указанными метками, если метки не заданы, то | |||
|
19 | /// добавленное состояние не будет конечным. | |||
|
20 | /// </summary> | |||
|
21 | /// <param name="tags">Метки состояния.</param> | |||
|
22 | /// <returns>Индекс добавленного состояния.</returns> | |||
|
23 | int AddState(int[] tags); | |||
|
24 | /// <summary> | |||
|
25 | /// Определяет переход между состояниями. | |||
|
26 | /// </summary> | |||
|
27 | /// <param name="s1">Исходное состояние.</param> | |||
|
28 | /// <param name="s2">Конечное состояние.</param> | |||
|
29 | /// <param name="input">Входной символ.</param> | |||
|
30 | void DefineTransition(int s1, int s2, int input); | |||
|
31 | /// <summary> | |||
|
32 | /// Размер входного алфавита. | |||
|
33 | /// </summary> | |||
|
34 | int AlphabetSize { get; } | |||
|
35 | } | |||
|
36 | } |
@@ -0,0 +1,19 | |||||
|
1 | using System; | |||
|
2 | using System.Collections.Generic; | |||
|
3 | using System.Linq; | |||
|
4 | using System.Text; | |||
|
5 | using System.Threading.Tasks; | |||
|
6 | ||||
|
7 | namespace Implab.Parsing { | |||
|
8 | /// <summary> | |||
|
9 | /// Интерфейс обходчика синтаксического дерева регулярного выражения | |||
|
10 | /// </summary> | |||
|
11 | public interface IVisitor { | |||
|
12 | void Visit(AltToken token); | |||
|
13 | void Visit(StarToken token); | |||
|
14 | void Visit(CatToken token); | |||
|
15 | void Visit(EmptyToken token); | |||
|
16 | void Visit(EndToken token); | |||
|
17 | void Visit(SymbolToken token); | |||
|
18 | } | |||
|
19 | } |
@@ -0,0 +1,17 | |||||
|
1 | using System; | |||
|
2 | using System.Collections.Generic; | |||
|
3 | using System.Linq; | |||
|
4 | using System.Text; | |||
|
5 | ||||
|
6 | namespace Implab.Parsing { | |||
|
7 | [Serializable] | |||
|
8 | public class ParserException : Exception { | |||
|
9 | public ParserException() { } | |||
|
10 | public ParserException(string message) : base(message) { } | |||
|
11 | public ParserException(string message, Exception inner) : base(message, inner) { } | |||
|
12 | protected ParserException( | |||
|
13 | System.Runtime.Serialization.SerializationInfo info, | |||
|
14 | System.Runtime.Serialization.StreamingContext context) | |||
|
15 | : base(info, context) { } | |||
|
16 | } | |||
|
17 | } |
@@ -0,0 +1,207 | |||||
|
1 | using Implab; | |||
|
2 | using System; | |||
|
3 | using System.Collections.Generic; | |||
|
4 | using System.Linq; | |||
|
5 | using System.Text; | |||
|
6 | using System.Threading.Tasks; | |||
|
7 | ||||
|
8 | namespace Implab.Parsing { | |||
|
9 | /// <summary> | |||
|
10 | /// Базовый класс для разбора потока входных символов на токены. | |||
|
11 | /// </summary> | |||
|
12 | /// <remarks> | |||
|
13 | /// Сканнер имеет внутри буффер с симолами входного текста, по которому перемещаются два | |||
|
14 | /// указателя, начала и конца токена, при перемещении искользуется ДКА для определения | |||
|
15 | /// конца токена и допустимости текущего символа. | |||
|
16 | /// </remarks> | |||
|
17 | public class Scanner { | |||
|
18 | struct ScannerConfig { | |||
|
19 | public DFAStateDescriptior[] states; | |||
|
20 | public int[] alphabetMap; | |||
|
21 | } | |||
|
22 | ||||
|
23 | Stack<ScannerConfig> m_defs = new Stack<ScannerConfig>(); | |||
|
24 | ||||
|
25 | DFAStateDescriptior[] m_states; | |||
|
26 | int[] m_alphabetMap; | |||
|
27 | ||||
|
28 | protected DFAStateDescriptior m_currentState; | |||
|
29 | int m_previewCode; | |||
|
30 | ||||
|
31 | protected int m_tokenLen = 0; | |||
|
32 | protected int m_tokenOffset; | |||
|
33 | ||||
|
34 | protected char[] m_buffer; | |||
|
35 | protected int m_bufferSize; | |||
|
36 | protected int m_pointer; | |||
|
37 | ||||
|
38 | public Scanner(CDFADefinition definition, string text) { | |||
|
39 | Safe.ArgumentNotNull(definition, "definition"); | |||
|
40 | Safe.ArgumentNotEmpty(text, "text"); | |||
|
41 | ||||
|
42 | m_states = definition.States; | |||
|
43 | m_alphabetMap = definition.Alphabet.GetTranslationMap(); | |||
|
44 | ||||
|
45 | Feed(text.ToCharArray()); | |||
|
46 | } | |||
|
47 | ||||
|
48 | public Scanner(CDFADefinition definition) { | |||
|
49 | Safe.ArgumentNotNull(definition, "definition"); | |||
|
50 | ||||
|
51 | m_states = definition.States; | |||
|
52 | m_alphabetMap = definition.Alphabet.GetTranslationMap(); | |||
|
53 | ||||
|
54 | Feed(new char[0]); | |||
|
55 | } | |||
|
56 | ||||
|
57 | /// <summary> | |||
|
58 | /// Заполняет входными данными буффер. | |||
|
59 | /// </summary> | |||
|
60 | /// <param name="data">Данные для обработки.</param> | |||
|
61 | /// <remarks>Копирование данных не происходит, переданный массив используется в | |||
|
62 | /// качестве входного буффера.</remarks> | |||
|
63 | public void Feed(char[] data) { | |||
|
64 | Safe.ArgumentNotNull(data, "data"); | |||
|
65 | ||||
|
66 | Feed(data, data.Length); | |||
|
67 | } | |||
|
68 | ||||
|
69 | /// <summary> | |||
|
70 | /// Заполняет буффур чтения входными данными. | |||
|
71 | /// </summary> | |||
|
72 | /// <param name="data">Данные для обработки.</param> | |||
|
73 | /// <param name="length">Длина данных для обработки.</param> | |||
|
74 | /// <remarks>Копирование данных не происходит, переданный массив используется в | |||
|
75 | /// качестве входного буффера.</remarks> | |||
|
76 | public void Feed(char[] data, int length) { | |||
|
77 | Safe.ArgumentNotNull(data, "data"); | |||
|
78 | Safe.ArgumentInRange(length, 0, data.Length, "length"); | |||
|
79 | ||||
|
80 | m_pointer = -1; | |||
|
81 | m_buffer = data; | |||
|
82 | m_bufferSize = length; | |||
|
83 | Shift(); | |||
|
84 | } | |||
|
85 | ||||
|
86 | /// <summary> | |||
|
87 | /// Получает текущий токен в виде строки. | |||
|
88 | /// </summary> | |||
|
89 | /// <returns></returns> | |||
|
90 | public string GetTokenValue() { | |||
|
91 | return new String(m_buffer, m_tokenOffset, m_tokenLen); | |||
|
92 | } | |||
|
93 | ||||
|
94 | /// <summary> | |||
|
95 | /// Метки текущего токена, которые были назначены в регулярном выражении. | |||
|
96 | /// </summary> | |||
|
97 | public int[] TokenTags { | |||
|
98 | get { | |||
|
99 | return m_currentState.tag; | |||
|
100 | } | |||
|
101 | } | |||
|
102 | ||||
|
103 | /// <summary> | |||
|
104 | /// Читает следующий токен, при этом <see cref="m_tokenOffset"/> указывает на начало токена, | |||
|
105 | /// <see cref="m_tokenLen"/> на длину токена, <see cref="m_buffer"/> - массив символов, в | |||
|
106 | /// котором находится токен. | |||
|
107 | /// </summary> | |||
|
108 | /// <returns><c>false</c> - достигнут конец данных, токен не прочитан.</returns> | |||
|
109 | protected bool ReadTokenInternal() { | |||
|
110 | if (m_pointer >= m_bufferSize) | |||
|
111 | return false; | |||
|
112 | ||||
|
113 | m_currentState = m_states[CDFADefinition.INITIAL_STATE]; | |||
|
114 | m_tokenLen = 0; | |||
|
115 | m_tokenOffset = m_pointer; | |||
|
116 | int nextState = CDFADefinition.UNREACHEBLE_STATE; | |||
|
117 | do { | |||
|
118 | nextState = m_currentState.transitions[m_previewCode]; | |||
|
119 | if (nextState == CDFADefinition.UNREACHEBLE_STATE) { | |||
|
120 | if (m_currentState.final) | |||
|
121 | return true; | |||
|
122 | else | |||
|
123 | throw new ParserException( | |||
|
124 | String.Format( | |||
|
125 | "Unexpected symbol '{0}', at pos {1}", | |||
|
126 | m_buffer[m_pointer], | |||
|
127 | Position | |||
|
128 | ) | |||
|
129 | ); | |||
|
130 | } else { | |||
|
131 | m_currentState = m_states[nextState]; | |||
|
132 | m_tokenLen++; | |||
|
133 | } | |||
|
134 | ||||
|
135 | } while (Shift()); | |||
|
136 | ||||
|
137 | // END OF DATA | |||
|
138 | if (!m_currentState.final) | |||
|
139 | throw new ParserException("Unexpected end of data"); | |||
|
140 | ||||
|
141 | return true; | |||
|
142 | } | |||
|
143 | ||||
|
144 | ||||
|
145 | bool Shift() { | |||
|
146 | m_pointer++; | |||
|
147 | ||||
|
148 | if (m_pointer >= m_bufferSize) { | |||
|
149 | return ReadNextChunk(); | |||
|
150 | } | |||
|
151 | ||||
|
152 | m_previewCode = m_alphabetMap[m_buffer[m_pointer]]; | |||
|
153 | ||||
|
154 | return true; | |||
|
155 | } | |||
|
156 | ||||
|
157 | /// <summary> | |||
|
158 | /// Вызывается по достижению конца входного буффера для получения | |||
|
159 | /// новых данных. | |||
|
160 | /// </summary> | |||
|
161 | /// <returns><c>true</c> - новые двнные получены, можно продолжать обработку.</returns> | |||
|
162 | protected virtual bool ReadNextChunk() { | |||
|
163 | return false; | |||
|
164 | } | |||
|
165 | ||||
|
166 | /// <summary> | |||
|
167 | /// Позиция сканнера во входном буфере | |||
|
168 | /// </summary> | |||
|
169 | public int Position { | |||
|
170 | get { | |||
|
171 | return m_pointer + 1; | |||
|
172 | } | |||
|
173 | } | |||
|
174 | ||||
|
175 | /// <summary> | |||
|
176 | /// Преключает внутренний ДКА на указанный, позволяет реализовать подобие захватывающей | |||
|
177 | /// группировки. | |||
|
178 | /// </summary> | |||
|
179 | /// <param name="states">Таблица состояний нового ДКА</param> | |||
|
180 | /// <param name="alphabet">Таблица входных символов для нового ДКА</param> | |||
|
181 | protected void Switch(DFAStateDescriptior[] states, int[] alphabet) { | |||
|
182 | Safe.ArgumentNotNull(states, "dfa"); | |||
|
183 | ||||
|
184 | m_defs.Push(new ScannerConfig { | |||
|
185 | states = m_states, | |||
|
186 | alphabetMap = m_alphabetMap | |||
|
187 | }); | |||
|
188 | ||||
|
189 | m_states = states; | |||
|
190 | m_alphabetMap = alphabet; | |||
|
191 | ||||
|
192 | m_previewCode = m_alphabetMap[m_buffer[m_pointer]]; | |||
|
193 | } | |||
|
194 | ||||
|
195 | /// <summary> | |||
|
196 | /// Восстанавливает предыдущей ДКА сканнера. | |||
|
197 | /// </summary> | |||
|
198 | protected void Restore() { | |||
|
199 | if (m_defs.Count == 0) | |||
|
200 | throw new InvalidOperationException(); | |||
|
201 | var prev = m_defs.Pop(); | |||
|
202 | m_states = prev.states; | |||
|
203 | m_alphabetMap = prev.alphabetMap; | |||
|
204 | m_previewCode = m_alphabetMap[m_buffer[m_pointer]]; | |||
|
205 | } | |||
|
206 | } | |||
|
207 | } |
@@ -0,0 +1,34 | |||||
|
1 | using Implab; | |||
|
2 | using System; | |||
|
3 | using System.Collections.Generic; | |||
|
4 | using System.Linq; | |||
|
5 | using System.Text; | |||
|
6 | using System.Threading.Tasks; | |||
|
7 | ||||
|
8 | namespace Implab.Parsing { | |||
|
9 | /// <summary> | |||
|
10 | /// Замыкание выражения с 0 и более повторов. | |||
|
11 | /// </summary> | |||
|
12 | public class StarToken: Token { | |||
|
13 | ||||
|
14 | Token m_token; | |||
|
15 | ||||
|
16 | public Token Token { | |||
|
17 | get { return m_token; } | |||
|
18 | } | |||
|
19 | ||||
|
20 | public StarToken(Token token) { | |||
|
21 | Safe.ArgumentNotNull(token, "token"); | |||
|
22 | m_token = token; | |||
|
23 | } | |||
|
24 | ||||
|
25 | public override void Accept(IVisitor visitor) { | |||
|
26 | Safe.ArgumentNotNull(visitor, "visitor"); | |||
|
27 | visitor.Visit(this); | |||
|
28 | } | |||
|
29 | ||||
|
30 | public override string ToString() { | |||
|
31 | return String.Format("({0})*", Token.ToString()); | |||
|
32 | } | |||
|
33 | } | |||
|
34 | } |
@@ -0,0 +1,33 | |||||
|
1 | using Implab; | |||
|
2 | using System; | |||
|
3 | using System.Collections.Generic; | |||
|
4 | using System.Linq; | |||
|
5 | using System.Text; | |||
|
6 | using System.Threading.Tasks; | |||
|
7 | ||||
|
8 | namespace Implab.Parsing { | |||
|
9 | /// <summary> | |||
|
10 | /// Выражение, соответсвующее одному символу. | |||
|
11 | /// </summary> | |||
|
12 | public class SymbolToken : Token { | |||
|
13 | int m_value; | |||
|
14 | ||||
|
15 | public int Value { | |||
|
16 | get { return m_value; } | |||
|
17 | } | |||
|
18 | ||||
|
19 | public SymbolToken(int value) { | |||
|
20 | m_value = value; | |||
|
21 | } | |||
|
22 | public override void Accept(IVisitor visitor) { | |||
|
23 | Safe.ArgumentNotNull(visitor, "visitor"); | |||
|
24 | ||||
|
25 | visitor.Visit(this); | |||
|
26 | ||||
|
27 | } | |||
|
28 | ||||
|
29 | public override string ToString() { | |||
|
30 | return Value.ToString(); | |||
|
31 | } | |||
|
32 | } | |||
|
33 | } |
@@ -0,0 +1,67 | |||||
|
1 | using Implab; | |||
|
2 | using System; | |||
|
3 | using System.Collections.Generic; | |||
|
4 | using System.Globalization; | |||
|
5 | using System.Linq; | |||
|
6 | using System.Text; | |||
|
7 | using System.Threading.Tasks; | |||
|
8 | ||||
|
9 | namespace Implab.Parsing { | |||
|
10 | public abstract class Token { | |||
|
11 | public abstract void Accept(IVisitor visitor); | |||
|
12 | ||||
|
13 | public Token Extend() { | |||
|
14 | return new CatToken(this, new EndToken()); | |||
|
15 | } | |||
|
16 | ||||
|
17 | public Token Tag<T>(T tag) where T : IConvertible { | |||
|
18 | return new CatToken(this, new EndToken(tag.ToInt32(CultureInfo.InvariantCulture))); | |||
|
19 | } | |||
|
20 | ||||
|
21 | public Token Cat(Token right) { | |||
|
22 | return new CatToken(this, right); | |||
|
23 | } | |||
|
24 | ||||
|
25 | public Token Or(Token right) { | |||
|
26 | return new AltToken(this, right); | |||
|
27 | } | |||
|
28 | ||||
|
29 | public Token Optional() { | |||
|
30 | return Or(new EmptyToken()); | |||
|
31 | } | |||
|
32 | ||||
|
33 | public Token EClosure() { | |||
|
34 | return new StarToken(this); | |||
|
35 | } | |||
|
36 | ||||
|
37 | public Token Closure() { | |||
|
38 | return new CatToken(this, new StarToken(this)); | |||
|
39 | } | |||
|
40 | ||||
|
41 | public Token Repeat(int count) { | |||
|
42 | Token token = null; | |||
|
43 | ||||
|
44 | for (int i = 0; i < count; i++) | |||
|
45 | token = token != null ? token.Cat(this) : this; | |||
|
46 | return token ?? new EmptyToken(); | |||
|
47 | } | |||
|
48 | ||||
|
49 | public Token Repeat(int min, int max) { | |||
|
50 | if (min > max || min < 1) | |||
|
51 | throw new ArgumentOutOfRangeException(); | |||
|
52 | var token = Repeat(min); | |||
|
53 | ||||
|
54 | for (int i = min; i < max; i++) | |||
|
55 | token = token.Cat( this.Optional() ); | |||
|
56 | return token; | |||
|
57 | } | |||
|
58 | ||||
|
59 | public static Token New<T>(params T[] set) where T : struct, IConvertible { | |||
|
60 | Safe.ArgumentNotNull(set, "set"); | |||
|
61 | Token token = null; | |||
|
62 | foreach(var c in set.Distinct()) | |||
|
63 | token = token == null ? new SymbolToken(c.ToInt32(CultureInfo.InvariantCulture)) : token.Or(new SymbolToken(c.ToInt32(CultureInfo.InvariantCulture))); | |||
|
64 | return token; | |||
|
65 | } | |||
|
66 | } | |||
|
67 | } |
@@ -33,6 +33,7 | |||||
33 | </ItemGroup> |
|
33 | </ItemGroup> | |
34 | <ItemGroup> |
|
34 | <ItemGroup> | |
35 | <Compile Include="Component.cs" /> |
|
35 | <Compile Include="Component.cs" /> | |
|
36 | <Compile Include="CustomEqualityComparer.cs" /> | |||
36 | <Compile Include="Diagnostics\ConsoleTraceListener.cs" /> |
|
37 | <Compile Include="Diagnostics\ConsoleTraceListener.cs" /> | |
37 | <Compile Include="Diagnostics\EventText.cs" /> |
|
38 | <Compile Include="Diagnostics\EventText.cs" /> | |
38 | <Compile Include="Diagnostics\IEventTextFormatter.cs" /> |
|
39 | <Compile Include="Diagnostics\IEventTextFormatter.cs" /> | |
@@ -52,10 +53,41 | |||||
52 | <Compile Include="IPromiseBase.cs" /> |
|
53 | <Compile Include="IPromiseBase.cs" /> | |
53 | <Compile Include="IServiceLocator.cs" /> |
|
54 | <Compile Include="IServiceLocator.cs" /> | |
54 | <Compile Include="ITaskController.cs" /> |
|
55 | <Compile Include="ITaskController.cs" /> | |
|
56 | <Compile Include="JSON\JSONElementContext.cs" /> | |||
|
57 | <Compile Include="JSON\JSONElementType.cs" /> | |||
|
58 | <Compile Include="JSON\JSONGrammar.cs" /> | |||
|
59 | <Compile Include="JSON\JSONParser.cs" /> | |||
|
60 | <Compile Include="JSON\JSONScanner.cs" /> | |||
|
61 | <Compile Include="JSON\JsonTokenType.cs" /> | |||
|
62 | <Compile Include="JSON\JSONWriter.cs" /> | |||
|
63 | <Compile Include="JSON\StringTranslator.cs" /> | |||
55 | <Compile Include="Parallels\DispatchPool.cs" /> |
|
64 | <Compile Include="Parallels\DispatchPool.cs" /> | |
56 | <Compile Include="Parallels\ArrayTraits.cs" /> |
|
65 | <Compile Include="Parallels\ArrayTraits.cs" /> | |
57 | <Compile Include="Parallels\MTQueue.cs" /> |
|
66 | <Compile Include="Parallels\MTQueue.cs" /> | |
58 | <Compile Include="Parallels\WorkerPool.cs" /> |
|
67 | <Compile Include="Parallels\WorkerPool.cs" /> | |
|
68 | <Compile Include="Parsing\Alphabet.cs" /> | |||
|
69 | <Compile Include="Parsing\AlphabetBase.cs" /> | |||
|
70 | <Compile Include="Parsing\AltToken.cs" /> | |||
|
71 | <Compile Include="Parsing\BinaryToken.cs" /> | |||
|
72 | <Compile Include="Parsing\CatToken.cs" /> | |||
|
73 | <Compile Include="Parsing\CDFADefinition.cs" /> | |||
|
74 | <Compile Include="Parsing\DFABuilder.cs" /> | |||
|
75 | <Compile Include="Parsing\DFADefinitionBase.cs" /> | |||
|
76 | <Compile Include="Parsing\DFAStateDescriptor.cs" /> | |||
|
77 | <Compile Include="Parsing\DFAutomaton.cs" /> | |||
|
78 | <Compile Include="Parsing\EDFADefinition.cs" /> | |||
|
79 | <Compile Include="Parsing\EmptyToken.cs" /> | |||
|
80 | <Compile Include="Parsing\EndToken.cs" /> | |||
|
81 | <Compile Include="Parsing\EnumAlphabet.cs" /> | |||
|
82 | <Compile Include="Parsing\Grammar.cs" /> | |||
|
83 | <Compile Include="Parsing\IAlphabet.cs" /> | |||
|
84 | <Compile Include="Parsing\IDFADefinition.cs" /> | |||
|
85 | <Compile Include="Parsing\IVisitor.cs" /> | |||
|
86 | <Compile Include="Parsing\ParserException.cs" /> | |||
|
87 | <Compile Include="Parsing\Scanner.cs" /> | |||
|
88 | <Compile Include="Parsing\StarToken.cs" /> | |||
|
89 | <Compile Include="Parsing\SymbolToken.cs" /> | |||
|
90 | <Compile Include="Parsing\Token.cs" /> | |||
59 | <Compile Include="ServiceLocator.cs" /> |
|
91 | <Compile Include="ServiceLocator.cs" /> | |
60 | <Compile Include="TaskController.cs" /> |
|
92 | <Compile Include="TaskController.cs" /> | |
61 | <Compile Include="ProgressInitEventArgs.cs" /> |
|
93 | <Compile Include="ProgressInitEventArgs.cs" /> |
@@ -25,6 +25,11 namespace Implab | |||||
25 | throw new ArgumentNullException(name); |
|
25 | throw new ArgumentNullException(name); | |
26 | } |
|
26 | } | |
27 |
|
27 | |||
|
28 | public static void ArgumentInRange(int arg, int min, int max, string name) { | |||
|
29 | if (arg < min || arg > max) | |||
|
30 | throw new ArgumentOutOfRangeException(name); | |||
|
31 | } | |||
|
32 | ||||
28 | public static void Dispose<T>(T obj) where T : class |
|
33 | public static void Dispose<T>(T obj) where T : class | |
29 | { |
|
34 | { | |
30 | var disp = obj as IDisposable; |
|
35 | var disp = obj as IDisposable; |
General Comments 0
You need to be logged in to leave comments.
Login now