##// END OF EJS Templates
working on JSON parser
cin -
r178:d5c5db0335ee ref20160224
parent child
Show More
@@ -0,0 +1,9
1
2 namespace Implab.Automaton {
3 public static class DFAConst {
4 public const int UNREACHABLE_STATE = -1;
5
6 public const int UNCLASSIFIED_INPUT = 0;
7 }
8 }
9
@@ -0,0 +1,37
1 using Implab;
2 using System;
3 using System.Collections.Generic;
4 using System.Diagnostics;
5 using System.Linq;
6
7 namespace Implab.Automaton.RegularExpressions {
8 /// <summary>
9 /// </summary>
10 public class RegularExpressionVisitor<TTag> : RegularExpressionVisitor {
11 readonly Dictionary<int, TTag> m_tags = new Dictionary<int, TTag>();
12
13 readonly ITaggedDFABuilder<TTag> m_builder;
14
15 public RegularExpressionVisitor(ITaggedDFABuilder<TTag> builder) : base(builder) {
16 m_builder = builder;
17 }
18
19 public override void Visit(EndToken token) {
20 base.Visit(token);
21 var tagged = token as EndToken<TTag>;
22 if (tagged != null)
23 m_tags.Add(Index, tagged.Tag);
24 }
25
26 protected override void MarkFinalState(HashSet<int> state) {
27 base.MarkFinalState(state);
28 m_builder.SetStateTag(Translate(state), GetStateTags(state));
29 }
30
31 TTag[] GetStateTags(IEnumerable<int> state) {
32 Debug.Assert(state != null);
33 return state.Where(m_tags.ContainsKey).Select(pos => m_tags[pos]).ToArray();
34 }
35
36 }
37 }
@@ -0,0 +1,44
1 using System;
2 using System.Threading;
3
4 namespace Implab.Components {
5 public class LazyAndWeak<T> where T : class {
6
7 readonly Func<T> m_factory;
8 readonly object m_lock;
9 WeakReference m_reference;
10
11
12 public LazyAndWeak(Func<T> factory, bool useLock) {
13 Safe.ArgumentNotNull(factory, "factory");
14 m_factory = factory;
15 m_lock = useLock ? new object() : null;
16 }
17
18 public LazyAndWeak(Func<T> factory) : this(factory, false) {
19 }
20
21 public T Value {
22 get {
23 while (true) {
24 var weak = m_reference;
25 T value;
26 if (weak != null) {
27 value = weak.Target as T;
28 if (value != null)
29 return value;
30 }
31
32 if (m_lock == null) {
33 value = m_factory();
34
35 if (Interlocked.CompareExchange(ref m_reference, new WeakReference(value), weak) == weak)
36 return value;
37 } else {
38 }
39 }
40 }
41 }
42 }
43 }
44
@@ -105,7 +105,7 namespace Implab.Automaton {
105
105
106 for (int i = 0; i < StateCount; i++)
106 for (int i = 0; i < StateCount; i++)
107 for (int j = 0; i < AlphabetSize; j++)
107 for (int j = 0; i < AlphabetSize; j++)
108 table[i, j] = DFAConst.UNREACHABLE_STATE;
108 table[i, j] = AutomatonConst.UNREACHABLE_STATE;
109
109
110 foreach (var t in this)
110 foreach (var t in this)
111 table[t.s1,t.edge] = t.s2;
111 table[t.s1,t.edge] = t.s2;
@@ -273,11 +273,11 namespace Implab.Automaton {
273
273
274 var nextCls = 0;
274 var nextCls = 0;
275 foreach (var item in minClasses) {
275 foreach (var item in minClasses) {
276 if (nextCls == DFAConst.UNCLASSIFIED_INPUT)
276 if (nextCls == AutomatonConst.UNCLASSIFIED_INPUT)
277 nextCls++;
277 nextCls++;
278
278
279 // сохраняем DFAConst.UNCLASSIFIED_INPUT
279 // сохраняем DFAConst.UNCLASSIFIED_INPUT
280 var cls = item.Contains(DFAConst.UNCLASSIFIED_INPUT) ? DFAConst.UNCLASSIFIED_INPUT : nextCls;
280 var cls = item.Contains(AutomatonConst.UNCLASSIFIED_INPUT) ? AutomatonConst.UNCLASSIFIED_INPUT : nextCls;
281
281
282 foreach (var a in item)
282 foreach (var a in item)
283 alphabetMap[a] = cls;
283 alphabetMap[a] = cls;
@@ -54,7 +54,7 namespace Implab.Automaton {
54 return cls;
54 return cls;
55 if (!m_supportUnclassified)
55 if (!m_supportUnclassified)
56 throw new ArgumentOutOfRangeException("symbol", "The specified symbol isn't in the alphabet");
56 throw new ArgumentOutOfRangeException("symbol", "The specified symbol isn't in the alphabet");
57 return DFAConst.UNCLASSIFIED_INPUT;
57 return AutomatonConst.UNCLASSIFIED_INPUT;
58 }
58 }
59
59
60 public int Count {
60 public int Count {
@@ -1,13 +1,11
1 using Implab;
1 namespace Implab.Automaton.RegularExpressions {
2
3 namespace Implab.Automaton.RegularExpressions {
4 /// <summary>
2 /// <summary>
5 /// Конечный символ расширенного регулярного выражения, при построении ДКА
3 /// Конечный символ расширенного регулярного выражения, при построении ДКА
6 /// используется для определения конечных состояний.
4 /// используется для определения конечных состояний.
7 /// </summary>
5 /// </summary>
8 public class EndToken<TTag>: Token {
6 public class EndToken<TTag>: EndToken {
9
7
10 TTag m_tag;
8 readonly TTag m_tag;
11
9
12 public EndToken(TTag tag) {
10 public EndToken(TTag tag) {
13 m_tag = tag;
11 m_tag = tag;
@@ -20,14 +18,6 namespace Implab.Automaton.RegularExpres
20 public TTag Tag {
18 public TTag Tag {
21 get { return m_tag; }
19 get { return m_tag; }
22 }
20 }
23
21
24 public override void Accept(IVisitor visitor) {
25 Safe.ArgumentOfType(visitor, typeof(IVisitor<TTag>), "visitor");
26 Safe.ArgumentNotNull(visitor, "visitor");
27 ((IVisitor<TTag>)visitor).Visit(this);
28 }
29 public override string ToString() {
30 return "#";
31 }
32 }
22 }
33 }
23 }
@@ -2,12 +2,12
2 using System.Linq;
2 using System.Linq;
3
3
4 namespace Implab.Automaton.RegularExpressions {
4 namespace Implab.Automaton.RegularExpressions {
5 public class RegularDFA<TInput, TTag> : DFATable, ITaggedDFABuilder<TTag> {
5 public class TaggedDFA<TInput, TTag> : DFATable, ITaggedDFABuilder<TTag> {
6
6
7 readonly Dictionary<int,TTag[]> m_tags = new Dictionary<int, TTag[]>();
7 readonly Dictionary<int,TTag[]> m_tags = new Dictionary<int, TTag[]>();
8 readonly IAlphabet<TInput> m_alphabet;
8 readonly IAlphabet<TInput> m_alphabet;
9
9
10 public RegularDFA(IAlphabet<TInput> alphabet) {
10 public TaggedDFA(IAlphabet<TInput> alphabet) {
11 Safe.ArgumentNotNull(alphabet, "aplhabet");
11 Safe.ArgumentNotNull(alphabet, "aplhabet");
12
12
13 m_alphabet = alphabet;
13 m_alphabet = alphabet;
@@ -48,10 +48,10 namespace Implab.Automaton.RegularExpres
48 /// Optimize the specified alphabet.
48 /// Optimize the specified alphabet.
49 /// </summary>
49 /// </summary>
50 /// <param name="alphabet">Пустой алфавит, который будет зполнен в процессе оптимизации.</param>
50 /// <param name="alphabet">Пустой алфавит, который будет зполнен в процессе оптимизации.</param>
51 public RegularDFA<TInput,TTag> Optimize(IAlphabetBuilder<TInput> alphabet) {
51 public TaggedDFA<TInput,TTag> Optimize(IAlphabetBuilder<TInput> alphabet) {
52 Safe.ArgumentNotNull(alphabet, "alphabet");
52 Safe.ArgumentNotNull(alphabet, "alphabet");
53
53
54 var dfa = new RegularDFA<TInput, TTag>(alphabet);
54 var dfa = new TaggedDFA<TInput, TTag>(alphabet);
55
55
56 var states = new DummyAlphabet(StateCount);
56 var states = new DummyAlphabet(StateCount);
57 var alphaMap = new Dictionary<int,int>();
57 var alphaMap = new Dictionary<int,int>();
@@ -10,7 +10,7 namespace Implab.Automaton.RegularExpres
10 /// регулярное выражение и вычисляет followpos, затем используется метод
10 /// регулярное выражение и вычисляет followpos, затем используется метод
11 /// <see cref="BuildDFA(IDFADefinition)"/> для построения автомата.
11 /// <see cref="BuildDFA(IDFADefinition)"/> для построения автомата.
12 /// </summary>
12 /// </summary>
13 public class RegularExpressionVisitor<TTag> : IVisitor<TTag> {
13 public class RegularExpressionVisitor : IVisitor {
14 int m_idx;
14 int m_idx;
15 Token m_root;
15 Token m_root;
16 HashSet<int> m_firstpos;
16 HashSet<int> m_firstpos;
@@ -19,13 +19,23 namespace Implab.Automaton.RegularExpres
19 readonly Dictionary<int, HashSet<int>> m_followpos = new Dictionary<int, HashSet<int>>();
19 readonly Dictionary<int, HashSet<int>> m_followpos = new Dictionary<int, HashSet<int>>();
20 readonly Dictionary<int, int> m_indexes = new Dictionary<int, int>();
20 readonly Dictionary<int, int> m_indexes = new Dictionary<int, int>();
21 readonly HashSet<int> m_ends = new HashSet<int>();
21 readonly HashSet<int> m_ends = new HashSet<int>();
22 readonly Dictionary<int, TTag> m_tags = new Dictionary<int, TTag>();
23
22
24 public Dictionary<int, HashSet<int>> FollowposMap {
23 readonly IDFATableBuilder m_builder;
25 get { return m_followpos; }
24 readonly IAlphabetBuilder<HashSet<int>> m_states = new MapAlphabet<HashSet<int>>(
25 false,
26 new CustomEqualityComparer<HashSet<int>>(
27 (x, y) => x.SetEquals(y),
28 x => x.Sum(n => n.GetHashCode())
29 )
30 );
31
32 public RegularExpressionVisitor(IDFATableBuilder builder) {
33 Safe.ArgumentNotNull(builder, "builder");
34
35 m_builder = builder;
26 }
36 }
27
37
28 public HashSet<int> Followpos(int pos) {
38 HashSet<int> Followpos(int pos) {
29 HashSet<int> set;
39 HashSet<int> set;
30 return m_followpos.TryGetValue(pos, out set) ? set : m_followpos[pos] = new HashSet<int>();
40 return m_followpos.TryGetValue(pos, out set) ? set : m_followpos[pos] = new HashSet<int>();
31 }
41 }
@@ -42,6 +52,9 namespace Implab.Automaton.RegularExpres
42 return false;
52 return false;
43 }
53 }
44
54
55 protected int Index {
56 get { return m_idx; }
57 }
45
58
46 public void Visit(AltToken token) {
59 public void Visit(AltToken token) {
47 if (m_root == null)
60 if (m_root == null)
@@ -112,45 +125,23 namespace Implab.Automaton.RegularExpres
112 m_lastpos = new HashSet<int>(new[] { m_idx });
125 m_lastpos = new HashSet<int>(new[] { m_idx });
113 }
126 }
114
127
115 public void Visit(EndToken<TTag> token) {
128 public virtual void Visit(EndToken token) {
116 if (m_root == null)
129 if (m_root == null)
117 m_root = token;
130 m_root = token;
118 m_idx++;
131 m_idx++;
119 m_indexes[m_idx] = DFAConst.UNCLASSIFIED_INPUT;
132 m_indexes[m_idx] = AutomatonConst.UNCLASSIFIED_INPUT;
120 m_firstpos = new HashSet<int>(new[] { m_idx });
121 m_lastpos = new HashSet<int>(new[] { m_idx });
122 Followpos(m_idx);
123 m_ends.Add(m_idx);
124 m_tags.Add(m_idx, token.Tag);
125 }
126
127 public void Visit(EndToken token) {
128 if (m_root == null)
129 m_root = token;
130 m_idx++;
131 m_indexes[m_idx] = DFAConst.UNCLASSIFIED_INPUT;
132 m_firstpos = new HashSet<int>(new[] { m_idx });
133 m_firstpos = new HashSet<int>(new[] { m_idx });
133 m_lastpos = new HashSet<int>(new[] { m_idx });
134 m_lastpos = new HashSet<int>(new[] { m_idx });
134 Followpos(m_idx);
135 Followpos(m_idx);
135 m_ends.Add(m_idx);
136 m_ends.Add(m_idx);
136 }
137 }
137
138
138 public void BuildDFA(ITaggedDFABuilder<TTag> dfa) {
139 public void BuildDFA() {
139 Safe.ArgumentNotNull(dfa,"dfa");
140 AddState(m_firstpos);
141 SetInitialState(m_firstpos);
140
142
141 var states = new MapAlphabet<HashSet<int>>(
143 if(IsFinal(m_firstpos))
142 false,
144 MarkFinalState(m_firstpos);
143 new CustomEqualityComparer<HashSet<int>>(
144 (x, y) => x.SetEquals(y),
145 x => x.Sum(n => n.GetHashCode())
146 ));
147
148 var initialState = states.DefineSymbol(m_firstpos);
149 dfa.SetInitialState(initialState);
150
151 var tags = GetStateTags(m_firstpos);
152 if (tags != null && tags.Length > 0)
153 dfa.MarkFinalState(initialState, tags);
154
145
155 var inputMax = m_indexes.Values.Max();
146 var inputMax = m_indexes.Values.Max();
156 var queue = new Queue<HashSet<int>>();
147 var queue = new Queue<HashSet<int>>();
@@ -158,49 +149,64 namespace Implab.Automaton.RegularExpres
158 queue.Enqueue(m_firstpos);
149 queue.Enqueue(m_firstpos);
159
150
160 while (queue.Count > 0) {
151 while (queue.Count > 0) {
161 var state = queue.Dequeue();
152 var s1 = queue.Dequeue();
162 var s1 = states.Translate(state);
163 Debug.Assert(s1 != DFAConst.UNCLASSIFIED_INPUT);
164
153
165 for (int a = 0; a <= inputMax; a++) {
154 for (int a = 0; a <= inputMax; a++) {
166 var next = new HashSet<int>();
155 var s2 = new HashSet<int>();
167 foreach (var p in state) {
156 foreach (var p in s1) {
168 if (m_indexes[p] == a) {
157 if (m_indexes[p] == a) {
169 next.UnionWith(Followpos(p));
158 s2.UnionWith(Followpos(p));
170 }
159 }
171 }
160 }
172 if (next.Count > 0) {
161 if (s2.Count > 0) {
173 int s2;
162 if (!HasState(s2)) {
174 if (states.Contains(next)) {
163 AddState(s2);
175 s2 = states.Translate(next);
164 if (IsFinal(s2))
176 } else {
165 MarkFinalState(s2);
177 s2 = states.DefineSymbol(next);
166
167 queue.Enqueue(s2);
168 }
178
169
179 if (IsFinal(next)) {
170 DefineTransition(s1, s2, a);
180
181 dfa.MarkFinalState(s2);
182 tags = GetStateTags(next);
183 if (tags != null && tags.Length > 0)
184 dfa.SetStateTag(s2, tags);
185 }
186
187 queue.Enqueue(next);
188 }
189 dfa.Add(new AutomatonTransition(s1, s2, a));
190 }
171 }
172
191 }
173 }
192 }
174 }
193 }
175 }
194
176
177 protected bool HasState(HashSet<int> state) {
178 return m_states.Contains(state);
179 }
180
181 protected void AddState(HashSet<int> state) {
182 Debug.Assert(!HasState(state));
183
184 m_states.DefineSymbol(state);
185 }
186
187 protected int Translate(HashSet<int> state) {
188 Debug.Assert(HasState(state));
189
190 return m_states.Translate(state);
191 }
192
193 protected virtual void SetInitialState(HashSet<int> state) {
194 m_builder.SetInitialState(Translate(state));
195 }
196
197 protected virtual void MarkFinalState(HashSet<int> state) {
198 m_builder.MarkFinalState(Translate(state));
199 }
200
201 protected virtual void DefineTransition(HashSet<int> s1, HashSet<int> s2, int ch) {
202
203 m_builder.Add(new AutomatonTransition(Translate(s1), Translate(s2), ch));
204 }
205
195 bool IsFinal(IEnumerable<int> state) {
206 bool IsFinal(IEnumerable<int> state) {
196 Debug.Assert(state != null);
207 Debug.Assert(state != null);
197 return state.Any(m_ends.Contains);
208 return state.Any(m_ends.Contains);
198 }
209 }
199
210
200 TTag[] GetStateTags(IEnumerable<int> state) {
201 Debug.Assert(state != null);
202 return state.Where(m_tags.ContainsKey).Select(pos => m_tags[pos]).ToArray();
203 }
204
205 }
211 }
206 }
212 }
@@ -6,7 +6,7 namespace Implab.Automaton.RegularExpres
6 public abstract class Token {
6 public abstract class Token {
7 public abstract void Accept(IVisitor visitor);
7 public abstract void Accept(IVisitor visitor);
8
8
9 public Token Extend() {
9 public Token End() {
10 return Cat(new EndToken());
10 return Cat(new EndToken());
11 }
11 }
12
12
@@ -9,14 +9,14 namespace Implab.Formats {
9 /// <summary>
9 /// <summary>
10 /// Базовый абстрактный класс. Грамматика, позволяет формулировать выражения над алфавитом типа <c>char</c>.
10 /// Базовый абстрактный класс. Грамматика, позволяет формулировать выражения над алфавитом типа <c>char</c>.
11 /// </summary>
11 /// </summary>
12 public abstract class Grammar<TSymbol, TTag> {
12 public abstract class Grammar<TSymbol> {
13
13
14 protected abstract IAlphabetBuilder<TSymbol> AlphabetBuilder {
14 protected abstract IAlphabetBuilder<TSymbol> AlphabetBuilder {
15 get;
15 get;
16 }
16 }
17
17
18 protected SymbolToken<TTag> UnclassifiedToken() {
18 protected SymbolToken UnclassifiedToken() {
19 return new SymbolToken<TTag>(DFAConst.UNCLASSIFIED_INPUT);
19 return new SymbolToken(AutomatonConst.UNCLASSIFIED_INPUT);
20 }
20 }
21
21
22 protected void DefineAlphabet(IEnumerable<TSymbol> alphabet) {
22 protected void DefineAlphabet(IEnumerable<TSymbol> alphabet) {
@@ -26,23 +26,23 namespace Implab.Formats {
26 AlphabetBuilder.DefineSymbol(ch);
26 AlphabetBuilder.DefineSymbol(ch);
27 }
27 }
28
28
29 protected Token<TTag> SymbolToken(TSymbol symbol) {
29 protected Token SymbolToken(TSymbol symbol) {
30 return Token<TTag>.New(TranslateOrAdd(symbol));
30 return Token.New(TranslateOrAdd(symbol));
31 }
31 }
32
32
33 protected Token<TTag> SymbolToken(IEnumerable<TSymbol> symbols) {
33 protected Token SymbolToken(IEnumerable<TSymbol> symbols) {
34 Safe.ArgumentNotNull(symbols, "symbols");
34 Safe.ArgumentNotNull(symbols, "symbols");
35
35
36 return Token<TTag>.New(TranslateOrAdd(symbols).ToArray());
36 return Token.New(TranslateOrAdd(symbols).ToArray());
37 }
37 }
38
38
39 protected Token<TTag> SymbolSetToken(params TSymbol[] set) {
39 protected Token SymbolSetToken(params TSymbol[] set) {
40 return SymbolToken(set);
40 return SymbolToken(set);
41 }
41 }
42
42
43 int TranslateOrAdd(TSymbol ch) {
43 int TranslateOrAdd(TSymbol ch) {
44 var t = AlphabetBuilder.Translate(ch);
44 var t = AlphabetBuilder.Translate(ch);
45 if (t == DFAConst.UNCLASSIFIED_INPUT)
45 if (t == AutomatonConst.UNCLASSIFIED_INPUT)
46 t = AlphabetBuilder.DefineSymbol(ch);
46 t = AlphabetBuilder.DefineSymbol(ch);
47 return t;
47 return t;
48 }
48 }
@@ -53,7 +53,7 namespace Implab.Formats {
53
53
54 int TranslateOrDie(TSymbol ch) {
54 int TranslateOrDie(TSymbol ch) {
55 var t = AlphabetBuilder.Translate(ch);
55 var t = AlphabetBuilder.Translate(ch);
56 if (t == DFAConst.UNCLASSIFIED_INPUT)
56 if (t == AutomatonConst.UNCLASSIFIED_INPUT)
57 throw new ApplicationException(String.Format("Symbol '{0}' is UNCLASSIFIED", ch));
57 throw new ApplicationException(String.Format("Symbol '{0}' is UNCLASSIFIED", ch));
58 return t;
58 return t;
59 }
59 }
@@ -62,22 +62,21 namespace Implab.Formats {
62 return symbols.Distinct().Select(TranslateOrDie);
62 return symbols.Distinct().Select(TranslateOrDie);
63 }
63 }
64
64
65 protected Token<TTag> SymbolTokenExcept(IEnumerable<TSymbol> symbols) {
65 protected Token SymbolTokenExcept(IEnumerable<TSymbol> symbols) {
66 Safe.ArgumentNotNull(symbols, "symbols");
66 Safe.ArgumentNotNull(symbols, "symbols");
67
67
68 return Token<TTag>.New( Enumerable.Range(0, AlphabetBuilder.Count).Except(TranslateOrDie(symbols)).ToArray() );
68 return Token.New( Enumerable.Range(0, AlphabetBuilder.Count).Except(TranslateOrDie(symbols)).ToArray() );
69 }
69 }
70
70
71 protected abstract IndexedAlphabetBase<TSymbol> CreateAlphabet();
71 protected abstract IndexedAlphabetBase<TSymbol> CreateAlphabet();
72
72
73 protected ScannerContext<TTag> BuildScannerContext(Token<TTag> regexp) {
73 protected ScannerContext<TTag> BuildScannerContext<TTag>(Token regexp) {
74
74
75 var dfa = new RegularDFA<TSymbol, TTag>(AlphabetBuilder);
75 var dfa = new RegularDFA<TSymbol, TTag>(AlphabetBuilder);
76
76
77 var visitor = new RegularExpressionVisitor<TTag>();
77 var visitor = new RegularExpressionVisitor<TTag>(dfa);
78 regexp.Accept( visitor );
78 regexp.Accept(visitor);
79
79 visitor.BuildDFA();
80 visitor.BuildDFA(dfa);
81
80
82 if (dfa.IsFinalState(dfa.InitialState))
81 if (dfa.IsFinalState(dfa.InitialState))
83 throw new ApplicationException("The specified language contains empty token");
82 throw new ApplicationException("The specified language contains empty token");
@@ -5,7 +5,6
5 enum JSONElementContext {
5 enum JSONElementContext {
6 None,
6 None,
7 Object,
7 Object,
8 Array,
8 Array
9 Closed
10 }
9 }
11 }
10 }
@@ -4,7 +4,7 using System;
4 using Implab.Automaton;
4 using Implab.Automaton;
5
5
6 namespace Implab.Formats.JSON {
6 namespace Implab.Formats.JSON {
7 class JSONGrammar : Grammar<char,JSONGrammar.TokenType> {
7 class JSONGrammar : Grammar<char> {
8 public enum TokenType {
8 public enum TokenType {
9 None,
9 None,
10 BeginObject,
10 BeginObject,
@@ -29,8 +29,8 namespace Implab.Formats.JSON {
29 get { return _instance.Value; }
29 get { return _instance.Value; }
30 }
30 }
31
31
32 readonly ScannerContext<TokenType> m_jsonDFA;
32 readonly ScannerContext<TokenType> m_jsonExpression;
33 readonly ScannerContext<TokenType> m_stringDFA;
33 readonly ScannerContext<TokenType> m_stringExpression;
34
34
35 public JSONGrammar() {
35 public JSONGrammar() {
36 DefineAlphabet(Enumerable.Range(0, 0x20).Select(x => (char)x));
36 DefineAlphabet(Enumerable.Range(0, 0x20).Select(x => (char)x));
@@ -81,23 +81,25 namespace Implab.Formats.JSON {
81 .Or(unescaped.Closure().Tag(TokenType.UnescapedChar));
81 .Or(unescaped.Closure().Tag(TokenType.UnescapedChar));
82
82
83
83
84 m_jsonDFA = BuildScannerContext(jsonExpression);
84 m_jsonExpression = BuildScannerContext<TokenType>(jsonExpression);
85 m_stringDFA = BuildScannerContext(jsonStringExpression);
85 m_stringExpression = BuildScannerContext<TokenType>(jsonStringExpression);
86
87
86 }
88 }
87
89
88 public ScannerContext<TokenType> JsonDFA {
90 public ScannerContext<TokenType> JsonExpression {
89 get {
91 get {
90 return m_jsonDFA;
92 return m_jsonExpression;
91 }
93 }
92 }
94 }
93
95
94 public ScannerContext<TokenType> JsonStringDFA {
96 public ScannerContext<TokenType> JsonStringExpression {
95 get {
97 get {
96 return m_stringDFA;
98 return m_stringExpression;
97 }
99 }
98 }
100 }
99
101
100 Token<TokenType> SymbolRangeToken(char start, char stop) {
102 Token SymbolRangeToken(char start, char stop) {
101 return SymbolToken(Enumerable.Range(start,stop - start).Cast<char>());
103 return SymbolToken(Enumerable.Range(start,stop - start).Cast<char>());
102 }
104 }
103
105
@@ -38,17 +38,30 namespace Implab.Formats.JSON {
38 MemberValue
38 MemberValue
39 }
39 }
40
40
41 #region Parser rules
41 struct ParserContext {
42 struct ParserContext {
42 DFAStateDescriptior<object>
43 readonly int[,] m_dfa;
43 }
44 int m_state;
45
46 readonly JSONElementContext m_elementContext;
44
47
45 static readonly EnumAlphabet<JsonTokenType> _alphabet = EnumAlphabet<JsonTokenType>.FullAlphabet;
48 public ParserContext(int[,] dfa, int state, JSONElementContext context) {
46 static readonly DFAStateDescriptior<object>[] _jsonDFA;
49 m_dfa = dfa;
47 static readonly int _jsonDFAInitialState;
50 m_state = state;
48 static readonly DFAStateDescriptior<object>[] _objectDFA;
51 m_elementContext = context;
49 static readonly int _objectDFAInitialState;
52 }
50 static readonly DFAStateDescriptior<object>[] _arrayDFA;
53
51 static readonly int _arrayDFAInitialState;
54 public bool Move(JsonTokenType token) {
55 var next = m_dfa[m_state, token];
56 if (next == AutomatonConst.UNREACHABLE_STATE)
57 return false;
58 m_state = next;
59 }
60
61 public JSONElementContext ElementContext {
62 get { return m_elementContext; }
63 }
64 }
52
65
53 static JSONParser() {
66 static JSONParser() {
54
67
@@ -64,7 +77,8 namespace Implab.Formats.JSON {
64 )
77 )
65 .Optional()
78 .Optional()
66 .Cat(Token(JsonTokenType.EndObject))
79 .Cat(Token(JsonTokenType.EndObject))
67 .Tag(null);
80 .End();
81
68 var arrayExpression = valueExpression
82 var arrayExpression = valueExpression
69 .Cat(
83 .Cat(
70 Token(JsonTokenType.ValueSeparator)
84 Token(JsonTokenType.ValueSeparator)
@@ -73,29 +87,31 namespace Implab.Formats.JSON {
73 )
87 )
74 .Optional()
88 .Optional()
75 .Cat(Token(JsonTokenType.EndArray))
89 .Cat(Token(JsonTokenType.EndArray))
76 .Tag(null);
90 .End();
77
91
78 var jsonExpression = valueExpression.Tag(null);
92 var jsonExpression = valueExpression.End();
79
93
80 _jsonDFA = CreateDFA(jsonExpression).GetTransitionTable();
94 _jsonDFA = CreateParserContext(jsonExpression, JSONElementContext.None);
81 _objectDFA = CreateDFA(objectExpression).GetTransitionTable();
95 _objectDFA = CreateParserContext(objectExpression, JSONElementContext.Object);
82 _arrayDFA = CreateDFA(arrayExpression).GetTransitionTable();
96 _arrayDFA = CreateParserContext(arrayExpression, JSONElementContext.Array);
83 }
97 }
84
98
85 static Token<object> Token(params JsonTokenType[] input) {
99 static Token Token(params JsonTokenType[] input) {
86 return Token<object>.New(input.Select(t => _alphabet.Translate(t)).ToArray());
100 return Token.New( input.Select(t => (int)t).ToArray() );
87 }
101 }
88
102
89 static RegularDFA<JsonTokenType,object> CreateDFA(Token<object> expr) {
103 static ParserContext CreateParserContext(Token expr, JSONElementContext context) {
90 var builder = new RegularExpressionVisitor<object>();
104
91 var dfa = new RegularDFA<JsonTokenType,object>(_alphabet);
105 var dfa = new DFATable();
92
106 var builder = new RegularExpressionVisitor(dfa);
93 expr.Accept(builder);
107 expr.Accept(builder);
108 builder.BuildDFA();
94
109
95 builder.BuildDFA(dfa);
110 return new ParserContext(dfa.CreateTransitionTable(), dfa.InitialState, context);
96 return dfa;
97 }
111 }
98
112
113 #endregion
114
99 JSONScanner m_scanner;
115 JSONScanner m_scanner;
100 MemberContext m_memberContext;
116 MemberContext m_memberContext;
101
117
@@ -117,8 +133,7 namespace Implab.Formats.JSON {
117 /// Создает новый экземпляр парсера, на основе текстового потока.
133 /// Создает новый экземпляр парсера, на основе текстового потока.
118 /// </summary>
134 /// </summary>
119 /// <param name="reader">Текстовый поток.</param>
135 /// <param name="reader">Текстовый поток.</param>
120 /// <param name="dispose">Признак того, что парсер должен конролировать время жизни входного потока.</param>
136 public JSONParser(TextReader reader)
121 public JSONParser(TextReader reader, bool dispose)
122 : base(_jsonDFA, INITIAL_STATE, new JSONParserContext { elementContext = JSONElementContext.None, memberName = String.Empty }) {
137 : base(_jsonDFA, INITIAL_STATE, new JSONParserContext { elementContext = JSONElementContext.None, memberName = String.Empty }) {
123 Safe.ArgumentNotNull(reader, "reader");
138 Safe.ArgumentNotNull(reader, "reader");
124 m_scanner = new JSONScanner();
139 m_scanner = new JSONScanner();
@@ -61,15 +61,15 namespace Implab.Formats {
61 while (pos < m_bufferSize) {
61 while (pos < m_bufferSize) {
62 var ch = m_buffer[pos];
62 var ch = m_buffer[pos];
63
63
64 state = dfa[state, ch > maxSymbol ? DFAConst.UNCLASSIFIED_INPUT : alphabet[ch]];
64 state = dfa[state, ch > maxSymbol ? AutomatonConst.UNCLASSIFIED_INPUT : alphabet[ch]];
65 if (state == DFAConst.UNREACHABLE_STATE)
65 if (state == AutomatonConst.UNREACHABLE_STATE)
66 break;
66 break;
67
67
68 pos++;
68 pos++;
69 }
69 }
70
70
71 m_tokenLength = pos - m_bufferOffset;
71 m_tokenLength = pos - m_bufferOffset;
72 } while (state != DFAConst.UNREACHABLE_STATE && Feed());
72 } while (state != AutomatonConst.UNREACHABLE_STATE && Feed());
73
73
74 m_tokenOffset = m_bufferOffset;
74 m_tokenOffset = m_bufferOffset;
75 m_bufferOffset += m_tokenLength;
75 m_bufferOffset += m_tokenLength;
@@ -159,7 +159,6
159 <Compile Include="Automaton\RegularExpressions\AltToken.cs" />
159 <Compile Include="Automaton\RegularExpressions\AltToken.cs" />
160 <Compile Include="Automaton\RegularExpressions\BinaryToken.cs" />
160 <Compile Include="Automaton\RegularExpressions\BinaryToken.cs" />
161 <Compile Include="Automaton\RegularExpressions\CatToken.cs" />
161 <Compile Include="Automaton\RegularExpressions\CatToken.cs" />
162 <Compile Include="Automaton\DFAConst.cs" />
163 <Compile Include="Automaton\RegularExpressions\StarToken.cs" />
162 <Compile Include="Automaton\RegularExpressions\StarToken.cs" />
164 <Compile Include="Automaton\RegularExpressions\SymbolToken.cs" />
163 <Compile Include="Automaton\RegularExpressions\SymbolToken.cs" />
165 <Compile Include="Automaton\RegularExpressions\EmptyToken.cs" />
164 <Compile Include="Automaton\RegularExpressions\EmptyToken.cs" />
@@ -183,7 +182,6
183 <Compile Include="Automaton\IDFATable.cs" />
182 <Compile Include="Automaton\IDFATable.cs" />
184 <Compile Include="Automaton\IDFATableBuilder.cs" />
183 <Compile Include="Automaton\IDFATableBuilder.cs" />
185 <Compile Include="Automaton\DFATable.cs" />
184 <Compile Include="Automaton\DFATable.cs" />
186 <Compile Include="Automaton\RegularExpressions\RegularDFA.cs" />
187 <Compile Include="Automaton\RegularExpressions\RegularExpressionVisitor.cs" />
185 <Compile Include="Automaton\RegularExpressions\RegularExpressionVisitor.cs" />
188 <Compile Include="Automaton\RegularExpressions\ITaggedDFABuilder.cs" />
186 <Compile Include="Automaton\RegularExpressions\ITaggedDFABuilder.cs" />
189 <Compile Include="Formats\TextScanner.cs" />
187 <Compile Include="Formats\TextScanner.cs" />
@@ -193,7 +191,10
193 <Compile Include="Formats\Grammar.cs" />
191 <Compile Include="Formats\Grammar.cs" />
194 <Compile Include="Automaton\RegularExpressions\EndTokenT.cs" />
192 <Compile Include="Automaton\RegularExpressions\EndTokenT.cs" />
195 <Compile Include="Automaton\RegularExpressions\EndToken.cs" />
193 <Compile Include="Automaton\RegularExpressions\EndToken.cs" />
196 <Compile Include="Automaton\RegularExpressions\IVisitorT.cs" />
194 <Compile Include="Automaton\RegularExpressions\RegularExpressionVisitorT.cs" />
195 <Compile Include="Automaton\AutomatonConst.cs" />
196 <Compile Include="Automaton\RegularExpressions\RegularDFA.cs" />
197 <Compile Include="Components\LazyAndWeak.cs" />
197 </ItemGroup>
198 </ItemGroup>
198 <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" />
199 <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" />
199 <ItemGroup />
200 <ItemGroup />
1 NO CONTENT: file was removed
NO CONTENT: file was removed
1 NO CONTENT: file was removed
NO CONTENT: file was removed
General Comments 0
You need to be logged in to leave comments. Login now