@@ -0,0 +1,9 | |||
|
1 | ||
|
2 | namespace Implab.Automaton { | |
|
3 | public static class DFAConst { | |
|
4 | public const int UNREACHABLE_STATE = -1; | |
|
5 | ||
|
6 | public const int UNCLASSIFIED_INPUT = 0; | |
|
7 | } | |
|
8 | } | |
|
9 |
@@ -0,0 +1,37 | |||
|
1 | using Implab; | |
|
2 | using System; | |
|
3 | using System.Collections.Generic; | |
|
4 | using System.Diagnostics; | |
|
5 | using System.Linq; | |
|
6 | ||
|
7 | namespace Implab.Automaton.RegularExpressions { | |
|
8 | /// <summary> | |
|
9 | /// </summary> | |
|
10 | public class RegularExpressionVisitor<TTag> : RegularExpressionVisitor { | |
|
11 | readonly Dictionary<int, TTag> m_tags = new Dictionary<int, TTag>(); | |
|
12 | ||
|
13 | readonly ITaggedDFABuilder<TTag> m_builder; | |
|
14 | ||
|
15 | public RegularExpressionVisitor(ITaggedDFABuilder<TTag> builder) : base(builder) { | |
|
16 | m_builder = builder; | |
|
17 | } | |
|
18 | ||
|
19 | public override void Visit(EndToken token) { | |
|
20 | base.Visit(token); | |
|
21 | var tagged = token as EndToken<TTag>; | |
|
22 | if (tagged != null) | |
|
23 | m_tags.Add(Index, tagged.Tag); | |
|
24 | } | |
|
25 | ||
|
26 | protected override void MarkFinalState(HashSet<int> state) { | |
|
27 | base.MarkFinalState(state); | |
|
28 | m_builder.SetStateTag(Translate(state), GetStateTags(state)); | |
|
29 | } | |
|
30 | ||
|
31 | TTag[] GetStateTags(IEnumerable<int> state) { | |
|
32 | Debug.Assert(state != null); | |
|
33 | return state.Where(m_tags.ContainsKey).Select(pos => m_tags[pos]).ToArray(); | |
|
34 | } | |
|
35 | ||
|
36 | } | |
|
37 | } |
@@ -0,0 +1,44 | |||
|
1 | using System; | |
|
2 | using System.Threading; | |
|
3 | ||
|
4 | namespace Implab.Components { | |
|
5 | public class LazyAndWeak<T> where T : class { | |
|
6 | ||
|
7 | readonly Func<T> m_factory; | |
|
8 | readonly object m_lock; | |
|
9 | WeakReference m_reference; | |
|
10 | ||
|
11 | ||
|
12 | public LazyAndWeak(Func<T> factory, bool useLock) { | |
|
13 | Safe.ArgumentNotNull(factory, "factory"); | |
|
14 | m_factory = factory; | |
|
15 | m_lock = useLock ? new object() : null; | |
|
16 | } | |
|
17 | ||
|
18 | public LazyAndWeak(Func<T> factory) : this(factory, false) { | |
|
19 | } | |
|
20 | ||
|
21 | public T Value { | |
|
22 | get { | |
|
23 | while (true) { | |
|
24 | var weak = m_reference; | |
|
25 | T value; | |
|
26 | if (weak != null) { | |
|
27 | value = weak.Target as T; | |
|
28 | if (value != null) | |
|
29 | return value; | |
|
30 | } | |
|
31 | ||
|
32 | if (m_lock == null) { | |
|
33 | value = m_factory(); | |
|
34 | ||
|
35 | if (Interlocked.CompareExchange(ref m_reference, new WeakReference(value), weak) == weak) | |
|
36 | return value; | |
|
37 | } else { | |
|
38 | } | |
|
39 | } | |
|
40 | } | |
|
41 | } | |
|
42 | } | |
|
43 | } | |
|
44 |
@@ -105,7 +105,7 namespace Implab.Automaton { | |||
|
105 | 105 | |
|
106 | 106 | for (int i = 0; i < StateCount; i++) |
|
107 | 107 | for (int j = 0; i < AlphabetSize; j++) |
|
108 |
table[i, j] = |
|
|
108 | table[i, j] = AutomatonConst.UNREACHABLE_STATE; | |
|
109 | 109 | |
|
110 | 110 | foreach (var t in this) |
|
111 | 111 | table[t.s1,t.edge] = t.s2; |
@@ -273,11 +273,11 namespace Implab.Automaton { | |||
|
273 | 273 | |
|
274 | 274 | var nextCls = 0; |
|
275 | 275 | foreach (var item in minClasses) { |
|
276 |
if (nextCls == |
|
|
276 | if (nextCls == AutomatonConst.UNCLASSIFIED_INPUT) | |
|
277 | 277 | nextCls++; |
|
278 | 278 | |
|
279 | 279 | // сохраняем DFAConst.UNCLASSIFIED_INPUT |
|
280 |
var cls = item.Contains( |
|
|
280 | var cls = item.Contains(AutomatonConst.UNCLASSIFIED_INPUT) ? AutomatonConst.UNCLASSIFIED_INPUT : nextCls; | |
|
281 | 281 | |
|
282 | 282 | foreach (var a in item) |
|
283 | 283 | alphabetMap[a] = cls; |
@@ -54,7 +54,7 namespace Implab.Automaton { | |||
|
54 | 54 | return cls; |
|
55 | 55 | if (!m_supportUnclassified) |
|
56 | 56 | throw new ArgumentOutOfRangeException("symbol", "The specified symbol isn't in the alphabet"); |
|
57 |
return |
|
|
57 | return AutomatonConst.UNCLASSIFIED_INPUT; | |
|
58 | 58 | } |
|
59 | 59 | |
|
60 | 60 | public int Count { |
@@ -1,13 +1,11 | |||
|
1 | using Implab; | |
|
2 | ||
|
3 | namespace Implab.Automaton.RegularExpressions { | |
|
1 | namespace Implab.Automaton.RegularExpressions { | |
|
4 | 2 | /// <summary> |
|
5 | 3 | /// Конечный символ расширенного регулярного выражения, при построении ДКА |
|
6 | 4 | /// используется для определения конечных состояний. |
|
7 | 5 | /// </summary> |
|
8 | public class EndToken<TTag>: Token { | |
|
6 | public class EndToken<TTag>: EndToken { | |
|
9 | 7 | |
|
10 | TTag m_tag; | |
|
8 | readonly TTag m_tag; | |
|
11 | 9 | |
|
12 | 10 | public EndToken(TTag tag) { |
|
13 | 11 | m_tag = tag; |
@@ -21,13 +19,5 namespace Implab.Automaton.RegularExpres | |||
|
21 | 19 | get { return m_tag; } |
|
22 | 20 | } |
|
23 | 21 | |
|
24 | public override void Accept(IVisitor visitor) { | |
|
25 | Safe.ArgumentOfType(visitor, typeof(IVisitor<TTag>), "visitor"); | |
|
26 | Safe.ArgumentNotNull(visitor, "visitor"); | |
|
27 | ((IVisitor<TTag>)visitor).Visit(this); | |
|
28 | } | |
|
29 | public override string ToString() { | |
|
30 | return "#"; | |
|
31 | 22 |
|
|
32 | 23 | } |
|
33 | } |
@@ -2,12 +2,12 | |||
|
2 | 2 | using System.Linq; |
|
3 | 3 | |
|
4 | 4 | namespace Implab.Automaton.RegularExpressions { |
|
5 |
public class |
|
|
5 | public class TaggedDFA<TInput, TTag> : DFATable, ITaggedDFABuilder<TTag> { | |
|
6 | 6 | |
|
7 | 7 | readonly Dictionary<int,TTag[]> m_tags = new Dictionary<int, TTag[]>(); |
|
8 | 8 | readonly IAlphabet<TInput> m_alphabet; |
|
9 | 9 | |
|
10 |
public |
|
|
10 | public TaggedDFA(IAlphabet<TInput> alphabet) { | |
|
11 | 11 | Safe.ArgumentNotNull(alphabet, "aplhabet"); |
|
12 | 12 | |
|
13 | 13 | m_alphabet = alphabet; |
@@ -48,10 +48,10 namespace Implab.Automaton.RegularExpres | |||
|
48 | 48 | /// Optimize the specified alphabet. |
|
49 | 49 | /// </summary> |
|
50 | 50 | /// <param name="alphabet">Пустой алфавит, который будет зполнен в процессе оптимизации.</param> |
|
51 |
public |
|
|
51 | public TaggedDFA<TInput,TTag> Optimize(IAlphabetBuilder<TInput> alphabet) { | |
|
52 | 52 | Safe.ArgumentNotNull(alphabet, "alphabet"); |
|
53 | 53 | |
|
54 |
var dfa = new |
|
|
54 | var dfa = new TaggedDFA<TInput, TTag>(alphabet); | |
|
55 | 55 | |
|
56 | 56 | var states = new DummyAlphabet(StateCount); |
|
57 | 57 | var alphaMap = new Dictionary<int,int>(); |
@@ -10,7 +10,7 namespace Implab.Automaton.RegularExpres | |||
|
10 | 10 | /// регулярное выражение и вычисляет followpos, затем используется метод |
|
11 | 11 | /// <see cref="BuildDFA(IDFADefinition)"/> для построения автомата. |
|
12 | 12 | /// </summary> |
|
13 |
public class RegularExpressionVisitor |
|
|
13 | public class RegularExpressionVisitor : IVisitor { | |
|
14 | 14 | int m_idx; |
|
15 | 15 | Token m_root; |
|
16 | 16 | HashSet<int> m_firstpos; |
@@ -19,13 +19,23 namespace Implab.Automaton.RegularExpres | |||
|
19 | 19 | readonly Dictionary<int, HashSet<int>> m_followpos = new Dictionary<int, HashSet<int>>(); |
|
20 | 20 | readonly Dictionary<int, int> m_indexes = new Dictionary<int, int>(); |
|
21 | 21 | readonly HashSet<int> m_ends = new HashSet<int>(); |
|
22 | readonly Dictionary<int, TTag> m_tags = new Dictionary<int, TTag>(); | |
|
23 | 22 | |
|
24 | public Dictionary<int, HashSet<int>> FollowposMap { | |
|
25 | get { return m_followpos; } | |
|
23 | readonly IDFATableBuilder m_builder; | |
|
24 | readonly IAlphabetBuilder<HashSet<int>> m_states = new MapAlphabet<HashSet<int>>( | |
|
25 | false, | |
|
26 | new CustomEqualityComparer<HashSet<int>>( | |
|
27 | (x, y) => x.SetEquals(y), | |
|
28 | x => x.Sum(n => n.GetHashCode()) | |
|
29 | ) | |
|
30 | ); | |
|
31 | ||
|
32 | public RegularExpressionVisitor(IDFATableBuilder builder) { | |
|
33 | Safe.ArgumentNotNull(builder, "builder"); | |
|
34 | ||
|
35 | m_builder = builder; | |
|
26 | 36 | } |
|
27 | 37 | |
|
28 |
|
|
|
38 | HashSet<int> Followpos(int pos) { | |
|
29 | 39 | HashSet<int> set; |
|
30 | 40 | return m_followpos.TryGetValue(pos, out set) ? set : m_followpos[pos] = new HashSet<int>(); |
|
31 | 41 | } |
@@ -42,6 +52,9 namespace Implab.Automaton.RegularExpres | |||
|
42 | 52 | return false; |
|
43 | 53 | } |
|
44 | 54 | |
|
55 | protected int Index { | |
|
56 | get { return m_idx; } | |
|
57 | } | |
|
45 | 58 | |
|
46 | 59 | public void Visit(AltToken token) { |
|
47 | 60 | if (m_root == null) |
@@ -112,45 +125,23 namespace Implab.Automaton.RegularExpres | |||
|
112 | 125 | m_lastpos = new HashSet<int>(new[] { m_idx }); |
|
113 | 126 | } |
|
114 | 127 | |
|
115 |
public void Visit(EndToken |
|
|
128 | public virtual void Visit(EndToken token) { | |
|
116 | 129 | if (m_root == null) |
|
117 | 130 | m_root = token; |
|
118 | 131 | m_idx++; |
|
119 |
m_indexes[m_idx] = |
|
|
120 | m_firstpos = new HashSet<int>(new[] { m_idx }); | |
|
121 | m_lastpos = new HashSet<int>(new[] { m_idx }); | |
|
122 | Followpos(m_idx); | |
|
123 | m_ends.Add(m_idx); | |
|
124 | m_tags.Add(m_idx, token.Tag); | |
|
125 | } | |
|
126 | ||
|
127 | public void Visit(EndToken token) { | |
|
128 | if (m_root == null) | |
|
129 | m_root = token; | |
|
130 | m_idx++; | |
|
131 | m_indexes[m_idx] = DFAConst.UNCLASSIFIED_INPUT; | |
|
132 | m_indexes[m_idx] = AutomatonConst.UNCLASSIFIED_INPUT; | |
|
132 | 133 | m_firstpos = new HashSet<int>(new[] { m_idx }); |
|
133 | 134 | m_lastpos = new HashSet<int>(new[] { m_idx }); |
|
134 | 135 | Followpos(m_idx); |
|
135 | 136 | m_ends.Add(m_idx); |
|
136 | 137 | } |
|
137 | 138 | |
|
138 |
public void BuildDFA( |
|
|
139 | Safe.ArgumentNotNull(dfa,"dfa"); | |
|
139 | public void BuildDFA() { | |
|
140 | AddState(m_firstpos); | |
|
141 | SetInitialState(m_firstpos); | |
|
140 | 142 | |
|
141 | var states = new MapAlphabet<HashSet<int>>( | |
|
142 | false, | |
|
143 | new CustomEqualityComparer<HashSet<int>>( | |
|
144 | (x, y) => x.SetEquals(y), | |
|
145 | x => x.Sum(n => n.GetHashCode()) | |
|
146 | )); | |
|
147 | ||
|
148 | var initialState = states.DefineSymbol(m_firstpos); | |
|
149 | dfa.SetInitialState(initialState); | |
|
150 | ||
|
151 | var tags = GetStateTags(m_firstpos); | |
|
152 | if (tags != null && tags.Length > 0) | |
|
153 | dfa.MarkFinalState(initialState, tags); | |
|
143 | if(IsFinal(m_firstpos)) | |
|
144 | MarkFinalState(m_firstpos); | |
|
154 | 145 | |
|
155 | 146 | var inputMax = m_indexes.Values.Max(); |
|
156 | 147 | var queue = new Queue<HashSet<int>>(); |
@@ -158,38 +149,58 namespace Implab.Automaton.RegularExpres | |||
|
158 | 149 | queue.Enqueue(m_firstpos); |
|
159 | 150 | |
|
160 | 151 | while (queue.Count > 0) { |
|
161 |
var s |
|
|
162 | var s1 = states.Translate(state); | |
|
163 | Debug.Assert(s1 != DFAConst.UNCLASSIFIED_INPUT); | |
|
152 | var s1 = queue.Dequeue(); | |
|
164 | 153 | |
|
165 | 154 | for (int a = 0; a <= inputMax; a++) { |
|
166 |
var |
|
|
167 |
foreach (var p in s |
|
|
155 | var s2 = new HashSet<int>(); | |
|
156 | foreach (var p in s1) { | |
|
168 | 157 | if (m_indexes[p] == a) { |
|
169 |
|
|
|
158 | s2.UnionWith(Followpos(p)); | |
|
170 | 159 | } |
|
171 | 160 | } |
|
172 |
if ( |
|
|
173 |
i |
|
|
174 |
|
|
|
175 |
s2 |
|
|
176 |
|
|
|
177 | s2 = states.DefineSymbol(next); | |
|
161 | if (s2.Count > 0) { | |
|
162 | if (!HasState(s2)) { | |
|
163 | AddState(s2); | |
|
164 | if (IsFinal(s2)) | |
|
165 | MarkFinalState(s2); | |
|
178 | 166 | |
|
179 |
|
|
|
180 | ||
|
181 | dfa.MarkFinalState(s2); | |
|
182 | tags = GetStateTags(next); | |
|
183 | if (tags != null && tags.Length > 0) | |
|
184 | dfa.SetStateTag(s2, tags); | |
|
167 | queue.Enqueue(s2); | |
|
185 | 168 |
|
|
186 | 169 | |
|
187 |
|
|
|
170 | DefineTransition(s1, s2, a); | |
|
188 | 171 |
|
|
189 | dfa.Add(new AutomatonTransition(s1, s2, a)); | |
|
172 | ||
|
190 | 173 |
|
|
191 | 174 |
|
|
192 | 175 |
|
|
176 | ||
|
177 | protected bool HasState(HashSet<int> state) { | |
|
178 | return m_states.Contains(state); | |
|
179 | } | |
|
180 | ||
|
181 | protected void AddState(HashSet<int> state) { | |
|
182 | Debug.Assert(!HasState(state)); | |
|
183 | ||
|
184 | m_states.DefineSymbol(state); | |
|
185 | } | |
|
186 | ||
|
187 | protected int Translate(HashSet<int> state) { | |
|
188 | Debug.Assert(HasState(state)); | |
|
189 | ||
|
190 | return m_states.Translate(state); | |
|
191 | } | |
|
192 | ||
|
193 | protected virtual void SetInitialState(HashSet<int> state) { | |
|
194 | m_builder.SetInitialState(Translate(state)); | |
|
195 | } | |
|
196 | ||
|
197 | protected virtual void MarkFinalState(HashSet<int> state) { | |
|
198 | m_builder.MarkFinalState(Translate(state)); | |
|
199 | } | |
|
200 | ||
|
201 | protected virtual void DefineTransition(HashSet<int> s1, HashSet<int> s2, int ch) { | |
|
202 | ||
|
203 | m_builder.Add(new AutomatonTransition(Translate(s1), Translate(s2), ch)); | |
|
193 | 204 | } |
|
194 | 205 | |
|
195 | 206 | bool IsFinal(IEnumerable<int> state) { |
@@ -197,10 +208,5 namespace Implab.Automaton.RegularExpres | |||
|
197 | 208 | return state.Any(m_ends.Contains); |
|
198 | 209 | } |
|
199 | 210 | |
|
200 | TTag[] GetStateTags(IEnumerable<int> state) { | |
|
201 | Debug.Assert(state != null); | |
|
202 | return state.Where(m_tags.ContainsKey).Select(pos => m_tags[pos]).ToArray(); | |
|
203 | } | |
|
204 | ||
|
205 | 211 | } |
|
206 | 212 | } |
@@ -6,7 +6,7 namespace Implab.Automaton.RegularExpres | |||
|
6 | 6 | public abstract class Token { |
|
7 | 7 | public abstract void Accept(IVisitor visitor); |
|
8 | 8 | |
|
9 |
public Token E |
|
|
9 | public Token End() { | |
|
10 | 10 | return Cat(new EndToken()); |
|
11 | 11 | } |
|
12 | 12 |
@@ -9,14 +9,14 namespace Implab.Formats { | |||
|
9 | 9 | /// <summary> |
|
10 | 10 | /// Базовый абстрактный класс. Грамматика, позволяет формулировать выражения над алфавитом типа <c>char</c>. |
|
11 | 11 | /// </summary> |
|
12 |
public abstract class Grammar<TSymbol |
|
|
12 | public abstract class Grammar<TSymbol> { | |
|
13 | 13 | |
|
14 | 14 | protected abstract IAlphabetBuilder<TSymbol> AlphabetBuilder { |
|
15 | 15 | get; |
|
16 | 16 | } |
|
17 | 17 | |
|
18 |
protected SymbolToken |
|
|
19 |
return new SymbolToken |
|
|
18 | protected SymbolToken UnclassifiedToken() { | |
|
19 | return new SymbolToken(AutomatonConst.UNCLASSIFIED_INPUT); | |
|
20 | 20 | } |
|
21 | 21 | |
|
22 | 22 | protected void DefineAlphabet(IEnumerable<TSymbol> alphabet) { |
@@ -26,23 +26,23 namespace Implab.Formats { | |||
|
26 | 26 | AlphabetBuilder.DefineSymbol(ch); |
|
27 | 27 | } |
|
28 | 28 | |
|
29 |
protected Token |
|
|
30 |
return Token |
|
|
29 | protected Token SymbolToken(TSymbol symbol) { | |
|
30 | return Token.New(TranslateOrAdd(symbol)); | |
|
31 | 31 | } |
|
32 | 32 | |
|
33 |
protected Token |
|
|
33 | protected Token SymbolToken(IEnumerable<TSymbol> symbols) { | |
|
34 | 34 | Safe.ArgumentNotNull(symbols, "symbols"); |
|
35 | 35 | |
|
36 |
return Token |
|
|
36 | return Token.New(TranslateOrAdd(symbols).ToArray()); | |
|
37 | 37 | } |
|
38 | 38 | |
|
39 |
protected Token |
|
|
39 | protected Token SymbolSetToken(params TSymbol[] set) { | |
|
40 | 40 | return SymbolToken(set); |
|
41 | 41 | } |
|
42 | 42 | |
|
43 | 43 | int TranslateOrAdd(TSymbol ch) { |
|
44 | 44 | var t = AlphabetBuilder.Translate(ch); |
|
45 |
if (t == |
|
|
45 | if (t == AutomatonConst.UNCLASSIFIED_INPUT) | |
|
46 | 46 | t = AlphabetBuilder.DefineSymbol(ch); |
|
47 | 47 | return t; |
|
48 | 48 | } |
@@ -53,7 +53,7 namespace Implab.Formats { | |||
|
53 | 53 | |
|
54 | 54 | int TranslateOrDie(TSymbol ch) { |
|
55 | 55 | var t = AlphabetBuilder.Translate(ch); |
|
56 |
if (t == |
|
|
56 | if (t == AutomatonConst.UNCLASSIFIED_INPUT) | |
|
57 | 57 | throw new ApplicationException(String.Format("Symbol '{0}' is UNCLASSIFIED", ch)); |
|
58 | 58 | return t; |
|
59 | 59 | } |
@@ -62,22 +62,21 namespace Implab.Formats { | |||
|
62 | 62 | return symbols.Distinct().Select(TranslateOrDie); |
|
63 | 63 | } |
|
64 | 64 | |
|
65 |
protected Token |
|
|
65 | protected Token SymbolTokenExcept(IEnumerable<TSymbol> symbols) { | |
|
66 | 66 | Safe.ArgumentNotNull(symbols, "symbols"); |
|
67 | 67 | |
|
68 |
return Token |
|
|
68 | return Token.New( Enumerable.Range(0, AlphabetBuilder.Count).Except(TranslateOrDie(symbols)).ToArray() ); | |
|
69 | 69 | } |
|
70 | 70 | |
|
71 | 71 | protected abstract IndexedAlphabetBase<TSymbol> CreateAlphabet(); |
|
72 | 72 | |
|
73 |
protected ScannerContext<TTag> BuildScannerContext |
|
|
73 | protected ScannerContext<TTag> BuildScannerContext<TTag>(Token regexp) { | |
|
74 | 74 | |
|
75 | 75 | var dfa = new RegularDFA<TSymbol, TTag>(AlphabetBuilder); |
|
76 | 76 | |
|
77 | var visitor = new RegularExpressionVisitor<TTag>(); | |
|
77 | var visitor = new RegularExpressionVisitor<TTag>(dfa); | |
|
78 | 78 |
regexp.Accept( |
|
79 | ||
|
80 | visitor.BuildDFA(dfa); | |
|
79 | visitor.BuildDFA(); | |
|
81 | 80 | |
|
82 | 81 | if (dfa.IsFinalState(dfa.InitialState)) |
|
83 | 82 | throw new ApplicationException("The specified language contains empty token"); |
@@ -5,7 +5,6 | |||
|
5 | 5 | enum JSONElementContext { |
|
6 | 6 | None, |
|
7 | 7 | Object, |
|
8 |
Array |
|
|
9 | Closed | |
|
8 | Array | |
|
10 | 9 | } |
|
11 | 10 | } |
@@ -4,7 +4,7 using System; | |||
|
4 | 4 | using Implab.Automaton; |
|
5 | 5 | |
|
6 | 6 | namespace Implab.Formats.JSON { |
|
7 |
class JSONGrammar : Grammar<char |
|
|
7 | class JSONGrammar : Grammar<char> { | |
|
8 | 8 | public enum TokenType { |
|
9 | 9 | None, |
|
10 | 10 | BeginObject, |
@@ -29,8 +29,8 namespace Implab.Formats.JSON { | |||
|
29 | 29 | get { return _instance.Value; } |
|
30 | 30 | } |
|
31 | 31 | |
|
32 |
readonly ScannerContext<TokenType> m_json |
|
|
33 |
readonly ScannerContext<TokenType> m_string |
|
|
32 | readonly ScannerContext<TokenType> m_jsonExpression; | |
|
33 | readonly ScannerContext<TokenType> m_stringExpression; | |
|
34 | 34 | |
|
35 | 35 | public JSONGrammar() { |
|
36 | 36 | DefineAlphabet(Enumerable.Range(0, 0x20).Select(x => (char)x)); |
@@ -81,23 +81,25 namespace Implab.Formats.JSON { | |||
|
81 | 81 | .Or(unescaped.Closure().Tag(TokenType.UnescapedChar)); |
|
82 | 82 | |
|
83 | 83 | |
|
84 |
m_json |
|
|
85 |
m_string |
|
|
84 | m_jsonExpression = BuildScannerContext<TokenType>(jsonExpression); | |
|
85 | m_stringExpression = BuildScannerContext<TokenType>(jsonStringExpression); | |
|
86 | ||
|
87 | ||
|
86 | 88 | } |
|
87 | 89 | |
|
88 |
public ScannerContext<TokenType> Json |
|
|
90 | public ScannerContext<TokenType> JsonExpression { | |
|
89 | 91 | get { |
|
90 |
return m_json |
|
|
92 | return m_jsonExpression; | |
|
91 | 93 | } |
|
92 | 94 | } |
|
93 | 95 | |
|
94 |
public ScannerContext<TokenType> JsonString |
|
|
96 | public ScannerContext<TokenType> JsonStringExpression { | |
|
95 | 97 | get { |
|
96 |
return m_string |
|
|
98 | return m_stringExpression; | |
|
97 | 99 | } |
|
98 | 100 | } |
|
99 | 101 | |
|
100 |
Token |
|
|
102 | Token SymbolRangeToken(char start, char stop) { | |
|
101 | 103 | return SymbolToken(Enumerable.Range(start,stop - start).Cast<char>()); |
|
102 | 104 | } |
|
103 | 105 |
@@ -38,17 +38,30 namespace Implab.Formats.JSON { | |||
|
38 | 38 | MemberValue |
|
39 | 39 | } |
|
40 | 40 | |
|
41 | #region Parser rules | |
|
41 | 42 | struct ParserContext { |
|
42 | DFAStateDescriptior<object> | |
|
43 | readonly int[,] m_dfa; | |
|
44 | int m_state; | |
|
45 | ||
|
46 | readonly JSONElementContext m_elementContext; | |
|
47 | ||
|
48 | public ParserContext(int[,] dfa, int state, JSONElementContext context) { | |
|
49 | m_dfa = dfa; | |
|
50 | m_state = state; | |
|
51 | m_elementContext = context; | |
|
43 | 52 | } |
|
44 | 53 | |
|
45 | static readonly EnumAlphabet<JsonTokenType> _alphabet = EnumAlphabet<JsonTokenType>.FullAlphabet; | |
|
46 | static readonly DFAStateDescriptior<object>[] _jsonDFA; | |
|
47 | static readonly int _jsonDFAInitialState; | |
|
48 | static readonly DFAStateDescriptior<object>[] _objectDFA; | |
|
49 | static readonly int _objectDFAInitialState; | |
|
50 | static readonly DFAStateDescriptior<object>[] _arrayDFA; | |
|
51 | static readonly int _arrayDFAInitialState; | |
|
54 | public bool Move(JsonTokenType token) { | |
|
55 | var next = m_dfa[m_state, token]; | |
|
56 | if (next == AutomatonConst.UNREACHABLE_STATE) | |
|
57 | return false; | |
|
58 | m_state = next; | |
|
59 | } | |
|
60 | ||
|
61 | public JSONElementContext ElementContext { | |
|
62 | get { return m_elementContext; } | |
|
63 | } | |
|
64 | } | |
|
52 | 65 | |
|
53 | 66 | static JSONParser() { |
|
54 | 67 | |
@@ -64,7 +77,8 namespace Implab.Formats.JSON { | |||
|
64 | 77 | ) |
|
65 | 78 | .Optional() |
|
66 | 79 | .Cat(Token(JsonTokenType.EndObject)) |
|
67 |
. |
|
|
80 | .End(); | |
|
81 | ||
|
68 | 82 | var arrayExpression = valueExpression |
|
69 | 83 | .Cat( |
|
70 | 84 | Token(JsonTokenType.ValueSeparator) |
@@ -73,29 +87,31 namespace Implab.Formats.JSON { | |||
|
73 | 87 | ) |
|
74 | 88 | .Optional() |
|
75 | 89 | .Cat(Token(JsonTokenType.EndArray)) |
|
76 |
. |
|
|
90 | .End(); | |
|
77 | 91 | |
|
78 |
var jsonExpression = valueExpression. |
|
|
92 | var jsonExpression = valueExpression.End(); | |
|
79 | 93 | |
|
80 |
_jsonDFA = Create |
|
|
81 |
_objectDFA = Create |
|
|
82 |
_arrayDFA = Create |
|
|
94 | _jsonDFA = CreateParserContext(jsonExpression, JSONElementContext.None); | |
|
95 | _objectDFA = CreateParserContext(objectExpression, JSONElementContext.Object); | |
|
96 | _arrayDFA = CreateParserContext(arrayExpression, JSONElementContext.Array); | |
|
83 | 97 | } |
|
84 | 98 | |
|
85 |
static Token |
|
|
86 |
return Token |
|
|
99 | static Token Token(params JsonTokenType[] input) { | |
|
100 | return Token.New( input.Select(t => (int)t).ToArray() ); | |
|
87 | 101 | } |
|
88 | 102 | |
|
89 | static RegularDFA<JsonTokenType,object> CreateDFA(Token<object> expr) { | |
|
90 | var builder = new RegularExpressionVisitor<object>(); | |
|
91 | var dfa = new RegularDFA<JsonTokenType,object>(_alphabet); | |
|
103 | static ParserContext CreateParserContext(Token expr, JSONElementContext context) { | |
|
92 | 104 | |
|
105 | var dfa = new DFATable(); | |
|
106 | var builder = new RegularExpressionVisitor(dfa); | |
|
93 | 107 | expr.Accept(builder); |
|
108 | builder.BuildDFA(); | |
|
94 | 109 | |
|
95 | builder.BuildDFA(dfa); | |
|
96 | return dfa; | |
|
110 | return new ParserContext(dfa.CreateTransitionTable(), dfa.InitialState, context); | |
|
97 | 111 | } |
|
98 | 112 | |
|
113 | #endregion | |
|
114 | ||
|
99 | 115 | JSONScanner m_scanner; |
|
100 | 116 | MemberContext m_memberContext; |
|
101 | 117 | |
@@ -117,8 +133,7 namespace Implab.Formats.JSON { | |||
|
117 | 133 | /// Создает новый экземпляр парсера, на основе текстового потока. |
|
118 | 134 | /// </summary> |
|
119 | 135 | /// <param name="reader">Текстовый поток.</param> |
|
120 | /// <param name="dispose">Признак того, что парсер должен конролировать время жизни входного потока.</param> | |
|
121 | public JSONParser(TextReader reader, bool dispose) | |
|
136 | public JSONParser(TextReader reader) | |
|
122 | 137 | : base(_jsonDFA, INITIAL_STATE, new JSONParserContext { elementContext = JSONElementContext.None, memberName = String.Empty }) { |
|
123 | 138 | Safe.ArgumentNotNull(reader, "reader"); |
|
124 | 139 | m_scanner = new JSONScanner(); |
@@ -61,15 +61,15 namespace Implab.Formats { | |||
|
61 | 61 | while (pos < m_bufferSize) { |
|
62 | 62 | var ch = m_buffer[pos]; |
|
63 | 63 | |
|
64 |
state = dfa[state, ch > maxSymbol ? |
|
|
65 |
if (state == |
|
|
64 | state = dfa[state, ch > maxSymbol ? AutomatonConst.UNCLASSIFIED_INPUT : alphabet[ch]]; | |
|
65 | if (state == AutomatonConst.UNREACHABLE_STATE) | |
|
66 | 66 | break; |
|
67 | 67 | |
|
68 | 68 | pos++; |
|
69 | 69 | } |
|
70 | 70 | |
|
71 | 71 | m_tokenLength = pos - m_bufferOffset; |
|
72 |
} while (state != |
|
|
72 | } while (state != AutomatonConst.UNREACHABLE_STATE && Feed()); | |
|
73 | 73 | |
|
74 | 74 | m_tokenOffset = m_bufferOffset; |
|
75 | 75 | m_bufferOffset += m_tokenLength; |
@@ -159,7 +159,6 | |||
|
159 | 159 | <Compile Include="Automaton\RegularExpressions\AltToken.cs" /> |
|
160 | 160 | <Compile Include="Automaton\RegularExpressions\BinaryToken.cs" /> |
|
161 | 161 | <Compile Include="Automaton\RegularExpressions\CatToken.cs" /> |
|
162 | <Compile Include="Automaton\DFAConst.cs" /> | |
|
163 | 162 | <Compile Include="Automaton\RegularExpressions\StarToken.cs" /> |
|
164 | 163 | <Compile Include="Automaton\RegularExpressions\SymbolToken.cs" /> |
|
165 | 164 | <Compile Include="Automaton\RegularExpressions\EmptyToken.cs" /> |
@@ -183,7 +182,6 | |||
|
183 | 182 | <Compile Include="Automaton\IDFATable.cs" /> |
|
184 | 183 | <Compile Include="Automaton\IDFATableBuilder.cs" /> |
|
185 | 184 | <Compile Include="Automaton\DFATable.cs" /> |
|
186 | <Compile Include="Automaton\RegularExpressions\RegularDFA.cs" /> | |
|
187 | 185 | <Compile Include="Automaton\RegularExpressions\RegularExpressionVisitor.cs" /> |
|
188 | 186 | <Compile Include="Automaton\RegularExpressions\ITaggedDFABuilder.cs" /> |
|
189 | 187 | <Compile Include="Formats\TextScanner.cs" /> |
@@ -193,7 +191,10 | |||
|
193 | 191 | <Compile Include="Formats\Grammar.cs" /> |
|
194 | 192 | <Compile Include="Automaton\RegularExpressions\EndTokenT.cs" /> |
|
195 | 193 | <Compile Include="Automaton\RegularExpressions\EndToken.cs" /> |
|
196 |
<Compile Include="Automaton\RegularExpressions\ |
|
|
194 | <Compile Include="Automaton\RegularExpressions\RegularExpressionVisitorT.cs" /> | |
|
195 | <Compile Include="Automaton\AutomatonConst.cs" /> | |
|
196 | <Compile Include="Automaton\RegularExpressions\RegularDFA.cs" /> | |
|
197 | <Compile Include="Components\LazyAndWeak.cs" /> | |
|
197 | 198 | </ItemGroup> |
|
198 | 199 | <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" /> |
|
199 | 200 | <ItemGroup /> |
|
1 | NO CONTENT: file was removed |
|
1 | NO CONTENT: file was removed |
General Comments 0
You need to be logged in to leave comments.
Login now