@@ -0,0 +1,9 | |||||
|
1 | ||||
|
2 | namespace Implab.Automaton { | |||
|
3 | public static class DFAConst { | |||
|
4 | public const int UNREACHABLE_STATE = -1; | |||
|
5 | ||||
|
6 | public const int UNCLASSIFIED_INPUT = 0; | |||
|
7 | } | |||
|
8 | } | |||
|
9 |
@@ -0,0 +1,37 | |||||
|
1 | using Implab; | |||
|
2 | using System; | |||
|
3 | using System.Collections.Generic; | |||
|
4 | using System.Diagnostics; | |||
|
5 | using System.Linq; | |||
|
6 | ||||
|
7 | namespace Implab.Automaton.RegularExpressions { | |||
|
8 | /// <summary> | |||
|
9 | /// </summary> | |||
|
10 | public class RegularExpressionVisitor<TTag> : RegularExpressionVisitor { | |||
|
11 | readonly Dictionary<int, TTag> m_tags = new Dictionary<int, TTag>(); | |||
|
12 | ||||
|
13 | readonly ITaggedDFABuilder<TTag> m_builder; | |||
|
14 | ||||
|
15 | public RegularExpressionVisitor(ITaggedDFABuilder<TTag> builder) : base(builder) { | |||
|
16 | m_builder = builder; | |||
|
17 | } | |||
|
18 | ||||
|
19 | public override void Visit(EndToken token) { | |||
|
20 | base.Visit(token); | |||
|
21 | var tagged = token as EndToken<TTag>; | |||
|
22 | if (tagged != null) | |||
|
23 | m_tags.Add(Index, tagged.Tag); | |||
|
24 | } | |||
|
25 | ||||
|
26 | protected override void MarkFinalState(HashSet<int> state) { | |||
|
27 | base.MarkFinalState(state); | |||
|
28 | m_builder.SetStateTag(Translate(state), GetStateTags(state)); | |||
|
29 | } | |||
|
30 | ||||
|
31 | TTag[] GetStateTags(IEnumerable<int> state) { | |||
|
32 | Debug.Assert(state != null); | |||
|
33 | return state.Where(m_tags.ContainsKey).Select(pos => m_tags[pos]).ToArray(); | |||
|
34 | } | |||
|
35 | ||||
|
36 | } | |||
|
37 | } |
@@ -0,0 +1,44 | |||||
|
1 | using System; | |||
|
2 | using System.Threading; | |||
|
3 | ||||
|
4 | namespace Implab.Components { | |||
|
5 | public class LazyAndWeak<T> where T : class { | |||
|
6 | ||||
|
7 | readonly Func<T> m_factory; | |||
|
8 | readonly object m_lock; | |||
|
9 | WeakReference m_reference; | |||
|
10 | ||||
|
11 | ||||
|
12 | public LazyAndWeak(Func<T> factory, bool useLock) { | |||
|
13 | Safe.ArgumentNotNull(factory, "factory"); | |||
|
14 | m_factory = factory; | |||
|
15 | m_lock = useLock ? new object() : null; | |||
|
16 | } | |||
|
17 | ||||
|
18 | public LazyAndWeak(Func<T> factory) : this(factory, false) { | |||
|
19 | } | |||
|
20 | ||||
|
21 | public T Value { | |||
|
22 | get { | |||
|
23 | while (true) { | |||
|
24 | var weak = m_reference; | |||
|
25 | T value; | |||
|
26 | if (weak != null) { | |||
|
27 | value = weak.Target as T; | |||
|
28 | if (value != null) | |||
|
29 | return value; | |||
|
30 | } | |||
|
31 | ||||
|
32 | if (m_lock == null) { | |||
|
33 | value = m_factory(); | |||
|
34 | ||||
|
35 | if (Interlocked.CompareExchange(ref m_reference, new WeakReference(value), weak) == weak) | |||
|
36 | return value; | |||
|
37 | } else { | |||
|
38 | } | |||
|
39 | } | |||
|
40 | } | |||
|
41 | } | |||
|
42 | } | |||
|
43 | } | |||
|
44 |
@@ -105,7 +105,7 namespace Implab.Automaton { | |||||
105 |
|
105 | |||
106 | for (int i = 0; i < StateCount; i++) |
|
106 | for (int i = 0; i < StateCount; i++) | |
107 | for (int j = 0; i < AlphabetSize; j++) |
|
107 | for (int j = 0; i < AlphabetSize; j++) | |
108 |
table[i, j] = |
|
108 | table[i, j] = AutomatonConst.UNREACHABLE_STATE; | |
109 |
|
109 | |||
110 | foreach (var t in this) |
|
110 | foreach (var t in this) | |
111 | table[t.s1,t.edge] = t.s2; |
|
111 | table[t.s1,t.edge] = t.s2; | |
@@ -273,11 +273,11 namespace Implab.Automaton { | |||||
273 |
|
273 | |||
274 | var nextCls = 0; |
|
274 | var nextCls = 0; | |
275 | foreach (var item in minClasses) { |
|
275 | foreach (var item in minClasses) { | |
276 |
if (nextCls == |
|
276 | if (nextCls == AutomatonConst.UNCLASSIFIED_INPUT) | |
277 | nextCls++; |
|
277 | nextCls++; | |
278 |
|
278 | |||
279 | // сохраняем DFAConst.UNCLASSIFIED_INPUT |
|
279 | // сохраняем DFAConst.UNCLASSIFIED_INPUT | |
280 |
var cls = item.Contains( |
|
280 | var cls = item.Contains(AutomatonConst.UNCLASSIFIED_INPUT) ? AutomatonConst.UNCLASSIFIED_INPUT : nextCls; | |
281 |
|
281 | |||
282 | foreach (var a in item) |
|
282 | foreach (var a in item) | |
283 | alphabetMap[a] = cls; |
|
283 | alphabetMap[a] = cls; |
@@ -54,7 +54,7 namespace Implab.Automaton { | |||||
54 | return cls; |
|
54 | return cls; | |
55 | if (!m_supportUnclassified) |
|
55 | if (!m_supportUnclassified) | |
56 | throw new ArgumentOutOfRangeException("symbol", "The specified symbol isn't in the alphabet"); |
|
56 | throw new ArgumentOutOfRangeException("symbol", "The specified symbol isn't in the alphabet"); | |
57 |
return |
|
57 | return AutomatonConst.UNCLASSIFIED_INPUT; | |
58 | } |
|
58 | } | |
59 |
|
59 | |||
60 | public int Count { |
|
60 | public int Count { |
@@ -1,13 +1,11 | |||||
1 | using Implab; |
|
1 | namespace Implab.Automaton.RegularExpressions { | |
2 |
|
||||
3 | namespace Implab.Automaton.RegularExpressions { |
|
|||
4 | /// <summary> |
|
2 | /// <summary> | |
5 | /// Конечный символ расширенного регулярного выражения, при построении ДКА |
|
3 | /// Конечный символ расширенного регулярного выражения, при построении ДКА | |
6 | /// используется для определения конечных состояний. |
|
4 | /// используется для определения конечных состояний. | |
7 | /// </summary> |
|
5 | /// </summary> | |
8 | public class EndToken<TTag>: Token { |
|
6 | public class EndToken<TTag>: EndToken { | |
9 |
|
7 | |||
10 | TTag m_tag; |
|
8 | readonly TTag m_tag; | |
11 |
|
9 | |||
12 | public EndToken(TTag tag) { |
|
10 | public EndToken(TTag tag) { | |
13 | m_tag = tag; |
|
11 | m_tag = tag; | |
@@ -21,13 +19,5 namespace Implab.Automaton.RegularExpres | |||||
21 | get { return m_tag; } |
|
19 | get { return m_tag; } | |
22 | } |
|
20 | } | |
23 |
|
21 | |||
24 | public override void Accept(IVisitor visitor) { |
|
|||
25 | Safe.ArgumentOfType(visitor, typeof(IVisitor<TTag>), "visitor"); |
|
|||
26 | Safe.ArgumentNotNull(visitor, "visitor"); |
|
|||
27 | ((IVisitor<TTag>)visitor).Visit(this); |
|
|||
28 | } |
|
|||
29 | public override string ToString() { |
|
|||
30 | return "#"; |
|
|||
31 |
|
|
22 | } | |
32 | } |
|
23 | } | |
33 | } |
|
@@ -2,12 +2,12 | |||||
2 | using System.Linq; |
|
2 | using System.Linq; | |
3 |
|
3 | |||
4 | namespace Implab.Automaton.RegularExpressions { |
|
4 | namespace Implab.Automaton.RegularExpressions { | |
5 |
public class |
|
5 | public class TaggedDFA<TInput, TTag> : DFATable, ITaggedDFABuilder<TTag> { | |
6 |
|
6 | |||
7 | readonly Dictionary<int,TTag[]> m_tags = new Dictionary<int, TTag[]>(); |
|
7 | readonly Dictionary<int,TTag[]> m_tags = new Dictionary<int, TTag[]>(); | |
8 | readonly IAlphabet<TInput> m_alphabet; |
|
8 | readonly IAlphabet<TInput> m_alphabet; | |
9 |
|
9 | |||
10 |
public |
|
10 | public TaggedDFA(IAlphabet<TInput> alphabet) { | |
11 | Safe.ArgumentNotNull(alphabet, "aplhabet"); |
|
11 | Safe.ArgumentNotNull(alphabet, "aplhabet"); | |
12 |
|
12 | |||
13 | m_alphabet = alphabet; |
|
13 | m_alphabet = alphabet; | |
@@ -48,10 +48,10 namespace Implab.Automaton.RegularExpres | |||||
48 | /// Optimize the specified alphabet. |
|
48 | /// Optimize the specified alphabet. | |
49 | /// </summary> |
|
49 | /// </summary> | |
50 | /// <param name="alphabet">Пустой алфавит, который будет зполнен в процессе оптимизации.</param> |
|
50 | /// <param name="alphabet">Пустой алфавит, который будет зполнен в процессе оптимизации.</param> | |
51 |
public |
|
51 | public TaggedDFA<TInput,TTag> Optimize(IAlphabetBuilder<TInput> alphabet) { | |
52 | Safe.ArgumentNotNull(alphabet, "alphabet"); |
|
52 | Safe.ArgumentNotNull(alphabet, "alphabet"); | |
53 |
|
53 | |||
54 |
var dfa = new |
|
54 | var dfa = new TaggedDFA<TInput, TTag>(alphabet); | |
55 |
|
55 | |||
56 | var states = new DummyAlphabet(StateCount); |
|
56 | var states = new DummyAlphabet(StateCount); | |
57 | var alphaMap = new Dictionary<int,int>(); |
|
57 | var alphaMap = new Dictionary<int,int>(); |
@@ -10,7 +10,7 namespace Implab.Automaton.RegularExpres | |||||
10 | /// регулярное выражение и вычисляет followpos, затем используется метод |
|
10 | /// регулярное выражение и вычисляет followpos, затем используется метод | |
11 | /// <see cref="BuildDFA(IDFADefinition)"/> для построения автомата. |
|
11 | /// <see cref="BuildDFA(IDFADefinition)"/> для построения автомата. | |
12 | /// </summary> |
|
12 | /// </summary> | |
13 |
public class RegularExpressionVisitor |
|
13 | public class RegularExpressionVisitor : IVisitor { | |
14 | int m_idx; |
|
14 | int m_idx; | |
15 | Token m_root; |
|
15 | Token m_root; | |
16 | HashSet<int> m_firstpos; |
|
16 | HashSet<int> m_firstpos; | |
@@ -19,13 +19,23 namespace Implab.Automaton.RegularExpres | |||||
19 | readonly Dictionary<int, HashSet<int>> m_followpos = new Dictionary<int, HashSet<int>>(); |
|
19 | readonly Dictionary<int, HashSet<int>> m_followpos = new Dictionary<int, HashSet<int>>(); | |
20 | readonly Dictionary<int, int> m_indexes = new Dictionary<int, int>(); |
|
20 | readonly Dictionary<int, int> m_indexes = new Dictionary<int, int>(); | |
21 | readonly HashSet<int> m_ends = new HashSet<int>(); |
|
21 | readonly HashSet<int> m_ends = new HashSet<int>(); | |
22 | readonly Dictionary<int, TTag> m_tags = new Dictionary<int, TTag>(); |
|
|||
23 |
|
22 | |||
24 | public Dictionary<int, HashSet<int>> FollowposMap { |
|
23 | readonly IDFATableBuilder m_builder; | |
25 | get { return m_followpos; } |
|
24 | readonly IAlphabetBuilder<HashSet<int>> m_states = new MapAlphabet<HashSet<int>>( | |
|
25 | false, | |||
|
26 | new CustomEqualityComparer<HashSet<int>>( | |||
|
27 | (x, y) => x.SetEquals(y), | |||
|
28 | x => x.Sum(n => n.GetHashCode()) | |||
|
29 | ) | |||
|
30 | ); | |||
|
31 | ||||
|
32 | public RegularExpressionVisitor(IDFATableBuilder builder) { | |||
|
33 | Safe.ArgumentNotNull(builder, "builder"); | |||
|
34 | ||||
|
35 | m_builder = builder; | |||
26 | } |
|
36 | } | |
27 |
|
37 | |||
28 |
|
|
38 | HashSet<int> Followpos(int pos) { | |
29 | HashSet<int> set; |
|
39 | HashSet<int> set; | |
30 | return m_followpos.TryGetValue(pos, out set) ? set : m_followpos[pos] = new HashSet<int>(); |
|
40 | return m_followpos.TryGetValue(pos, out set) ? set : m_followpos[pos] = new HashSet<int>(); | |
31 | } |
|
41 | } | |
@@ -42,6 +52,9 namespace Implab.Automaton.RegularExpres | |||||
42 | return false; |
|
52 | return false; | |
43 | } |
|
53 | } | |
44 |
|
54 | |||
|
55 | protected int Index { | |||
|
56 | get { return m_idx; } | |||
|
57 | } | |||
45 |
|
58 | |||
46 | public void Visit(AltToken token) { |
|
59 | public void Visit(AltToken token) { | |
47 | if (m_root == null) |
|
60 | if (m_root == null) | |
@@ -112,45 +125,23 namespace Implab.Automaton.RegularExpres | |||||
112 | m_lastpos = new HashSet<int>(new[] { m_idx }); |
|
125 | m_lastpos = new HashSet<int>(new[] { m_idx }); | |
113 | } |
|
126 | } | |
114 |
|
127 | |||
115 |
public void Visit(EndToken |
|
128 | public virtual void Visit(EndToken token) { | |
116 | if (m_root == null) |
|
129 | if (m_root == null) | |
117 | m_root = token; |
|
130 | m_root = token; | |
118 | m_idx++; |
|
131 | m_idx++; | |
119 |
m_indexes[m_idx] = |
|
132 | m_indexes[m_idx] = AutomatonConst.UNCLASSIFIED_INPUT; | |
120 | m_firstpos = new HashSet<int>(new[] { m_idx }); |
|
|||
121 | m_lastpos = new HashSet<int>(new[] { m_idx }); |
|
|||
122 | Followpos(m_idx); |
|
|||
123 | m_ends.Add(m_idx); |
|
|||
124 | m_tags.Add(m_idx, token.Tag); |
|
|||
125 | } |
|
|||
126 |
|
||||
127 | public void Visit(EndToken token) { |
|
|||
128 | if (m_root == null) |
|
|||
129 | m_root = token; |
|
|||
130 | m_idx++; |
|
|||
131 | m_indexes[m_idx] = DFAConst.UNCLASSIFIED_INPUT; |
|
|||
132 | m_firstpos = new HashSet<int>(new[] { m_idx }); |
|
133 | m_firstpos = new HashSet<int>(new[] { m_idx }); | |
133 | m_lastpos = new HashSet<int>(new[] { m_idx }); |
|
134 | m_lastpos = new HashSet<int>(new[] { m_idx }); | |
134 | Followpos(m_idx); |
|
135 | Followpos(m_idx); | |
135 | m_ends.Add(m_idx); |
|
136 | m_ends.Add(m_idx); | |
136 | } |
|
137 | } | |
137 |
|
138 | |||
138 |
public void BuildDFA( |
|
139 | public void BuildDFA() { | |
139 | Safe.ArgumentNotNull(dfa,"dfa"); |
|
140 | AddState(m_firstpos); | |
|
141 | SetInitialState(m_firstpos); | |||
140 |
|
142 | |||
141 | var states = new MapAlphabet<HashSet<int>>( |
|
143 | if(IsFinal(m_firstpos)) | |
142 | false, |
|
144 | MarkFinalState(m_firstpos); | |
143 | new CustomEqualityComparer<HashSet<int>>( |
|
|||
144 | (x, y) => x.SetEquals(y), |
|
|||
145 | x => x.Sum(n => n.GetHashCode()) |
|
|||
146 | )); |
|
|||
147 |
|
||||
148 | var initialState = states.DefineSymbol(m_firstpos); |
|
|||
149 | dfa.SetInitialState(initialState); |
|
|||
150 |
|
||||
151 | var tags = GetStateTags(m_firstpos); |
|
|||
152 | if (tags != null && tags.Length > 0) |
|
|||
153 | dfa.MarkFinalState(initialState, tags); |
|
|||
154 |
|
145 | |||
155 | var inputMax = m_indexes.Values.Max(); |
|
146 | var inputMax = m_indexes.Values.Max(); | |
156 | var queue = new Queue<HashSet<int>>(); |
|
147 | var queue = new Queue<HashSet<int>>(); | |
@@ -158,38 +149,58 namespace Implab.Automaton.RegularExpres | |||||
158 | queue.Enqueue(m_firstpos); |
|
149 | queue.Enqueue(m_firstpos); | |
159 |
|
150 | |||
160 | while (queue.Count > 0) { |
|
151 | while (queue.Count > 0) { | |
161 |
var s |
|
152 | var s1 = queue.Dequeue(); | |
162 | var s1 = states.Translate(state); |
|
|||
163 | Debug.Assert(s1 != DFAConst.UNCLASSIFIED_INPUT); |
|
|||
164 |
|
153 | |||
165 | for (int a = 0; a <= inputMax; a++) { |
|
154 | for (int a = 0; a <= inputMax; a++) { | |
166 |
var |
|
155 | var s2 = new HashSet<int>(); | |
167 |
foreach (var p in s |
|
156 | foreach (var p in s1) { | |
168 | if (m_indexes[p] == a) { |
|
157 | if (m_indexes[p] == a) { | |
169 |
|
|
158 | s2.UnionWith(Followpos(p)); | |
170 | } |
|
159 | } | |
171 | } |
|
160 | } | |
172 |
if ( |
|
161 | if (s2.Count > 0) { | |
173 |
i |
|
162 | if (!HasState(s2)) { | |
174 |
|
|
163 | AddState(s2); | |
175 |
s2 |
|
164 | if (IsFinal(s2)) | |
176 |
|
|
165 | MarkFinalState(s2); | |
177 | s2 = states.DefineSymbol(next); |
|
|||
178 |
|
166 | |||
179 |
|
|
167 | queue.Enqueue(s2); | |
180 |
|
||||
181 | dfa.MarkFinalState(s2); |
|
|||
182 | tags = GetStateTags(next); |
|
|||
183 | if (tags != null && tags.Length > 0) |
|
|||
184 | dfa.SetStateTag(s2, tags); |
|
|||
185 |
|
|
168 | } | |
186 |
|
169 | |||
187 |
|
|
170 | DefineTransition(s1, s2, a); | |
188 |
|
|
171 | } | |
189 | dfa.Add(new AutomatonTransition(s1, s2, a)); |
|
172 | ||
190 |
|
|
173 | } | |
191 |
|
|
174 | } | |
192 |
|
|
175 | } | |
|
176 | ||||
|
177 | protected bool HasState(HashSet<int> state) { | |||
|
178 | return m_states.Contains(state); | |||
|
179 | } | |||
|
180 | ||||
|
181 | protected void AddState(HashSet<int> state) { | |||
|
182 | Debug.Assert(!HasState(state)); | |||
|
183 | ||||
|
184 | m_states.DefineSymbol(state); | |||
|
185 | } | |||
|
186 | ||||
|
187 | protected int Translate(HashSet<int> state) { | |||
|
188 | Debug.Assert(HasState(state)); | |||
|
189 | ||||
|
190 | return m_states.Translate(state); | |||
|
191 | } | |||
|
192 | ||||
|
193 | protected virtual void SetInitialState(HashSet<int> state) { | |||
|
194 | m_builder.SetInitialState(Translate(state)); | |||
|
195 | } | |||
|
196 | ||||
|
197 | protected virtual void MarkFinalState(HashSet<int> state) { | |||
|
198 | m_builder.MarkFinalState(Translate(state)); | |||
|
199 | } | |||
|
200 | ||||
|
201 | protected virtual void DefineTransition(HashSet<int> s1, HashSet<int> s2, int ch) { | |||
|
202 | ||||
|
203 | m_builder.Add(new AutomatonTransition(Translate(s1), Translate(s2), ch)); | |||
193 | } |
|
204 | } | |
194 |
|
205 | |||
195 | bool IsFinal(IEnumerable<int> state) { |
|
206 | bool IsFinal(IEnumerable<int> state) { | |
@@ -197,10 +208,5 namespace Implab.Automaton.RegularExpres | |||||
197 | return state.Any(m_ends.Contains); |
|
208 | return state.Any(m_ends.Contains); | |
198 | } |
|
209 | } | |
199 |
|
210 | |||
200 | TTag[] GetStateTags(IEnumerable<int> state) { |
|
|||
201 | Debug.Assert(state != null); |
|
|||
202 | return state.Where(m_tags.ContainsKey).Select(pos => m_tags[pos]).ToArray(); |
|
|||
203 | } |
|
|||
204 |
|
||||
205 | } |
|
211 | } | |
206 | } |
|
212 | } |
@@ -6,7 +6,7 namespace Implab.Automaton.RegularExpres | |||||
6 | public abstract class Token { |
|
6 | public abstract class Token { | |
7 | public abstract void Accept(IVisitor visitor); |
|
7 | public abstract void Accept(IVisitor visitor); | |
8 |
|
8 | |||
9 |
public Token E |
|
9 | public Token End() { | |
10 | return Cat(new EndToken()); |
|
10 | return Cat(new EndToken()); | |
11 | } |
|
11 | } | |
12 |
|
12 |
@@ -9,14 +9,14 namespace Implab.Formats { | |||||
9 | /// <summary> |
|
9 | /// <summary> | |
10 | /// Базовый абстрактный класс. Грамматика, позволяет формулировать выражения над алфавитом типа <c>char</c>. |
|
10 | /// Базовый абстрактный класс. Грамматика, позволяет формулировать выражения над алфавитом типа <c>char</c>. | |
11 | /// </summary> |
|
11 | /// </summary> | |
12 |
public abstract class Grammar<TSymbol |
|
12 | public abstract class Grammar<TSymbol> { | |
13 |
|
13 | |||
14 | protected abstract IAlphabetBuilder<TSymbol> AlphabetBuilder { |
|
14 | protected abstract IAlphabetBuilder<TSymbol> AlphabetBuilder { | |
15 | get; |
|
15 | get; | |
16 | } |
|
16 | } | |
17 |
|
17 | |||
18 |
protected SymbolToken |
|
18 | protected SymbolToken UnclassifiedToken() { | |
19 |
return new SymbolToken |
|
19 | return new SymbolToken(AutomatonConst.UNCLASSIFIED_INPUT); | |
20 | } |
|
20 | } | |
21 |
|
21 | |||
22 | protected void DefineAlphabet(IEnumerable<TSymbol> alphabet) { |
|
22 | protected void DefineAlphabet(IEnumerable<TSymbol> alphabet) { | |
@@ -26,23 +26,23 namespace Implab.Formats { | |||||
26 | AlphabetBuilder.DefineSymbol(ch); |
|
26 | AlphabetBuilder.DefineSymbol(ch); | |
27 | } |
|
27 | } | |
28 |
|
28 | |||
29 |
protected Token |
|
29 | protected Token SymbolToken(TSymbol symbol) { | |
30 |
return Token |
|
30 | return Token.New(TranslateOrAdd(symbol)); | |
31 | } |
|
31 | } | |
32 |
|
32 | |||
33 |
protected Token |
|
33 | protected Token SymbolToken(IEnumerable<TSymbol> symbols) { | |
34 | Safe.ArgumentNotNull(symbols, "symbols"); |
|
34 | Safe.ArgumentNotNull(symbols, "symbols"); | |
35 |
|
35 | |||
36 |
return Token |
|
36 | return Token.New(TranslateOrAdd(symbols).ToArray()); | |
37 | } |
|
37 | } | |
38 |
|
38 | |||
39 |
protected Token |
|
39 | protected Token SymbolSetToken(params TSymbol[] set) { | |
40 | return SymbolToken(set); |
|
40 | return SymbolToken(set); | |
41 | } |
|
41 | } | |
42 |
|
42 | |||
43 | int TranslateOrAdd(TSymbol ch) { |
|
43 | int TranslateOrAdd(TSymbol ch) { | |
44 | var t = AlphabetBuilder.Translate(ch); |
|
44 | var t = AlphabetBuilder.Translate(ch); | |
45 |
if (t == |
|
45 | if (t == AutomatonConst.UNCLASSIFIED_INPUT) | |
46 | t = AlphabetBuilder.DefineSymbol(ch); |
|
46 | t = AlphabetBuilder.DefineSymbol(ch); | |
47 | return t; |
|
47 | return t; | |
48 | } |
|
48 | } | |
@@ -53,7 +53,7 namespace Implab.Formats { | |||||
53 |
|
53 | |||
54 | int TranslateOrDie(TSymbol ch) { |
|
54 | int TranslateOrDie(TSymbol ch) { | |
55 | var t = AlphabetBuilder.Translate(ch); |
|
55 | var t = AlphabetBuilder.Translate(ch); | |
56 |
if (t == |
|
56 | if (t == AutomatonConst.UNCLASSIFIED_INPUT) | |
57 | throw new ApplicationException(String.Format("Symbol '{0}' is UNCLASSIFIED", ch)); |
|
57 | throw new ApplicationException(String.Format("Symbol '{0}' is UNCLASSIFIED", ch)); | |
58 | return t; |
|
58 | return t; | |
59 | } |
|
59 | } | |
@@ -62,22 +62,21 namespace Implab.Formats { | |||||
62 | return symbols.Distinct().Select(TranslateOrDie); |
|
62 | return symbols.Distinct().Select(TranslateOrDie); | |
63 | } |
|
63 | } | |
64 |
|
64 | |||
65 |
protected Token |
|
65 | protected Token SymbolTokenExcept(IEnumerable<TSymbol> symbols) { | |
66 | Safe.ArgumentNotNull(symbols, "symbols"); |
|
66 | Safe.ArgumentNotNull(symbols, "symbols"); | |
67 |
|
67 | |||
68 |
return Token |
|
68 | return Token.New( Enumerable.Range(0, AlphabetBuilder.Count).Except(TranslateOrDie(symbols)).ToArray() ); | |
69 | } |
|
69 | } | |
70 |
|
70 | |||
71 | protected abstract IndexedAlphabetBase<TSymbol> CreateAlphabet(); |
|
71 | protected abstract IndexedAlphabetBase<TSymbol> CreateAlphabet(); | |
72 |
|
72 | |||
73 |
protected ScannerContext<TTag> BuildScannerContext |
|
73 | protected ScannerContext<TTag> BuildScannerContext<TTag>(Token regexp) { | |
74 |
|
74 | |||
75 | var dfa = new RegularDFA<TSymbol, TTag>(AlphabetBuilder); |
|
75 | var dfa = new RegularDFA<TSymbol, TTag>(AlphabetBuilder); | |
76 |
|
76 | |||
77 | var visitor = new RegularExpressionVisitor<TTag>(); |
|
77 | var visitor = new RegularExpressionVisitor<TTag>(dfa); | |
78 |
regexp.Accept( |
|
78 | regexp.Accept(visitor); | |
79 |
|
79 | visitor.BuildDFA(); | ||
80 | visitor.BuildDFA(dfa); |
|
|||
81 |
|
80 | |||
82 | if (dfa.IsFinalState(dfa.InitialState)) |
|
81 | if (dfa.IsFinalState(dfa.InitialState)) | |
83 | throw new ApplicationException("The specified language contains empty token"); |
|
82 | throw new ApplicationException("The specified language contains empty token"); |
@@ -5,7 +5,6 | |||||
5 | enum JSONElementContext { |
|
5 | enum JSONElementContext { | |
6 | None, |
|
6 | None, | |
7 | Object, |
|
7 | Object, | |
8 |
Array |
|
8 | Array | |
9 | Closed |
|
|||
10 | } |
|
9 | } | |
11 | } |
|
10 | } |
@@ -4,7 +4,7 using System; | |||||
4 | using Implab.Automaton; |
|
4 | using Implab.Automaton; | |
5 |
|
5 | |||
6 | namespace Implab.Formats.JSON { |
|
6 | namespace Implab.Formats.JSON { | |
7 |
class JSONGrammar : Grammar<char |
|
7 | class JSONGrammar : Grammar<char> { | |
8 | public enum TokenType { |
|
8 | public enum TokenType { | |
9 | None, |
|
9 | None, | |
10 | BeginObject, |
|
10 | BeginObject, | |
@@ -29,8 +29,8 namespace Implab.Formats.JSON { | |||||
29 | get { return _instance.Value; } |
|
29 | get { return _instance.Value; } | |
30 | } |
|
30 | } | |
31 |
|
31 | |||
32 |
readonly ScannerContext<TokenType> m_json |
|
32 | readonly ScannerContext<TokenType> m_jsonExpression; | |
33 |
readonly ScannerContext<TokenType> m_string |
|
33 | readonly ScannerContext<TokenType> m_stringExpression; | |
34 |
|
34 | |||
35 | public JSONGrammar() { |
|
35 | public JSONGrammar() { | |
36 | DefineAlphabet(Enumerable.Range(0, 0x20).Select(x => (char)x)); |
|
36 | DefineAlphabet(Enumerable.Range(0, 0x20).Select(x => (char)x)); | |
@@ -81,23 +81,25 namespace Implab.Formats.JSON { | |||||
81 | .Or(unescaped.Closure().Tag(TokenType.UnescapedChar)); |
|
81 | .Or(unescaped.Closure().Tag(TokenType.UnescapedChar)); | |
82 |
|
82 | |||
83 |
|
83 | |||
84 |
m_json |
|
84 | m_jsonExpression = BuildScannerContext<TokenType>(jsonExpression); | |
85 |
m_string |
|
85 | m_stringExpression = BuildScannerContext<TokenType>(jsonStringExpression); | |
|
86 | ||||
|
87 | ||||
86 | } |
|
88 | } | |
87 |
|
89 | |||
88 |
public ScannerContext<TokenType> Json |
|
90 | public ScannerContext<TokenType> JsonExpression { | |
89 | get { |
|
91 | get { | |
90 |
return m_json |
|
92 | return m_jsonExpression; | |
91 | } |
|
93 | } | |
92 | } |
|
94 | } | |
93 |
|
95 | |||
94 |
public ScannerContext<TokenType> JsonString |
|
96 | public ScannerContext<TokenType> JsonStringExpression { | |
95 | get { |
|
97 | get { | |
96 |
return m_string |
|
98 | return m_stringExpression; | |
97 | } |
|
99 | } | |
98 | } |
|
100 | } | |
99 |
|
101 | |||
100 |
Token |
|
102 | Token SymbolRangeToken(char start, char stop) { | |
101 | return SymbolToken(Enumerable.Range(start,stop - start).Cast<char>()); |
|
103 | return SymbolToken(Enumerable.Range(start,stop - start).Cast<char>()); | |
102 | } |
|
104 | } | |
103 |
|
105 |
@@ -38,17 +38,30 namespace Implab.Formats.JSON { | |||||
38 | MemberValue |
|
38 | MemberValue | |
39 | } |
|
39 | } | |
40 |
|
40 | |||
|
41 | #region Parser rules | |||
41 | struct ParserContext { |
|
42 | struct ParserContext { | |
42 | DFAStateDescriptior<object> |
|
43 | readonly int[,] m_dfa; | |
|
44 | int m_state; | |||
|
45 | ||||
|
46 | readonly JSONElementContext m_elementContext; | |||
|
47 | ||||
|
48 | public ParserContext(int[,] dfa, int state, JSONElementContext context) { | |||
|
49 | m_dfa = dfa; | |||
|
50 | m_state = state; | |||
|
51 | m_elementContext = context; | |||
43 | } |
|
52 | } | |
44 |
|
53 | |||
45 | static readonly EnumAlphabet<JsonTokenType> _alphabet = EnumAlphabet<JsonTokenType>.FullAlphabet; |
|
54 | public bool Move(JsonTokenType token) { | |
46 | static readonly DFAStateDescriptior<object>[] _jsonDFA; |
|
55 | var next = m_dfa[m_state, token]; | |
47 | static readonly int _jsonDFAInitialState; |
|
56 | if (next == AutomatonConst.UNREACHABLE_STATE) | |
48 | static readonly DFAStateDescriptior<object>[] _objectDFA; |
|
57 | return false; | |
49 | static readonly int _objectDFAInitialState; |
|
58 | m_state = next; | |
50 | static readonly DFAStateDescriptior<object>[] _arrayDFA; |
|
59 | } | |
51 | static readonly int _arrayDFAInitialState; |
|
60 | ||
|
61 | public JSONElementContext ElementContext { | |||
|
62 | get { return m_elementContext; } | |||
|
63 | } | |||
|
64 | } | |||
52 |
|
65 | |||
53 | static JSONParser() { |
|
66 | static JSONParser() { | |
54 |
|
67 | |||
@@ -64,7 +77,8 namespace Implab.Formats.JSON { | |||||
64 | ) |
|
77 | ) | |
65 | .Optional() |
|
78 | .Optional() | |
66 | .Cat(Token(JsonTokenType.EndObject)) |
|
79 | .Cat(Token(JsonTokenType.EndObject)) | |
67 |
. |
|
80 | .End(); | |
|
81 | ||||
68 | var arrayExpression = valueExpression |
|
82 | var arrayExpression = valueExpression | |
69 | .Cat( |
|
83 | .Cat( | |
70 | Token(JsonTokenType.ValueSeparator) |
|
84 | Token(JsonTokenType.ValueSeparator) | |
@@ -73,29 +87,31 namespace Implab.Formats.JSON { | |||||
73 | ) |
|
87 | ) | |
74 | .Optional() |
|
88 | .Optional() | |
75 | .Cat(Token(JsonTokenType.EndArray)) |
|
89 | .Cat(Token(JsonTokenType.EndArray)) | |
76 |
. |
|
90 | .End(); | |
77 |
|
91 | |||
78 |
var jsonExpression = valueExpression. |
|
92 | var jsonExpression = valueExpression.End(); | |
79 |
|
93 | |||
80 |
_jsonDFA = Create |
|
94 | _jsonDFA = CreateParserContext(jsonExpression, JSONElementContext.None); | |
81 |
_objectDFA = Create |
|
95 | _objectDFA = CreateParserContext(objectExpression, JSONElementContext.Object); | |
82 |
_arrayDFA = Create |
|
96 | _arrayDFA = CreateParserContext(arrayExpression, JSONElementContext.Array); | |
83 | } |
|
97 | } | |
84 |
|
98 | |||
85 |
static Token |
|
99 | static Token Token(params JsonTokenType[] input) { | |
86 |
return Token |
|
100 | return Token.New( input.Select(t => (int)t).ToArray() ); | |
87 | } |
|
101 | } | |
88 |
|
102 | |||
89 | static RegularDFA<JsonTokenType,object> CreateDFA(Token<object> expr) { |
|
103 | static ParserContext CreateParserContext(Token expr, JSONElementContext context) { | |
90 | var builder = new RegularExpressionVisitor<object>(); |
|
|||
91 | var dfa = new RegularDFA<JsonTokenType,object>(_alphabet); |
|
|||
92 |
|
104 | |||
|
105 | var dfa = new DFATable(); | |||
|
106 | var builder = new RegularExpressionVisitor(dfa); | |||
93 | expr.Accept(builder); |
|
107 | expr.Accept(builder); | |
|
108 | builder.BuildDFA(); | |||
94 |
|
109 | |||
95 | builder.BuildDFA(dfa); |
|
110 | return new ParserContext(dfa.CreateTransitionTable(), dfa.InitialState, context); | |
96 | return dfa; |
|
|||
97 | } |
|
111 | } | |
98 |
|
112 | |||
|
113 | #endregion | |||
|
114 | ||||
99 | JSONScanner m_scanner; |
|
115 | JSONScanner m_scanner; | |
100 | MemberContext m_memberContext; |
|
116 | MemberContext m_memberContext; | |
101 |
|
117 | |||
@@ -117,8 +133,7 namespace Implab.Formats.JSON { | |||||
117 | /// Создает новый экземпляр парсера, на основе текстового потока. |
|
133 | /// Создает новый экземпляр парсера, на основе текстового потока. | |
118 | /// </summary> |
|
134 | /// </summary> | |
119 | /// <param name="reader">Текстовый поток.</param> |
|
135 | /// <param name="reader">Текстовый поток.</param> | |
120 | /// <param name="dispose">Признак того, что парсер должен конролировать время жизни входного потока.</param> |
|
136 | public JSONParser(TextReader reader) | |
121 | public JSONParser(TextReader reader, bool dispose) |
|
|||
122 | : base(_jsonDFA, INITIAL_STATE, new JSONParserContext { elementContext = JSONElementContext.None, memberName = String.Empty }) { |
|
137 | : base(_jsonDFA, INITIAL_STATE, new JSONParserContext { elementContext = JSONElementContext.None, memberName = String.Empty }) { | |
123 | Safe.ArgumentNotNull(reader, "reader"); |
|
138 | Safe.ArgumentNotNull(reader, "reader"); | |
124 | m_scanner = new JSONScanner(); |
|
139 | m_scanner = new JSONScanner(); |
@@ -61,15 +61,15 namespace Implab.Formats { | |||||
61 | while (pos < m_bufferSize) { |
|
61 | while (pos < m_bufferSize) { | |
62 | var ch = m_buffer[pos]; |
|
62 | var ch = m_buffer[pos]; | |
63 |
|
63 | |||
64 |
state = dfa[state, ch > maxSymbol ? |
|
64 | state = dfa[state, ch > maxSymbol ? AutomatonConst.UNCLASSIFIED_INPUT : alphabet[ch]]; | |
65 |
if (state == |
|
65 | if (state == AutomatonConst.UNREACHABLE_STATE) | |
66 | break; |
|
66 | break; | |
67 |
|
67 | |||
68 | pos++; |
|
68 | pos++; | |
69 | } |
|
69 | } | |
70 |
|
70 | |||
71 | m_tokenLength = pos - m_bufferOffset; |
|
71 | m_tokenLength = pos - m_bufferOffset; | |
72 |
} while (state != |
|
72 | } while (state != AutomatonConst.UNREACHABLE_STATE && Feed()); | |
73 |
|
73 | |||
74 | m_tokenOffset = m_bufferOffset; |
|
74 | m_tokenOffset = m_bufferOffset; | |
75 | m_bufferOffset += m_tokenLength; |
|
75 | m_bufferOffset += m_tokenLength; |
@@ -159,7 +159,6 | |||||
159 | <Compile Include="Automaton\RegularExpressions\AltToken.cs" /> |
|
159 | <Compile Include="Automaton\RegularExpressions\AltToken.cs" /> | |
160 | <Compile Include="Automaton\RegularExpressions\BinaryToken.cs" /> |
|
160 | <Compile Include="Automaton\RegularExpressions\BinaryToken.cs" /> | |
161 | <Compile Include="Automaton\RegularExpressions\CatToken.cs" /> |
|
161 | <Compile Include="Automaton\RegularExpressions\CatToken.cs" /> | |
162 | <Compile Include="Automaton\DFAConst.cs" /> |
|
|||
163 | <Compile Include="Automaton\RegularExpressions\StarToken.cs" /> |
|
162 | <Compile Include="Automaton\RegularExpressions\StarToken.cs" /> | |
164 | <Compile Include="Automaton\RegularExpressions\SymbolToken.cs" /> |
|
163 | <Compile Include="Automaton\RegularExpressions\SymbolToken.cs" /> | |
165 | <Compile Include="Automaton\RegularExpressions\EmptyToken.cs" /> |
|
164 | <Compile Include="Automaton\RegularExpressions\EmptyToken.cs" /> | |
@@ -183,7 +182,6 | |||||
183 | <Compile Include="Automaton\IDFATable.cs" /> |
|
182 | <Compile Include="Automaton\IDFATable.cs" /> | |
184 | <Compile Include="Automaton\IDFATableBuilder.cs" /> |
|
183 | <Compile Include="Automaton\IDFATableBuilder.cs" /> | |
185 | <Compile Include="Automaton\DFATable.cs" /> |
|
184 | <Compile Include="Automaton\DFATable.cs" /> | |
186 | <Compile Include="Automaton\RegularExpressions\RegularDFA.cs" /> |
|
|||
187 | <Compile Include="Automaton\RegularExpressions\RegularExpressionVisitor.cs" /> |
|
185 | <Compile Include="Automaton\RegularExpressions\RegularExpressionVisitor.cs" /> | |
188 | <Compile Include="Automaton\RegularExpressions\ITaggedDFABuilder.cs" /> |
|
186 | <Compile Include="Automaton\RegularExpressions\ITaggedDFABuilder.cs" /> | |
189 | <Compile Include="Formats\TextScanner.cs" /> |
|
187 | <Compile Include="Formats\TextScanner.cs" /> | |
@@ -193,7 +191,10 | |||||
193 | <Compile Include="Formats\Grammar.cs" /> |
|
191 | <Compile Include="Formats\Grammar.cs" /> | |
194 | <Compile Include="Automaton\RegularExpressions\EndTokenT.cs" /> |
|
192 | <Compile Include="Automaton\RegularExpressions\EndTokenT.cs" /> | |
195 | <Compile Include="Automaton\RegularExpressions\EndToken.cs" /> |
|
193 | <Compile Include="Automaton\RegularExpressions\EndToken.cs" /> | |
196 |
<Compile Include="Automaton\RegularExpressions\ |
|
194 | <Compile Include="Automaton\RegularExpressions\RegularExpressionVisitorT.cs" /> | |
|
195 | <Compile Include="Automaton\AutomatonConst.cs" /> | |||
|
196 | <Compile Include="Automaton\RegularExpressions\RegularDFA.cs" /> | |||
|
197 | <Compile Include="Components\LazyAndWeak.cs" /> | |||
197 | </ItemGroup> |
|
198 | </ItemGroup> | |
198 | <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" /> |
|
199 | <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" /> | |
199 | <ItemGroup /> |
|
200 | <ItemGroup /> |
1 | NO CONTENT: file was removed |
|
NO CONTENT: file was removed |
1 | NO CONTENT: file was removed |
|
NO CONTENT: file was removed |
General Comments 0
You need to be logged in to leave comments.
Login now