@@ -0,0 +1,8 | |||
|
1 | using System; | |
|
2 | ||
|
3 | namespace Implab.Automaton.RegularExpressions { | |
|
4 | public interface ITaggedDFABuilder<TTag> : IDFATableBuilder { | |
|
5 | void SetStateTag(int s, TTag[] tags); | |
|
6 | } | |
|
7 | } | |
|
8 |
@@ -0,0 +1,89 | |||
|
1 | using System; | |
|
2 | using System.Collections.Generic; | |
|
3 | using System.Linq; | |
|
4 | ||
|
5 | namespace Implab.Automaton.RegularExpressions { | |
|
6 | public class RegularDFA<TInput, TTag> : DFATable, ITaggedDFABuilder<TTag> { | |
|
7 | ||
|
8 | readonly Dictionary<int,TTag[]> m_tags = new Dictionary<int, TTag[]>(); | |
|
9 | readonly IAlphabet<TInput> m_alphabet; | |
|
10 | ||
|
11 | public RegularDFA(IAlphabet<TInput> alphabet) { | |
|
12 | Safe.ArgumentNotNull(alphabet, "aplhabet"); | |
|
13 | ||
|
14 | m_alphabet = alphabet; | |
|
15 | } | |
|
16 | ||
|
17 | ||
|
18 | public IAlphabet<TInput> InputAlphabet { | |
|
19 | get { | |
|
20 | return m_alphabet; | |
|
21 | } | |
|
22 | } | |
|
23 | ||
|
24 | public void MarkFinalState(int s, TTag[] tags) { | |
|
25 | MarkFinalState(s); | |
|
26 | SetStateTag(s, tags); | |
|
27 | } | |
|
28 | ||
|
29 | public void SetStateTag(int s, TTag[] tags) { | |
|
30 | Safe.ArgumentNotNull(tags, "tags"); | |
|
31 | m_tags[s] = tags; | |
|
32 | } | |
|
33 | ||
|
34 | public TTag[] GetStateTag(int s) { | |
|
35 | TTag[] tags; | |
|
36 | return m_tags.TryGetValue(s, out tags) ? tags : new TTag[0]; | |
|
37 | } | |
|
38 | ||
|
39 | public new DFAStateDescriptor<TTag>[] CreateTransitionTable() { | |
|
40 | var table = new DFAStateDescriptor<TTag>[StateCount]; | |
|
41 | ||
|
42 | foreach (var t in this) { | |
|
43 | if (table[t.s1].transitions == null) | |
|
44 | table[t.s1] = new DFAStateDescriptor<TTag>(AlphabetSize, IsFinalState(t.s1), GetStateTag(t.s1)); | |
|
45 | if (table[t.s2].transitions == null) | |
|
46 | table[t.s2] = new DFAStateDescriptor<TTag>(AlphabetSize, IsFinalState(t.s2), GetStateTag(t.s2)); | |
|
47 | table[t.s1].transitions[t.edge] = t.s2; | |
|
48 | } | |
|
49 | ||
|
50 | return table; | |
|
51 | } | |
|
52 | ||
|
53 | /// <summary> | |
|
54 | /// Optimize the specified alphabet. | |
|
55 | /// </summary> | |
|
56 | /// <param name="alphabet">Пустой алфавит, который будет зполнен в процессе оптимизации.</param> | |
|
57 | public RegularDFA<TInput,TTag> Optimize(IAlphabetBuilder<TInput> alphabet) { | |
|
58 | Safe.ArgumentNotNull(alphabet, "alphabet"); | |
|
59 | ||
|
60 | var dfa = new RegularDFA<TInput, TTag>(alphabet); | |
|
61 | ||
|
62 | var states = new DummyAlphabet(StateCount); | |
|
63 | var alphaMap = new Dictionary<int,int>(); | |
|
64 | var stateMap = new Dictionary<int,int>(); | |
|
65 | ||
|
66 | Optimize(dfa, alphaMap, stateMap); | |
|
67 | ||
|
68 | // mark tags in the new DFA | |
|
69 | foreach (var g in m_tags.Where(x => x.Key < StateCount).GroupBy(x => stateMap[x.Key], x => x.Value )) | |
|
70 | dfa.SetStateTag(g.Key, g.SelectMany(x => x).ToArray()); | |
|
71 | ||
|
72 | // make the alphabet for the new DFA | |
|
73 | foreach (var pair in alphaMap) | |
|
74 | alphabet.DefineClass(m_alphabet.GetSymbols(pair.Key), pair.Value); | |
|
75 | ||
|
76 | return dfa; | |
|
77 | } | |
|
78 | ||
|
79 | protected override IEnumerable<HashSet<int>> GroupFinalStates() { | |
|
80 | var arrayComparer = new CustomEqualityComparer<TTag[]>( | |
|
81 | (x,y) => x.Length == y.Length && x.All(it => y.Contains(it)), | |
|
82 | x => x.Sum(it => x.GetHashCode()) | |
|
83 | ); | |
|
84 | return FinalStates.GroupBy(x => m_tags[x], arrayComparer).Select(g => new HashSet<int>(g)); | |
|
85 | } | |
|
86 | ||
|
87 | } | |
|
88 | } | |
|
89 |
@@ -0,0 +1,184 | |||
|
1 | using Implab; | |
|
2 | using System; | |
|
3 | using System.Collections.Generic; | |
|
4 | using System.Diagnostics; | |
|
5 | using System.Linq; | |
|
6 | ||
|
7 | namespace Implab.Automaton.RegularExpressions { | |
|
8 | /// <summary> | |
|
9 | /// Используется для построения ДКА по регулярному выражению, сначала обходит | |
|
10 | /// регулярное выражение и вычисляет followpos, затем используется метод | |
|
11 | /// <see cref="BuildDFA(IDFADefinition)"/> для построения автомата. | |
|
12 | /// </summary> | |
|
13 | public class RegularExpressionVisitor<TTag> : IVisitor<TTag> { | |
|
14 | int m_idx; | |
|
15 | Token<TTag> m_root; | |
|
16 | HashSet<int> m_firstpos; | |
|
17 | HashSet<int> m_lastpos; | |
|
18 | ||
|
19 | readonly Dictionary<int, HashSet<int>> m_followpos = new Dictionary<int, HashSet<int>>(); | |
|
20 | readonly Dictionary<int, int> m_indexes = new Dictionary<int, int>(); | |
|
21 | readonly Dictionary<int, TTag> m_ends = new Dictionary<int, TTag>(); | |
|
22 | ||
|
23 | public Dictionary<int, HashSet<int>> FollowposMap { | |
|
24 | get { return m_followpos; } | |
|
25 | } | |
|
26 | ||
|
27 | public HashSet<int> Followpos(int pos) { | |
|
28 | HashSet<int> set; | |
|
29 | return m_followpos.TryGetValue(pos, out set) ? set : m_followpos[pos] = new HashSet<int>(); | |
|
30 | } | |
|
31 | ||
|
32 | bool Nullable(object n) { | |
|
33 | if (n is EmptyToken<TTag> || n is StarToken<TTag>) | |
|
34 | return true; | |
|
35 | var altToken = n as AltToken<TTag>; | |
|
36 | if (altToken != null) | |
|
37 | return Nullable(altToken.Left) || Nullable(altToken.Right); | |
|
38 | var catToken = n as CatToken<TTag>; | |
|
39 | if (catToken != null) | |
|
40 | return Nullable(catToken.Left) && Nullable(catToken.Right); | |
|
41 | return false; | |
|
42 | } | |
|
43 | ||
|
44 | ||
|
45 | public void Visit(AltToken<TTag> token) { | |
|
46 | if (m_root == null) | |
|
47 | m_root = token; | |
|
48 | var firtspos = new HashSet<int>(); | |
|
49 | var lastpos = new HashSet<int>(); | |
|
50 | ||
|
51 | token.Left.Accept(this); | |
|
52 | firtspos.UnionWith(m_firstpos); | |
|
53 | lastpos.UnionWith(m_lastpos); | |
|
54 | ||
|
55 | token.Right.Accept(this); | |
|
56 | firtspos.UnionWith(m_firstpos); | |
|
57 | lastpos.UnionWith(m_lastpos); | |
|
58 | ||
|
59 | m_firstpos = firtspos; | |
|
60 | m_lastpos = lastpos; | |
|
61 | } | |
|
62 | ||
|
63 | public void Visit(StarToken<TTag> token) { | |
|
64 | if (m_root == null) | |
|
65 | m_root = token; | |
|
66 | token.Token.Accept(this); | |
|
67 | ||
|
68 | foreach (var i in m_lastpos) | |
|
69 | Followpos(i).UnionWith(m_firstpos); | |
|
70 | } | |
|
71 | ||
|
72 | public void Visit(CatToken<TTag> token) { | |
|
73 | if (m_root == null) | |
|
74 | m_root = token; | |
|
75 | ||
|
76 | var firtspos = new HashSet<int>(); | |
|
77 | var lastpos = new HashSet<int>(); | |
|
78 | token.Left.Accept(this); | |
|
79 | firtspos.UnionWith(m_firstpos); | |
|
80 | var leftLastpos = m_lastpos; | |
|
81 | ||
|
82 | token.Right.Accept(this); | |
|
83 | lastpos.UnionWith(m_lastpos); | |
|
84 | var rightFirstpos = m_firstpos; | |
|
85 | ||
|
86 | if (Nullable(token.Left)) | |
|
87 | firtspos.UnionWith(rightFirstpos); | |
|
88 | ||
|
89 | if (Nullable(token.Right)) | |
|
90 | lastpos.UnionWith(leftLastpos); | |
|
91 | ||
|
92 | m_firstpos = firtspos; | |
|
93 | m_lastpos = lastpos; | |
|
94 | ||
|
95 | foreach (var i in leftLastpos) | |
|
96 | Followpos(i).UnionWith(rightFirstpos); | |
|
97 | ||
|
98 | } | |
|
99 | ||
|
100 | public void Visit(EmptyToken<TTag> token) { | |
|
101 | if (m_root == null) | |
|
102 | m_root = token; | |
|
103 | } | |
|
104 | ||
|
105 | public void Visit(SymbolToken<TTag> token) { | |
|
106 | if (m_root == null) | |
|
107 | m_root = token; | |
|
108 | m_idx++; | |
|
109 | m_indexes[m_idx] = token.Value; | |
|
110 | m_firstpos = new HashSet<int>(new[] { m_idx }); | |
|
111 | m_lastpos = new HashSet<int>(new[] { m_idx }); | |
|
112 | } | |
|
113 | ||
|
114 | public void Visit(EndToken<TTag> token) { | |
|
115 | if (m_root == null) | |
|
116 | m_root = token; | |
|
117 | m_idx++; | |
|
118 | m_indexes[m_idx] = DFAConst.UNCLASSIFIED_INPUT; | |
|
119 | m_firstpos = new HashSet<int>(new[] { m_idx }); | |
|
120 | m_lastpos = new HashSet<int>(new[] { m_idx }); | |
|
121 | Followpos(m_idx); | |
|
122 | m_ends.Add(m_idx, token.Tag); | |
|
123 | } | |
|
124 | ||
|
125 | public void BuildDFA(ITaggedDFABuilder<TTag> dfa) { | |
|
126 | Safe.ArgumentNotNull(dfa,"dfa"); | |
|
127 | ||
|
128 | var states = new MapAlphabet<HashSet<int>>( | |
|
129 | false, | |
|
130 | new CustomEqualityComparer<HashSet<int>>( | |
|
131 | (x, y) => x.SetEquals(y), | |
|
132 | x => x.Sum(n => n.GetHashCode()) | |
|
133 | )); | |
|
134 | ||
|
135 | var initialState = states.DefineSymbol(m_firstpos); | |
|
136 | dfa.SetInitialState(initialState); | |
|
137 | ||
|
138 | var tags = GetStateTags(m_firstpos); | |
|
139 | if (tags != null && tags.Length > 0) | |
|
140 | dfa.MarkFinalState(initialState, tags); | |
|
141 | ||
|
142 | var inputMax = m_indexes.Values.Max(); | |
|
143 | var queue = new Queue<HashSet<int>>(); | |
|
144 | ||
|
145 | queue.Enqueue(m_firstpos); | |
|
146 | ||
|
147 | while (queue.Count > 0) { | |
|
148 | var state = queue.Dequeue(); | |
|
149 | var s1 = states.Translate(state); | |
|
150 | Debug.Assert(s1 != DFAConst.UNCLASSIFIED_INPUT); | |
|
151 | ||
|
152 | for (int a = 0; a <= inputMax; a++) { | |
|
153 | var next = new HashSet<int>(); | |
|
154 | foreach (var p in state) { | |
|
155 | if (m_indexes[p] == a) { | |
|
156 | next.UnionWith(Followpos(p)); | |
|
157 | } | |
|
158 | } | |
|
159 | if (next.Count > 0) { | |
|
160 | int s2 = states.Translate(next); | |
|
161 | if (s2 == DFAConst.UNCLASSIFIED_INPUT) { | |
|
162 | s2 = states.DefineSymbol(next); | |
|
163 | ||
|
164 | tags = GetStateTags(next); | |
|
165 | if (tags != null && tags.Length > 0) { | |
|
166 | dfa.MarkFinalState(s2); | |
|
167 | dfa.SetStateTag(s2, tags); | |
|
168 | } | |
|
169 | ||
|
170 | queue.Enqueue(next); | |
|
171 | } | |
|
172 | dfa.Add(new AutomatonTransition(s1, s2, a)); | |
|
173 | } | |
|
174 | } | |
|
175 | } | |
|
176 | } | |
|
177 | ||
|
178 | TTag[] GetStateTags(IEnumerable<int> state) { | |
|
179 | Debug.Assert(state != null); | |
|
180 | return state.Where(m_ends.ContainsKey).Select(pos => m_ends[pos]).ToArray(); | |
|
181 | } | |
|
182 | ||
|
183 | } | |
|
184 | } |
@@ -1,18 +1,18 | |||
|
1 | 1 | namespace Implab.Automaton { |
|
2 |
public struct DFAStateDescript |
|
|
2 | public struct DFAStateDescriptor { | |
|
3 | 3 | public readonly bool final; |
|
4 | 4 | public readonly int[] transitions; |
|
5 | 5 | |
|
6 | 6 | |
|
7 |
public DFAStateDescript |
|
|
7 | public DFAStateDescriptor(int[] transitions, bool final) { | |
|
8 | 8 | this.transitions = transitions; |
|
9 | 9 | this.final = final; |
|
10 | 10 | } |
|
11 | 11 | |
|
12 |
public DFAStateDescript |
|
|
12 | public DFAStateDescriptor(int[] transitions) : this(transitions, false) { | |
|
13 | 13 | } |
|
14 | 14 | |
|
15 |
public DFAStateDescript |
|
|
15 | public DFAStateDescriptor(int size, bool final) { | |
|
16 | 16 | Safe.ArgumentInRange(size, 0, int.MaxValue, "size"); |
|
17 | 17 | |
|
18 | 18 | this.final = final; |
@@ -100,6 +100,20 namespace Implab.Automaton { | |||
|
100 | 100 | return GetEnumerator(); |
|
101 | 101 | } |
|
102 | 102 | |
|
103 | public DFAStateDescriptor[] CreateTransitionTable() { | |
|
104 | var table = new DFAStateDescriptor[StateCount]; | |
|
105 | ||
|
106 | foreach (var t in this) { | |
|
107 | if (table[t.s1].transitions == null) | |
|
108 | table[t.s1] = new DFAStateDescriptor(AlphabetSize, IsFinalState(t.s1)); | |
|
109 | if (table[t.s2].transitions == null) | |
|
110 | table[t.s2] = new DFAStateDescriptor(AlphabetSize, IsFinalState(t.s2)); | |
|
111 | table[t.s1].transitions[t.edge] = t.s2; | |
|
112 | } | |
|
113 | ||
|
114 | return table; | |
|
115 | } | |
|
116 | ||
|
103 | 117 | /// <summary>Формирует множества конечных состояний перед началом работы алгоритма минимизации.</summary> |
|
104 | 118 | /// <remarks> |
|
105 | 119 | /// В процессе построения минимального автомата требуется разделить множество состояний, |
@@ -71,8 +71,16 namespace Implab.Automaton { | |||
|
71 | 71 | return true; |
|
72 | 72 | } |
|
73 | 73 | |
|
74 | public IEnumerable<T> GetSymbols(int cls) { | |
|
75 | for (var i = 0; i < m_map.Length; i++) | |
|
76 | if (m_map[i] == cls) | |
|
77 | yield return GetSymbolByIndex(i); | |
|
78 | } | |
|
79 | ||
|
74 | 80 | public abstract int GetSymbolIndex(T symbol); |
|
75 | 81 | |
|
82 | public abstract T GetSymbolByIndex(int index); | |
|
83 | ||
|
76 | 84 | public abstract IEnumerable<T> InputSymbols { get; } |
|
77 | 85 | |
|
78 | 86 | /// <summary> |
@@ -38,7 +38,6 namespace Implab.Automaton { | |||
|
38 | 38 | Safe.ArgumentNotNull(symbols, "symbols"); |
|
39 | 39 | |
|
40 | 40 | m_nextCls = Math.Max(cls + 1, m_nextCls); |
|
41 | symbols = symbols.Distinct(); | |
|
42 | 41 | |
|
43 | 42 | foreach (var symbol in symbols) |
|
44 | 43 | m_map[symbol] = cls; |
@@ -68,6 +67,10 namespace Implab.Automaton { | |||
|
68 | 67 | return m_supportUnclassified || m_map.ContainsKey(symbol); |
|
69 | 68 | } |
|
70 | 69 | |
|
70 | ||
|
71 | public IEnumerable<T> GetSymbols(int cls) { | |
|
72 | return m_map.Where(p => p.Value == cls).Select(p => p.Key); | |
|
73 | } | |
|
71 | 74 | #endregion |
|
72 | 75 | } |
|
73 | 76 | } |
@@ -1,14 +1,14 | |||
|
1 | 1 | using System; |
|
2 | 2 | |
|
3 | 3 | namespace Implab.Automaton.RegularExpressions { |
|
4 |
public struct DFAStateDescriptor |
|
|
4 | public struct DFAStateDescriptor<T> { | |
|
5 | 5 | public readonly bool final; |
|
6 | 6 | |
|
7 | 7 | public readonly int[] transitions; |
|
8 | 8 | |
|
9 | 9 | public readonly T[] tags; |
|
10 | 10 | |
|
11 |
public DFAStateDescriptor |
|
|
11 | public DFAStateDescriptor(int size, bool final, T[] tags) { | |
|
12 | 12 | Safe.ArgumentAssert(size >= 0, "size"); |
|
13 | 13 | this.final = final; |
|
14 | 14 | this.tags = tags; |
@@ -66,23 +66,21 namespace Implab.Automaton.RegularExpres | |||
|
66 | 66 | return Token<TTag>.New( Enumerable.Range(0, AlphabetBuilder.Count).Except(TranslateOrDie(symbols)).ToArray() ); |
|
67 | 67 | } |
|
68 | 68 | |
|
69 |
protected |
|
|
70 | Safe.ArgumentNotNull(lang, "lang"); | |
|
71 | Safe.ArgumentNotNull(dfaAlphabet, "dfaAlphabet"); | |
|
69 | protected abstract IAlphabetBuilder<TSymbol> CreateAlphabet(); | |
|
72 | 70 | |
|
73 | var dfa = new RegularDFADefinition<TSymbol, TTag>(AlphabetBuilder); | |
|
71 | protected RegularDFA<TSymbol, TTag> BuildDFA(Token<TTag> regexp) { | |
|
74 | 72 | |
|
75 |
var |
|
|
73 | var dfa = new RegularDFA<TSymbol, TTag>(AlphabetBuilder); | |
|
76 | 74 | |
|
77 | lang.Accept( builder ); | |
|
75 | var visitor = new RegularExpressionVisitor<TTag>(); | |
|
76 | regexp.Accept( visitor ); | |
|
78 | 77 | |
|
79 |
|
|
|
78 | visitor.BuildDFA(dfa); | |
|
80 | 79 | |
|
81 | 80 | if (dfa.IsFinalState(dfa.InitialState)) |
|
82 | 81 | throw new ApplicationException("The specified language contains empty token"); |
|
83 | 82 | |
|
84 |
dfa.Optimize( |
|
|
85 | ||
|
83 | return dfa.Optimize(CreateAlphabet()); | |
|
86 | 84 | } |
|
87 | 85 | |
|
88 | 86 | } |
@@ -3,6 +3,7 using System; | |||
|
3 | 3 | using System.Collections.Generic; |
|
4 | 4 | using System.IO; |
|
5 | 5 | using Implab.Components; |
|
6 | using Implab.Automaton.RegularExpressions; | |
|
6 | 7 | |
|
7 | 8 | namespace Implab.Automaton { |
|
8 | 9 | /// <summary> |
@@ -14,19 +15,23 namespace Implab.Automaton { | |||
|
14 | 15 | /// конца токена и допустимости текущего символа. |
|
15 | 16 | /// </remarks> |
|
16 | 17 | public abstract class Scanner<TTag> : Disposable { |
|
17 | struct ScannerConfig { | |
|
18 |
public DFAStateDescript |
|
|
19 |
public int[] alphabet |
|
|
20 | public int initialState; | |
|
18 | protected struct ScannerConfig { | |
|
19 | public readonly DFAStateDescriptor<TTag>[] states; | |
|
20 | public readonly int[] alphabet; | |
|
21 | public readonly int initialState; | |
|
22 | ||
|
23 | public ScannerConfig(DFAStateDescriptor<TTag>[] states, int[] alphabet, int initialState) { | |
|
24 | this.initialState = initialState; | |
|
25 | this.alphabet = alphabet; | |
|
26 | this.states = states; | |
|
27 | } | |
|
21 | 28 | } |
|
22 | 29 | |
|
23 | 30 | Stack<ScannerConfig> m_defs = new Stack<ScannerConfig>(); |
|
24 | 31 | |
|
25 | DFAStateDescriptior<TTag>[] m_states; | |
|
26 | int[] m_alphabetMap; | |
|
27 | int m_initialState; | |
|
32 | ScannerConfig m_config; | |
|
28 | 33 | |
|
29 |
protected DFAStateDescript |
|
|
34 | protected DFAStateDescriptor<TTag> m_currentState; | |
|
30 | 35 | int m_previewCode; |
|
31 | 36 | |
|
32 | 37 | protected int m_tokenLen; |
@@ -41,15 +46,11 namespace Implab.Automaton { | |||
|
41 | 46 | int m_chunkSize = 1024; // 1k |
|
42 | 47 | int m_limit = 10 * 1024 * 1024; // 10Mb |
|
43 | 48 | |
|
44 | protected Scanner(DFAStateDescriptior<TTag>[] states, int[] alphabet, int initialState) { | |
|
45 | Safe.ArgumentNotEmpty(states, "states"); | |
|
46 | Safe.ArgumentNotNull(alphabet, "alphabet"); | |
|
49 | protected Scanner(ScannerConfig config) { | |
|
50 | Safe.ArgumentNotEmpty(config.states, "config.states"); | |
|
51 | Safe.ArgumentNotNull(config.alphabet, "config.alphabet"); | |
|
47 | 52 | |
|
48 |
m_ |
|
|
49 | m_alphabetMap = alphabet; | |
|
50 | m_initialState = initialState; | |
|
51 | ||
|
52 | Feed(new char[0]); | |
|
53 | m_config = config; | |
|
53 | 54 | } |
|
54 | 55 | |
|
55 | 56 | /// <summary> |
@@ -110,7 +111,7 namespace Implab.Automaton { | |||
|
110 | 111 | /// </summary> |
|
111 | 112 | protected TTag[] TokenTags { |
|
112 | 113 | get { |
|
113 | return m_currentState.tag; | |
|
114 | return m_currentState.tags; | |
|
114 | 115 | } |
|
115 | 116 | } |
|
116 | 117 | |
@@ -133,7 +134,7 namespace Implab.Automaton { | |||
|
133 | 134 | if (m_pointer >= m_bufferSize) |
|
134 | 135 | return false; |
|
135 | 136 | |
|
136 | m_currentState = m_states[m_initialState]; | |
|
137 | m_currentState = m_config.states[m_config.initialState]; | |
|
137 | 138 | m_tokenLen = 0; |
|
138 | 139 | m_tokenOffset = m_pointer; |
|
139 | 140 | int nextState; |
@@ -151,7 +152,7 namespace Implab.Automaton { | |||
|
151 | 152 | ) |
|
152 | 153 | ); |
|
153 | 154 | } |
|
154 | m_currentState = m_states[nextState]; | |
|
155 | m_currentState = m_config.states[nextState]; | |
|
155 | 156 | m_tokenLen++; |
|
156 | 157 | |
|
157 | 158 | } while (Shift()); |
@@ -172,7 +173,7 namespace Implab.Automaton { | |||
|
172 | 173 | return false; |
|
173 | 174 | } |
|
174 | 175 | |
|
175 |
m_previewCode = m_alphabet |
|
|
176 | m_previewCode = m_config.alphabet[m_buffer[m_pointer]]; | |
|
176 | 177 | |
|
177 | 178 | return true; |
|
178 | 179 | } |
@@ -217,23 +218,14 namespace Implab.Automaton { | |||
|
217 | 218 | /// Преключает внутренний ДКА на указанный, позволяет реализовать подобие захватывающей |
|
218 | 219 | /// группировки. |
|
219 | 220 | /// </summary> |
|
220 |
/// <param name |
|
|
221 | /// <param name="alphabet">Таблица входных символов для нового ДКА</param> | |
|
222 | /// <param name = "initialState"></param> | |
|
223 | protected void Switch(DFAStateDescriptior<TTag>[] states, int[] alphabet, int initialState) { | |
|
224 | Safe.ArgumentNotNull(states, "dfa"); | |
|
221 | /// <param name = "config"></param> | |
|
222 | protected void Switch(ScannerConfig config) { | |
|
223 | Safe.ArgumentNotNull(config.states, "config.states"); | |
|
225 | 224 | |
|
226 |
m_defs.Push( |
|
|
227 | states = m_states, | |
|
228 | alphabetMap = m_alphabetMap, | |
|
229 | initialState = m_initialState | |
|
230 | }); | |
|
225 | m_defs.Push(m_config); | |
|
226 | m_config = config; | |
|
231 | 227 | |
|
232 | m_states = states; | |
|
233 | m_alphabetMap = alphabet; | |
|
234 | m_initialState = initialState; | |
|
235 | ||
|
236 | m_previewCode = m_alphabetMap[m_buffer[m_pointer]]; | |
|
228 | m_previewCode = m_config.alphabet[m_buffer[m_pointer]]; | |
|
237 | 229 | } |
|
238 | 230 | |
|
239 | 231 | /// <summary> |
@@ -242,11 +234,9 namespace Implab.Automaton { | |||
|
242 | 234 | protected void Restore() { |
|
243 | 235 | if (m_defs.Count == 0) |
|
244 | 236 | throw new InvalidOperationException(); |
|
245 |
|
|
|
246 | m_states = prev.states; | |
|
247 | m_alphabetMap = prev.alphabetMap; | |
|
248 | m_initialState = prev.initialState; | |
|
249 | m_previewCode = m_alphabetMap[m_buffer[m_pointer]]; | |
|
237 | m_config = m_defs.Pop(); | |
|
238 | ||
|
239 | m_previewCode = m_config.alphabet[m_buffer[m_pointer]]; | |
|
250 | 240 | } |
|
251 | 241 | |
|
252 | 242 | protected override void Dispose(bool disposing) { |
@@ -13,6 +13,10 namespace Implab.Formats { | |||
|
13 | 13 | return (int)symbol; |
|
14 | 14 | } |
|
15 | 15 | |
|
16 | public override byte GetSymbolByIndex(int index) { | |
|
17 | return (byte)index; | |
|
18 | } | |
|
19 | ||
|
16 | 20 | public IEnumerable<byte> InputSymbols { |
|
17 | 21 | get { |
|
18 | 22 | return Enumerable.Range(byte.MinValue, byte.MaxValue).Cast<byte>(); |
@@ -13,6 +13,10 namespace Implab.Formats { | |||
|
13 | 13 | return symbol; |
|
14 | 14 | } |
|
15 | 15 | |
|
16 | public override char GetSymbolByIndex(int index) { | |
|
17 | return (char)index; | |
|
18 | } | |
|
19 | ||
|
16 | 20 | public override IEnumerable<char> InputSymbols { |
|
17 | 21 | get { return Enumerable.Range(char.MinValue, char.MaxValue).Cast<char>(); } |
|
18 | 22 | } |
@@ -1,6 +1,7 | |||
|
1 | 1 | using System.Linq; |
|
2 | 2 | using Implab.Automaton.RegularExpressions; |
|
3 | 3 | using System; |
|
4 | using Implab.Automaton; | |
|
4 | 5 | |
|
5 | 6 | namespace Implab.Formats.JSON { |
|
6 | 7 | class JSONGrammar : Grammar<char,JSONGrammar.TokenType> { |
@@ -35,8 +36,8 namespace Implab.Formats.JSON { | |||
|
35 | 36 | get { return _instance.Value; } |
|
36 | 37 | } |
|
37 | 38 | |
|
38 |
readonly Regular |
|
|
39 |
readonly Regular |
|
|
39 | readonly RegularDFA<char, TokenType> m_jsonDFA; | |
|
40 | readonly RegularDFA<char, TokenType> m_stringDFA; | |
|
40 | 41 | |
|
41 | 42 | public JSONGrammar() { |
|
42 | 43 | DefineAlphabet(Enumerable.Range(0, 0x20).Select(x => (char)x)); |
@@ -87,21 +88,17 namespace Implab.Formats.JSON { | |||
|
87 | 88 | .Or(unescaped.Closure().Tag(TokenType.UnescapedChar)); |
|
88 | 89 | |
|
89 | 90 | |
|
90 | m_jsonDFA = new RegularCharDFADefinition<TokenType>(new CharAlphabet()); | |
|
91 | BuildDFA(jsonExpression, m_jsonDFA, m_jsonDFA.InputAlphabet); | |
|
92 | ||
|
93 | ||
|
94 | m_stringDFA = new RegularCharDFADefinition<TokenType>(new CharAlphabet()); | |
|
95 | BuildDFA(jsonStringExpression, m_jsonDFA, m_jsonDFA.InputAlphabet); | |
|
91 | m_jsonDFA = BuildDFA(jsonExpression); | |
|
92 | m_stringDFA = BuildDFA(jsonStringExpression); | |
|
96 | 93 | } |
|
97 | 94 | |
|
98 |
public Regular |
|
|
95 | public RegularDFA<char, TokenType> JsonDFA { | |
|
99 | 96 | get { |
|
100 | 97 | return m_jsonDFA; |
|
101 | 98 | } |
|
102 | 99 | } |
|
103 | 100 | |
|
104 |
public RegularDFA |
|
|
101 | public RegularDFA<char,TokenType> JsonStringDFA { | |
|
105 | 102 | get { |
|
106 | 103 | return m_stringDFA; |
|
107 | 104 | } |
@@ -111,5 +108,9 namespace Implab.Formats.JSON { | |||
|
111 | 108 | return SymbolToken(Enumerable.Range(start,stop - start).Cast<char>()); |
|
112 | 109 | } |
|
113 | 110 | |
|
111 | protected override IAlphabetBuilder<char> CreateAlphabet() { | |
|
112 | return new CharAlphabet(); | |
|
113 | } | |
|
114 | ||
|
114 | 115 | } |
|
115 | 116 | } |
@@ -86,9 +86,9 namespace Implab.Formats.JSON { | |||
|
86 | 86 | return Token<object>.New(input.Select(t => _alphabet.Translate(t)).ToArray()); |
|
87 | 87 | } |
|
88 | 88 | |
|
89 |
static RegularDFA |
|
|
90 |
var builder = new Regular |
|
|
91 |
var dfa = new RegularDFA |
|
|
89 | static RegularDFA<JsonTokenType,object> CreateDFA(Token<object> expr) { | |
|
90 | var builder = new RegularExpressionVisitor<object>(); | |
|
91 | var dfa = new RegularDFA<JsonTokenType,object>(_alphabet); | |
|
92 | 92 | |
|
93 | 93 | expr.Accept(builder); |
|
94 | 94 |
@@ -170,7 +170,6 | |||
|
170 | 170 | <Compile Include="Automaton\RegularExpressions\Token.cs" /> |
|
171 | 171 | <Compile Include="Automaton\RegularExpressions\IVisitor.cs" /> |
|
172 | 172 | <Compile Include="Automaton\AutomatonTransition.cs" /> |
|
173 | <Compile Include="Automaton\RegularExpressions\RegularDFABuilder.cs" /> | |
|
174 | 173 | <Compile Include="Formats\JSON\JSONElementContext.cs" /> |
|
175 | 174 | <Compile Include="Formats\JSON\JSONElementType.cs" /> |
|
176 | 175 | <Compile Include="Formats\JSON\JSONGrammar.cs" /> |
@@ -183,13 +182,14 | |||
|
183 | 182 | <Compile Include="Formats\JSON\StringTranslator.cs" /> |
|
184 | 183 | <Compile Include="Automaton\MapAlphabet.cs" /> |
|
185 | 184 | <Compile Include="Automaton\DummyAlphabet.cs" /> |
|
186 | <Compile Include="Automaton\RegularExpressions\RegularDFADefinition.cs" /> | |
|
187 | 185 | <Compile Include="Formats\CharAlphabet.cs" /> |
|
188 | 186 | <Compile Include="Formats\ByteAlphabet.cs" /> |
|
189 | <Compile Include="Formats\RegularCharDFADefinition.cs" /> | |
|
190 | 187 | <Compile Include="Automaton\IDFATable.cs" /> |
|
191 | 188 | <Compile Include="Automaton\IDFATableBuilder.cs" /> |
|
192 | 189 | <Compile Include="Automaton\DFATable.cs" /> |
|
190 | <Compile Include="Automaton\RegularExpressions\RegularDFA.cs" /> | |
|
191 | <Compile Include="Automaton\RegularExpressions\RegularExpressionVisitor.cs" /> | |
|
192 | <Compile Include="Automaton\RegularExpressions\ITaggedDFABuilder.cs" /> | |
|
193 | 193 | <Compile Include="Automaton\RegularExpressions\DFAStateDescriptorT.cs" /> |
|
194 | 194 | </ItemGroup> |
|
195 | 195 | <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" /> |
|
1 | NO CONTENT: file was removed |
|
1 | NO CONTENT: file was removed |
|
1 | NO CONTENT: file was removed |
General Comments 0
You need to be logged in to leave comments.
Login now