@@ -0,0 +1,8 | |||||
|
1 | using System; | |||
|
2 | ||||
|
3 | namespace Implab.Automaton.RegularExpressions { | |||
|
4 | public interface ITaggedDFABuilder<TTag> : IDFATableBuilder { | |||
|
5 | void SetStateTag(int s, TTag[] tags); | |||
|
6 | } | |||
|
7 | } | |||
|
8 |
@@ -0,0 +1,89 | |||||
|
1 | using System; | |||
|
2 | using System.Collections.Generic; | |||
|
3 | using System.Linq; | |||
|
4 | ||||
|
5 | namespace Implab.Automaton.RegularExpressions { | |||
|
6 | public class RegularDFA<TInput, TTag> : DFATable, ITaggedDFABuilder<TTag> { | |||
|
7 | ||||
|
8 | readonly Dictionary<int,TTag[]> m_tags = new Dictionary<int, TTag[]>(); | |||
|
9 | readonly IAlphabet<TInput> m_alphabet; | |||
|
10 | ||||
|
11 | public RegularDFA(IAlphabet<TInput> alphabet) { | |||
|
12 | Safe.ArgumentNotNull(alphabet, "aplhabet"); | |||
|
13 | ||||
|
14 | m_alphabet = alphabet; | |||
|
15 | } | |||
|
16 | ||||
|
17 | ||||
|
18 | public IAlphabet<TInput> InputAlphabet { | |||
|
19 | get { | |||
|
20 | return m_alphabet; | |||
|
21 | } | |||
|
22 | } | |||
|
23 | ||||
|
24 | public void MarkFinalState(int s, TTag[] tags) { | |||
|
25 | MarkFinalState(s); | |||
|
26 | SetStateTag(s, tags); | |||
|
27 | } | |||
|
28 | ||||
|
29 | public void SetStateTag(int s, TTag[] tags) { | |||
|
30 | Safe.ArgumentNotNull(tags, "tags"); | |||
|
31 | m_tags[s] = tags; | |||
|
32 | } | |||
|
33 | ||||
|
34 | public TTag[] GetStateTag(int s) { | |||
|
35 | TTag[] tags; | |||
|
36 | return m_tags.TryGetValue(s, out tags) ? tags : new TTag[0]; | |||
|
37 | } | |||
|
38 | ||||
|
39 | public new DFAStateDescriptor<TTag>[] CreateTransitionTable() { | |||
|
40 | var table = new DFAStateDescriptor<TTag>[StateCount]; | |||
|
41 | ||||
|
42 | foreach (var t in this) { | |||
|
43 | if (table[t.s1].transitions == null) | |||
|
44 | table[t.s1] = new DFAStateDescriptor<TTag>(AlphabetSize, IsFinalState(t.s1), GetStateTag(t.s1)); | |||
|
45 | if (table[t.s2].transitions == null) | |||
|
46 | table[t.s2] = new DFAStateDescriptor<TTag>(AlphabetSize, IsFinalState(t.s2), GetStateTag(t.s2)); | |||
|
47 | table[t.s1].transitions[t.edge] = t.s2; | |||
|
48 | } | |||
|
49 | ||||
|
50 | return table; | |||
|
51 | } | |||
|
52 | ||||
|
53 | /// <summary> | |||
|
54 | /// Optimize the specified alphabet. | |||
|
55 | /// </summary> | |||
|
56 | /// <param name="alphabet">Пустой алфавит, который будет зполнен в процессе оптимизации.</param> | |||
|
57 | public RegularDFA<TInput,TTag> Optimize(IAlphabetBuilder<TInput> alphabet) { | |||
|
58 | Safe.ArgumentNotNull(alphabet, "alphabet"); | |||
|
59 | ||||
|
60 | var dfa = new RegularDFA<TInput, TTag>(alphabet); | |||
|
61 | ||||
|
62 | var states = new DummyAlphabet(StateCount); | |||
|
63 | var alphaMap = new Dictionary<int,int>(); | |||
|
64 | var stateMap = new Dictionary<int,int>(); | |||
|
65 | ||||
|
66 | Optimize(dfa, alphaMap, stateMap); | |||
|
67 | ||||
|
68 | // mark tags in the new DFA | |||
|
69 | foreach (var g in m_tags.Where(x => x.Key < StateCount).GroupBy(x => stateMap[x.Key], x => x.Value )) | |||
|
70 | dfa.SetStateTag(g.Key, g.SelectMany(x => x).ToArray()); | |||
|
71 | ||||
|
72 | // make the alphabet for the new DFA | |||
|
73 | foreach (var pair in alphaMap) | |||
|
74 | alphabet.DefineClass(m_alphabet.GetSymbols(pair.Key), pair.Value); | |||
|
75 | ||||
|
76 | return dfa; | |||
|
77 | } | |||
|
78 | ||||
|
79 | protected override IEnumerable<HashSet<int>> GroupFinalStates() { | |||
|
80 | var arrayComparer = new CustomEqualityComparer<TTag[]>( | |||
|
81 | (x,y) => x.Length == y.Length && x.All(it => y.Contains(it)), | |||
|
82 | x => x.Sum(it => x.GetHashCode()) | |||
|
83 | ); | |||
|
84 | return FinalStates.GroupBy(x => m_tags[x], arrayComparer).Select(g => new HashSet<int>(g)); | |||
|
85 | } | |||
|
86 | ||||
|
87 | } | |||
|
88 | } | |||
|
89 |
@@ -0,0 +1,184 | |||||
|
1 | using Implab; | |||
|
2 | using System; | |||
|
3 | using System.Collections.Generic; | |||
|
4 | using System.Diagnostics; | |||
|
5 | using System.Linq; | |||
|
6 | ||||
|
7 | namespace Implab.Automaton.RegularExpressions { | |||
|
8 | /// <summary> | |||
|
9 | /// Используется для построения ДКА по регулярному выражению, сначала обходит | |||
|
10 | /// регулярное выражение и вычисляет followpos, затем используется метод | |||
|
11 | /// <see cref="BuildDFA(IDFADefinition)"/> для построения автомата. | |||
|
12 | /// </summary> | |||
|
13 | public class RegularExpressionVisitor<TTag> : IVisitor<TTag> { | |||
|
14 | int m_idx; | |||
|
15 | Token<TTag> m_root; | |||
|
16 | HashSet<int> m_firstpos; | |||
|
17 | HashSet<int> m_lastpos; | |||
|
18 | ||||
|
19 | readonly Dictionary<int, HashSet<int>> m_followpos = new Dictionary<int, HashSet<int>>(); | |||
|
20 | readonly Dictionary<int, int> m_indexes = new Dictionary<int, int>(); | |||
|
21 | readonly Dictionary<int, TTag> m_ends = new Dictionary<int, TTag>(); | |||
|
22 | ||||
|
23 | public Dictionary<int, HashSet<int>> FollowposMap { | |||
|
24 | get { return m_followpos; } | |||
|
25 | } | |||
|
26 | ||||
|
27 | public HashSet<int> Followpos(int pos) { | |||
|
28 | HashSet<int> set; | |||
|
29 | return m_followpos.TryGetValue(pos, out set) ? set : m_followpos[pos] = new HashSet<int>(); | |||
|
30 | } | |||
|
31 | ||||
|
32 | bool Nullable(object n) { | |||
|
33 | if (n is EmptyToken<TTag> || n is StarToken<TTag>) | |||
|
34 | return true; | |||
|
35 | var altToken = n as AltToken<TTag>; | |||
|
36 | if (altToken != null) | |||
|
37 | return Nullable(altToken.Left) || Nullable(altToken.Right); | |||
|
38 | var catToken = n as CatToken<TTag>; | |||
|
39 | if (catToken != null) | |||
|
40 | return Nullable(catToken.Left) && Nullable(catToken.Right); | |||
|
41 | return false; | |||
|
42 | } | |||
|
43 | ||||
|
44 | ||||
|
45 | public void Visit(AltToken<TTag> token) { | |||
|
46 | if (m_root == null) | |||
|
47 | m_root = token; | |||
|
48 | var firtspos = new HashSet<int>(); | |||
|
49 | var lastpos = new HashSet<int>(); | |||
|
50 | ||||
|
51 | token.Left.Accept(this); | |||
|
52 | firtspos.UnionWith(m_firstpos); | |||
|
53 | lastpos.UnionWith(m_lastpos); | |||
|
54 | ||||
|
55 | token.Right.Accept(this); | |||
|
56 | firtspos.UnionWith(m_firstpos); | |||
|
57 | lastpos.UnionWith(m_lastpos); | |||
|
58 | ||||
|
59 | m_firstpos = firtspos; | |||
|
60 | m_lastpos = lastpos; | |||
|
61 | } | |||
|
62 | ||||
|
63 | public void Visit(StarToken<TTag> token) { | |||
|
64 | if (m_root == null) | |||
|
65 | m_root = token; | |||
|
66 | token.Token.Accept(this); | |||
|
67 | ||||
|
68 | foreach (var i in m_lastpos) | |||
|
69 | Followpos(i).UnionWith(m_firstpos); | |||
|
70 | } | |||
|
71 | ||||
|
72 | public void Visit(CatToken<TTag> token) { | |||
|
73 | if (m_root == null) | |||
|
74 | m_root = token; | |||
|
75 | ||||
|
76 | var firtspos = new HashSet<int>(); | |||
|
77 | var lastpos = new HashSet<int>(); | |||
|
78 | token.Left.Accept(this); | |||
|
79 | firtspos.UnionWith(m_firstpos); | |||
|
80 | var leftLastpos = m_lastpos; | |||
|
81 | ||||
|
82 | token.Right.Accept(this); | |||
|
83 | lastpos.UnionWith(m_lastpos); | |||
|
84 | var rightFirstpos = m_firstpos; | |||
|
85 | ||||
|
86 | if (Nullable(token.Left)) | |||
|
87 | firtspos.UnionWith(rightFirstpos); | |||
|
88 | ||||
|
89 | if (Nullable(token.Right)) | |||
|
90 | lastpos.UnionWith(leftLastpos); | |||
|
91 | ||||
|
92 | m_firstpos = firtspos; | |||
|
93 | m_lastpos = lastpos; | |||
|
94 | ||||
|
95 | foreach (var i in leftLastpos) | |||
|
96 | Followpos(i).UnionWith(rightFirstpos); | |||
|
97 | ||||
|
98 | } | |||
|
99 | ||||
|
100 | public void Visit(EmptyToken<TTag> token) { | |||
|
101 | if (m_root == null) | |||
|
102 | m_root = token; | |||
|
103 | } | |||
|
104 | ||||
|
105 | public void Visit(SymbolToken<TTag> token) { | |||
|
106 | if (m_root == null) | |||
|
107 | m_root = token; | |||
|
108 | m_idx++; | |||
|
109 | m_indexes[m_idx] = token.Value; | |||
|
110 | m_firstpos = new HashSet<int>(new[] { m_idx }); | |||
|
111 | m_lastpos = new HashSet<int>(new[] { m_idx }); | |||
|
112 | } | |||
|
113 | ||||
|
114 | public void Visit(EndToken<TTag> token) { | |||
|
115 | if (m_root == null) | |||
|
116 | m_root = token; | |||
|
117 | m_idx++; | |||
|
118 | m_indexes[m_idx] = DFAConst.UNCLASSIFIED_INPUT; | |||
|
119 | m_firstpos = new HashSet<int>(new[] { m_idx }); | |||
|
120 | m_lastpos = new HashSet<int>(new[] { m_idx }); | |||
|
121 | Followpos(m_idx); | |||
|
122 | m_ends.Add(m_idx, token.Tag); | |||
|
123 | } | |||
|
124 | ||||
|
125 | public void BuildDFA(ITaggedDFABuilder<TTag> dfa) { | |||
|
126 | Safe.ArgumentNotNull(dfa,"dfa"); | |||
|
127 | ||||
|
128 | var states = new MapAlphabet<HashSet<int>>( | |||
|
129 | false, | |||
|
130 | new CustomEqualityComparer<HashSet<int>>( | |||
|
131 | (x, y) => x.SetEquals(y), | |||
|
132 | x => x.Sum(n => n.GetHashCode()) | |||
|
133 | )); | |||
|
134 | ||||
|
135 | var initialState = states.DefineSymbol(m_firstpos); | |||
|
136 | dfa.SetInitialState(initialState); | |||
|
137 | ||||
|
138 | var tags = GetStateTags(m_firstpos); | |||
|
139 | if (tags != null && tags.Length > 0) | |||
|
140 | dfa.MarkFinalState(initialState, tags); | |||
|
141 | ||||
|
142 | var inputMax = m_indexes.Values.Max(); | |||
|
143 | var queue = new Queue<HashSet<int>>(); | |||
|
144 | ||||
|
145 | queue.Enqueue(m_firstpos); | |||
|
146 | ||||
|
147 | while (queue.Count > 0) { | |||
|
148 | var state = queue.Dequeue(); | |||
|
149 | var s1 = states.Translate(state); | |||
|
150 | Debug.Assert(s1 != DFAConst.UNCLASSIFIED_INPUT); | |||
|
151 | ||||
|
152 | for (int a = 0; a <= inputMax; a++) { | |||
|
153 | var next = new HashSet<int>(); | |||
|
154 | foreach (var p in state) { | |||
|
155 | if (m_indexes[p] == a) { | |||
|
156 | next.UnionWith(Followpos(p)); | |||
|
157 | } | |||
|
158 | } | |||
|
159 | if (next.Count > 0) { | |||
|
160 | int s2 = states.Translate(next); | |||
|
161 | if (s2 == DFAConst.UNCLASSIFIED_INPUT) { | |||
|
162 | s2 = states.DefineSymbol(next); | |||
|
163 | ||||
|
164 | tags = GetStateTags(next); | |||
|
165 | if (tags != null && tags.Length > 0) { | |||
|
166 | dfa.MarkFinalState(s2); | |||
|
167 | dfa.SetStateTag(s2, tags); | |||
|
168 | } | |||
|
169 | ||||
|
170 | queue.Enqueue(next); | |||
|
171 | } | |||
|
172 | dfa.Add(new AutomatonTransition(s1, s2, a)); | |||
|
173 | } | |||
|
174 | } | |||
|
175 | } | |||
|
176 | } | |||
|
177 | ||||
|
178 | TTag[] GetStateTags(IEnumerable<int> state) { | |||
|
179 | Debug.Assert(state != null); | |||
|
180 | return state.Where(m_ends.ContainsKey).Select(pos => m_ends[pos]).ToArray(); | |||
|
181 | } | |||
|
182 | ||||
|
183 | } | |||
|
184 | } |
@@ -1,18 +1,18 | |||||
1 | namespace Implab.Automaton { |
|
1 | namespace Implab.Automaton { | |
2 |
public struct DFAStateDescript |
|
2 | public struct DFAStateDescriptor { | |
3 | public readonly bool final; |
|
3 | public readonly bool final; | |
4 | public readonly int[] transitions; |
|
4 | public readonly int[] transitions; | |
5 |
|
5 | |||
6 |
|
6 | |||
7 |
public DFAStateDescript |
|
7 | public DFAStateDescriptor(int[] transitions, bool final) { | |
8 | this.transitions = transitions; |
|
8 | this.transitions = transitions; | |
9 | this.final = final; |
|
9 | this.final = final; | |
10 | } |
|
10 | } | |
11 |
|
11 | |||
12 |
public DFAStateDescript |
|
12 | public DFAStateDescriptor(int[] transitions) : this(transitions, false) { | |
13 | } |
|
13 | } | |
14 |
|
14 | |||
15 |
public DFAStateDescript |
|
15 | public DFAStateDescriptor(int size, bool final) { | |
16 | Safe.ArgumentInRange(size, 0, int.MaxValue, "size"); |
|
16 | Safe.ArgumentInRange(size, 0, int.MaxValue, "size"); | |
17 |
|
17 | |||
18 | this.final = final; |
|
18 | this.final = final; |
@@ -100,6 +100,20 namespace Implab.Automaton { | |||||
100 | return GetEnumerator(); |
|
100 | return GetEnumerator(); | |
101 | } |
|
101 | } | |
102 |
|
102 | |||
|
103 | public DFAStateDescriptor[] CreateTransitionTable() { | |||
|
104 | var table = new DFAStateDescriptor[StateCount]; | |||
|
105 | ||||
|
106 | foreach (var t in this) { | |||
|
107 | if (table[t.s1].transitions == null) | |||
|
108 | table[t.s1] = new DFAStateDescriptor(AlphabetSize, IsFinalState(t.s1)); | |||
|
109 | if (table[t.s2].transitions == null) | |||
|
110 | table[t.s2] = new DFAStateDescriptor(AlphabetSize, IsFinalState(t.s2)); | |||
|
111 | table[t.s1].transitions[t.edge] = t.s2; | |||
|
112 | } | |||
|
113 | ||||
|
114 | return table; | |||
|
115 | } | |||
|
116 | ||||
103 | /// <summary>Формирует множества конечных состояний перед началом работы алгоритма минимизации.</summary> |
|
117 | /// <summary>Формирует множества конечных состояний перед началом работы алгоритма минимизации.</summary> | |
104 | /// <remarks> |
|
118 | /// <remarks> | |
105 | /// В процессе построения минимального автомата требуется разделить множество состояний, |
|
119 | /// В процессе построения минимального автомата требуется разделить множество состояний, |
@@ -71,8 +71,16 namespace Implab.Automaton { | |||||
71 | return true; |
|
71 | return true; | |
72 | } |
|
72 | } | |
73 |
|
73 | |||
|
74 | public IEnumerable<T> GetSymbols(int cls) { | |||
|
75 | for (var i = 0; i < m_map.Length; i++) | |||
|
76 | if (m_map[i] == cls) | |||
|
77 | yield return GetSymbolByIndex(i); | |||
|
78 | } | |||
|
79 | ||||
74 | public abstract int GetSymbolIndex(T symbol); |
|
80 | public abstract int GetSymbolIndex(T symbol); | |
75 |
|
81 | |||
|
82 | public abstract T GetSymbolByIndex(int index); | |||
|
83 | ||||
76 | public abstract IEnumerable<T> InputSymbols { get; } |
|
84 | public abstract IEnumerable<T> InputSymbols { get; } | |
77 |
|
85 | |||
78 | /// <summary> |
|
86 | /// <summary> |
@@ -38,7 +38,6 namespace Implab.Automaton { | |||||
38 | Safe.ArgumentNotNull(symbols, "symbols"); |
|
38 | Safe.ArgumentNotNull(symbols, "symbols"); | |
39 |
|
39 | |||
40 | m_nextCls = Math.Max(cls + 1, m_nextCls); |
|
40 | m_nextCls = Math.Max(cls + 1, m_nextCls); | |
41 | symbols = symbols.Distinct(); |
|
|||
42 |
|
41 | |||
43 | foreach (var symbol in symbols) |
|
42 | foreach (var symbol in symbols) | |
44 | m_map[symbol] = cls; |
|
43 | m_map[symbol] = cls; | |
@@ -68,6 +67,10 namespace Implab.Automaton { | |||||
68 | return m_supportUnclassified || m_map.ContainsKey(symbol); |
|
67 | return m_supportUnclassified || m_map.ContainsKey(symbol); | |
69 | } |
|
68 | } | |
70 |
|
69 | |||
|
70 | ||||
|
71 | public IEnumerable<T> GetSymbols(int cls) { | |||
|
72 | return m_map.Where(p => p.Value == cls).Select(p => p.Key); | |||
|
73 | } | |||
71 | #endregion |
|
74 | #endregion | |
72 | } |
|
75 | } | |
73 | } |
|
76 | } |
@@ -1,14 +1,14 | |||||
1 | using System; |
|
1 | using System; | |
2 |
|
2 | |||
3 | namespace Implab.Automaton.RegularExpressions { |
|
3 | namespace Implab.Automaton.RegularExpressions { | |
4 |
public struct DFAStateDescriptor |
|
4 | public struct DFAStateDescriptor<T> { | |
5 | public readonly bool final; |
|
5 | public readonly bool final; | |
6 |
|
6 | |||
7 | public readonly int[] transitions; |
|
7 | public readonly int[] transitions; | |
8 |
|
8 | |||
9 | public readonly T[] tags; |
|
9 | public readonly T[] tags; | |
10 |
|
10 | |||
11 |
public DFAStateDescriptor |
|
11 | public DFAStateDescriptor(int size, bool final, T[] tags) { | |
12 | Safe.ArgumentAssert(size >= 0, "size"); |
|
12 | Safe.ArgumentAssert(size >= 0, "size"); | |
13 | this.final = final; |
|
13 | this.final = final; | |
14 | this.tags = tags; |
|
14 | this.tags = tags; |
@@ -66,23 +66,21 namespace Implab.Automaton.RegularExpres | |||||
66 | return Token<TTag>.New( Enumerable.Range(0, AlphabetBuilder.Count).Except(TranslateOrDie(symbols)).ToArray() ); |
|
66 | return Token<TTag>.New( Enumerable.Range(0, AlphabetBuilder.Count).Except(TranslateOrDie(symbols)).ToArray() ); | |
67 | } |
|
67 | } | |
68 |
|
68 | |||
69 |
protected |
|
69 | protected abstract IAlphabetBuilder<TSymbol> CreateAlphabet(); | |
70 | Safe.ArgumentNotNull(lang, "lang"); |
|
|||
71 | Safe.ArgumentNotNull(dfaAlphabet, "dfaAlphabet"); |
|
|||
72 |
|
70 | |||
73 | var dfa = new RegularDFADefinition<TSymbol, TTag>(AlphabetBuilder); |
|
71 | protected RegularDFA<TSymbol, TTag> BuildDFA(Token<TTag> regexp) { | |
74 |
|
72 | |||
75 |
var |
|
73 | var dfa = new RegularDFA<TSymbol, TTag>(AlphabetBuilder); | |
76 |
|
74 | |||
77 | lang.Accept( builder ); |
|
75 | var visitor = new RegularExpressionVisitor<TTag>(); | |
|
76 | regexp.Accept( visitor ); | |||
78 |
|
77 | |||
79 |
|
|
78 | visitor.BuildDFA(dfa); | |
80 |
|
79 | |||
81 | if (dfa.IsFinalState(dfa.InitialState)) |
|
80 | if (dfa.IsFinalState(dfa.InitialState)) | |
82 | throw new ApplicationException("The specified language contains empty token"); |
|
81 | throw new ApplicationException("The specified language contains empty token"); | |
83 |
|
82 | |||
84 |
dfa.Optimize( |
|
83 | return dfa.Optimize(CreateAlphabet()); | |
85 |
|
||||
86 | } |
|
84 | } | |
87 |
|
85 | |||
88 | } |
|
86 | } |
@@ -3,6 +3,7 using System; | |||||
3 | using System.Collections.Generic; |
|
3 | using System.Collections.Generic; | |
4 | using System.IO; |
|
4 | using System.IO; | |
5 | using Implab.Components; |
|
5 | using Implab.Components; | |
|
6 | using Implab.Automaton.RegularExpressions; | |||
6 |
|
7 | |||
7 | namespace Implab.Automaton { |
|
8 | namespace Implab.Automaton { | |
8 | /// <summary> |
|
9 | /// <summary> | |
@@ -14,19 +15,23 namespace Implab.Automaton { | |||||
14 | /// конца токена и допустимости текущего символа. |
|
15 | /// конца токена и допустимости текущего символа. | |
15 | /// </remarks> |
|
16 | /// </remarks> | |
16 | public abstract class Scanner<TTag> : Disposable { |
|
17 | public abstract class Scanner<TTag> : Disposable { | |
17 | struct ScannerConfig { |
|
18 | protected struct ScannerConfig { | |
18 |
public DFAStateDescript |
|
19 | public readonly DFAStateDescriptor<TTag>[] states; | |
19 |
public int[] alphabet |
|
20 | public readonly int[] alphabet; | |
20 | public int initialState; |
|
21 | public readonly int initialState; | |
|
22 | ||||
|
23 | public ScannerConfig(DFAStateDescriptor<TTag>[] states, int[] alphabet, int initialState) { | |||
|
24 | this.initialState = initialState; | |||
|
25 | this.alphabet = alphabet; | |||
|
26 | this.states = states; | |||
|
27 | } | |||
21 | } |
|
28 | } | |
22 |
|
29 | |||
23 | Stack<ScannerConfig> m_defs = new Stack<ScannerConfig>(); |
|
30 | Stack<ScannerConfig> m_defs = new Stack<ScannerConfig>(); | |
24 |
|
31 | |||
25 | DFAStateDescriptior<TTag>[] m_states; |
|
32 | ScannerConfig m_config; | |
26 | int[] m_alphabetMap; |
|
|||
27 | int m_initialState; |
|
|||
28 |
|
33 | |||
29 |
protected DFAStateDescript |
|
34 | protected DFAStateDescriptor<TTag> m_currentState; | |
30 | int m_previewCode; |
|
35 | int m_previewCode; | |
31 |
|
36 | |||
32 | protected int m_tokenLen; |
|
37 | protected int m_tokenLen; | |
@@ -41,15 +46,11 namespace Implab.Automaton { | |||||
41 | int m_chunkSize = 1024; // 1k |
|
46 | int m_chunkSize = 1024; // 1k | |
42 | int m_limit = 10 * 1024 * 1024; // 10Mb |
|
47 | int m_limit = 10 * 1024 * 1024; // 10Mb | |
43 |
|
48 | |||
44 | protected Scanner(DFAStateDescriptior<TTag>[] states, int[] alphabet, int initialState) { |
|
49 | protected Scanner(ScannerConfig config) { | |
45 | Safe.ArgumentNotEmpty(states, "states"); |
|
50 | Safe.ArgumentNotEmpty(config.states, "config.states"); | |
46 | Safe.ArgumentNotNull(alphabet, "alphabet"); |
|
51 | Safe.ArgumentNotNull(config.alphabet, "config.alphabet"); | |
47 |
|
52 | |||
48 |
m_ |
|
53 | m_config = config; | |
49 | m_alphabetMap = alphabet; |
|
|||
50 | m_initialState = initialState; |
|
|||
51 |
|
||||
52 | Feed(new char[0]); |
|
|||
53 | } |
|
54 | } | |
54 |
|
55 | |||
55 | /// <summary> |
|
56 | /// <summary> | |
@@ -110,7 +111,7 namespace Implab.Automaton { | |||||
110 | /// </summary> |
|
111 | /// </summary> | |
111 | protected TTag[] TokenTags { |
|
112 | protected TTag[] TokenTags { | |
112 | get { |
|
113 | get { | |
113 | return m_currentState.tag; |
|
114 | return m_currentState.tags; | |
114 | } |
|
115 | } | |
115 | } |
|
116 | } | |
116 |
|
117 | |||
@@ -133,7 +134,7 namespace Implab.Automaton { | |||||
133 | if (m_pointer >= m_bufferSize) |
|
134 | if (m_pointer >= m_bufferSize) | |
134 | return false; |
|
135 | return false; | |
135 |
|
136 | |||
136 | m_currentState = m_states[m_initialState]; |
|
137 | m_currentState = m_config.states[m_config.initialState]; | |
137 | m_tokenLen = 0; |
|
138 | m_tokenLen = 0; | |
138 | m_tokenOffset = m_pointer; |
|
139 | m_tokenOffset = m_pointer; | |
139 | int nextState; |
|
140 | int nextState; | |
@@ -151,7 +152,7 namespace Implab.Automaton { | |||||
151 | ) |
|
152 | ) | |
152 | ); |
|
153 | ); | |
153 | } |
|
154 | } | |
154 | m_currentState = m_states[nextState]; |
|
155 | m_currentState = m_config.states[nextState]; | |
155 | m_tokenLen++; |
|
156 | m_tokenLen++; | |
156 |
|
157 | |||
157 | } while (Shift()); |
|
158 | } while (Shift()); | |
@@ -172,7 +173,7 namespace Implab.Automaton { | |||||
172 | return false; |
|
173 | return false; | |
173 | } |
|
174 | } | |
174 |
|
175 | |||
175 |
m_previewCode = m_alphabet |
|
176 | m_previewCode = m_config.alphabet[m_buffer[m_pointer]]; | |
176 |
|
177 | |||
177 | return true; |
|
178 | return true; | |
178 | } |
|
179 | } | |
@@ -217,23 +218,14 namespace Implab.Automaton { | |||||
217 | /// Преключает внутренний ДКА на указанный, позволяет реализовать подобие захватывающей |
|
218 | /// Преключает внутренний ДКА на указанный, позволяет реализовать подобие захватывающей | |
218 | /// группировки. |
|
219 | /// группировки. | |
219 | /// </summary> |
|
220 | /// </summary> | |
220 |
/// <param name |
|
221 | /// <param name = "config"></param> | |
221 | /// <param name="alphabet">Таблица входных символов для нового ДКА</param> |
|
222 | protected void Switch(ScannerConfig config) { | |
222 | /// <param name = "initialState"></param> |
|
223 | Safe.ArgumentNotNull(config.states, "config.states"); | |
223 | protected void Switch(DFAStateDescriptior<TTag>[] states, int[] alphabet, int initialState) { |
|
|||
224 | Safe.ArgumentNotNull(states, "dfa"); |
|
|||
225 |
|
224 | |||
226 |
m_defs.Push( |
|
225 | m_defs.Push(m_config); | |
227 | states = m_states, |
|
226 | m_config = config; | |
228 | alphabetMap = m_alphabetMap, |
|
|||
229 | initialState = m_initialState |
|
|||
230 | }); |
|
|||
231 |
|
227 | |||
232 | m_states = states; |
|
228 | m_previewCode = m_config.alphabet[m_buffer[m_pointer]]; | |
233 | m_alphabetMap = alphabet; |
|
|||
234 | m_initialState = initialState; |
|
|||
235 |
|
||||
236 | m_previewCode = m_alphabetMap[m_buffer[m_pointer]]; |
|
|||
237 | } |
|
229 | } | |
238 |
|
230 | |||
239 | /// <summary> |
|
231 | /// <summary> | |
@@ -242,11 +234,9 namespace Implab.Automaton { | |||||
242 | protected void Restore() { |
|
234 | protected void Restore() { | |
243 | if (m_defs.Count == 0) |
|
235 | if (m_defs.Count == 0) | |
244 | throw new InvalidOperationException(); |
|
236 | throw new InvalidOperationException(); | |
245 |
|
|
237 | m_config = m_defs.Pop(); | |
246 | m_states = prev.states; |
|
238 | ||
247 | m_alphabetMap = prev.alphabetMap; |
|
239 | m_previewCode = m_config.alphabet[m_buffer[m_pointer]]; | |
248 | m_initialState = prev.initialState; |
|
|||
249 | m_previewCode = m_alphabetMap[m_buffer[m_pointer]]; |
|
|||
250 | } |
|
240 | } | |
251 |
|
241 | |||
252 | protected override void Dispose(bool disposing) { |
|
242 | protected override void Dispose(bool disposing) { |
@@ -13,6 +13,10 namespace Implab.Formats { | |||||
13 | return (int)symbol; |
|
13 | return (int)symbol; | |
14 | } |
|
14 | } | |
15 |
|
15 | |||
|
16 | public override byte GetSymbolByIndex(int index) { | |||
|
17 | return (byte)index; | |||
|
18 | } | |||
|
19 | ||||
16 | public IEnumerable<byte> InputSymbols { |
|
20 | public IEnumerable<byte> InputSymbols { | |
17 | get { |
|
21 | get { | |
18 | return Enumerable.Range(byte.MinValue, byte.MaxValue).Cast<byte>(); |
|
22 | return Enumerable.Range(byte.MinValue, byte.MaxValue).Cast<byte>(); |
@@ -13,6 +13,10 namespace Implab.Formats { | |||||
13 | return symbol; |
|
13 | return symbol; | |
14 | } |
|
14 | } | |
15 |
|
15 | |||
|
16 | public override char GetSymbolByIndex(int index) { | |||
|
17 | return (char)index; | |||
|
18 | } | |||
|
19 | ||||
16 | public override IEnumerable<char> InputSymbols { |
|
20 | public override IEnumerable<char> InputSymbols { | |
17 | get { return Enumerable.Range(char.MinValue, char.MaxValue).Cast<char>(); } |
|
21 | get { return Enumerable.Range(char.MinValue, char.MaxValue).Cast<char>(); } | |
18 | } |
|
22 | } |
@@ -1,6 +1,7 | |||||
1 | using System.Linq; |
|
1 | using System.Linq; | |
2 | using Implab.Automaton.RegularExpressions; |
|
2 | using Implab.Automaton.RegularExpressions; | |
3 | using System; |
|
3 | using System; | |
|
4 | using Implab.Automaton; | |||
4 |
|
5 | |||
5 | namespace Implab.Formats.JSON { |
|
6 | namespace Implab.Formats.JSON { | |
6 | class JSONGrammar : Grammar<char,JSONGrammar.TokenType> { |
|
7 | class JSONGrammar : Grammar<char,JSONGrammar.TokenType> { | |
@@ -35,8 +36,8 namespace Implab.Formats.JSON { | |||||
35 | get { return _instance.Value; } |
|
36 | get { return _instance.Value; } | |
36 | } |
|
37 | } | |
37 |
|
38 | |||
38 |
readonly Regular |
|
39 | readonly RegularDFA<char, TokenType> m_jsonDFA; | |
39 |
readonly Regular |
|
40 | readonly RegularDFA<char, TokenType> m_stringDFA; | |
40 |
|
41 | |||
41 | public JSONGrammar() { |
|
42 | public JSONGrammar() { | |
42 | DefineAlphabet(Enumerable.Range(0, 0x20).Select(x => (char)x)); |
|
43 | DefineAlphabet(Enumerable.Range(0, 0x20).Select(x => (char)x)); | |
@@ -87,21 +88,17 namespace Implab.Formats.JSON { | |||||
87 | .Or(unescaped.Closure().Tag(TokenType.UnescapedChar)); |
|
88 | .Or(unescaped.Closure().Tag(TokenType.UnescapedChar)); | |
88 |
|
89 | |||
89 |
|
90 | |||
90 | m_jsonDFA = new RegularCharDFADefinition<TokenType>(new CharAlphabet()); |
|
91 | m_jsonDFA = BuildDFA(jsonExpression); | |
91 | BuildDFA(jsonExpression, m_jsonDFA, m_jsonDFA.InputAlphabet); |
|
92 | m_stringDFA = BuildDFA(jsonStringExpression); | |
92 |
|
||||
93 |
|
||||
94 | m_stringDFA = new RegularCharDFADefinition<TokenType>(new CharAlphabet()); |
|
|||
95 | BuildDFA(jsonStringExpression, m_jsonDFA, m_jsonDFA.InputAlphabet); |
|
|||
96 | } |
|
93 | } | |
97 |
|
94 | |||
98 |
public Regular |
|
95 | public RegularDFA<char, TokenType> JsonDFA { | |
99 | get { |
|
96 | get { | |
100 | return m_jsonDFA; |
|
97 | return m_jsonDFA; | |
101 | } |
|
98 | } | |
102 | } |
|
99 | } | |
103 |
|
100 | |||
104 |
public RegularDFA |
|
101 | public RegularDFA<char,TokenType> JsonStringDFA { | |
105 | get { |
|
102 | get { | |
106 | return m_stringDFA; |
|
103 | return m_stringDFA; | |
107 | } |
|
104 | } | |
@@ -111,5 +108,9 namespace Implab.Formats.JSON { | |||||
111 | return SymbolToken(Enumerable.Range(start,stop - start).Cast<char>()); |
|
108 | return SymbolToken(Enumerable.Range(start,stop - start).Cast<char>()); | |
112 | } |
|
109 | } | |
113 |
|
110 | |||
|
111 | protected override IAlphabetBuilder<char> CreateAlphabet() { | |||
|
112 | return new CharAlphabet(); | |||
|
113 | } | |||
|
114 | ||||
114 | } |
|
115 | } | |
115 | } |
|
116 | } |
@@ -86,9 +86,9 namespace Implab.Formats.JSON { | |||||
86 | return Token<object>.New(input.Select(t => _alphabet.Translate(t)).ToArray()); |
|
86 | return Token<object>.New(input.Select(t => _alphabet.Translate(t)).ToArray()); | |
87 | } |
|
87 | } | |
88 |
|
88 | |||
89 |
static RegularDFA |
|
89 | static RegularDFA<JsonTokenType,object> CreateDFA(Token<object> expr) { | |
90 |
var builder = new Regular |
|
90 | var builder = new RegularExpressionVisitor<object>(); | |
91 |
var dfa = new RegularDFA |
|
91 | var dfa = new RegularDFA<JsonTokenType,object>(_alphabet); | |
92 |
|
92 | |||
93 | expr.Accept(builder); |
|
93 | expr.Accept(builder); | |
94 |
|
94 |
@@ -170,7 +170,6 | |||||
170 | <Compile Include="Automaton\RegularExpressions\Token.cs" /> |
|
170 | <Compile Include="Automaton\RegularExpressions\Token.cs" /> | |
171 | <Compile Include="Automaton\RegularExpressions\IVisitor.cs" /> |
|
171 | <Compile Include="Automaton\RegularExpressions\IVisitor.cs" /> | |
172 | <Compile Include="Automaton\AutomatonTransition.cs" /> |
|
172 | <Compile Include="Automaton\AutomatonTransition.cs" /> | |
173 | <Compile Include="Automaton\RegularExpressions\RegularDFABuilder.cs" /> |
|
|||
174 | <Compile Include="Formats\JSON\JSONElementContext.cs" /> |
|
173 | <Compile Include="Formats\JSON\JSONElementContext.cs" /> | |
175 | <Compile Include="Formats\JSON\JSONElementType.cs" /> |
|
174 | <Compile Include="Formats\JSON\JSONElementType.cs" /> | |
176 | <Compile Include="Formats\JSON\JSONGrammar.cs" /> |
|
175 | <Compile Include="Formats\JSON\JSONGrammar.cs" /> | |
@@ -183,13 +182,14 | |||||
183 | <Compile Include="Formats\JSON\StringTranslator.cs" /> |
|
182 | <Compile Include="Formats\JSON\StringTranslator.cs" /> | |
184 | <Compile Include="Automaton\MapAlphabet.cs" /> |
|
183 | <Compile Include="Automaton\MapAlphabet.cs" /> | |
185 | <Compile Include="Automaton\DummyAlphabet.cs" /> |
|
184 | <Compile Include="Automaton\DummyAlphabet.cs" /> | |
186 | <Compile Include="Automaton\RegularExpressions\RegularDFADefinition.cs" /> |
|
|||
187 | <Compile Include="Formats\CharAlphabet.cs" /> |
|
185 | <Compile Include="Formats\CharAlphabet.cs" /> | |
188 | <Compile Include="Formats\ByteAlphabet.cs" /> |
|
186 | <Compile Include="Formats\ByteAlphabet.cs" /> | |
189 | <Compile Include="Formats\RegularCharDFADefinition.cs" /> |
|
|||
190 | <Compile Include="Automaton\IDFATable.cs" /> |
|
187 | <Compile Include="Automaton\IDFATable.cs" /> | |
191 | <Compile Include="Automaton\IDFATableBuilder.cs" /> |
|
188 | <Compile Include="Automaton\IDFATableBuilder.cs" /> | |
192 | <Compile Include="Automaton\DFATable.cs" /> |
|
189 | <Compile Include="Automaton\DFATable.cs" /> | |
|
190 | <Compile Include="Automaton\RegularExpressions\RegularDFA.cs" /> | |||
|
191 | <Compile Include="Automaton\RegularExpressions\RegularExpressionVisitor.cs" /> | |||
|
192 | <Compile Include="Automaton\RegularExpressions\ITaggedDFABuilder.cs" /> | |||
193 | <Compile Include="Automaton\RegularExpressions\DFAStateDescriptorT.cs" /> |
|
193 | <Compile Include="Automaton\RegularExpressions\DFAStateDescriptorT.cs" /> | |
194 | </ItemGroup> |
|
194 | </ItemGroup> | |
195 | <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" /> |
|
195 | <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" /> |
1 | NO CONTENT: file was removed |
|
NO CONTENT: file was removed |
1 | NO CONTENT: file was removed |
|
NO CONTENT: file was removed |
1 | NO CONTENT: file was removed |
|
NO CONTENT: file was removed |
General Comments 0
You need to be logged in to leave comments.
Login now