##// END OF EJS Templates
DFA refactoring
cin -
r169:54270c2f29f2 ref20160224
parent child
Show More
@@ -0,0 +1,280
1 using Implab;
2 using System;
3 using System.Collections.Generic;
4 using System.Linq;
5
6 namespace Implab.Automaton {
7 public class DFATable : IDFATableBuilder {
8 DFAStateDescriptior[] m_dfaTable;
9
10 int m_stateCount;
11 int m_symbolCount;
12 int m_initialState;
13
14 readonly HashSet<int> m_finalStates = new HashSet<int>();
15 readonly HashSet<AutomatonTransition> m_transitions = new HashSet<AutomatonTransition>();
16
17 void AssertNotReadOnly() {
18 if (m_dfaTable != null)
19 throw new InvalidOperationException("The object is readonly");
20 }
21
22
23 #region IDFADefinition implementation
24
25 public DFAStateDescriptior[] GetTransitionTable() {
26 if (m_dfaTable == null) {
27 if (m_stateCount <= 0)
28 throw new InvalidOperationException("Invalid automaton definition: states count = {0}", m_stateCount);
29 if (m_symbolCount <= 0)
30 throw new InvalidOperationException("Invalid automaton definition: symbols count = {0}", m_symbolCount);
31
32 m_dfaTable = ConstructTransitionTable();
33 }
34 return m_dfaTable;
35 }
36
37 public bool IsFinalState(int s) {
38 Safe.ArgumentInRange(s, 0, m_stateCount, "s");
39
40 return m_dfaTable != null ? m_dfaTable[s].final : m_finalStates.Contains(s);
41 }
42
43 public IEnumerable<int> FinalStates {
44 get {
45 return m_finalStates;
46 }
47 }
48
49 public int StateCount {
50 get { return m_stateCount; }
51 }
52
53 public int AlphabetSize {
54 get { return m_symbolCount; }
55 }
56
57 public int InitialState {
58 get { return m_initialState; }
59 }
60
61 #endregion
62
63 protected virtual DFAStateDescriptior[] ConstructTransitionTable() {
64 var dfaTable = new DFAStateDescriptior[m_stateCount];
65
66
67 foreach (var t in m_transitions) {
68 if (dfaTable[t.s1].transitions == null)
69 dfaTable[t.s1] = new DFAStateDescriptior(m_symbolCount, m_finalStates.Contains(t.s1));
70
71 dfaTable[t.s1].transitions[t.edge] = t.s2;
72 }
73
74 foreach (var s in m_finalStates)
75 if (!dfaTable[s].final)
76 m_dfaTable[s] = new DFAStateDescriptior(m_symbolCount, true);
77
78 }
79
80 public void SetInitialState(int s) {
81 Safe.ArgumentAssert(s >= 0, "s");
82 m_initialState = s;
83 }
84
85 public void MarkFinalState(int state) {
86 AssertNotReadOnly();
87 m_finalStates.Add(state);
88 }
89
90 public void Add(AutomatonTransition item) {
91 AssertNotReadOnly();
92 Safe.ArgumentAssert(item.s1 >= 0, "item");
93 Safe.ArgumentAssert(item.s2 >= 0, "item");
94 Safe.ArgumentAssert(item.edge >= 0, "item");
95
96 m_stateCount = Math.Max(m_stateCount, Math.Max(item.s1, item.s2) + 1);
97 m_symbolCount = Math.Max(m_symbolCount, item.edge);
98
99 m_transitions.Add(item);
100 }
101
102 public void Clear() {
103 AssertNotReadOnly();
104
105 m_stateCount = 0;
106 m_symbolCount = 0;
107 m_finalStates.Clear();
108 m_transitions.Clear();
109 }
110
111 public bool Contains(AutomatonTransition item) {
112 return m_transitions.Contains(item);
113 }
114
115 public void CopyTo(AutomatonTransition[] array, int arrayIndex) {
116 m_transitions.CopyTo(array, arrayIndex);
117 }
118
119 public bool Remove(AutomatonTransition item) {
120 AssertNotReadOnly();
121 m_transitions.Remove(item);
122 }
123
124 public int Count {
125 get {
126 return m_transitions.Count;
127 }
128 }
129
130 public bool IsReadOnly {
131 get {
132 return m_dfaTable != null;
133 }
134 }
135
136 public IEnumerator<AutomatonTransition> GetEnumerator() {
137 return m_transitions.GetEnumerator();
138 }
139
140 System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator() {
141 return GetEnumerator();
142 }
143
144 /// <summary>Формирует множества конечных состояний перед началом работы алгоритма минимизации.</summary>
145 /// <remarks>
146 /// В процессе построения минимального автомата требуется разделить множество состояний,
147 /// на два подмножества - конечные состояния и все остальные, после чего эти подмножества
148 /// будут резделены на более мелкие. Иногда требуется гарантировать различия конечных сосотяний,
149 /// для этого необходимо переопределить даннцю фукнцию, для получения множеств конечных состояний.
150 /// </remarks>
151 /// <returns>The final states.</returns>
152 protected virtual IEnumerable<HashSet<int>> GroupFinalStates() {
153 return new HashSet<int>[] { m_finalStates };
154 }
155
156 protected void Optimize<TInput, TState>(
157 IDFATableBuilder optimalDFA,
158 IAlphabet<TInput> inputAlphabet,
159 IAlphabetBuilder<TInput> optimalInputAlphabet,
160 IAlphabet<TState> stateAlphabet,
161 IAlphabetBuilder<TState> optimalStateAlphabet
162 ) {
163 Safe.ArgumentNotNull(optimalDFA, "dfa");
164 Safe.ArgumentNotNull(optimalInputAlphabet, "optimalInputAlphabet");
165 Safe.ArgumentNotNull(optimalStateAlphabet, "optimalStateAlphabet");
166 Safe.ArgumentNotNull(inputAlphabet, "inputAlphabet");
167 Safe.ArgumentNotNull(stateAlphabet, "stateAlphabet");
168
169 if (inputAlphabet.Count != m_symbolCount)
170 throw new InvalidOperationException("The input symbols aphabet mismatch");
171 if (stateAlphabet.Count != m_stateCount)
172 throw new InvalidOperationException("The states alphabet mismatch");
173
174 var setComparer = new CustomEqualityComparer<HashSet<int>>(
175 (x, y) => x.SetEquals(y),
176 s => s.Sum(x => x.GetHashCode())
177 );
178
179 var optimalStates = new HashSet<HashSet<int>>(setComparer);
180 var queue = new HashSet<HashSet<int>>(setComparer);
181
182 // получаем конечные состояния, сгруппированные по маркерам
183 optimalStates.UnionWith(
184 GroupFinalStates()
185 );
186
187 var state = new HashSet<int>(
188 Enumerable
189 .Range(0, m_stateCount - 1)
190 .Where(i => !m_finalStates.Contains(i))
191 );
192
193 optimalStates.Add(state);
194 queue.Add(state);
195
196 var rmap = m_transitions
197 .GroupBy(t => t.s2)
198 .ToLookup(
199 g => g.Key, // s2
200 g => g.ToLookup(t => t.edge, t => t.s1)
201 );
202
203 while (queue.Count > 0) {
204 var stateA = queue.First();
205 queue.Remove(stateA);
206
207 for (int c = 0; c < m_symbolCount; c++) {
208 var stateX = new HashSet<int>();
209 foreach(var a in stateA)
210 stateX.UnionWith(rmap[a][c]); // all states from wich 'c' leads to 'a'
211
212 foreach (var stateY in optimalStates.ToArray()) {
213 if (stateX.Overlaps(stateY) && !stateY.IsSubsetOf(stateX)) {
214 var stateR1 = new HashSet<int>(stateY);
215 var stateR2 = new HashSet<int>(stateY);
216
217 stateR1.IntersectWith(stateX);
218 stateR2.ExceptWith(stateX);
219
220 optimalStates.Remove(stateY);
221 optimalStates.Add(stateR1);
222 optimalStates.Add(stateR2);
223
224 if (queue.Contains(stateY)) {
225 queue.Remove(stateY);
226 queue.Add(stateR1);
227 queue.Add(stateR2);
228 } else {
229 queue.Add(stateR1.Count <= stateR2.Count ? stateR1 : stateR2);
230 }
231 }
232 }
233 }
234 }
235
236 // карта получения оптимального состояния по соотвествующему ему простому состоянию
237 var statesMap = stateAlphabet.Reclassify(optimalStateAlphabet, optimalStates);
238
239 // получаем минимальный алфавит
240 // входные символы не различимы, если Move(s,a1) == Move(s,a2)
241 var optimalAlphabet = m_transitions
242 .GroupBy(t => Tuple.Create(statesMap[t.s1], statesMap[t.s2]), t => t.edge);
243
244 var alphabetMap = inputAlphabet.Reclassify(optimalInputAlphabet, optimalAlphabet);
245
246 // построение автомата
247 optimalDFA.SetInitialState(statesMap[m_initialState]);
248
249 foreach (var sf in m_finalStates.GroupBy(s => statesMap[s]))
250 optimalDFA.MarkFinalState(sf.Key);
251
252 foreach (var t in m_transitions.Select(t => new AutomatonTransition(statesMap[t.s1],statesMap[t.s2],alphabetMap[t.edge])).Distinct())
253 optimalDFA.Add(t);
254
255 }
256
257 protected void PrintDFA<TInput, TState>(IAlphabet<TInput> inputAlphabet, IAlphabet<TState> stateAlphabet) {
258 Safe.ArgumentNotNull(inputAlphabet, "inputAlphabet");
259 Safe.ArgumentNotNull(stateAlphabet, "stateAlphabet");
260
261 var inputMap = inputAlphabet.CreateReverseMap();
262 var stateMap = stateAlphabet.CreateReverseMap();
263
264 for (int i = 0; i < inputMap.Length; i++)
265 Console.WriteLine("C{0}: {1}", i, String.Join(",", inputMap[i]));
266
267
268 foreach(var t in m_transitions)
269 Console.WriteLine(
270 "[{0}] -{{{1}}}-> [{2}]{3}",
271 stateMap[t.s1],
272 String.Join(",", inputMap[t.edge]),
273 stateMap[t.s2],
274 m_finalStates.Contains(t.s2) ? "$" : ""
275 );
276
277 }
278
279 }
280 }
@@ -0,0 +1,9
1 using System;
2
3 namespace Implab.Automaton.RegularExpressions {
4 public interface IDFATable2<TTag> : IDFATable {
5 void MarkFinalState(int state, TTag[] tags);
6
7 }
8 }
9
@@ -11,5 +11,16
11
11
12 public DFAStateDescriptior(int[] transitions) : this(transitions, false) {
12 public DFAStateDescriptior(int[] transitions) : this(transitions, false) {
13 }
13 }
14
15 public DFAStateDescriptior(int size, bool final) {
16 Safe.ArgumentInRange(size, 0, int.MaxValue, "size");
17
18 this.final = final;
19
20 transitions = new int[size];
21
22 for (int i = 0; i < size; i++)
23 transitions[i] = DFAConst.UNREACHABLE_STATE;
24 }
14 }
25 }
15 }
26 }
@@ -1,7 +1,7
1 using System;
1 using System;
2
2
3 namespace Implab.Automaton.RegularExpressions {
3 namespace Implab.Automaton.RegularExpressions {
4 public class RegularDFADefinition<TInput, TTag> : DFATransitionTable<TTag>, IDFATransitionTable<TTag> {
4 public class RegularDFADefinition<TInput, TTag> : DFATable {
5
5
6 readonly IAlphabet<TInput> m_alphabet;
6 readonly IAlphabet<TInput> m_alphabet;
7
7
@@ -18,7 +18,7 namespace Implab.Automaton.RegularExpres
18 }
18 }
19 }
19 }
20
20
21 protected override DFAStateDescriptior<TTag>[] ConstructTransitionTable() {
21 protected override DFAStateDescriptior[] ConstructTransitionTable() {
22 if (InputAlphabet.Count != m_alphabet.Count)
22 if (InputAlphabet.Count != m_alphabet.Count)
23 throw new InvalidOperationException("The alphabet doesn't match the transition table");
23 throw new InvalidOperationException("The alphabet doesn't match the transition table");
24
24
@@ -183,13 +183,14
183 <Compile Include="Formats\JSON\StringTranslator.cs" />
183 <Compile Include="Formats\JSON\StringTranslator.cs" />
184 <Compile Include="Automaton\MapAlphabet.cs" />
184 <Compile Include="Automaton\MapAlphabet.cs" />
185 <Compile Include="Automaton\DummyAlphabet.cs" />
185 <Compile Include="Automaton\DummyAlphabet.cs" />
186 <Compile Include="Automaton\DFATransitionTable.cs" />
187 <Compile Include="Automaton\RegularExpressions\RegularDFADefinition.cs" />
186 <Compile Include="Automaton\RegularExpressions\RegularDFADefinition.cs" />
188 <Compile Include="Formats\CharAlphabet.cs" />
187 <Compile Include="Formats\CharAlphabet.cs" />
189 <Compile Include="Formats\ByteAlphabet.cs" />
188 <Compile Include="Formats\ByteAlphabet.cs" />
190 <Compile Include="Formats\RegularCharDFADefinition.cs" />
189 <Compile Include="Formats\RegularCharDFADefinition.cs" />
191 <Compile Include="Automaton\IDFATable.cs" />
190 <Compile Include="Automaton\IDFATable.cs" />
192 <Compile Include="Automaton\IDFATableBuilder.cs" />
191 <Compile Include="Automaton\IDFATableBuilder.cs" />
192 <Compile Include="Automaton\DFATable.cs" />
193 <Compile Include="Automaton\RegularExpressions\IDFATable2.cs" />
193 </ItemGroup>
194 </ItemGroup>
194 <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" />
195 <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" />
195 <ItemGroup />
196 <ItemGroup />
1 NO CONTENT: file was removed
NO CONTENT: file was removed
General Comments 0
You need to be logged in to leave comments. Login now