@@ -0,0 +1,251 | |||
|
1 | using Implab; | |
|
2 | using System; | |
|
3 | using System.Collections.Generic; | |
|
4 | using System.Linq; | |
|
5 | ||
|
6 | namespace Implab.Automaton { | |
|
7 | public class DFATransitionTable<TTag> : IDFATransitionTableBuilder<TTag> { | |
|
8 | DFAStateDescriptior<TTag>[] m_dfaTable; | |
|
9 | ||
|
10 | int m_stateCount; | |
|
11 | int m_symbolCount; | |
|
12 | int m_initialState; | |
|
13 | ||
|
14 | readonly Dictionary<int, TTag[]> m_finalStates = new Dictionary<int, TTag[]>(); | |
|
15 | readonly HashSet<AutomatonTransition> m_transitions = new HashSet<AutomatonTransition>(); | |
|
16 | ||
|
17 | ||
|
18 | #region IDFADefinition implementation | |
|
19 | ||
|
20 | public DFAStateDescriptior<TTag>[] GetTransitionTable() { | |
|
21 | if (m_dfaTable == null) { | |
|
22 | if (m_stateCount <= 0) | |
|
23 | throw new InvalidOperationException("Invalid automaton definition: states count = {0}", m_stateCount); | |
|
24 | if (m_symbolCount <= 0) | |
|
25 | throw new InvalidOperationException("Invalid automaton definition: symbols count = {0}", m_symbolCount); | |
|
26 | ||
|
27 | m_dfaTable = ConstructTransitionTable(); | |
|
28 | } | |
|
29 | return m_dfaTable; | |
|
30 | } | |
|
31 | ||
|
32 | public bool IsFinalState(int s) { | |
|
33 | Safe.ArgumentInRange(s, 0, m_stateCount, "s"); | |
|
34 | ||
|
35 | return m_finalStates.ContainsKey(s); | |
|
36 | } | |
|
37 | ||
|
38 | public IEnumerable<KeyValuePair<int,TTag[]>> FinalStates { | |
|
39 | get { | |
|
40 | return m_finalStates; | |
|
41 | } | |
|
42 | } | |
|
43 | ||
|
44 | public int StateCount { | |
|
45 | get { return m_stateCount; } | |
|
46 | } | |
|
47 | ||
|
48 | public int AlphabetSize { | |
|
49 | get { return m_symbolCount; } | |
|
50 | } | |
|
51 | ||
|
52 | public int InitialState { | |
|
53 | get { return m_initialState; } | |
|
54 | } | |
|
55 | ||
|
56 | #endregion | |
|
57 | ||
|
58 | protected virtual DFAStateDescriptior<TTag>[] ConstructTransitionTable() { | |
|
59 | var dfaTable = new DFAStateDescriptior<TTag>[m_stateCount]; | |
|
60 | ||
|
61 | foreach (var pair in m_finalStates) { | |
|
62 | var idx = pair.Key; | |
|
63 | ||
|
64 | dfaTable[idx].final = true; | |
|
65 | dfaTable[idx].tag = pair.Value; | |
|
66 | } | |
|
67 | ||
|
68 | foreach (var t in m_transitions) { | |
|
69 | if (dfaTable[t.s1].transitions == null) { | |
|
70 | dfaTable[t.s1].transitions = new int[m_symbolCount]; | |
|
71 | for (int i = 0; i < dfaTable[t.s1].transitions.Length; i++) | |
|
72 | dfaTable[t.s1].transitions[i] = DFAConst.UNREACHABLE_STATE; | |
|
73 | } | |
|
74 | ||
|
75 | dfaTable[t.s1].transitions[t.edge] = t.s2; | |
|
76 | } | |
|
77 | } | |
|
78 | ||
|
79 | #region IDFADefinitionBuilder | |
|
80 | ||
|
81 | public void DefineTransition(int s1, int s2, int symbol) { | |
|
82 | if (m_dfaTable != null) | |
|
83 | throw new InvalidOperationException("The transition table is already built"); | |
|
84 | ||
|
85 | Safe.ArgumentAssert(s1 > 0, "s1"); | |
|
86 | Safe.ArgumentAssert(s2 > 0, "s2"); | |
|
87 | Safe.ArgumentAssert(symbol >= 0, "symbol"); | |
|
88 | ||
|
89 | m_stateCount = Math.Max(Math.Max(m_stateCount, s1 + 1), s2 + 1); | |
|
90 | m_symbolCount = Math.Max(m_symbolCount, symbol + 1); | |
|
91 | ||
|
92 | m_transitions.Add(new AutomatonTransition(s1, s2, symbol)); | |
|
93 | } | |
|
94 | ||
|
95 | public void MarkFinalState(int state, params TTag[] tags) { | |
|
96 | if (m_dfaTable != null) | |
|
97 | throw new InvalidOperationException("The transition table is already built"); | |
|
98 | ||
|
99 | m_finalStates[state] = tags; | |
|
100 | } | |
|
101 | ||
|
102 | public void SetInitialState(int s) { | |
|
103 | Safe.ArgumentAssert(s >= 0, "s"); | |
|
104 | m_initialState = s; | |
|
105 | } | |
|
106 | ||
|
107 | ||
|
108 | #endregion | |
|
109 | ||
|
110 | protected void Optimize<TInput, TState>( | |
|
111 | IDFATransitionTableBuilder<TTag> optimalDFA, | |
|
112 | IAlphabet<TInput> inputAlphabet, | |
|
113 | IAlphabetBuilder<TInput> optimalInputAlphabet, | |
|
114 | IAlphabet<TState> stateAlphabet, | |
|
115 | IAlphabetBuilder<TState> optimalStateAlphabet | |
|
116 | ) { | |
|
117 | Safe.ArgumentNotNull(optimalDFA, "dfa"); | |
|
118 | Safe.ArgumentNotNull(optimalInputAlphabet, "optimalInputAlphabet"); | |
|
119 | Safe.ArgumentNotNull(optimalStateAlphabet, "optimalStateAlphabet"); | |
|
120 | Safe.ArgumentNotNull(inputAlphabet, "inputAlphabet"); | |
|
121 | Safe.ArgumentNotNull(stateAlphabet, "stateAlphabet"); | |
|
122 | ||
|
123 | if (inputAlphabet.Count != m_symbolCount) | |
|
124 | throw new InvalidOperationException("The input symbols aphabet mismatch"); | |
|
125 | if (stateAlphabet.Count != m_stateCount) | |
|
126 | throw new InvalidOperationException("The states alphabet mismatch"); | |
|
127 | ||
|
128 | var setComparer = new CustomEqualityComparer<HashSet<int>>( | |
|
129 | (x, y) => x.SetEquals(y), | |
|
130 | s => s.Sum(x => x.GetHashCode()) | |
|
131 | ); | |
|
132 | ||
|
133 | var arrayComparer = new CustomEqualityComparer<TTag[]>( | |
|
134 | (x,y) => (new HashSet<int>(x)).SetEquals(new HashSet<int>(y)), | |
|
135 | a => a.Sum(x => x.GetHashCode()) | |
|
136 | ); | |
|
137 | ||
|
138 | var optimalStates = new HashSet<HashSet<int>>(setComparer); | |
|
139 | var queue = new HashSet<HashSet<int>>(setComparer); | |
|
140 | ||
|
141 | // получаем конечные состояния, сгруппированные по маркерам | |
|
142 | optimalStates.UnionWith( | |
|
143 | m_finalStates | |
|
144 | .GroupBy(pair => pair.Value, arrayComparer) | |
|
145 | .Select( | |
|
146 | g => new HashSet<int>( | |
|
147 | g.Select( pair => pair.Key) | |
|
148 | ) | |
|
149 | ) | |
|
150 | ); | |
|
151 | ||
|
152 | var state = new HashSet<int>( | |
|
153 | Enumerable | |
|
154 | .Range(0, m_stateCount - 1) | |
|
155 | .Where(i => !m_finalStates.ContainsKey(i)) | |
|
156 | ); | |
|
157 | optimalStates.Add(state); | |
|
158 | queue.Add(state); | |
|
159 | ||
|
160 | var rmap = m_transitions | |
|
161 | .GroupBy(t => t.s2) | |
|
162 | .ToLookup( | |
|
163 | g => g.Key, // s2 | |
|
164 | g => g.ToLookup(t => t.edge, t => t.s1) | |
|
165 | ); | |
|
166 | ||
|
167 | while (queue.Count > 0) { | |
|
168 | var stateA = queue.First(); | |
|
169 | queue.Remove(stateA); | |
|
170 | ||
|
171 | for (int c = 0; c < m_symbolCount; c++) { | |
|
172 | var stateX = new HashSet<int>(); | |
|
173 | foreach(var a in stateA) | |
|
174 | stateX.UnionWith(rmap[a][c]); // all states from wich 'c' leads to 'a' | |
|
175 | ||
|
176 | foreach (var stateY in optimalStates.ToArray()) { | |
|
177 | if (stateX.Overlaps(stateY) && !stateY.IsSubsetOf(stateX)) { | |
|
178 | var stateR1 = new HashSet<int>(stateY); | |
|
179 | var stateR2 = new HashSet<int>(stateY); | |
|
180 | ||
|
181 | stateR1.IntersectWith(stateX); | |
|
182 | stateR2.ExceptWith(stateX); | |
|
183 | ||
|
184 | optimalStates.Remove(stateY); | |
|
185 | optimalStates.Add(stateR1); | |
|
186 | optimalStates.Add(stateR2); | |
|
187 | ||
|
188 | if (queue.Contains(stateY)) { | |
|
189 | queue.Remove(stateY); | |
|
190 | queue.Add(stateR1); | |
|
191 | queue.Add(stateR2); | |
|
192 | } else { | |
|
193 | queue.Add(stateR1.Count <= stateR2.Count ? stateR1 : stateR2); | |
|
194 | } | |
|
195 | } | |
|
196 | } | |
|
197 | } | |
|
198 | } | |
|
199 | ||
|
200 | // карта получения оптимального состояния по соотвествующему ему простому состоянию | |
|
201 | var statesMap = stateAlphabet.Reclassify(optimalStateAlphabet, optimalStates); | |
|
202 | ||
|
203 | // получаем минимальный алфавит | |
|
204 | // входные символы не различимы, если Move(s,a1) == Move(s,a2) | |
|
205 | var optimalAlphabet = m_transitions | |
|
206 | .GroupBy(t => Tuple.Create(statesMap[t.s1], statesMap[t.s2]), t => t.edge); | |
|
207 | ||
|
208 | var alphabetMap = inputAlphabet.Reclassify(optimalInputAlphabet, optimalAlphabet); | |
|
209 | ||
|
210 | var optimalTags = m_finalStates | |
|
211 | .GroupBy(pair => statesMap[pair.Key]) | |
|
212 | .ToDictionary( | |
|
213 | g => g.Key, | |
|
214 | g => g.SelectMany(pair => pair.Value).ToArray() | |
|
215 | ); | |
|
216 | ||
|
217 | // построение автомата | |
|
218 | optimalDFA.SetInitialState(statesMap[m_initialState]); | |
|
219 | ||
|
220 | foreach (var pair in optimalTags) | |
|
221 | optimalDFA.MarkFinalState(pair.Key, pair.Value); | |
|
222 | ||
|
223 | foreach (var t in m_transitions.Select(t => new AutomatonTransition(statesMap[t.s1],statesMap[t.s2],alphabetMap[t.edge])).Distinct()) | |
|
224 | optimalDFA.DefineTransition(t.s1, t.s2, t.edge); | |
|
225 | ||
|
226 | } | |
|
227 | ||
|
228 | protected void PrintDFA<TInput, TState>(IAlphabet<TInput> inputAlphabet, IAlphabet<TState> stateAlphabet) { | |
|
229 | Safe.ArgumentNotNull(inputAlphabet, "inputAlphabet"); | |
|
230 | Safe.ArgumentNotNull(stateAlphabet, "stateAlphabet"); | |
|
231 | ||
|
232 | var inputMap = inputAlphabet.CreateReverseMap(); | |
|
233 | var stateMap = stateAlphabet.CreateReverseMap(); | |
|
234 | ||
|
235 | for (int i = 0; i < inputMap.Length; i++) | |
|
236 | Console.WriteLine("C{0}: {1}", i, String.Join(",", inputMap[i])); | |
|
237 | ||
|
238 | ||
|
239 | foreach(var t in m_transitions) | |
|
240 | Console.WriteLine( | |
|
241 | "[{0}] -{{{1}}}-> [{2}]{3}", | |
|
242 | stateMap[t.s1], | |
|
243 | String.Join(",", inputMap[t.edge]), | |
|
244 | stateMap[t.s2], | |
|
245 | m_finalStates.ContainsKey(t.s2) ? "$" : "" | |
|
246 | ); | |
|
247 | ||
|
248 | } | |
|
249 | ||
|
250 | } | |
|
251 | } |
@@ -0,0 +1,59 | |||
|
1 | using System.Collections.Generic; | |
|
2 | ||
|
3 | ||
|
4 | namespace Implab.Automaton { | |
|
5 | /// <summary> | |
|
6 | /// Полностью описывает DFA автомат, его поведение, состояние и входные символы. | |
|
7 | /// </summary> | |
|
8 | /// <example> | |
|
9 | /// class MyAutomaton { | |
|
10 | /// int m_current; | |
|
11 | /// readonly DFAStateDescriptor<string>[] m_automaton; | |
|
12 | /// readonly IAlphabet<MyCommands> m_commands; | |
|
13 | /// | |
|
14 | /// public MyAutomaton(IDFADefinition<MyCommands,MyStates,string> definition) { | |
|
15 | /// m_current = definition.StateAlphabet.Translate(MyStates.Initial); | |
|
16 | /// m_automaton = definition.GetTransitionTable(); | |
|
17 | /// m_commands = definition.InputAlphabet; | |
|
18 | /// } | |
|
19 | /// | |
|
20 | /// // defined a method which will move the automaton to the next state | |
|
21 | /// public void Move(MyCommands cmd) { | |
|
22 | /// // use transition map to determine the next state | |
|
23 | /// var next = m_automaton[m_current].transitions[m_commands.Translate(cmd)]; | |
|
24 | /// | |
|
25 | /// // validate that we aren't in the unreachable state | |
|
26 | /// if (next == DFAConst.UNREACHABLE_STATE) | |
|
27 | /// throw new InvalidOperationException("The specified command is invalid"); | |
|
28 | /// | |
|
29 | /// // if everything is ok | |
|
30 | /// m_current = next; | |
|
31 | /// } | |
|
32 | /// } | |
|
33 | /// </example> | |
|
34 | public interface IDFATransitionTable<TTag> { | |
|
35 | /// <summary> | |
|
36 | /// Таблица переходов состояний автомата | |
|
37 | /// </summary> | |
|
38 | /// <returns>The transition table.</returns> | |
|
39 | DFAStateDescriptior<TTag>[] GetTransitionTable(); | |
|
40 | ||
|
41 | int StateCount { | |
|
42 | get; | |
|
43 | } | |
|
44 | ||
|
45 | int AlphabetSize { | |
|
46 | get; | |
|
47 | } | |
|
48 | ||
|
49 | int InitialState { | |
|
50 | get; | |
|
51 | } | |
|
52 | ||
|
53 | bool IsFinalState(int s); | |
|
54 | ||
|
55 | IEnumerable<KeyValuePair<int,TTag[]>> FinalStates { | |
|
56 | get; | |
|
57 | } | |
|
58 | } | |
|
59 | } |
@@ -0,0 +1,25 | |||
|
1 | using System; | |
|
2 | ||
|
3 | namespace Implab.Automaton { | |
|
4 | public interface IDFATransitionTableBuilder<TTag> : IDFATransitionTable<TTag> { | |
|
5 | /// <summary> | |
|
6 | /// Marks the state as final and assings tags. | |
|
7 | /// </summary> | |
|
8 | /// <param name="state">State.</param> | |
|
9 | /// <param name="tags">Tags.</param> | |
|
10 | void MarkFinalState(int state, params TTag[] tags); | |
|
11 | ||
|
12 | /// <summary> | |
|
13 | /// Defines the transition from <paramref name="s1"/> to | |
|
14 | /// <paramref name="s2"/> with input <paramref name="symbol"/>. | |
|
15 | /// </summary> | |
|
16 | /// <param name="s1">S1.</param> | |
|
17 | /// <param name="s2">S2.</param> | |
|
18 | /// <param name="symbol">Symbol.</param> | |
|
19 | void DefineTransition(int s1, int s2, int symbol); | |
|
20 | ||
|
21 | void SetInitialState(int s); | |
|
22 | ||
|
23 | } | |
|
24 | } | |
|
25 |
@@ -0,0 +1,46 | |||
|
1 | using System; | |
|
2 | ||
|
3 | namespace Implab.Automaton.RegularExpressions { | |
|
4 | public class RegularDFADefinition<TInput, TTag> : DFATransitionTable<TTag>, IDFATransitionTable<TTag> { | |
|
5 | ||
|
6 | readonly IAlphabet<TInput> m_alphabet; | |
|
7 | readonly int m_initialState; | |
|
8 | ||
|
9 | public RegularDFADefinition(IAlphabet<TInput> alphabet, int initialState) { | |
|
10 | Safe.ArgumentNotNull(alphabet, "aplhabet"); | |
|
11 | ||
|
12 | m_alphabet = alphabet; | |
|
13 | m_initialState = initialState; | |
|
14 | } | |
|
15 | ||
|
16 | ||
|
17 | public IAlphabet<TInput> InputAlphabet { | |
|
18 | get { | |
|
19 | return m_alphabet; | |
|
20 | } | |
|
21 | } | |
|
22 | ||
|
23 | protected override DFAStateDescriptior<TTag>[] ConstructTransitionTable() { | |
|
24 | if (InputAlphabet.Count != m_alphabet.Count) | |
|
25 | throw new InvalidOperationException("The alphabet doesn't match the transition table"); | |
|
26 | ||
|
27 | return base.ConstructTransitionTable(); | |
|
28 | } | |
|
29 | ||
|
30 | /// <summary> | |
|
31 | /// Optimize the specified alphabet. | |
|
32 | /// </summary> | |
|
33 | /// <param name="alphabet">Пустой алфавит, который будет зполнен в процессе оптимизации.</param> | |
|
34 | public RegularDFADefinition<TInput, TTag> Optimize(IAlphabetBuilder<TInput> alphabet) { | |
|
35 | Safe.ArgumentNotNull(alphabet, "alphabet"); | |
|
36 | ||
|
37 | var optimalDFA = new RegularDFADefinition<TInput,TTag>(alphabet, m_initialState); | |
|
38 | ||
|
39 | Optimize(optimalDFA, InputAlphabet, alphabet, new DummyAlphabet(StateCount), new MapAlphabet<int>()); | |
|
40 | ||
|
41 | } | |
|
42 | ||
|
43 | ||
|
44 | } | |
|
45 | } | |
|
46 |
@@ -0,0 +1,25 | |||
|
1 | using System; | |
|
2 | using System.Collections.Generic; | |
|
3 | using System.Linq; | |
|
4 | ||
|
5 | namespace Implab.Automaton { | |
|
6 | public class ByteAlphabet : IndexedAlphabetBase<byte> { | |
|
7 | public ByteAlphabet() : base(byte.MaxValue + 1){ | |
|
8 | } | |
|
9 | ||
|
10 | #region implemented abstract members of IndexedAlphabetBase | |
|
11 | ||
|
12 | public override int GetSymbolIndex(byte symbol) { | |
|
13 | return (int)symbol; | |
|
14 | } | |
|
15 | ||
|
16 | public IEnumerable<byte> InputSymbols { | |
|
17 | get { | |
|
18 | return Enumerable.Range(byte.MinValue, byte.MaxValue).Cast<byte>(); | |
|
19 | } | |
|
20 | } | |
|
21 | ||
|
22 | #endregion | |
|
23 | } | |
|
24 | } | |
|
25 |
@@ -0,0 +1,23 | |||
|
1 | using Implab; | |
|
2 | using System; | |
|
3 | using System.Collections.Generic; | |
|
4 | using System.Linq; | |
|
5 | using System.Text; | |
|
6 | using System.Threading.Tasks; | |
|
7 | ||
|
8 | namespace Implab.Automaton { | |
|
9 | public class CharAlphabet: IndexedAlphabetBase<char> { | |
|
10 | ||
|
11 | public CharAlphabet() | |
|
12 | : base(char.MaxValue + 1) { | |
|
13 | } | |
|
14 | ||
|
15 | public override int GetSymbolIndex(char symbol) { | |
|
16 | return symbol; | |
|
17 | } | |
|
18 | ||
|
19 | public override IEnumerable<char> InputSymbols { | |
|
20 | get { return Enumerable.Range(char.MinValue, char.MaxValue).Cast<char>(); } | |
|
21 | } | |
|
22 | } | |
|
23 | } |
@@ -1,10 +1,4 | |||
|
1 | using System; | |
|
2 | using System.Collections.Generic; | |
|
3 | using System.Linq; | |
|
4 | using System.Text; | |
|
5 | using System.Threading.Tasks; | |
|
6 | ||
|
7 | namespace Implab.Automaton { | |
|
1 | namespace Implab.Automaton { | |
|
8 | 2 | public struct DFAStateDescriptior<TTag> { |
|
9 | 3 | public bool final; |
|
10 | 4 | public TTag[] tag; |
@@ -3,8 +3,16 using System.Collections.Generic; | |||
|
3 | 3 | using System.Linq; |
|
4 | 4 | |
|
5 | 5 | namespace Implab.Automaton { |
|
6 | /// <summary> | |
|
7 | /// Dummy alphabet consists of integer numbers which are identical to their classes. | |
|
8 | /// </summary> | |
|
6 | 9 | public class DummyAlphabet : IAlphabet<int> { |
|
7 | 10 | readonly int m_size; |
|
11 | ||
|
12 | /// <summary> | |
|
13 | /// Creates a new dummy alphabet with given size. | |
|
14 | /// </summary> | |
|
15 | /// <param name="size">The size of the alphabet, must be greater then zero.</param> | |
|
8 | 16 | public DummyAlphabet(int size) { |
|
9 | 17 | Safe.ArgumentAssert(size > 0); |
|
10 | 18 | m_size = 0; |
@@ -21,6 +29,8 namespace Implab.Automaton { | |||
|
21 | 29 | Safe.ArgumentNotNull(classes, "classes"); |
|
22 | 30 | var map = new int[m_size]; |
|
23 | 31 | foreach (var cls in classes) { |
|
32 | if (cls.Contains(DFAConst.UNCLASSIFIED_INPUT)) | |
|
33 | continue; | |
|
24 | 34 | var newid = newAlphabet.DefineClass(cls); |
|
25 | 35 | foreach (var id in cls) |
|
26 | 36 | map[id] = newid; |
@@ -12,46 +12,49 namespace Implab.Automaton { | |||
|
12 | 12 | /// <typeparam name="T">Тип перечислений</typeparam> |
|
13 | 13 | public class EnumAlphabet<T> : IndexedAlphabetBase<T> where T : struct, IConvertible { |
|
14 | 14 | [SuppressMessage("Microsoft.Design", "CA1000:DoNotDeclareStaticMembersOnGenericTypes")] |
|
15 | static readonly T[] _symbols; | |
|
16 | static readonly EnumAlphabet<T> _fullAlphabet; | |
|
15 | static readonly Lazy<T[]> _symbols = new Lazy<T[]>(GetSymbols); | |
|
16 | ||
|
17 | [SuppressMessage("Microsoft.Design", "CA1000:DoNotDeclareStaticMembersOnGenericTypes")] | |
|
18 | static readonly Lazy<EnumAlphabet<T>> _fullAlphabet = new Lazy<EnumAlphabet<T>>(CreateEnumAlphabet); | |
|
19 | ||
|
20 | static EnumAlphabet<T> CreateEnumAlphabet() { | |
|
21 | var symbols = _symbols.Value; | |
|
17 | 22 | |
|
18 | [SuppressMessage("Microsoft.Design", "CA1065:DoNotRaiseExceptionsInUnexpectedLocations")] | |
|
19 | static EnumAlphabet() { | |
|
23 | if ( | |
|
24 | symbols[symbols.Length - 1].ToInt32(CultureInfo.InvariantCulture) >= symbols.Length | |
|
25 | || symbols[0].ToInt32(CultureInfo.InvariantCulture) != 0 | |
|
26 | ) | |
|
27 | throw new InvalidOperationException("The specified enumeration must be zero-based and continuously numbered"); | |
|
28 | ||
|
29 | return new EnumAlphabet<T>(symbols.Select(x => x.ToInt32(CultureInfo.InvariantCulture)).ToArray()); | |
|
30 | } | |
|
31 | ||
|
32 | static T[] GetSymbols() { | |
|
20 | 33 | if (!typeof(T).IsEnum) |
|
21 | 34 | throw new InvalidOperationException("Invalid generic parameter, enumeration is required"); |
|
22 | 35 | |
|
23 | 36 | if (Enum.GetUnderlyingType(typeof(T)) != typeof(Int32)) |
|
24 | 37 | throw new InvalidOperationException("Only enums based on Int32 are supported"); |
|
25 | 38 | |
|
26 |
|
|
|
39 | return ((T[])Enum.GetValues(typeof(T))) | |
|
27 | 40 | .OrderBy(x => x.ToInt32(CultureInfo.InvariantCulture)) |
|
28 | 41 | .ToArray(); |
|
29 | ||
|
30 | if ( | |
|
31 | _symbols[_symbols.Length - 1].ToInt32(CultureInfo.InvariantCulture) >= _symbols.Length | |
|
32 | || _symbols[0].ToInt32(CultureInfo.InvariantCulture) != 0 | |
|
33 | ) | |
|
34 | throw new InvalidOperationException("The specified enumeration must be zero-based and continuously numbered"); | |
|
35 | ||
|
36 | _fullAlphabet = new EnumAlphabet<T>(_symbols.Select(x => x.ToInt32(CultureInfo.InvariantCulture)).ToArray()); | |
|
37 | 42 | } |
|
38 | 43 | |
|
39 | ||
|
40 | ||
|
41 | 44 | public static EnumAlphabet<T> FullAlphabet { |
|
42 | 45 | get { |
|
43 | return _fullAlphabet; | |
|
46 | return _fullAlphabet.Value; | |
|
44 | 47 | } |
|
45 | 48 | } |
|
46 | 49 | |
|
47 | 50 | |
|
48 | 51 | public EnumAlphabet() |
|
49 | : base(_symbols.Length) { | |
|
52 | : base(_symbols.Value.Length) { | |
|
50 | 53 | } |
|
51 | 54 | |
|
52 | 55 | public EnumAlphabet(int[] map) |
|
53 | 56 | : base(map) { |
|
54 | Debug.Assert(map.Length == _symbols.Length); | |
|
57 | Debug.Assert(map.Length == _symbols.Value.Length); | |
|
55 | 58 | } |
|
56 | 59 | |
|
57 | 60 | |
@@ -60,7 +63,7 namespace Implab.Automaton { | |||
|
60 | 63 | } |
|
61 | 64 | |
|
62 | 65 | public override IEnumerable<T> InputSymbols { |
|
63 | get { return _symbols; } | |
|
66 | get { return _symbols.Value; } | |
|
64 | 67 | } |
|
65 | 68 | |
|
66 | 69 | } |
@@ -17,9 +17,6 namespace Implab.Automaton { | |||
|
17 | 17 | /// <param name="symbols">Множестов исходных символов</param> |
|
18 | 18 | /// <returns>Идентификатор символа алфавита.</returns> |
|
19 | 19 | int DefineClass(IEnumerable<TSymbol> symbols); |
|
20 | ||
|
21 | ||
|
22 | ||
|
23 | 20 | } |
|
24 | 21 | } |
|
25 | 22 |
@@ -9,8 +9,6 namespace Implab.Automaton { | |||
|
9 | 9 | /// Indexed alphabet is the finite set of symbols where each symbol has a zero-based unique index. |
|
10 | 10 | /// </summary> |
|
11 | 11 | public abstract class IndexedAlphabetBase<T> : IAlphabetBuilder<T> { |
|
12 | public const int UNCLASSIFIED = 0; | |
|
13 | ||
|
14 | 12 | int m_nextId = 1; |
|
15 | 13 | readonly int[] m_map; |
|
16 | 14 | |
@@ -31,7 +29,7 namespace Implab.Automaton { | |||
|
31 | 29 | |
|
32 | 30 | public int DefineSymbol(T symbol) { |
|
33 | 31 | var index = GetSymbolIndex(symbol); |
|
34 | if (m_map[index] == UNCLASSIFIED) | |
|
32 | if (m_map[index] == DFAConst.UNCLASSIFIED_INPUT) | |
|
35 | 33 | m_map[index] = m_nextId++; |
|
36 | 34 | return m_map[index]; |
|
37 | 35 | } |
@@ -42,7 +40,7 namespace Implab.Automaton { | |||
|
42 | 40 | |
|
43 | 41 | foreach (var symbol in symbols) { |
|
44 | 42 | var index = GetSymbolIndex(symbol); |
|
45 | if (m_map[index] == UNCLASSIFIED) | |
|
43 | if (m_map[index] == DFAConst.UNCLASSIFIED_INPUT) | |
|
46 | 44 | m_map[GetSymbolIndex(symbol)] = m_nextId; |
|
47 | 45 | else |
|
48 | 46 | throw new InvalidOperationException(String.Format("Symbol '{0}' already in use", symbol)); |
@@ -52,10 +50,10 namespace Implab.Automaton { | |||
|
52 | 50 | |
|
53 | 51 | public List<T>[] CreateReverseMap() { |
|
54 | 52 | return |
|
55 |
Enumerable.Range( |
|
|
53 | Enumerable.Range(0, Count) | |
|
56 | 54 | .Select( |
|
57 | 55 | i => InputSymbols |
|
58 |
|
|
|
56 | .Where(x => i != DFAConst.UNCLASSIFIED_INPUT && m_map[GetSymbolIndex(x)] == i) | |
|
59 | 57 | .ToList() |
|
60 | 58 | ) |
|
61 | 59 | .ToArray(); |
@@ -70,7 +68,7 namespace Implab.Automaton { | |||
|
70 | 68 | |
|
71 | 69 | foreach (var scl in classes) { |
|
72 | 70 | // skip if the supper class contains the unclassified element |
|
73 | if (scl.Contains(UNCLASSIFIED)) | |
|
71 | if (scl.Contains(DFAConst.UNCLASSIFIED_INPUT)) | |
|
74 | 72 | continue; |
|
75 | 73 | var range = new List<T>(); |
|
76 | 74 | foreach (var cl in scl) { |
@@ -5,11 +5,14 using System.Linq; | |||
|
5 | 5 | namespace Implab.Automaton { |
|
6 | 6 | public class MapAlphabet<T> : IAlphabetBuilder<T> { |
|
7 | 7 | readonly Dictionary<T,int> m_map; |
|
8 | int m_nextCls; | |
|
8 | int m_nextCls = 1; | |
|
9 | ||
|
10 | public MapAlphabet() { | |
|
11 | m_map = new Dictionary<T, int>(); | |
|
12 | } | |
|
9 | 13 | |
|
10 | 14 | public MapAlphabet(IEqualityComparer<T> comparer) { |
|
11 | 15 | m_map = new Dictionary<T, int>(comparer); |
|
12 | m_nextCls = 1; | |
|
13 | 16 | } |
|
14 | 17 | |
|
15 | 18 | #region IAlphabetBuilder implementation |
@@ -71,6 +74,9 namespace Implab.Automaton { | |||
|
71 | 74 | var map = new int[rmap.Length]; |
|
72 | 75 | |
|
73 | 76 | foreach (var cls in classes) { |
|
77 | if (cls.Contains(DFAConst.UNCLASSIFIED_INPUT)) | |
|
78 | continue; | |
|
79 | ||
|
74 | 80 | var symbols = new List<T>(); |
|
75 | 81 | foreach (var id in cls) { |
|
76 | 82 | if (id < 0 || id >= rmap.Length) |
@@ -71,14 +71,16 namespace Implab.Automaton.RegularExpres | |||
|
71 | 71 | protected CDFADefinition BuildDFA(Token<TTag> lang) { |
|
72 | 72 | Safe.ArgumentNotNull(lang, "lang"); |
|
73 | 73 | |
|
74 |
var dfa = new |
|
|
74 | var dfa = new RegularDFADefinition<TSymbol, TTag>(Alphabet, 0); | |
|
75 | ||
|
76 | var table = new DFATransitionTable<TTag>(); | |
|
75 | 77 | |
|
76 | 78 | var builder = new RegularDFABuilder<TTag>(); |
|
77 | 79 | |
|
78 | 80 | lang.Accept( builder ); |
|
79 | 81 | |
|
80 |
builder.BuildDFA( |
|
|
81 |
if ( |
|
|
82 | var initialState = builder.BuildDFA(table); | |
|
83 | if (table.IsFinalState(initialState)) | |
|
82 | 84 | throw new ApplicationException("The specified language contains empty token"); |
|
83 | 85 | |
|
84 | 86 | return dfa.Optimize(); |
@@ -122,7 +122,7 namespace Implab.Automaton.RegularExpres | |||
|
122 | 122 | m_ends.Add(m_idx, token.Tag); |
|
123 | 123 | } |
|
124 | 124 | |
|
125 |
public void BuildDFA(IDFA |
|
|
125 | public void BuildDFA(IDFATransitionTableBuilder<TTag> dfa) { | |
|
126 | 126 | Safe.ArgumentNotNull(dfa,"dfa"); |
|
127 | 127 | |
|
128 | 128 | var states = new MapAlphabet<HashSet<int>>(new CustomEqualityComparer<HashSet<int>>( |
@@ -131,6 +131,7 namespace Implab.Automaton.RegularExpres | |||
|
131 | 131 | )); |
|
132 | 132 | |
|
133 | 133 | var initialState = states.DefineSymbol(m_firstpos); |
|
134 | dfa.SetInitialState(initialState); | |
|
134 | 135 | |
|
135 | 136 | var tags = GetStateTags(m_firstpos); |
|
136 | 137 | if (tags != null && tags.Length > 0) |
@@ -17,12 +17,14 namespace Implab.Automaton { | |||
|
17 | 17 | struct ScannerConfig { |
|
18 | 18 | public DFAStateDescriptior<TTag>[] states; |
|
19 | 19 | public int[] alphabetMap; |
|
20 | public int initialState; | |
|
20 | 21 | } |
|
21 | 22 | |
|
22 | 23 | Stack<ScannerConfig> m_defs = new Stack<ScannerConfig>(); |
|
23 | 24 | |
|
24 | 25 | DFAStateDescriptior<TTag>[] m_states; |
|
25 | 26 | int[] m_alphabetMap; |
|
27 | int m_initialState; | |
|
26 | 28 | |
|
27 | 29 | protected DFAStateDescriptior<TTag> m_currentState; |
|
28 | 30 | int m_previewCode; |
@@ -39,12 +41,13 namespace Implab.Automaton { | |||
|
39 | 41 | int m_chunkSize = 1024; // 1k |
|
40 | 42 | int m_limit = 10 * 1024 * 1024; // 10Mb |
|
41 | 43 | |
|
42 | protected Scanner(DFAStateDescriptior<TTag>[] states, int[] alphabet) { | |
|
44 | protected Scanner(DFAStateDescriptior<TTag>[] states, int[] alphabet, int initialState) { | |
|
43 | 45 | Safe.ArgumentNotEmpty(states, "states"); |
|
44 | 46 | Safe.ArgumentNotNull(alphabet, "alphabet"); |
|
45 | 47 | |
|
46 | 48 | m_states = states; |
|
47 | 49 | m_alphabetMap = alphabet; |
|
50 | m_initialState = initialState; | |
|
48 | 51 | |
|
49 | 52 | Feed(new char[0]); |
|
50 | 53 | } |
@@ -130,7 +133,7 namespace Implab.Automaton { | |||
|
130 | 133 | if (m_pointer >= m_bufferSize) |
|
131 | 134 | return false; |
|
132 | 135 | |
|
133 |
m_currentState = m_states[ |
|
|
136 | m_currentState = m_states[m_initialState]; | |
|
134 | 137 | m_tokenLen = 0; |
|
135 | 138 | m_tokenOffset = m_pointer; |
|
136 | 139 | int nextState; |
@@ -217,16 +220,18 namespace Implab.Automaton { | |||
|
217 | 220 | /// </summary> |
|
218 | 221 | /// <param name="states">Таблица состояний нового ДКА</param> |
|
219 | 222 | /// <param name="alphabet">Таблица входных символов для нового ДКА</param> |
|
220 | protected void Switch(DFAStateDescriptior<TTag>[] states, int[] alphabet) { | |
|
223 | protected void Switch(DFAStateDescriptior<TTag>[] states, int[] alphabet, int initialState) { | |
|
221 | 224 | Safe.ArgumentNotNull(states, "dfa"); |
|
222 | 225 | |
|
223 | 226 | m_defs.Push(new ScannerConfig { |
|
224 | 227 | states = m_states, |
|
225 | alphabetMap = m_alphabetMap | |
|
228 | alphabetMap = m_alphabetMap, | |
|
229 | initialState = m_initialState | |
|
226 | 230 | }); |
|
227 | 231 | |
|
228 | 232 | m_states = states; |
|
229 | 233 | m_alphabetMap = alphabet; |
|
234 | m_initialState = initialState; | |
|
230 | 235 | |
|
231 | 236 | m_previewCode = m_alphabetMap[m_buffer[m_pointer]]; |
|
232 | 237 | } |
@@ -240,6 +245,7 namespace Implab.Automaton { | |||
|
240 | 245 | var prev = m_defs.Pop(); |
|
241 | 246 | m_states = prev.states; |
|
242 | 247 | m_alphabetMap = prev.alphabetMap; |
|
248 | m_initialState = prev.initialState; | |
|
243 | 249 | m_previewCode = m_alphabetMap[m_buffer[m_pointer]]; |
|
244 | 250 | } |
|
245 | 251 |
@@ -8,6 +8,9 | |||
|
8 | 8 | <RootNamespace>Implab</RootNamespace> |
|
9 | 9 | <AssemblyName>Implab</AssemblyName> |
|
10 | 10 | <TargetFrameworkVersion>v4.5</TargetFrameworkVersion> |
|
11 | <ReleaseVersion>0.2</ReleaseVersion> | |
|
12 | <ProductVersion>8.0.30703</ProductVersion> | |
|
13 | <SchemaVersion>2.0</SchemaVersion> | |
|
11 | 14 | </PropertyGroup> |
|
12 | 15 | <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' "> |
|
13 | 16 | <DebugSymbols>true</DebugSymbols> |
@@ -148,20 +151,14 | |||
|
148 | 151 | <Compile Include="Components\ExecutionState.cs" /> |
|
149 | 152 | <Compile Include="Components\RunnableComponent.cs" /> |
|
150 | 153 | <Compile Include="Components\IFactory.cs" /> |
|
151 | <Compile Include="Automaton\CDFADefinition.cs" /> | |
|
152 | 154 | <Compile Include="Automaton\DFAStateDescriptor.cs" /> |
|
153 | 155 | <Compile Include="Automaton\DFAutomaton.cs" /> |
|
154 | <Compile Include="Automaton\EDFADefinition.cs" /> | |
|
155 | 156 | <Compile Include="Automaton\EnumAlphabet.cs" /> |
|
156 | 157 | <Compile Include="Automaton\IAlphabet.cs" /> |
|
157 | <Compile Include="Automaton\IDFADefinition.cs" /> | |
|
158 | 158 | <Compile Include="Automaton\ParserException.cs" /> |
|
159 | 159 | <Compile Include="Automaton\Scanner.cs" /> |
|
160 | <Compile Include="Automaton\DFADefinition.cs" /> | |
|
161 | 160 | <Compile Include="Automaton\IndexedAlphabetBase.cs" /> |
|
162 | <Compile Include="Automaton\CharAlphabet.cs" /> | |
|
163 | 161 | <Compile Include="Automaton\IAlphabetBuilder.cs" /> |
|
164 | <Compile Include="Automaton\IDFADefinitionBuilder.cs" /> | |
|
165 | 162 | <Compile Include="Automaton\RegularExpressions\AltToken.cs" /> |
|
166 | 163 | <Compile Include="Automaton\RegularExpressions\BinaryToken.cs" /> |
|
167 | 164 | <Compile Include="Automaton\RegularExpressions\CatToken.cs" /> |
@@ -174,7 +171,6 | |||
|
174 | 171 | <Compile Include="Automaton\RegularExpressions\Token.cs" /> |
|
175 | 172 | <Compile Include="Automaton\RegularExpressions\IVisitor.cs" /> |
|
176 | 173 | <Compile Include="Automaton\AutomatonTransition.cs" /> |
|
177 | <Compile Include="Automaton\ByteAlphabet.cs" /> | |
|
178 | 174 | <Compile Include="Automaton\RegularExpressions\RegularDFABuilder.cs" /> |
|
179 | 175 | <Compile Include="Formats\JSON\JSONElementContext.cs" /> |
|
180 | 176 | <Compile Include="Formats\JSON\JSONElementType.cs" /> |
@@ -188,6 +184,12 | |||
|
188 | 184 | <Compile Include="Formats\JSON\StringTranslator.cs" /> |
|
189 | 185 | <Compile Include="Automaton\MapAlphabet.cs" /> |
|
190 | 186 | <Compile Include="Automaton\DummyAlphabet.cs" /> |
|
187 | <Compile Include="Automaton\DFATransitionTable.cs" /> | |
|
188 | <Compile Include="Automaton\IDFATransitionTableBuilder.cs" /> | |
|
189 | <Compile Include="Automaton\IDFATransitionTable.cs" /> | |
|
190 | <Compile Include="Automaton\RegularExpressions\RegularDFADefinition.cs" /> | |
|
191 | <Compile Include="Formats\CharAlphabet.cs" /> | |
|
192 | <Compile Include="Formats\ByteAlphabet.cs" /> | |
|
191 | 193 | </ItemGroup> |
|
192 | 194 | <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" /> |
|
193 | 195 | <ItemGroup /> |
|
1 | NO CONTENT: file was removed |
|
1 | NO CONTENT: file was removed |
|
1 | NO CONTENT: file was removed |
|
1 | NO CONTENT: file was removed |
|
1 | NO CONTENT: file was removed |
|
1 | NO CONTENT: file was removed |
|
1 | NO CONTENT: file was removed |
General Comments 0
You need to be logged in to leave comments.
Login now