##// END OF EJS Templates
Almost complete DFA refactoring
cin -
r164:ec35731ae299 ref20160224
parent child
Show More
@@ -0,0 +1,251
1 using Implab;
2 using System;
3 using System.Collections.Generic;
4 using System.Linq;
5
6 namespace Implab.Automaton {
7 public class DFATransitionTable<TTag> : IDFATransitionTableBuilder<TTag> {
8 DFAStateDescriptior<TTag>[] m_dfaTable;
9
10 int m_stateCount;
11 int m_symbolCount;
12 int m_initialState;
13
14 readonly Dictionary<int, TTag[]> m_finalStates = new Dictionary<int, TTag[]>();
15 readonly HashSet<AutomatonTransition> m_transitions = new HashSet<AutomatonTransition>();
16
17
18 #region IDFADefinition implementation
19
20 public DFAStateDescriptior<TTag>[] GetTransitionTable() {
21 if (m_dfaTable == null) {
22 if (m_stateCount <= 0)
23 throw new InvalidOperationException("Invalid automaton definition: states count = {0}", m_stateCount);
24 if (m_symbolCount <= 0)
25 throw new InvalidOperationException("Invalid automaton definition: symbols count = {0}", m_symbolCount);
26
27 m_dfaTable = ConstructTransitionTable();
28 }
29 return m_dfaTable;
30 }
31
32 public bool IsFinalState(int s) {
33 Safe.ArgumentInRange(s, 0, m_stateCount, "s");
34
35 return m_finalStates.ContainsKey(s);
36 }
37
38 public IEnumerable<KeyValuePair<int,TTag[]>> FinalStates {
39 get {
40 return m_finalStates;
41 }
42 }
43
44 public int StateCount {
45 get { return m_stateCount; }
46 }
47
48 public int AlphabetSize {
49 get { return m_symbolCount; }
50 }
51
52 public int InitialState {
53 get { return m_initialState; }
54 }
55
56 #endregion
57
58 protected virtual DFAStateDescriptior<TTag>[] ConstructTransitionTable() {
59 var dfaTable = new DFAStateDescriptior<TTag>[m_stateCount];
60
61 foreach (var pair in m_finalStates) {
62 var idx = pair.Key;
63
64 dfaTable[idx].final = true;
65 dfaTable[idx].tag = pair.Value;
66 }
67
68 foreach (var t in m_transitions) {
69 if (dfaTable[t.s1].transitions == null) {
70 dfaTable[t.s1].transitions = new int[m_symbolCount];
71 for (int i = 0; i < dfaTable[t.s1].transitions.Length; i++)
72 dfaTable[t.s1].transitions[i] = DFAConst.UNREACHABLE_STATE;
73 }
74
75 dfaTable[t.s1].transitions[t.edge] = t.s2;
76 }
77 }
78
79 #region IDFADefinitionBuilder
80
81 public void DefineTransition(int s1, int s2, int symbol) {
82 if (m_dfaTable != null)
83 throw new InvalidOperationException("The transition table is already built");
84
85 Safe.ArgumentAssert(s1 > 0, "s1");
86 Safe.ArgumentAssert(s2 > 0, "s2");
87 Safe.ArgumentAssert(symbol >= 0, "symbol");
88
89 m_stateCount = Math.Max(Math.Max(m_stateCount, s1 + 1), s2 + 1);
90 m_symbolCount = Math.Max(m_symbolCount, symbol + 1);
91
92 m_transitions.Add(new AutomatonTransition(s1, s2, symbol));
93 }
94
95 public void MarkFinalState(int state, params TTag[] tags) {
96 if (m_dfaTable != null)
97 throw new InvalidOperationException("The transition table is already built");
98
99 m_finalStates[state] = tags;
100 }
101
102 public void SetInitialState(int s) {
103 Safe.ArgumentAssert(s >= 0, "s");
104 m_initialState = s;
105 }
106
107
108 #endregion
109
110 protected void Optimize<TInput, TState>(
111 IDFATransitionTableBuilder<TTag> optimalDFA,
112 IAlphabet<TInput> inputAlphabet,
113 IAlphabetBuilder<TInput> optimalInputAlphabet,
114 IAlphabet<TState> stateAlphabet,
115 IAlphabetBuilder<TState> optimalStateAlphabet
116 ) {
117 Safe.ArgumentNotNull(optimalDFA, "dfa");
118 Safe.ArgumentNotNull(optimalInputAlphabet, "optimalInputAlphabet");
119 Safe.ArgumentNotNull(optimalStateAlphabet, "optimalStateAlphabet");
120 Safe.ArgumentNotNull(inputAlphabet, "inputAlphabet");
121 Safe.ArgumentNotNull(stateAlphabet, "stateAlphabet");
122
123 if (inputAlphabet.Count != m_symbolCount)
124 throw new InvalidOperationException("The input symbols aphabet mismatch");
125 if (stateAlphabet.Count != m_stateCount)
126 throw new InvalidOperationException("The states alphabet mismatch");
127
128 var setComparer = new CustomEqualityComparer<HashSet<int>>(
129 (x, y) => x.SetEquals(y),
130 s => s.Sum(x => x.GetHashCode())
131 );
132
133 var arrayComparer = new CustomEqualityComparer<TTag[]>(
134 (x,y) => (new HashSet<int>(x)).SetEquals(new HashSet<int>(y)),
135 a => a.Sum(x => x.GetHashCode())
136 );
137
138 var optimalStates = new HashSet<HashSet<int>>(setComparer);
139 var queue = new HashSet<HashSet<int>>(setComparer);
140
141 // получаем конечные состояния, сгруппированные по маркерам
142 optimalStates.UnionWith(
143 m_finalStates
144 .GroupBy(pair => pair.Value, arrayComparer)
145 .Select(
146 g => new HashSet<int>(
147 g.Select( pair => pair.Key)
148 )
149 )
150 );
151
152 var state = new HashSet<int>(
153 Enumerable
154 .Range(0, m_stateCount - 1)
155 .Where(i => !m_finalStates.ContainsKey(i))
156 );
157 optimalStates.Add(state);
158 queue.Add(state);
159
160 var rmap = m_transitions
161 .GroupBy(t => t.s2)
162 .ToLookup(
163 g => g.Key, // s2
164 g => g.ToLookup(t => t.edge, t => t.s1)
165 );
166
167 while (queue.Count > 0) {
168 var stateA = queue.First();
169 queue.Remove(stateA);
170
171 for (int c = 0; c < m_symbolCount; c++) {
172 var stateX = new HashSet<int>();
173 foreach(var a in stateA)
174 stateX.UnionWith(rmap[a][c]); // all states from wich 'c' leads to 'a'
175
176 foreach (var stateY in optimalStates.ToArray()) {
177 if (stateX.Overlaps(stateY) && !stateY.IsSubsetOf(stateX)) {
178 var stateR1 = new HashSet<int>(stateY);
179 var stateR2 = new HashSet<int>(stateY);
180
181 stateR1.IntersectWith(stateX);
182 stateR2.ExceptWith(stateX);
183
184 optimalStates.Remove(stateY);
185 optimalStates.Add(stateR1);
186 optimalStates.Add(stateR2);
187
188 if (queue.Contains(stateY)) {
189 queue.Remove(stateY);
190 queue.Add(stateR1);
191 queue.Add(stateR2);
192 } else {
193 queue.Add(stateR1.Count <= stateR2.Count ? stateR1 : stateR2);
194 }
195 }
196 }
197 }
198 }
199
200 // карта получения оптимального состояния по соотвествующему ему простому состоянию
201 var statesMap = stateAlphabet.Reclassify(optimalStateAlphabet, optimalStates);
202
203 // получаем минимальный алфавит
204 // входные символы не различимы, если Move(s,a1) == Move(s,a2)
205 var optimalAlphabet = m_transitions
206 .GroupBy(t => Tuple.Create(statesMap[t.s1], statesMap[t.s2]), t => t.edge);
207
208 var alphabetMap = inputAlphabet.Reclassify(optimalInputAlphabet, optimalAlphabet);
209
210 var optimalTags = m_finalStates
211 .GroupBy(pair => statesMap[pair.Key])
212 .ToDictionary(
213 g => g.Key,
214 g => g.SelectMany(pair => pair.Value).ToArray()
215 );
216
217 // построение автомата
218 optimalDFA.SetInitialState(statesMap[m_initialState]);
219
220 foreach (var pair in optimalTags)
221 optimalDFA.MarkFinalState(pair.Key, pair.Value);
222
223 foreach (var t in m_transitions.Select(t => new AutomatonTransition(statesMap[t.s1],statesMap[t.s2],alphabetMap[t.edge])).Distinct())
224 optimalDFA.DefineTransition(t.s1, t.s2, t.edge);
225
226 }
227
228 protected void PrintDFA<TInput, TState>(IAlphabet<TInput> inputAlphabet, IAlphabet<TState> stateAlphabet) {
229 Safe.ArgumentNotNull(inputAlphabet, "inputAlphabet");
230 Safe.ArgumentNotNull(stateAlphabet, "stateAlphabet");
231
232 var inputMap = inputAlphabet.CreateReverseMap();
233 var stateMap = stateAlphabet.CreateReverseMap();
234
235 for (int i = 0; i < inputMap.Length; i++)
236 Console.WriteLine("C{0}: {1}", i, String.Join(",", inputMap[i]));
237
238
239 foreach(var t in m_transitions)
240 Console.WriteLine(
241 "[{0}] -{{{1}}}-> [{2}]{3}",
242 stateMap[t.s1],
243 String.Join(",", inputMap[t.edge]),
244 stateMap[t.s2],
245 m_finalStates.ContainsKey(t.s2) ? "$" : ""
246 );
247
248 }
249
250 }
251 }
@@ -0,0 +1,59
1 using System.Collections.Generic;
2
3
4 namespace Implab.Automaton {
5 /// <summary>
6 /// Полностью описывает DFA автомат, его поведение, состояние и входные символы.
7 /// </summary>
8 /// <example>
9 /// class MyAutomaton {
10 /// int m_current;
11 /// readonly DFAStateDescriptor<string>[] m_automaton;
12 /// readonly IAlphabet<MyCommands> m_commands;
13 ///
14 /// public MyAutomaton(IDFADefinition&lt;MyCommands,MyStates,string&gt; definition) {
15 /// m_current = definition.StateAlphabet.Translate(MyStates.Initial);
16 /// m_automaton = definition.GetTransitionTable();
17 /// m_commands = definition.InputAlphabet;
18 /// }
19 ///
20 /// // defined a method which will move the automaton to the next state
21 /// public void Move(MyCommands cmd) {
22 /// // use transition map to determine the next state
23 /// var next = m_automaton[m_current].transitions[m_commands.Translate(cmd)];
24 ///
25 /// // validate that we aren't in the unreachable state
26 /// if (next == DFAConst.UNREACHABLE_STATE)
27 /// throw new InvalidOperationException("The specified command is invalid");
28 ///
29 /// // if everything is ok
30 /// m_current = next;
31 /// }
32 /// }
33 /// </example>
34 public interface IDFATransitionTable<TTag> {
35 /// <summary>
36 /// Таблица переходов состояний автомата
37 /// </summary>
38 /// <returns>The transition table.</returns>
39 DFAStateDescriptior<TTag>[] GetTransitionTable();
40
41 int StateCount {
42 get;
43 }
44
45 int AlphabetSize {
46 get;
47 }
48
49 int InitialState {
50 get;
51 }
52
53 bool IsFinalState(int s);
54
55 IEnumerable<KeyValuePair<int,TTag[]>> FinalStates {
56 get;
57 }
58 }
59 }
@@ -0,0 +1,25
1 using System;
2
3 namespace Implab.Automaton {
4 public interface IDFATransitionTableBuilder<TTag> : IDFATransitionTable<TTag> {
5 /// <summary>
6 /// Marks the state as final and assings tags.
7 /// </summary>
8 /// <param name="state">State.</param>
9 /// <param name="tags">Tags.</param>
10 void MarkFinalState(int state, params TTag[] tags);
11
12 /// <summary>
13 /// Defines the transition from <paramref name="s1"/> to
14 /// <paramref name="s2"/> with input <paramref name="symbol"/>.
15 /// </summary>
16 /// <param name="s1">S1.</param>
17 /// <param name="s2">S2.</param>
18 /// <param name="symbol">Symbol.</param>
19 void DefineTransition(int s1, int s2, int symbol);
20
21 void SetInitialState(int s);
22
23 }
24 }
25
@@ -0,0 +1,46
1 using System;
2
3 namespace Implab.Automaton.RegularExpressions {
4 public class RegularDFADefinition<TInput, TTag> : DFATransitionTable<TTag>, IDFATransitionTable<TTag> {
5
6 readonly IAlphabet<TInput> m_alphabet;
7 readonly int m_initialState;
8
9 public RegularDFADefinition(IAlphabet<TInput> alphabet, int initialState) {
10 Safe.ArgumentNotNull(alphabet, "aplhabet");
11
12 m_alphabet = alphabet;
13 m_initialState = initialState;
14 }
15
16
17 public IAlphabet<TInput> InputAlphabet {
18 get {
19 return m_alphabet;
20 }
21 }
22
23 protected override DFAStateDescriptior<TTag>[] ConstructTransitionTable() {
24 if (InputAlphabet.Count != m_alphabet.Count)
25 throw new InvalidOperationException("The alphabet doesn't match the transition table");
26
27 return base.ConstructTransitionTable();
28 }
29
30 /// <summary>
31 /// Optimize the specified alphabet.
32 /// </summary>
33 /// <param name="alphabet">Пустой алфавит, который будет зполнен в процессе оптимизации.</param>
34 public RegularDFADefinition<TInput, TTag> Optimize(IAlphabetBuilder<TInput> alphabet) {
35 Safe.ArgumentNotNull(alphabet, "alphabet");
36
37 var optimalDFA = new RegularDFADefinition<TInput,TTag>(alphabet, m_initialState);
38
39 Optimize(optimalDFA, InputAlphabet, alphabet, new DummyAlphabet(StateCount), new MapAlphabet<int>());
40
41 }
42
43
44 }
45 }
46
@@ -0,0 +1,25
1 using System;
2 using System.Collections.Generic;
3 using System.Linq;
4
5 namespace Implab.Automaton {
6 public class ByteAlphabet : IndexedAlphabetBase<byte> {
7 public ByteAlphabet() : base(byte.MaxValue + 1){
8 }
9
10 #region implemented abstract members of IndexedAlphabetBase
11
12 public override int GetSymbolIndex(byte symbol) {
13 return (int)symbol;
14 }
15
16 public IEnumerable<byte> InputSymbols {
17 get {
18 return Enumerable.Range(byte.MinValue, byte.MaxValue).Cast<byte>();
19 }
20 }
21
22 #endregion
23 }
24 }
25
@@ -0,0 +1,23
1 using Implab;
2 using System;
3 using System.Collections.Generic;
4 using System.Linq;
5 using System.Text;
6 using System.Threading.Tasks;
7
8 namespace Implab.Automaton {
9 public class CharAlphabet: IndexedAlphabetBase<char> {
10
11 public CharAlphabet()
12 : base(char.MaxValue + 1) {
13 }
14
15 public override int GetSymbolIndex(char symbol) {
16 return symbol;
17 }
18
19 public override IEnumerable<char> InputSymbols {
20 get { return Enumerable.Range(char.MinValue, char.MaxValue).Cast<char>(); }
21 }
22 }
23 }
@@ -1,10 +1,4
1 using System;
1 namespace Implab.Automaton {
2 using System.Collections.Generic;
3 using System.Linq;
4 using System.Text;
5 using System.Threading.Tasks;
6
7 namespace Implab.Automaton {
8 public struct DFAStateDescriptior<TTag> {
2 public struct DFAStateDescriptior<TTag> {
9 public bool final;
3 public bool final;
10 public TTag[] tag;
4 public TTag[] tag;
@@ -3,8 +3,16 using System.Collections.Generic;
3 using System.Linq;
3 using System.Linq;
4
4
5 namespace Implab.Automaton {
5 namespace Implab.Automaton {
6 /// <summary>
7 /// Dummy alphabet consists of integer numbers which are identical to their classes.
8 /// </summary>
6 public class DummyAlphabet : IAlphabet<int> {
9 public class DummyAlphabet : IAlphabet<int> {
7 readonly int m_size;
10 readonly int m_size;
11
12 /// <summary>
13 /// Creates a new dummy alphabet with given size.
14 /// </summary>
15 /// <param name="size">The size of the alphabet, must be greater then zero.</param>
8 public DummyAlphabet(int size) {
16 public DummyAlphabet(int size) {
9 Safe.ArgumentAssert(size > 0);
17 Safe.ArgumentAssert(size > 0);
10 m_size = 0;
18 m_size = 0;
@@ -21,6 +29,8 namespace Implab.Automaton {
21 Safe.ArgumentNotNull(classes, "classes");
29 Safe.ArgumentNotNull(classes, "classes");
22 var map = new int[m_size];
30 var map = new int[m_size];
23 foreach (var cls in classes) {
31 foreach (var cls in classes) {
32 if (cls.Contains(DFAConst.UNCLASSIFIED_INPUT))
33 continue;
24 var newid = newAlphabet.DefineClass(cls);
34 var newid = newAlphabet.DefineClass(cls);
25 foreach (var id in cls)
35 foreach (var id in cls)
26 map[id] = newid;
36 map[id] = newid;
@@ -12,46 +12,49 namespace Implab.Automaton {
12 /// <typeparam name="T">Тип перечислений</typeparam>
12 /// <typeparam name="T">Тип перечислений</typeparam>
13 public class EnumAlphabet<T> : IndexedAlphabetBase<T> where T : struct, IConvertible {
13 public class EnumAlphabet<T> : IndexedAlphabetBase<T> where T : struct, IConvertible {
14 [SuppressMessage("Microsoft.Design", "CA1000:DoNotDeclareStaticMembersOnGenericTypes")]
14 [SuppressMessage("Microsoft.Design", "CA1000:DoNotDeclareStaticMembersOnGenericTypes")]
15 static readonly T[] _symbols;
15 static readonly Lazy<T[]> _symbols = new Lazy<T[]>(GetSymbols);
16 static readonly EnumAlphabet<T> _fullAlphabet;
16
17 [SuppressMessage("Microsoft.Design", "CA1000:DoNotDeclareStaticMembersOnGenericTypes")]
18 static readonly Lazy<EnumAlphabet<T>> _fullAlphabet = new Lazy<EnumAlphabet<T>>(CreateEnumAlphabet);
19
20 static EnumAlphabet<T> CreateEnumAlphabet() {
21 var symbols = _symbols.Value;
17
22
18 [SuppressMessage("Microsoft.Design", "CA1065:DoNotRaiseExceptionsInUnexpectedLocations")]
23 if (
19 static EnumAlphabet() {
24 symbols[symbols.Length - 1].ToInt32(CultureInfo.InvariantCulture) >= symbols.Length
25 || symbols[0].ToInt32(CultureInfo.InvariantCulture) != 0
26 )
27 throw new InvalidOperationException("The specified enumeration must be zero-based and continuously numbered");
28
29 return new EnumAlphabet<T>(symbols.Select(x => x.ToInt32(CultureInfo.InvariantCulture)).ToArray());
30 }
31
32 static T[] GetSymbols() {
20 if (!typeof(T).IsEnum)
33 if (!typeof(T).IsEnum)
21 throw new InvalidOperationException("Invalid generic parameter, enumeration is required");
34 throw new InvalidOperationException("Invalid generic parameter, enumeration is required");
22
35
23 if (Enum.GetUnderlyingType(typeof(T)) != typeof(Int32))
36 if (Enum.GetUnderlyingType(typeof(T)) != typeof(Int32))
24 throw new InvalidOperationException("Only enums based on Int32 are supported");
37 throw new InvalidOperationException("Only enums based on Int32 are supported");
25
38
26 _symbols = ((T[])Enum.GetValues(typeof(T)))
39 return ((T[])Enum.GetValues(typeof(T)))
27 .OrderBy(x => x.ToInt32(CultureInfo.InvariantCulture))
40 .OrderBy(x => x.ToInt32(CultureInfo.InvariantCulture))
28 .ToArray();
41 .ToArray();
29
30 if (
31 _symbols[_symbols.Length - 1].ToInt32(CultureInfo.InvariantCulture) >= _symbols.Length
32 || _symbols[0].ToInt32(CultureInfo.InvariantCulture) != 0
33 )
34 throw new InvalidOperationException("The specified enumeration must be zero-based and continuously numbered");
35
36 _fullAlphabet = new EnumAlphabet<T>(_symbols.Select(x => x.ToInt32(CultureInfo.InvariantCulture)).ToArray());
37 }
42 }
38
43
39
40
41 public static EnumAlphabet<T> FullAlphabet {
44 public static EnumAlphabet<T> FullAlphabet {
42 get {
45 get {
43 return _fullAlphabet;
46 return _fullAlphabet.Value;
44 }
47 }
45 }
48 }
46
49
47
50
48 public EnumAlphabet()
51 public EnumAlphabet()
49 : base(_symbols.Length) {
52 : base(_symbols.Value.Length) {
50 }
53 }
51
54
52 public EnumAlphabet(int[] map)
55 public EnumAlphabet(int[] map)
53 : base(map) {
56 : base(map) {
54 Debug.Assert(map.Length == _symbols.Length);
57 Debug.Assert(map.Length == _symbols.Value.Length);
55 }
58 }
56
59
57
60
@@ -60,7 +63,7 namespace Implab.Automaton {
60 }
63 }
61
64
62 public override IEnumerable<T> InputSymbols {
65 public override IEnumerable<T> InputSymbols {
63 get { return _symbols; }
66 get { return _symbols.Value; }
64 }
67 }
65
68
66 }
69 }
@@ -17,9 +17,6 namespace Implab.Automaton {
17 /// <param name="symbols">Множестов исходных символов</param>
17 /// <param name="symbols">Множестов исходных символов</param>
18 /// <returns>Идентификатор символа алфавита.</returns>
18 /// <returns>Идентификатор символа алфавита.</returns>
19 int DefineClass(IEnumerable<TSymbol> symbols);
19 int DefineClass(IEnumerable<TSymbol> symbols);
20
21
22
23 }
20 }
24 }
21 }
25
22
@@ -9,8 +9,6 namespace Implab.Automaton {
9 /// Indexed alphabet is the finite set of symbols where each symbol has a zero-based unique index.
9 /// Indexed alphabet is the finite set of symbols where each symbol has a zero-based unique index.
10 /// </summary>
10 /// </summary>
11 public abstract class IndexedAlphabetBase<T> : IAlphabetBuilder<T> {
11 public abstract class IndexedAlphabetBase<T> : IAlphabetBuilder<T> {
12 public const int UNCLASSIFIED = 0;
13
14 int m_nextId = 1;
12 int m_nextId = 1;
15 readonly int[] m_map;
13 readonly int[] m_map;
16
14
@@ -31,7 +29,7 namespace Implab.Automaton {
31
29
32 public int DefineSymbol(T symbol) {
30 public int DefineSymbol(T symbol) {
33 var index = GetSymbolIndex(symbol);
31 var index = GetSymbolIndex(symbol);
34 if (m_map[index] == UNCLASSIFIED)
32 if (m_map[index] == DFAConst.UNCLASSIFIED_INPUT)
35 m_map[index] = m_nextId++;
33 m_map[index] = m_nextId++;
36 return m_map[index];
34 return m_map[index];
37 }
35 }
@@ -42,7 +40,7 namespace Implab.Automaton {
42
40
43 foreach (var symbol in symbols) {
41 foreach (var symbol in symbols) {
44 var index = GetSymbolIndex(symbol);
42 var index = GetSymbolIndex(symbol);
45 if (m_map[index] == UNCLASSIFIED)
43 if (m_map[index] == DFAConst.UNCLASSIFIED_INPUT)
46 m_map[GetSymbolIndex(symbol)] = m_nextId;
44 m_map[GetSymbolIndex(symbol)] = m_nextId;
47 else
45 else
48 throw new InvalidOperationException(String.Format("Symbol '{0}' already in use", symbol));
46 throw new InvalidOperationException(String.Format("Symbol '{0}' already in use", symbol));
@@ -52,10 +50,10 namespace Implab.Automaton {
52
50
53 public List<T>[] CreateReverseMap() {
51 public List<T>[] CreateReverseMap() {
54 return
52 return
55 Enumerable.Range(UNCLASSIFIED, Count)
53 Enumerable.Range(0, Count)
56 .Select(
54 .Select(
57 i => InputSymbols
55 i => InputSymbols
58 .Where(x => i != UNCLASSIFIED && m_map[GetSymbolIndex(x)] == i)
56 .Where(x => i != DFAConst.UNCLASSIFIED_INPUT && m_map[GetSymbolIndex(x)] == i)
59 .ToList()
57 .ToList()
60 )
58 )
61 .ToArray();
59 .ToArray();
@@ -70,7 +68,7 namespace Implab.Automaton {
70
68
71 foreach (var scl in classes) {
69 foreach (var scl in classes) {
72 // skip if the supper class contains the unclassified element
70 // skip if the supper class contains the unclassified element
73 if (scl.Contains(UNCLASSIFIED))
71 if (scl.Contains(DFAConst.UNCLASSIFIED_INPUT))
74 continue;
72 continue;
75 var range = new List<T>();
73 var range = new List<T>();
76 foreach (var cl in scl) {
74 foreach (var cl in scl) {
@@ -5,11 +5,14 using System.Linq;
5 namespace Implab.Automaton {
5 namespace Implab.Automaton {
6 public class MapAlphabet<T> : IAlphabetBuilder<T> {
6 public class MapAlphabet<T> : IAlphabetBuilder<T> {
7 readonly Dictionary<T,int> m_map;
7 readonly Dictionary<T,int> m_map;
8 int m_nextCls;
8 int m_nextCls = 1;
9
10 public MapAlphabet() {
11 m_map = new Dictionary<T, int>();
12 }
9
13
10 public MapAlphabet(IEqualityComparer<T> comparer) {
14 public MapAlphabet(IEqualityComparer<T> comparer) {
11 m_map = new Dictionary<T, int>(comparer);
15 m_map = new Dictionary<T, int>(comparer);
12 m_nextCls = 1;
13 }
16 }
14
17
15 #region IAlphabetBuilder implementation
18 #region IAlphabetBuilder implementation
@@ -71,6 +74,9 namespace Implab.Automaton {
71 var map = new int[rmap.Length];
74 var map = new int[rmap.Length];
72
75
73 foreach (var cls in classes) {
76 foreach (var cls in classes) {
77 if (cls.Contains(DFAConst.UNCLASSIFIED_INPUT))
78 continue;
79
74 var symbols = new List<T>();
80 var symbols = new List<T>();
75 foreach (var id in cls) {
81 foreach (var id in cls) {
76 if (id < 0 || id >= rmap.Length)
82 if (id < 0 || id >= rmap.Length)
@@ -71,14 +71,16 namespace Implab.Automaton.RegularExpres
71 protected CDFADefinition BuildDFA(Token<TTag> lang) {
71 protected CDFADefinition BuildDFA(Token<TTag> lang) {
72 Safe.ArgumentNotNull(lang, "lang");
72 Safe.ArgumentNotNull(lang, "lang");
73
73
74 var dfa = new CDFADefinition(Alphabet);
74 var dfa = new RegularDFADefinition<TSymbol, TTag>(Alphabet, 0);
75
75
76 var table = new DFATransitionTable<TTag>();
77
76 var builder = new RegularDFABuilder<TTag>();
78 var builder = new RegularDFABuilder<TTag>();
77
79
78 lang.Accept( builder );
80 lang.Accept( builder );
79
81
80 builder.BuildDFA(dfa);
82 var initialState = builder.BuildDFA(table);
81 if (dfa.InitialStateIsFinal)
83 if (table.IsFinalState(initialState))
82 throw new ApplicationException("The specified language contains empty token");
84 throw new ApplicationException("The specified language contains empty token");
83
85
84 return dfa.Optimize();
86 return dfa.Optimize();
@@ -122,7 +122,7 namespace Implab.Automaton.RegularExpres
122 m_ends.Add(m_idx, token.Tag);
122 m_ends.Add(m_idx, token.Tag);
123 }
123 }
124
124
125 public void BuildDFA(IDFADefinitionBuilder<TTag> dfa) {
125 public void BuildDFA(IDFATransitionTableBuilder<TTag> dfa) {
126 Safe.ArgumentNotNull(dfa,"dfa");
126 Safe.ArgumentNotNull(dfa,"dfa");
127
127
128 var states = new MapAlphabet<HashSet<int>>(new CustomEqualityComparer<HashSet<int>>(
128 var states = new MapAlphabet<HashSet<int>>(new CustomEqualityComparer<HashSet<int>>(
@@ -131,7 +131,8 namespace Implab.Automaton.RegularExpres
131 ));
131 ));
132
132
133 var initialState = states.DefineSymbol(m_firstpos);
133 var initialState = states.DefineSymbol(m_firstpos);
134
134 dfa.SetInitialState(initialState);
135
135 var tags = GetStateTags(m_firstpos);
136 var tags = GetStateTags(m_firstpos);
136 if (tags != null && tags.Length > 0)
137 if (tags != null && tags.Length > 0)
137 dfa.MarkFinalState(initialState, tags);
138 dfa.MarkFinalState(initialState, tags);
@@ -17,12 +17,14 namespace Implab.Automaton {
17 struct ScannerConfig {
17 struct ScannerConfig {
18 public DFAStateDescriptior<TTag>[] states;
18 public DFAStateDescriptior<TTag>[] states;
19 public int[] alphabetMap;
19 public int[] alphabetMap;
20 public int initialState;
20 }
21 }
21
22
22 Stack<ScannerConfig> m_defs = new Stack<ScannerConfig>();
23 Stack<ScannerConfig> m_defs = new Stack<ScannerConfig>();
23
24
24 DFAStateDescriptior<TTag>[] m_states;
25 DFAStateDescriptior<TTag>[] m_states;
25 int[] m_alphabetMap;
26 int[] m_alphabetMap;
27 int m_initialState;
26
28
27 protected DFAStateDescriptior<TTag> m_currentState;
29 protected DFAStateDescriptior<TTag> m_currentState;
28 int m_previewCode;
30 int m_previewCode;
@@ -39,12 +41,13 namespace Implab.Automaton {
39 int m_chunkSize = 1024; // 1k
41 int m_chunkSize = 1024; // 1k
40 int m_limit = 10 * 1024 * 1024; // 10Mb
42 int m_limit = 10 * 1024 * 1024; // 10Mb
41
43
42 protected Scanner(DFAStateDescriptior<TTag>[] states, int[] alphabet) {
44 protected Scanner(DFAStateDescriptior<TTag>[] states, int[] alphabet, int initialState) {
43 Safe.ArgumentNotEmpty(states, "states");
45 Safe.ArgumentNotEmpty(states, "states");
44 Safe.ArgumentNotNull(alphabet, "alphabet");
46 Safe.ArgumentNotNull(alphabet, "alphabet");
45
47
46 m_states = states;
48 m_states = states;
47 m_alphabetMap = alphabet;
49 m_alphabetMap = alphabet;
50 m_initialState = initialState;
48
51
49 Feed(new char[0]);
52 Feed(new char[0]);
50 }
53 }
@@ -130,7 +133,7 namespace Implab.Automaton {
130 if (m_pointer >= m_bufferSize)
133 if (m_pointer >= m_bufferSize)
131 return false;
134 return false;
132
135
133 m_currentState = m_states[DFADefinition.INITIAL_STATE];
136 m_currentState = m_states[m_initialState];
134 m_tokenLen = 0;
137 m_tokenLen = 0;
135 m_tokenOffset = m_pointer;
138 m_tokenOffset = m_pointer;
136 int nextState;
139 int nextState;
@@ -217,16 +220,18 namespace Implab.Automaton {
217 /// </summary>
220 /// </summary>
218 /// <param name="states">Таблица состояний нового ДКА</param>
221 /// <param name="states">Таблица состояний нового ДКА</param>
219 /// <param name="alphabet">Таблица входных символов для нового ДКА</param>
222 /// <param name="alphabet">Таблица входных символов для нового ДКА</param>
220 protected void Switch(DFAStateDescriptior<TTag>[] states, int[] alphabet) {
223 protected void Switch(DFAStateDescriptior<TTag>[] states, int[] alphabet, int initialState) {
221 Safe.ArgumentNotNull(states, "dfa");
224 Safe.ArgumentNotNull(states, "dfa");
222
225
223 m_defs.Push(new ScannerConfig {
226 m_defs.Push(new ScannerConfig {
224 states = m_states,
227 states = m_states,
225 alphabetMap = m_alphabetMap
228 alphabetMap = m_alphabetMap,
229 initialState = m_initialState
226 });
230 });
227
231
228 m_states = states;
232 m_states = states;
229 m_alphabetMap = alphabet;
233 m_alphabetMap = alphabet;
234 m_initialState = initialState;
230
235
231 m_previewCode = m_alphabetMap[m_buffer[m_pointer]];
236 m_previewCode = m_alphabetMap[m_buffer[m_pointer]];
232 }
237 }
@@ -240,6 +245,7 namespace Implab.Automaton {
240 var prev = m_defs.Pop();
245 var prev = m_defs.Pop();
241 m_states = prev.states;
246 m_states = prev.states;
242 m_alphabetMap = prev.alphabetMap;
247 m_alphabetMap = prev.alphabetMap;
248 m_initialState = prev.initialState;
243 m_previewCode = m_alphabetMap[m_buffer[m_pointer]];
249 m_previewCode = m_alphabetMap[m_buffer[m_pointer]];
244 }
250 }
245
251
@@ -8,6 +8,9
8 <RootNamespace>Implab</RootNamespace>
8 <RootNamespace>Implab</RootNamespace>
9 <AssemblyName>Implab</AssemblyName>
9 <AssemblyName>Implab</AssemblyName>
10 <TargetFrameworkVersion>v4.5</TargetFrameworkVersion>
10 <TargetFrameworkVersion>v4.5</TargetFrameworkVersion>
11 <ReleaseVersion>0.2</ReleaseVersion>
12 <ProductVersion>8.0.30703</ProductVersion>
13 <SchemaVersion>2.0</SchemaVersion>
11 </PropertyGroup>
14 </PropertyGroup>
12 <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
15 <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
13 <DebugSymbols>true</DebugSymbols>
16 <DebugSymbols>true</DebugSymbols>
@@ -148,20 +151,14
148 <Compile Include="Components\ExecutionState.cs" />
151 <Compile Include="Components\ExecutionState.cs" />
149 <Compile Include="Components\RunnableComponent.cs" />
152 <Compile Include="Components\RunnableComponent.cs" />
150 <Compile Include="Components\IFactory.cs" />
153 <Compile Include="Components\IFactory.cs" />
151 <Compile Include="Automaton\CDFADefinition.cs" />
152 <Compile Include="Automaton\DFAStateDescriptor.cs" />
154 <Compile Include="Automaton\DFAStateDescriptor.cs" />
153 <Compile Include="Automaton\DFAutomaton.cs" />
155 <Compile Include="Automaton\DFAutomaton.cs" />
154 <Compile Include="Automaton\EDFADefinition.cs" />
155 <Compile Include="Automaton\EnumAlphabet.cs" />
156 <Compile Include="Automaton\EnumAlphabet.cs" />
156 <Compile Include="Automaton\IAlphabet.cs" />
157 <Compile Include="Automaton\IAlphabet.cs" />
157 <Compile Include="Automaton\IDFADefinition.cs" />
158 <Compile Include="Automaton\ParserException.cs" />
158 <Compile Include="Automaton\ParserException.cs" />
159 <Compile Include="Automaton\Scanner.cs" />
159 <Compile Include="Automaton\Scanner.cs" />
160 <Compile Include="Automaton\DFADefinition.cs" />
161 <Compile Include="Automaton\IndexedAlphabetBase.cs" />
160 <Compile Include="Automaton\IndexedAlphabetBase.cs" />
162 <Compile Include="Automaton\CharAlphabet.cs" />
163 <Compile Include="Automaton\IAlphabetBuilder.cs" />
161 <Compile Include="Automaton\IAlphabetBuilder.cs" />
164 <Compile Include="Automaton\IDFADefinitionBuilder.cs" />
165 <Compile Include="Automaton\RegularExpressions\AltToken.cs" />
162 <Compile Include="Automaton\RegularExpressions\AltToken.cs" />
166 <Compile Include="Automaton\RegularExpressions\BinaryToken.cs" />
163 <Compile Include="Automaton\RegularExpressions\BinaryToken.cs" />
167 <Compile Include="Automaton\RegularExpressions\CatToken.cs" />
164 <Compile Include="Automaton\RegularExpressions\CatToken.cs" />
@@ -174,7 +171,6
174 <Compile Include="Automaton\RegularExpressions\Token.cs" />
171 <Compile Include="Automaton\RegularExpressions\Token.cs" />
175 <Compile Include="Automaton\RegularExpressions\IVisitor.cs" />
172 <Compile Include="Automaton\RegularExpressions\IVisitor.cs" />
176 <Compile Include="Automaton\AutomatonTransition.cs" />
173 <Compile Include="Automaton\AutomatonTransition.cs" />
177 <Compile Include="Automaton\ByteAlphabet.cs" />
178 <Compile Include="Automaton\RegularExpressions\RegularDFABuilder.cs" />
174 <Compile Include="Automaton\RegularExpressions\RegularDFABuilder.cs" />
179 <Compile Include="Formats\JSON\JSONElementContext.cs" />
175 <Compile Include="Formats\JSON\JSONElementContext.cs" />
180 <Compile Include="Formats\JSON\JSONElementType.cs" />
176 <Compile Include="Formats\JSON\JSONElementType.cs" />
@@ -188,6 +184,12
188 <Compile Include="Formats\JSON\StringTranslator.cs" />
184 <Compile Include="Formats\JSON\StringTranslator.cs" />
189 <Compile Include="Automaton\MapAlphabet.cs" />
185 <Compile Include="Automaton\MapAlphabet.cs" />
190 <Compile Include="Automaton\DummyAlphabet.cs" />
186 <Compile Include="Automaton\DummyAlphabet.cs" />
187 <Compile Include="Automaton\DFATransitionTable.cs" />
188 <Compile Include="Automaton\IDFATransitionTableBuilder.cs" />
189 <Compile Include="Automaton\IDFATransitionTable.cs" />
190 <Compile Include="Automaton\RegularExpressions\RegularDFADefinition.cs" />
191 <Compile Include="Formats\CharAlphabet.cs" />
192 <Compile Include="Formats\ByteAlphabet.cs" />
191 </ItemGroup>
193 </ItemGroup>
192 <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" />
194 <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" />
193 <ItemGroup />
195 <ItemGroup />
1 NO CONTENT: file was removed
NO CONTENT: file was removed
1 NO CONTENT: file was removed
NO CONTENT: file was removed
1 NO CONTENT: file was removed
NO CONTENT: file was removed
1 NO CONTENT: file was removed
NO CONTENT: file was removed
1 NO CONTENT: file was removed
NO CONTENT: file was removed
1 NO CONTENT: file was removed
NO CONTENT: file was removed
1 NO CONTENT: file was removed
NO CONTENT: file was removed
General Comments 0
You need to be logged in to leave comments. Login now