@@ -0,0 +1,33 | |||||
|
1 | using System; | |||
|
2 | ||||
|
3 | namespace Implab.Automaton { | |||
|
4 | struct AutomatonTransition : IEquatable<AutomatonTransition> { | |||
|
5 | public readonly int s1; | |||
|
6 | public readonly int s2; | |||
|
7 | public readonly int edge; | |||
|
8 | ||||
|
9 | public AutomatonTransition(int s1, int s2, int edge) { | |||
|
10 | this.s1 = s1; | |||
|
11 | this.s2 = s2; | |||
|
12 | this.edge = edge; | |||
|
13 | } | |||
|
14 | ||||
|
15 | ||||
|
16 | #region IEquatable implementation | |||
|
17 | public bool Equals(AutomatonTransition other) { | |||
|
18 | return other.s1 == s1 && other.s2 == s2 && other.edge == edge ; | |||
|
19 | } | |||
|
20 | #endregion | |||
|
21 | ||||
|
22 | public override bool Equals(object obj) { | |||
|
23 | if (obj is AutomatonTransition) | |||
|
24 | return Equals((AutomatonTransition)obj); | |||
|
25 | return base.Equals(obj); | |||
|
26 | } | |||
|
27 | ||||
|
28 | public override int GetHashCode() { | |||
|
29 | return s1 + s2 + edge; | |||
|
30 | } | |||
|
31 | } | |||
|
32 | } | |||
|
33 |
@@ -0,0 +1,23 | |||||
|
1 | using System; | |||
|
2 | ||||
|
3 | namespace Implab.Automaton { | |||
|
4 | public class ByteAlphabet : IndexedAlphabetBase<byte> { | |||
|
5 | public ByteAlphabet() : base(byte.MaxValue + 1){ | |||
|
6 | } | |||
|
7 | ||||
|
8 | #region implemented abstract members of IndexedAlphabetBase | |||
|
9 | ||||
|
10 | public override int GetSymbolIndex(byte symbol) { | |||
|
11 | return (int)symbol; | |||
|
12 | } | |||
|
13 | ||||
|
14 | public override System.Collections.Generic.IEnumerable<byte> InputSymbols { | |||
|
15 | get { | |||
|
16 | throw new NotImplementedException(); | |||
|
17 | } | |||
|
18 | } | |||
|
19 | ||||
|
20 | #endregion | |||
|
21 | } | |||
|
22 | } | |||
|
23 |
@@ -0,0 +1,22 | |||||
|
1 | namespace Implab.Automaton { | |||
|
2 | public class CDFADefinition : DFADefinition { | |||
|
3 | readonly CharAlphabet m_alphabet; | |||
|
4 | ||||
|
5 | public CharAlphabet Alphabet { | |||
|
6 | get { return m_alphabet; } | |||
|
7 | } | |||
|
8 | ||||
|
9 | public CDFADefinition(CharAlphabet alphabet): base(alphabet.Count) { | |||
|
10 | m_alphabet = alphabet; | |||
|
11 | } | |||
|
12 | ||||
|
13 | public CDFADefinition Optimize() { | |||
|
14 | ||||
|
15 | return (CDFADefinition)Optimize(alphabet => new CDFADefinition((CharAlphabet)alphabet), m_alphabet, new CharAlphabet()); | |||
|
16 | } | |||
|
17 | ||||
|
18 | public void PrintDFA() { | |||
|
19 | PrintDFA(m_alphabet); | |||
|
20 | } | |||
|
21 | } | |||
|
22 | } |
@@ -0,0 +1,23 | |||||
|
1 | using Implab; | |||
|
2 | using System; | |||
|
3 | using System.Collections.Generic; | |||
|
4 | using System.Linq; | |||
|
5 | using System.Text; | |||
|
6 | using System.Threading.Tasks; | |||
|
7 | ||||
|
8 | namespace Implab.Automaton { | |||
|
9 | public class CharAlphabet: IndexedAlphabetBase<char> { | |||
|
10 | ||||
|
11 | public CharAlphabet() | |||
|
12 | : base(char.MaxValue + 1) { | |||
|
13 | } | |||
|
14 | ||||
|
15 | public override int GetSymbolIndex(char symbol) { | |||
|
16 | return symbol; | |||
|
17 | } | |||
|
18 | ||||
|
19 | public override IEnumerable<char> InputSymbols { | |||
|
20 | get { return Enumerable.Range(char.MinValue, char.MaxValue).Select(x => (char)x); } | |||
|
21 | } | |||
|
22 | } | |||
|
23 | } |
@@ -0,0 +1,10 | |||||
|
1 | using System; | |||
|
2 | ||||
|
3 | namespace Implab.Automaton { | |||
|
4 | public static class DFAConst { | |||
|
5 | public const int UNREACHABLE_STATE = -1; | |||
|
6 | ||||
|
7 | public const int UNCLASSIFIED_INPUT = 0; | |||
|
8 | } | |||
|
9 | } | |||
|
10 |
@@ -0,0 +1,213 | |||||
|
1 | using Implab; | |||
|
2 | using System; | |||
|
3 | using System.Collections.Generic; | |||
|
4 | using System.Linq; | |||
|
5 | ||||
|
6 | namespace Implab.Automaton { | |||
|
7 | public class DFADefinition<TInput, TState, TTag> : IDFADefinitionBuilder<TTag>, IDFADefinition<TInput, TState, TTag> { | |||
|
8 | DFAStateDescriptior<TTag>[] m_dfaTable; | |||
|
9 | readonly IAlphabet<TInput> m_inputAlphabet; | |||
|
10 | readonly IAlphabet<TState> m_stateAlphabet; | |||
|
11 | ||||
|
12 | readonly Dictionary<int, TTag[]> m_finalStates = new Dictionary<int, TTag[]>(); | |||
|
13 | readonly HashSet<AutomatonTransition> m_transitions = new HashSet<AutomatonTransition>(); | |||
|
14 | ||||
|
15 | public DFADefinition(IAlphabet<TInput> inputAlphabet, IAlphabet<TState> stateAlphabet) { | |||
|
16 | Safe.ArgumentNotNull(inputAlphabet, "inputAlphabet"); | |||
|
17 | Safe.ArgumentNotNull(stateAlphabet, "stateAlphabet"); | |||
|
18 | ||||
|
19 | m_inputAlphabet = inputAlphabet; | |||
|
20 | m_stateAlphabet = stateAlphabet; | |||
|
21 | } | |||
|
22 | ||||
|
23 | public void DefineTransition(int s1, int s2, int symbol) { | |||
|
24 | Safe.ArgumentInRange(s1, 0, m_stateAlphabet.Count-1, "s1"); | |||
|
25 | Safe.ArgumentInRange(s2, 0, m_stateAlphabet.Count-1, "s2"); | |||
|
26 | Safe.ArgumentInRange(symbol, 0, m_inputAlphabet.Count-1, "symbol"); | |||
|
27 | ||||
|
28 | m_transitions.Add(new AutomatonTransition(s1, s2, symbol)); | |||
|
29 | } | |||
|
30 | ||||
|
31 | ||||
|
32 | #region IDFADefinition implementation | |||
|
33 | ||||
|
34 | public DFAStateDescriptior<TTag>[] GetTransitionTable() { | |||
|
35 | if (m_dfaTable == null) { | |||
|
36 | m_dfaTable = new DFAStateDescriptior<TTag>[m_stateAlphabet.Count]; | |||
|
37 | ||||
|
38 | foreach (var pair in m_finalStates) { | |||
|
39 | var idx = pair.Key; | |||
|
40 | ||||
|
41 | m_dfaTable[idx].final = true; | |||
|
42 | m_dfaTable[idx].tag = m_dfaTable[idx].tag != null ? | |||
|
43 | m_dfaTable[idx].tag.Concat(pair.Value).Distinct().ToArray() : | |||
|
44 | pair.Value; | |||
|
45 | } | |||
|
46 | ||||
|
47 | foreach (var t in m_transitions) { | |||
|
48 | if (m_dfaTable[t.s1].transitions == null) { | |||
|
49 | m_dfaTable[t.s1].transitions = new int[m_inputAlphabet.Count]; | |||
|
50 | for (int i = 0; i < m_dfaTable[t.s1].transitions.Length; i++) | |||
|
51 | m_dfaTable[t.s1].transitions[i] = DFAConst.UNREACHABLE_STATE; | |||
|
52 | } | |||
|
53 | ||||
|
54 | m_dfaTable[t.s1].transitions[t.edge] = t.s2; | |||
|
55 | } | |||
|
56 | } | |||
|
57 | return m_dfaTable; | |||
|
58 | } | |||
|
59 | ||||
|
60 | public IAlphabet<TInput> InputAlphabet { | |||
|
61 | get { | |||
|
62 | return m_inputAlphabet; | |||
|
63 | } | |||
|
64 | } | |||
|
65 | ||||
|
66 | public IAlphabet<TState> StateAlphabet { | |||
|
67 | get { | |||
|
68 | return m_stateAlphabet; | |||
|
69 | } | |||
|
70 | } | |||
|
71 | ||||
|
72 | #endregion | |||
|
73 | ||||
|
74 | #region IDFADefinitionBuilder | |||
|
75 | ||||
|
76 | public void DefineTransition(int s1, int s2, int symbol) { | |||
|
77 | Safe.ArgumentInRange(s1, 0, m_stateAlphabet.Count - 1, "s1"); | |||
|
78 | Safe.ArgumentInRange(s2, 0, m_stateAlphabet.Count - 1, "s2"); | |||
|
79 | Safe.ArgumentInRange(symbol, 0, m_inputAlphabet.Count - 1, "symbol"); | |||
|
80 | ||||
|
81 | m_transitions.Add(new AutomatonTransition(s1, s2, symbol)); | |||
|
82 | } | |||
|
83 | ||||
|
84 | public void MarkFinalState(int state, params TTag[] tags) { | |||
|
85 | m_finalStates[state] = tags; | |||
|
86 | } | |||
|
87 | ||||
|
88 | ||||
|
89 | #endregion | |||
|
90 | ||||
|
91 | protected void Optimize(IDFADefinitionBuilder<TTag> optimalDFA,IAlphabetBuilder<TInput> optimalInputAlphabet, IAlphabetBuilder<TState> optimalStateAlphabet) { | |||
|
92 | Safe.ArgumentNotNull(optimalDFA, "dfa"); | |||
|
93 | Safe.ArgumentNotNull(optimalInputAlphabet, "optimalInputAlphabet"); | |||
|
94 | Safe.ArgumentNotNull(optimalStateAlphabet, "optimalStateAlphabet"); | |||
|
95 | ||||
|
96 | var setComparer = new CustomEqualityComparer<HashSet<int>>( | |||
|
97 | (x, y) => x.SetEquals(y), | |||
|
98 | s => s.Sum(x => x.GetHashCode()) | |||
|
99 | ); | |||
|
100 | ||||
|
101 | var arrayComparer = new CustomEqualityComparer<TTag[]>( | |||
|
102 | (x,y) => (new HashSet<int>(x)).SetEquals(new HashSet<int>(y)), | |||
|
103 | a => a.Sum(x => x.GetHashCode()) | |||
|
104 | ); | |||
|
105 | ||||
|
106 | var optimalStates = new HashSet<HashSet<int>>(setComparer); | |||
|
107 | var queue = new HashSet<HashSet<int>>(setComparer); | |||
|
108 | ||||
|
109 | // получаем конечные состояния, сгруппированные по маркерам | |||
|
110 | optimalStates.UnionWith( | |||
|
111 | m_finalStates | |||
|
112 | .GroupBy(pair => pair.Value, arrayComparer) | |||
|
113 | .Select( | |||
|
114 | g => new HashSet<int>( | |||
|
115 | g.Select( pair => pair.Key) | |||
|
116 | ) | |||
|
117 | ) | |||
|
118 | ); | |||
|
119 | ||||
|
120 | var state = new HashSet<int>( | |||
|
121 | Enumerable | |||
|
122 | .Range(0, m_stateAlphabet.Count - 1) | |||
|
123 | .Where(i => !m_finalStates.ContainsKey(i)) | |||
|
124 | ); | |||
|
125 | optimalStates.Add(state); | |||
|
126 | queue.Add(state); | |||
|
127 | ||||
|
128 | var rmap = m_transitions | |||
|
129 | .GroupBy(t => t.s2) | |||
|
130 | .ToLookup( | |||
|
131 | g => g.Key, // s2 | |||
|
132 | g => g.ToLookup(t => t.edge, t => t.s1) | |||
|
133 | ); | |||
|
134 | ||||
|
135 | while (queue.Count > 0) { | |||
|
136 | var stateA = queue.First(); | |||
|
137 | queue.Remove(stateA); | |||
|
138 | ||||
|
139 | for (int c = 0; c < m_inputAlphabet.Count; c++) { | |||
|
140 | var stateX = new HashSet<int>(); | |||
|
141 | foreach(var a in stateA) | |||
|
142 | stateX.UnionWith(rmap[a][c]); // all states from wich 'c' leads to 'a' | |||
|
143 | ||||
|
144 | foreach (var stateY in optimalStates.ToArray()) { | |||
|
145 | if (stateX.Overlaps(stateY) && !stateY.IsSubsetOf(stateX)) { | |||
|
146 | var stateR1 = new HashSet<int>(stateY); | |||
|
147 | var stateR2 = new HashSet<int>(stateY); | |||
|
148 | ||||
|
149 | stateR1.IntersectWith(stateX); | |||
|
150 | stateR2.ExceptWith(stateX); | |||
|
151 | ||||
|
152 | optimalStates.Remove(stateY); | |||
|
153 | optimalStates.Add(stateR1); | |||
|
154 | optimalStates.Add(stateR2); | |||
|
155 | ||||
|
156 | if (queue.Contains(stateY)) { | |||
|
157 | queue.Remove(stateY); | |||
|
158 | queue.Add(stateR1); | |||
|
159 | queue.Add(stateR2); | |||
|
160 | } else { | |||
|
161 | queue.Add(stateR1.Count <= stateR2.Count ? stateR1 : stateR2); | |||
|
162 | } | |||
|
163 | } | |||
|
164 | } | |||
|
165 | } | |||
|
166 | } | |||
|
167 | ||||
|
168 | // карта получения оптимального состояния по соотвествующему ему простому состоянию | |||
|
169 | var statesMap = m_stateAlphabet.Reclassify(optimalStateAlphabet, optimalStates); | |||
|
170 | ||||
|
171 | // получаем минимальный алфавит | |||
|
172 | // входные символы не различимы, если Move(s,a1) == Move(s,a2) | |||
|
173 | var optimalAlphabet = m_transitions | |||
|
174 | .GroupBy(t => Tuple.Create(statesMap[t.s1], statesMap[t.s2]), t => t.edge); | |||
|
175 | ||||
|
176 | var alphabetMap = m_inputAlphabet.Reclassify(optimalInputAlphabet, optimalAlphabet); | |||
|
177 | ||||
|
178 | var optimalTags = m_finalStates | |||
|
179 | .GroupBy(pair => statesMap[pair.Key]) | |||
|
180 | .ToDictionary( | |||
|
181 | g => g.Key, | |||
|
182 | g => g.SelectMany(pair => pair.Value).ToArray() | |||
|
183 | ); | |||
|
184 | ||||
|
185 | // построение автомата | |||
|
186 | foreach (var pair in optimalTags) | |||
|
187 | optimalDFA.MarkFinalState(pair.Key, pair.Value); | |||
|
188 | ||||
|
189 | foreach (var t in m_transitions.Select(t => new AutomatonTransition(statesMap[t.s1],statesMap[t.s2],alphabetMap[t.edge])).Distinct()) | |||
|
190 | optimalDFA.DefineTransition(t.s1, t.s2, t.edge); | |||
|
191 | } | |||
|
192 | ||||
|
193 | public void PrintDFA() { | |||
|
194 | ||||
|
195 | var inputMap = InputAlphabet.CreateReverseMap(); | |||
|
196 | var stateMap = StateAlphabet.CreateReverseMap(); | |||
|
197 | ||||
|
198 | for (int i = 0; i < inputMap.Length; i++) { | |||
|
199 | Console.WriteLine("C{0}: {1}", i, String.Join(",", inputMap[i])); | |||
|
200 | } | |||
|
201 | ||||
|
202 | foreach(var t in m_transitions) | |||
|
203 | Console.WriteLine( | |||
|
204 | "S{0} -{1}-> S{2}{3}", | |||
|
205 | stateMap[t.s1], | |||
|
206 | String.Join(",", inputMap[t.edge]), | |||
|
207 | stateMap[t.s2], | |||
|
208 | m_finalStates.ContainsKey(t.s2) ? "$" : "" | |||
|
209 | ); | |||
|
210 | ||||
|
211 | } | |||
|
212 | } | |||
|
213 | } |
@@ -0,0 +1,13 | |||||
|
1 | using System; | |||
|
2 | using System.Collections.Generic; | |||
|
3 | using System.Linq; | |||
|
4 | using System.Text; | |||
|
5 | using System.Threading.Tasks; | |||
|
6 | ||||
|
7 | namespace Implab.Automaton { | |||
|
8 | public struct DFAStateDescriptior<TTag> { | |||
|
9 | public bool final; | |||
|
10 | public TTag[] tag; | |||
|
11 | public int[] transitions; | |||
|
12 | } | |||
|
13 | } |
@@ -0,0 +1,61 | |||||
|
1 | using Implab; | |||
|
2 | using System; | |||
|
3 | using System.Collections.Generic; | |||
|
4 | using System.Diagnostics; | |||
|
5 | using System.Linq; | |||
|
6 | using System.Text; | |||
|
7 | using System.Threading.Tasks; | |||
|
8 | ||||
|
9 | namespace Implab.Automaton { | |||
|
10 | public abstract class DFAutomaton<T> { | |||
|
11 | protected struct ContextFrame { | |||
|
12 | public DFAStateDescriptior[] states; | |||
|
13 | public int current; | |||
|
14 | public T info; | |||
|
15 | } | |||
|
16 | ||||
|
17 | public const int INITIAL_STATE = DFADefinition.INITIAL_STATE; | |||
|
18 | public const int UNREACHEBLE_STATE = DFADefinition.UNREACHEBLE_STATE; | |||
|
19 | ||||
|
20 | protected ContextFrame m_context; | |||
|
21 | Stack<ContextFrame> m_contextStack = new Stack<ContextFrame>(); | |||
|
22 | ||||
|
23 | protected int Level { | |||
|
24 | get { return m_contextStack.Count; } | |||
|
25 | } | |||
|
26 | ||||
|
27 | protected DFAutomaton(DFAStateDescriptior[] states, int startState, T info) { | |||
|
28 | Safe.ArgumentNotNull(states, "states"); | |||
|
29 | Safe.ArgumentInRange(startState, 0, states.Length - 1, "startState"); | |||
|
30 | ||||
|
31 | m_context.states = states; | |||
|
32 | m_context.current = startState; | |||
|
33 | m_context.info = info; | |||
|
34 | } | |||
|
35 | ||||
|
36 | protected void Switch(DFAStateDescriptior[] states, int current, T info) { | |||
|
37 | Debug.Assert(states != null); | |||
|
38 | Debug.Assert(current >= 0 && current < states.Length); | |||
|
39 | m_contextStack.Push(m_context); | |||
|
40 | m_context.states = states; | |||
|
41 | m_context.current = current; | |||
|
42 | m_context.info = info; | |||
|
43 | } | |||
|
44 | ||||
|
45 | protected void Restore() { | |||
|
46 | Debug.Assert(m_contextStack.Count > 0); | |||
|
47 | ||||
|
48 | m_context = m_contextStack.Pop(); | |||
|
49 | } | |||
|
50 | ||||
|
51 | protected void Move(int input) { | |||
|
52 | Debug.Assert(input > 0 && input < m_context.states[m_context.current].transitions.Length); | |||
|
53 | m_context.current = m_context.states[m_context.current].transitions[input]; | |||
|
54 | } | |||
|
55 | ||||
|
56 | protected bool CanMove(int input) { | |||
|
57 | Debug.Assert(input > 0 && input < m_context.states[m_context.current].transitions.Length); | |||
|
58 | return m_context.states[m_context.current].transitions[input] != UNREACHEBLE_STATE; | |||
|
59 | } | |||
|
60 | } | |||
|
61 | } |
@@ -0,0 +1,29 | |||||
|
1 | using Implab; | |||
|
2 | using System; | |||
|
3 | ||||
|
4 | namespace Implab.Parsing { | |||
|
5 | public class EDFADefinition<T> : DFADefinition where T : struct, IConvertible { | |||
|
6 | readonly EnumAlphabet<T> m_alphabet; | |||
|
7 | ||||
|
8 | public EnumAlphabet<T> Alphabet { | |||
|
9 | get { return m_alphabet; } | |||
|
10 | } | |||
|
11 | ||||
|
12 | public EDFADefinition(EnumAlphabet<T> alphabet) : base(alphabet.Count) { | |||
|
13 | m_alphabet = alphabet; | |||
|
14 | } | |||
|
15 | ||||
|
16 | public void DefineTransition(int s1, int s2, T input) { | |||
|
17 | DefineTransition(s1, s2, m_alphabet.Translate(input)); | |||
|
18 | } | |||
|
19 | ||||
|
20 | public EDFADefinition<T> Optimize() { | |||
|
21 | ||||
|
22 | return (EDFADefinition<T>)Optimize(alphabet => new EDFADefinition<T>((EnumAlphabet<T>)alphabet), m_alphabet, new EnumAlphabet<T>()); | |||
|
23 | } | |||
|
24 | ||||
|
25 | public void PrintDFA() { | |||
|
26 | PrintDFA(m_alphabet); | |||
|
27 | } | |||
|
28 | } | |||
|
29 | } |
@@ -0,0 +1,67 | |||||
|
1 | using System; | |||
|
2 | using System.Collections.Generic; | |||
|
3 | using System.Diagnostics; | |||
|
4 | using System.Globalization; | |||
|
5 | using System.Linq; | |||
|
6 | using System.Diagnostics.CodeAnalysis; | |||
|
7 | ||||
|
8 | namespace Implab.Automaton { | |||
|
9 | /// <summary> | |||
|
10 | /// Алфавит символами которого являются элементы перечислений. | |||
|
11 | /// </summary> | |||
|
12 | /// <typeparam name="T">Тип перечислений</typeparam> | |||
|
13 | public class EnumAlphabet<T> : IndexedAlphabetBase<T> where T : struct, IConvertible { | |||
|
14 | [SuppressMessage("Microsoft.Design", "CA1000:DoNotDeclareStaticMembersOnGenericTypes")] | |||
|
15 | static readonly T[] _symbols; | |||
|
16 | static readonly EnumAlphabet<T> _fullAlphabet; | |||
|
17 | ||||
|
18 | [SuppressMessage("Microsoft.Design", "CA1065:DoNotRaiseExceptionsInUnexpectedLocations")] | |||
|
19 | static EnumAlphabet() { | |||
|
20 | if (!typeof(T).IsEnum) | |||
|
21 | throw new InvalidOperationException("Invalid generic parameter, enumeration is required"); | |||
|
22 | ||||
|
23 | if (Enum.GetUnderlyingType(typeof(T)) != typeof(Int32)) | |||
|
24 | throw new InvalidOperationException("Only enums based on Int32 are supported"); | |||
|
25 | ||||
|
26 | _symbols = ((T[])Enum.GetValues(typeof(T))) | |||
|
27 | .OrderBy(x => x.ToInt32(CultureInfo.InvariantCulture)) | |||
|
28 | .ToArray(); | |||
|
29 | ||||
|
30 | if ( | |||
|
31 | _symbols[_symbols.Length - 1].ToInt32(CultureInfo.InvariantCulture) >= _symbols.Length | |||
|
32 | || _symbols[0].ToInt32(CultureInfo.InvariantCulture) != 0 | |||
|
33 | ) | |||
|
34 | throw new InvalidOperationException("The specified enumeration must be zero-based and continuously numbered"); | |||
|
35 | ||||
|
36 | _fullAlphabet = new EnumAlphabet<T>(_symbols.Select(x => x.ToInt32(CultureInfo.InvariantCulture)).ToArray()); | |||
|
37 | } | |||
|
38 | ||||
|
39 | ||||
|
40 | ||||
|
41 | public static EnumAlphabet<T> FullAlphabet { | |||
|
42 | get { | |||
|
43 | return _fullAlphabet; | |||
|
44 | } | |||
|
45 | } | |||
|
46 | ||||
|
47 | ||||
|
48 | public EnumAlphabet() | |||
|
49 | : base(_symbols.Length) { | |||
|
50 | } | |||
|
51 | ||||
|
52 | public EnumAlphabet(int[] map) | |||
|
53 | : base(map) { | |||
|
54 | Debug.Assert(map.Length == _symbols.Length); | |||
|
55 | } | |||
|
56 | ||||
|
57 | ||||
|
58 | public override int GetSymbolIndex(T symbol) { | |||
|
59 | return symbol.ToInt32(CultureInfo.InvariantCulture); | |||
|
60 | } | |||
|
61 | ||||
|
62 | public override IEnumerable<T> InputSymbols { | |||
|
63 | get { return _symbols; } | |||
|
64 | } | |||
|
65 | ||||
|
66 | } | |||
|
67 | } |
@@ -0,0 +1,48 | |||||
|
1 | using System; | |||
|
2 | using System.Collections.Generic; | |||
|
3 | using System.Linq; | |||
|
4 | using System.Text; | |||
|
5 | using System.Threading.Tasks; | |||
|
6 | ||||
|
7 | namespace Implab.Automaton { | |||
|
8 | /// <summary> | |||
|
9 | /// Алфавит. Множество символов, которые разбиты на классы, при этом классы имеют непрерывную нумерацию, | |||
|
10 | /// что позволяет использовать их в качестве индексов массивов. | |||
|
11 | /// </summary> | |||
|
12 | /// <remarks> | |||
|
13 | /// <para>Алфавит является сюрьективным отображением множества символов в множество индексов, это позволяет сократить размер таблицы переходов автомата | |||
|
14 | /// для входных символов, которые для него не различимы.</para> | |||
|
15 | /// </remarks> | |||
|
16 | /// <typeparam name="TSymbol">Тип символов.</typeparam> | |||
|
17 | public interface IAlphabet<TSymbol> { | |||
|
18 | /// <summary> | |||
|
19 | /// Количество классов символов в алфавите. | |||
|
20 | /// </summary> | |||
|
21 | int Count { get; } | |||
|
22 | ||||
|
23 | /// <summary> | |||
|
24 | /// Создает карту обратного сопоставления класса символов алфавита и сопоставленным | |||
|
25 | /// ему исходным символам. | |||
|
26 | /// </summary> | |||
|
27 | /// <returns></returns> | |||
|
28 | List<TSymbol>[] CreateReverseMap(); | |||
|
29 | ||||
|
30 | /// <summary> | |||
|
31 | /// Создает новый алфавит на основе текущего, горппируя его сиволы в более | |||
|
32 | /// крупные непересекающиеся классы символов. | |||
|
33 | /// </summary> | |||
|
34 | /// <param name="newAlphabet">Новый, пустой алфавит, в котором быдут определены классы.</param> | |||
|
35 | /// <param name="classes">Множество классов символов текущего алфавита.</param> | |||
|
36 | /// <returns>Карта для перехода классов текущего | |||
|
37 | /// алфавита к классам нового.</returns> | |||
|
38 | /// <remarks>Ползволяет укрупнить алфавит, объединив классы в текущем алфавите. Используется при оптимизации автомата.</remarks> | |||
|
39 | int[] Reclassify(IAlphabetBuilder<TSymbol> newAlphabet, IEnumerable<IEnumerable<int>> classes); | |||
|
40 | ||||
|
41 | /// <summary> | |||
|
42 | /// Преобразует входной символ в индекс символа из алфавита. | |||
|
43 | /// </summary> | |||
|
44 | /// <param name="symobl">Исходный символ</param> | |||
|
45 | /// <returns>Индекс в алфавите</returns> | |||
|
46 | int Translate(TSymbol symobl); | |||
|
47 | } | |||
|
48 | } |
@@ -0,0 +1,25 | |||||
|
1 | ||||
|
2 | using System.Collections.Generic; | |||
|
3 | ||||
|
4 | namespace Implab.Automaton { | |||
|
5 | public interface IAlphabetBuilder<TSymbol> : IAlphabet<TSymbol> { | |||
|
6 | /// <summary> | |||
|
7 | /// Добавляет новый символ в алфавит, если символ уже был добавлен, то | |||
|
8 | /// возвращается ранее сопоставленный с символом класс. | |||
|
9 | /// </summary> | |||
|
10 | /// <param name="symbol">Символ для добавления.</param> | |||
|
11 | /// <returns>Индекс класса, который попоставлен с символом.</returns> | |||
|
12 | int DefineSymbol(TSymbol symbol); | |||
|
13 | /// <summary> | |||
|
14 | /// Доабвляем класс символов. Множеству указанных исходных символов | |||
|
15 | /// будет сопоставлен символ в алфавите. | |||
|
16 | /// </summary> | |||
|
17 | /// <param name="symbols">Множестов исходных символов</param> | |||
|
18 | /// <returns>Идентификатор символа алфавита.</returns> | |||
|
19 | int DefineClass(IEnumerable<TSymbol> symbols); | |||
|
20 | ||||
|
21 | ||||
|
22 | ||||
|
23 | } | |||
|
24 | } | |||
|
25 |
@@ -0,0 +1,56 | |||||
|
1 | ||||
|
2 | namespace Implab.Automaton { | |||
|
3 | /// <summary> | |||
|
4 | /// Полностью описывает DFA автомат, его поведение, состояние и входные символы. | |||
|
5 | /// </summary> | |||
|
6 | /// <example> | |||
|
7 | /// class MyAutomaton { | |||
|
8 | /// int m_current; | |||
|
9 | /// readonly DFAStateDescriptor<string>[] m_automaton; | |||
|
10 | /// readonly IAlphabet<MyCommands> m_commands; | |||
|
11 | /// | |||
|
12 | /// public MyAutomaton(IDFADefinition<MyCommands,MyStates,string> definition) { | |||
|
13 | /// m_current = definition.StateAlphabet.Translate(MyStates.Initial); | |||
|
14 | /// m_automaton = definition.GetTransitionTable(); | |||
|
15 | /// m_commands = definition.InputAlphabet; | |||
|
16 | /// } | |||
|
17 | /// | |||
|
18 | /// // defined a method which will move the automaton to the next state | |||
|
19 | /// public void Move(MyCommands cmd) { | |||
|
20 | /// // use transition map to determine the next state | |||
|
21 | /// var next = m_automaton[m_current].transitions[m_commands.Translate(cmd)]; | |||
|
22 | /// | |||
|
23 | /// // validate that we aren't in the unreachable state | |||
|
24 | /// if (next == DFAConst.UNREACHABLE_STATE) | |||
|
25 | /// throw new InvalidOperationException("The specified command is invalid"); | |||
|
26 | /// | |||
|
27 | /// // if everything is ok | |||
|
28 | /// m_current = next; | |||
|
29 | /// } | |||
|
30 | /// } | |||
|
31 | /// </example> | |||
|
32 | public interface IDFADefinition<TInput, TState, TTag> { | |||
|
33 | /// <summary> | |||
|
34 | /// Алфавит входных символов | |||
|
35 | /// </summary> | |||
|
36 | /// <value>The input alphabet.</value> | |||
|
37 | IAlphabet<TInput> InputAlphabet { | |||
|
38 | get; | |||
|
39 | } | |||
|
40 | ||||
|
41 | /// <summary> | |||
|
42 | /// Алфавит состояний автомата | |||
|
43 | /// </summary> | |||
|
44 | /// <value>The state alphabet.</value> | |||
|
45 | IAlphabet<TState> StateAlphabet { | |||
|
46 | get; | |||
|
47 | } | |||
|
48 | ||||
|
49 | /// <summary> | |||
|
50 | /// Таблица переходов состояний автомата | |||
|
51 | /// </summary> | |||
|
52 | /// <returns>The transition table.</returns> | |||
|
53 | DFAStateDescriptior<TTag>[] GetTransitionTable(); | |||
|
54 | ||||
|
55 | } | |||
|
56 | } |
@@ -0,0 +1,23 | |||||
|
1 | using System; | |||
|
2 | ||||
|
3 | namespace Implab.Automaton { | |||
|
4 | public interface IDFADefinitionBuilder<TTag> { | |||
|
5 | /// <summary> | |||
|
6 | /// Marks the state as final and assings tags. | |||
|
7 | /// </summary> | |||
|
8 | /// <param name="state">State.</param> | |||
|
9 | /// <param name="tags">Tags.</param> | |||
|
10 | void MarkFinalState(int state, params TTag[] tags); | |||
|
11 | ||||
|
12 | /// <summary> | |||
|
13 | /// Defines the transition from <paramref name="s1"/> to | |||
|
14 | /// <paramref name="s2"/> with input <paramref name="symbol"/>. | |||
|
15 | /// </summary> | |||
|
16 | /// <param name="s1">S1.</param> | |||
|
17 | /// <param name="s2">S2.</param> | |||
|
18 | /// <param name="symbol">Symbol.</param> | |||
|
19 | void DefineTransition(int s1, int s2, int symbol); | |||
|
20 | ||||
|
21 | } | |||
|
22 | } | |||
|
23 |
@@ -0,0 +1,105 | |||||
|
1 | using Implab; | |||
|
2 | using System; | |||
|
3 | using System.Collections.Generic; | |||
|
4 | using System.Diagnostics; | |||
|
5 | using System.Linq; | |||
|
6 | ||||
|
7 | namespace Implab.Automaton { | |||
|
8 | /// <summary> | |||
|
9 | /// Indexed alphabet is the finite set of symbols where each symbol has a zero-based unique index. | |||
|
10 | /// </summary> | |||
|
11 | public abstract class IndexedAlphabetBase<T> : IAlphabetBuilder<T> { | |||
|
12 | public const int UNCLASSIFIED = 0; | |||
|
13 | ||||
|
14 | int m_nextId = 1; | |||
|
15 | readonly int[] m_map; | |||
|
16 | ||||
|
17 | public int Count { | |||
|
18 | get { return m_nextId; } | |||
|
19 | } | |||
|
20 | ||||
|
21 | protected IndexedAlphabetBase(int mapSize) { | |||
|
22 | m_map = new int[mapSize]; | |||
|
23 | } | |||
|
24 | ||||
|
25 | protected IndexedAlphabetBase(int[] map) { | |||
|
26 | Debug.Assert(map != null); | |||
|
27 | ||||
|
28 | m_map = map; | |||
|
29 | m_nextId = map.Max() + 1; | |||
|
30 | } | |||
|
31 | ||||
|
32 | public int DefineSymbol(T symbol) { | |||
|
33 | var index = GetSymbolIndex(symbol); | |||
|
34 | if (m_map[index] == UNCLASSIFIED) | |||
|
35 | m_map[index] = m_nextId++; | |||
|
36 | return m_map[index]; | |||
|
37 | } | |||
|
38 | ||||
|
39 | public int DefineClass(IEnumerable<T> symbols) { | |||
|
40 | Safe.ArgumentNotNull(symbols, "symbols"); | |||
|
41 | symbols = symbols.Distinct(); | |||
|
42 | ||||
|
43 | foreach (var symbol in symbols) { | |||
|
44 | var index = GetSymbolIndex(symbol); | |||
|
45 | if (m_map[index] == UNCLASSIFIED) | |||
|
46 | m_map[GetSymbolIndex(symbol)] = m_nextId; | |||
|
47 | else | |||
|
48 | throw new InvalidOperationException(String.Format("Symbol '{0}' already in use", symbol)); | |||
|
49 | } | |||
|
50 | return m_nextId++; | |||
|
51 | } | |||
|
52 | ||||
|
53 | public List<T>[] CreateReverseMap() { | |||
|
54 | return | |||
|
55 | Enumerable.Range(UNCLASSIFIED, Count) | |||
|
56 | .Select( | |||
|
57 | i => InputSymbols | |||
|
58 | .Where(x => i != UNCLASSIFIED && m_map[GetSymbolIndex(x)] == i) | |||
|
59 | .ToList() | |||
|
60 | ) | |||
|
61 | .ToArray(); | |||
|
62 | } | |||
|
63 | ||||
|
64 | public int[] Reclassify(IAlphabetBuilder<T> newAlphabet, IEnumerable<IEnumerable<int>> classes) { | |||
|
65 | Safe.ArgumentNotNull(newAlphabet, "newAlphabet"); | |||
|
66 | Safe.ArgumentNotNull(classes, "classes"); | |||
|
67 | var reverseMap = CreateReverseMap(); | |||
|
68 | ||||
|
69 | var translationMap = new int[Count]; | |||
|
70 | ||||
|
71 | foreach (var scl in classes) { | |||
|
72 | // skip if the supper class contains the unclassified element | |||
|
73 | if (scl.Contains(UNCLASSIFIED)) | |||
|
74 | continue; | |||
|
75 | var range = new List<T>(); | |||
|
76 | foreach (var cl in scl) { | |||
|
77 | if (cl < 0 || cl >= reverseMap.Length) | |||
|
78 | throw new ArgumentOutOfRangeException(String.Format("Class {0} is not valid for the current alphabet", cl)); | |||
|
79 | range.AddRange(reverseMap[cl]); | |||
|
80 | } | |||
|
81 | var newClass = newAlphabet.DefineClass(range); | |||
|
82 | foreach (var cl in scl) | |||
|
83 | translationMap[cl] = newClass; | |||
|
84 | } | |||
|
85 | ||||
|
86 | return translationMap; | |||
|
87 | } | |||
|
88 | ||||
|
89 | public virtual int Translate(T symbol) { | |||
|
90 | return m_map[GetSymbolIndex(symbol)]; | |||
|
91 | } | |||
|
92 | ||||
|
93 | public abstract int GetSymbolIndex(T symbol); | |||
|
94 | ||||
|
95 | public abstract IEnumerable<T> InputSymbols { get; } | |||
|
96 | ||||
|
97 | /// <summary> | |||
|
98 | /// Gets the translation map from the index of the symbol to it's class this is usefull for the optimized input symbols transtaion. | |||
|
99 | /// </summary> | |||
|
100 | /// <returns>The translation map.</returns> | |||
|
101 | public int[] GetTranslationMap() { | |||
|
102 | return m_map; | |||
|
103 | } | |||
|
104 | } | |||
|
105 | } |
@@ -0,0 +1,17 | |||||
|
1 | using System; | |||
|
2 | using System.Collections.Generic; | |||
|
3 | using System.Linq; | |||
|
4 | using System.Text; | |||
|
5 | ||||
|
6 | namespace Implab.Automaton { | |||
|
7 | [Serializable] | |||
|
8 | public class ParserException : Exception { | |||
|
9 | public ParserException() { } | |||
|
10 | public ParserException(string message) : base(message) { } | |||
|
11 | public ParserException(string message, Exception inner) : base(message, inner) { } | |||
|
12 | protected ParserException( | |||
|
13 | System.Runtime.Serialization.SerializationInfo info, | |||
|
14 | System.Runtime.Serialization.StreamingContext context) | |||
|
15 | : base(info, context) { } | |||
|
16 | } | |||
|
17 | } |
@@ -0,0 +1,17 | |||||
|
1 | using System; | |||
|
2 | ||||
|
3 | namespace Implab.Automaton.RegularExpressions { | |||
|
4 | public class AltToken<TTag>: BinaryToken<TTag> { | |||
|
5 | public AltToken(Token<TTag> left, Token<TTag> right) | |||
|
6 | : base(left, right) { | |||
|
7 | } | |||
|
8 | ||||
|
9 | public override void Accept(IVisitor<TTag> visitor) { | |||
|
10 | Safe.ArgumentNotNull(visitor, "visitor"); | |||
|
11 | visitor.Visit(this); | |||
|
12 | } | |||
|
13 | public override string ToString() { | |||
|
14 | return String.Format(Right is BinaryToken<TTag> ? "{0}|({1})" : "{0}|{1}", Left, Right); | |||
|
15 | } | |||
|
16 | } | |||
|
17 | } |
@@ -0,0 +1,21 | |||||
|
1 | using Implab; | |||
|
2 | ||||
|
3 | namespace Implab.Automaton.RegularExpressions { | |||
|
4 | public abstract class BinaryToken<TTag> : Token<TTag> { | |||
|
5 | readonly Token<TTag> m_left; | |||
|
6 | readonly Token<TTag> m_right; | |||
|
7 | ||||
|
8 | public Token<TTag> Left { | |||
|
9 | get { return m_left; } | |||
|
10 | } | |||
|
11 | ||||
|
12 | public Token<TTag> Right { | |||
|
13 | get { return m_right; } | |||
|
14 | } | |||
|
15 | ||||
|
16 | protected BinaryToken(Token<TTag> left, Token<TTag> right) { | |||
|
17 | Safe.ArgumentNotNull(m_left = left, "left"); | |||
|
18 | Safe.ArgumentNotNull(m_right = right, "right"); | |||
|
19 | } | |||
|
20 | } | |||
|
21 | } |
@@ -0,0 +1,22 | |||||
|
1 | using System; | |||
|
2 | ||||
|
3 | namespace Implab.Automaton.RegularExpressions { | |||
|
4 | public class CatToken<TTag> : BinaryToken<TTag> { | |||
|
5 | public CatToken(Token<TTag> left, Token<TTag> right) | |||
|
6 | : base(left, right) { | |||
|
7 | } | |||
|
8 | ||||
|
9 | public override void Accept(IVisitor<TTag> visitor) { | |||
|
10 | Safe.ArgumentNotNull(visitor, "visitor"); | |||
|
11 | visitor.Visit(this); | |||
|
12 | } | |||
|
13 | ||||
|
14 | public override string ToString() { | |||
|
15 | return String.Format("{0}{1}", FormatToken(Left), FormatToken(Right)); | |||
|
16 | } | |||
|
17 | ||||
|
18 | static string FormatToken(Token<TTag> token) { | |||
|
19 | return String.Format(token is AltToken<TTag> ? "({0})" : "{0}", token); | |||
|
20 | } | |||
|
21 | } | |||
|
22 | } |
@@ -0,0 +1,181 | |||||
|
1 | using Implab; | |||
|
2 | using System; | |||
|
3 | using System.Collections.Generic; | |||
|
4 | using System.Diagnostics; | |||
|
5 | using System.Linq; | |||
|
6 | ||||
|
7 | namespace Implab.Automaton.RegularExpressions { | |||
|
8 | /// <summary> | |||
|
9 | /// Используется для построения ДКА по регулярному выражению, сначала обходит | |||
|
10 | /// регулярное выражение и вычисляет followpos, затем используется метод | |||
|
11 | /// <see cref="BuildDFA(IDFADefinition)"/> для построения автомата. | |||
|
12 | /// </summary> | |||
|
13 | public class DFABuilder<TTag> : IVisitor<TTag> { | |||
|
14 | int m_idx = 0; | |||
|
15 | Token<TTag> m_root; | |||
|
16 | HashSet<int> m_firstpos; | |||
|
17 | HashSet<int> m_lastpos; | |||
|
18 | ||||
|
19 | readonly Dictionary<int, HashSet<int>> m_followpos = new Dictionary<int, HashSet<int>>(); | |||
|
20 | readonly Dictionary<int, int> m_indexes = new Dictionary<int, int>(); | |||
|
21 | readonly Dictionary<int, TTag> m_ends = new Dictionary<int, TTag>(); | |||
|
22 | ||||
|
23 | public Dictionary<int, HashSet<int>> FollowposMap { | |||
|
24 | get { return m_followpos; } | |||
|
25 | } | |||
|
26 | ||||
|
27 | public HashSet<int> Followpos(int pos) { | |||
|
28 | HashSet<int> set; | |||
|
29 | if (m_followpos.TryGetValue(pos, out set)) | |||
|
30 | return set; | |||
|
31 | return m_followpos[pos] = new HashSet<int>(); | |||
|
32 | } | |||
|
33 | ||||
|
34 | bool Nullable(object n) { | |||
|
35 | if (n is EmptyToken<TTag> || n is StarToken<TTag>) | |||
|
36 | return true; | |||
|
37 | if (n is AltToken<TTag>) | |||
|
38 | return Nullable(((AltToken<TTag>)n).Left) || Nullable(((AltToken<TTag>)n).Right); | |||
|
39 | if (n is CatToken<TTag>) | |||
|
40 | return Nullable(((CatToken<TTag>)n).Left) && Nullable(((CatToken<TTag>)n).Right); | |||
|
41 | return false; | |||
|
42 | } | |||
|
43 | ||||
|
44 | ||||
|
45 | public void Visit(AltToken<TTag> token) { | |||
|
46 | if (m_root == null) | |||
|
47 | m_root = token; | |||
|
48 | var firtspos = new HashSet<int>(); | |||
|
49 | var lastpos = new HashSet<int>(); | |||
|
50 | ||||
|
51 | token.Left.Accept(this); | |||
|
52 | firtspos.UnionWith(m_firstpos); | |||
|
53 | lastpos.UnionWith(m_lastpos); | |||
|
54 | ||||
|
55 | token.Right.Accept(this); | |||
|
56 | firtspos.UnionWith(m_firstpos); | |||
|
57 | lastpos.UnionWith(m_lastpos); | |||
|
58 | ||||
|
59 | m_firstpos = firtspos; | |||
|
60 | m_lastpos = lastpos; | |||
|
61 | } | |||
|
62 | ||||
|
63 | public void Visit(StarToken<TTag> token) { | |||
|
64 | if (m_root == null) | |||
|
65 | m_root = token; | |||
|
66 | token.Token.Accept(this); | |||
|
67 | ||||
|
68 | foreach (var i in m_lastpos) | |||
|
69 | Followpos(i).UnionWith(m_firstpos); | |||
|
70 | } | |||
|
71 | ||||
|
72 | public void Visit(CatToken<TTag> token) { | |||
|
73 | if (m_root == null) | |||
|
74 | m_root = token; | |||
|
75 | ||||
|
76 | var firtspos = new HashSet<int>(); | |||
|
77 | var lastpos = new HashSet<int>(); | |||
|
78 | token.Left.Accept(this); | |||
|
79 | firtspos.UnionWith(m_firstpos); | |||
|
80 | var leftLastpos = m_lastpos; | |||
|
81 | ||||
|
82 | token.Right.Accept(this); | |||
|
83 | lastpos.UnionWith(m_lastpos); | |||
|
84 | var rightFirstpos = m_firstpos; | |||
|
85 | ||||
|
86 | if (Nullable(token.Left)) | |||
|
87 | firtspos.UnionWith(rightFirstpos); | |||
|
88 | ||||
|
89 | if (Nullable(token.Right)) | |||
|
90 | lastpos.UnionWith(leftLastpos); | |||
|
91 | ||||
|
92 | m_firstpos = firtspos; | |||
|
93 | m_lastpos = lastpos; | |||
|
94 | ||||
|
95 | foreach (var i in leftLastpos) | |||
|
96 | Followpos(i).UnionWith(rightFirstpos); | |||
|
97 | ||||
|
98 | } | |||
|
99 | ||||
|
100 | public void Visit(EmptyToken<TTag> token) { | |||
|
101 | if (m_root == null) | |||
|
102 | m_root = token; | |||
|
103 | } | |||
|
104 | ||||
|
105 | public void Visit(SymbolToken<TTag> token) { | |||
|
106 | if (m_root == null) | |||
|
107 | m_root = token; | |||
|
108 | m_idx++; | |||
|
109 | m_indexes[m_idx] = token.Value; | |||
|
110 | m_firstpos = new HashSet<int>(new[] { m_idx }); | |||
|
111 | m_lastpos = new HashSet<int>(new[] { m_idx }); | |||
|
112 | } | |||
|
113 | ||||
|
114 | public void Visit(EndToken<TTag> token) { | |||
|
115 | if (m_root == null) | |||
|
116 | m_root = token; | |||
|
117 | m_idx++; | |||
|
118 | m_indexes[m_idx] = DFAConst.UNCLASSIFIED_INPUT; | |||
|
119 | m_firstpos = new HashSet<int>(new[] { m_idx }); | |||
|
120 | m_lastpos = new HashSet<int>(new[] { m_idx }); | |||
|
121 | Followpos(m_idx); | |||
|
122 | m_ends.Add(m_idx, token.Tag); | |||
|
123 | } | |||
|
124 | ||||
|
125 | public void BuildDFA(IDFADefinitionBuilder<TTag> dfa, IAlphabetBuilder<int> states) { | |||
|
126 | Safe.ArgumentNotNull(dfa,"dfa"); | |||
|
127 | ||||
|
128 | var stateMap = new Dictionary<HashSet<int>, int>(new CustomEqualityComparer<HashSet<int>>( | |||
|
129 | (x, y) => x.SetEquals(y), | |||
|
130 | x => x.Sum(n => n.GetHashCode()) | |||
|
131 | )); | |||
|
132 | ||||
|
133 | int nextState = 0; | |||
|
134 | ||||
|
135 | int initialState = states.DefineSymbol(nextState++); | |||
|
136 | stateMap[m_firstpos] = initialState; | |||
|
137 | ||||
|
138 | var tags = GetStateTags(m_firstpos); | |||
|
139 | if (tags != null && tags.Length > 0) | |||
|
140 | dfa.MarkFinalState(initialState, tags); | |||
|
141 | ||||
|
142 | var inputMax = m_indexes.Values.Max(); | |||
|
143 | var queue = new Queue<HashSet<int>>(); | |||
|
144 | ||||
|
145 | queue.Enqueue(m_firstpos); | |||
|
146 | ||||
|
147 | while (queue.Count > 0) { | |||
|
148 | var state = queue.Dequeue(); | |||
|
149 | var s1 = stateMap[state]; | |||
|
150 | ||||
|
151 | for (int a = 0; a <= inputMax; a++) { | |||
|
152 | var next = new HashSet<int>(); | |||
|
153 | foreach (var p in state) { | |||
|
154 | if (m_indexes[p] == a) { | |||
|
155 | next.UnionWith(Followpos(p)); | |||
|
156 | } | |||
|
157 | } | |||
|
158 | if (next.Count > 0) { | |||
|
159 | int s2; | |||
|
160 | if (!stateMap.TryGetValue(next, out s2)) { | |||
|
161 | s2 = states.DefineSymbol(nextState++); | |||
|
162 | stateMap[next] = s2; | |||
|
163 | tags = GetStateTags(next); | |||
|
164 | if (tags != null && tags.Length > 0) | |||
|
165 | dfa.MarkFinalState(s2, tags); | |||
|
166 | ||||
|
167 | queue.Enqueue(next); | |||
|
168 | } | |||
|
169 | dfa.DefineTransition(s1, s2, a); | |||
|
170 | } | |||
|
171 | } | |||
|
172 | } | |||
|
173 | } | |||
|
174 | ||||
|
175 | TTag[] GetStateTags(IEnumerable<int> state) { | |||
|
176 | Debug.Assert(state != null); | |||
|
177 | return state.Where(m_ends.ContainsKey).Select(pos => m_ends[pos]).ToArray(); | |||
|
178 | } | |||
|
179 | ||||
|
180 | } | |||
|
181 | } |
@@ -0,0 +1,13 | |||||
|
1 | using Implab; | |||
|
2 | ||||
|
3 | namespace Implab.Automaton.RegularExpressions { | |||
|
4 | public class EmptyToken<TTag> : Token<TTag> { | |||
|
5 | public override void Accept(IVisitor<TTag> visitor) { | |||
|
6 | Safe.ArgumentNotNull(visitor, "visitor"); | |||
|
7 | visitor.Visit(this); | |||
|
8 | } | |||
|
9 | public override string ToString() { | |||
|
10 | return "$"; | |||
|
11 | } | |||
|
12 | } | |||
|
13 | } |
@@ -0,0 +1,32 | |||||
|
1 | using Implab; | |||
|
2 | ||||
|
3 | namespace Implab.Automaton.RegularExpressions { | |||
|
4 | /// <summary> | |||
|
5 | /// Конечный символ расширенного регулярного выражения, при построении ДКА | |||
|
6 | /// используется для определения конечных состояний. | |||
|
7 | /// </summary> | |||
|
8 | public class EndToken<TTag>: Token<TTag> { | |||
|
9 | ||||
|
10 | TTag m_tag; | |||
|
11 | ||||
|
12 | public EndToken(TTag tag) { | |||
|
13 | m_tag = tag; | |||
|
14 | } | |||
|
15 | ||||
|
16 | public EndToken() | |||
|
17 | : this(0) { | |||
|
18 | } | |||
|
19 | ||||
|
20 | public TTag Tag { | |||
|
21 | get { return m_tag; } | |||
|
22 | } | |||
|
23 | ||||
|
24 | public override void Accept(IVisitor<TTag> visitor) { | |||
|
25 | Safe.ArgumentNotNull(visitor, "visitor"); | |||
|
26 | visitor.Visit(this); | |||
|
27 | } | |||
|
28 | public override string ToString() { | |||
|
29 | return "#"; | |||
|
30 | } | |||
|
31 | } | |||
|
32 | } |
@@ -0,0 +1,108 | |||||
|
1 | using Implab; | |||
|
2 | using System; | |||
|
3 | using System.Collections.Generic; | |||
|
4 | using System.Linq; | |||
|
5 | using System.Text; | |||
|
6 | using System.Threading.Tasks; | |||
|
7 | ||||
|
8 | namespace Implab.Automaton.RegularExpressions { | |||
|
9 | /// <summary> | |||
|
10 | /// Базовый абстрактный класс. Грамматика, позволяет формулировать выражения над алфавитом типа <c>char</c>. | |||
|
11 | /// </summary> | |||
|
12 | /// <typeparam name="TGrammar"></typeparam> | |||
|
13 | public abstract class Grammar<TGrammar> where TGrammar: Grammar<TGrammar>, new() { | |||
|
14 | static TGrammar _instance; | |||
|
15 | ||||
|
16 | public static TGrammar Instance{ | |||
|
17 | get { | |||
|
18 | if (_instance == null) | |||
|
19 | _instance = new TGrammar(); | |||
|
20 | return _instance; | |||
|
21 | } | |||
|
22 | } | |||
|
23 | ||||
|
24 | readonly CharAlphabet m_alphabet = new CharAlphabet(); | |||
|
25 | ||||
|
26 | public CharAlphabet Alphabet { | |||
|
27 | get { return m_alphabet; } | |||
|
28 | } | |||
|
29 | ||||
|
30 | public SymbolToken UnclassifiedToken() { | |||
|
31 | return new SymbolToken(CharAlphabet.UNCLASSIFIED); | |||
|
32 | } | |||
|
33 | ||||
|
34 | public void DefineAlphabet(IEnumerable<char> alphabet) { | |||
|
35 | Safe.ArgumentNotNull(alphabet, "alphabet"); | |||
|
36 | ||||
|
37 | foreach (var ch in alphabet) | |||
|
38 | m_alphabet.DefineSymbol(ch); | |||
|
39 | } | |||
|
40 | public Token SymbolRangeToken(char start, char end) { | |||
|
41 | return SymbolToken(Enumerable.Range(start, end - start + 1).Select(x => (char)x)); | |||
|
42 | } | |||
|
43 | ||||
|
44 | public Token SymbolToken(char symbol) { | |||
|
45 | return Token.New(TranslateOrAdd(symbol)); | |||
|
46 | } | |||
|
47 | ||||
|
48 | public Token SymbolToken(IEnumerable<char> symbols) { | |||
|
49 | Safe.ArgumentNotNull(symbols, "symbols"); | |||
|
50 | ||||
|
51 | return Token.New(TranslateOrAdd(symbols).ToArray()); | |||
|
52 | } | |||
|
53 | ||||
|
54 | public Token SymbolSetToken(params char[] set) { | |||
|
55 | return SymbolToken(set); | |||
|
56 | } | |||
|
57 | ||||
|
58 | int TranslateOrAdd(char ch) { | |||
|
59 | var t = m_alphabet.Translate(ch); | |||
|
60 | if (t == CharAlphabet.UNCLASSIFIED) | |||
|
61 | t = m_alphabet.DefineSymbol(ch); | |||
|
62 | return t; | |||
|
63 | } | |||
|
64 | ||||
|
65 | IEnumerable<int> TranslateOrAdd(IEnumerable<char> symbols) { | |||
|
66 | return symbols.Distinct().Select(TranslateOrAdd); | |||
|
67 | } | |||
|
68 | ||||
|
69 | int TranslateOrDie(char ch) { | |||
|
70 | var t = m_alphabet.Translate(ch); | |||
|
71 | if (t == CharAlphabet.UNCLASSIFIED) | |||
|
72 | throw new ApplicationException(String.Format("Symbol '{0}' is UNCLASSIFIED", ch)); | |||
|
73 | return t; | |||
|
74 | } | |||
|
75 | ||||
|
76 | IEnumerable<int> TranslateOrDie(IEnumerable<char> symbols) { | |||
|
77 | return symbols.Distinct().Select(TranslateOrDie); | |||
|
78 | } | |||
|
79 | ||||
|
80 | public Token SymbolTokenExcept(IEnumerable<char> symbols) { | |||
|
81 | Safe.ArgumentNotNull(symbols, "symbols"); | |||
|
82 | ||||
|
83 | return Token.New( Enumerable.Range(0, m_alphabet.Count).Except(TranslateOrDie(symbols)).ToArray()); | |||
|
84 | } | |||
|
85 | ||||
|
86 | protected CDFADefinition BuildDFA(Token lang) { | |||
|
87 | Safe.ArgumentNotNull(lang, "lang"); | |||
|
88 | ||||
|
89 | var dfa = new CDFADefinition(m_alphabet); | |||
|
90 | ||||
|
91 | var builder = new DFABuilder(); | |||
|
92 | ||||
|
93 | lang.Accept( builder ); | |||
|
94 | ||||
|
95 | builder.BuildDFA(dfa); | |||
|
96 | if (dfa.InitialStateIsFinal) | |||
|
97 | throw new ApplicationException("The specified language contains empty token"); | |||
|
98 | ||||
|
99 | return dfa.Optimize(); | |||
|
100 | } | |||
|
101 | ||||
|
102 | ||||
|
103 | ||||
|
104 | //protected abstract TGrammar CreateInstance(); | |||
|
105 | } | |||
|
106 | ||||
|
107 | ||||
|
108 | } |
@@ -0,0 +1,13 | |||||
|
1 | namespace Implab.Automaton.RegularExpressions { | |||
|
2 | /// <summary> | |||
|
3 | /// Интерфейс обходчика синтаксического дерева регулярного выражения | |||
|
4 | /// </summary> | |||
|
5 | public interface IVisitor<TTag> { | |||
|
6 | void Visit(AltToken<TTag> token); | |||
|
7 | void Visit(StarToken<TTag> token); | |||
|
8 | void Visit(CatToken<TTag> token); | |||
|
9 | void Visit(EmptyToken<TTag> token); | |||
|
10 | void Visit(EndToken<TTag> token); | |||
|
11 | void Visit(SymbolToken<TTag> token); | |||
|
12 | } | |||
|
13 | } |
@@ -0,0 +1,34 | |||||
|
1 | using Implab; | |||
|
2 | using System; | |||
|
3 | using System.Collections.Generic; | |||
|
4 | using System.Linq; | |||
|
5 | using System.Text; | |||
|
6 | using System.Threading.Tasks; | |||
|
7 | ||||
|
8 | namespace Implab.Automaton.RegularExpressions { | |||
|
9 | /// <summary> | |||
|
10 | /// Замыкание выражения с 0 и более повторов. | |||
|
11 | /// </summary> | |||
|
12 | public class StarToken<TTag>: Token<TTag> { | |||
|
13 | ||||
|
14 | Token<TTag> m_token; | |||
|
15 | ||||
|
16 | public Token<TTag> Token { | |||
|
17 | get { return m_token; } | |||
|
18 | } | |||
|
19 | ||||
|
20 | public StarToken(Token<TTag> token) { | |||
|
21 | Safe.ArgumentNotNull(token, "token"); | |||
|
22 | m_token = token; | |||
|
23 | } | |||
|
24 | ||||
|
25 | public override void Accept(IVisitor<TTag> visitor) { | |||
|
26 | Safe.ArgumentNotNull(visitor, "visitor"); | |||
|
27 | visitor.Visit(this); | |||
|
28 | } | |||
|
29 | ||||
|
30 | public override string ToString() { | |||
|
31 | return String.Format("({0})*", Token); | |||
|
32 | } | |||
|
33 | } | |||
|
34 | } |
@@ -0,0 +1,27 | |||||
|
1 | using Implab; | |||
|
2 | ||||
|
3 | namespace Implab.Automaton.RegularExpressions { | |||
|
4 | /// <summary> | |||
|
5 | /// Выражение, соответсвующее одному символу. | |||
|
6 | /// </summary> | |||
|
7 | public class SymbolToken<TTag> : Token<TTag> { | |||
|
8 | int m_value; | |||
|
9 | ||||
|
10 | public int Value { | |||
|
11 | get { return m_value; } | |||
|
12 | } | |||
|
13 | ||||
|
14 | public SymbolToken(int value) { | |||
|
15 | m_value = value; | |||
|
16 | } | |||
|
17 | public override void Accept(IVisitor<TTag> visitor) { | |||
|
18 | Safe.ArgumentNotNull(visitor, "visitor"); | |||
|
19 | ||||
|
20 | visitor.Visit(this); | |||
|
21 | } | |||
|
22 | ||||
|
23 | public override string ToString() { | |||
|
24 | return Value.ToString(); | |||
|
25 | } | |||
|
26 | } | |||
|
27 | } |
@@ -0,0 +1,63 | |||||
|
1 | using Implab; | |||
|
2 | using System; | |||
|
3 | using System.Linq; | |||
|
4 | ||||
|
5 | namespace Implab.Automaton.RegularExpressions { | |||
|
6 | public abstract class Token<TTag> { | |||
|
7 | public abstract void Accept(IVisitor<TTag> visitor); | |||
|
8 | ||||
|
9 | public Token<TTag> Extend() { | |||
|
10 | return Cat(new EndToken<TTag>()); | |||
|
11 | } | |||
|
12 | ||||
|
13 | public Token<TTag> Tag<T>(T tag) where T : IConvertible { | |||
|
14 | return Cat(new EndToken<TTag>(tag)); | |||
|
15 | } | |||
|
16 | ||||
|
17 | public Token<TTag> Cat(Token<TTag> right) { | |||
|
18 | return new CatToken<TTag>(this, right); | |||
|
19 | } | |||
|
20 | ||||
|
21 | public Token<TTag> Or(Token<TTag> right) { | |||
|
22 | return new AltToken<TTag>(this, right); | |||
|
23 | } | |||
|
24 | ||||
|
25 | public Token<TTag> Optional() { | |||
|
26 | return Or(new EmptyToken<TTag>()); | |||
|
27 | } | |||
|
28 | ||||
|
29 | public Token<TTag> EClosure() { | |||
|
30 | return new StarToken<TTag>(this); | |||
|
31 | } | |||
|
32 | ||||
|
33 | public Token<TTag> Closure() { | |||
|
34 | return Cat(new StarToken<TTag>(this)); | |||
|
35 | } | |||
|
36 | ||||
|
37 | public Token<TTag> Repeat(int count) { | |||
|
38 | Token<TTag> token = null; | |||
|
39 | ||||
|
40 | for (int i = 0; i < count; i++) | |||
|
41 | token = token != null ? token.Cat(this) : this; | |||
|
42 | return token ?? new EmptyToken<TTag>(); | |||
|
43 | } | |||
|
44 | ||||
|
45 | public Token<TTag> Repeat(int min, int max) { | |||
|
46 | if (min > max || min < 1) | |||
|
47 | throw new ArgumentOutOfRangeException(); | |||
|
48 | var token = Repeat(min); | |||
|
49 | ||||
|
50 | for (int i = min; i < max; i++) | |||
|
51 | token = token.Cat( this.Optional() ); | |||
|
52 | return token; | |||
|
53 | } | |||
|
54 | ||||
|
55 | public static Token<TTag> New<T>(params T[] set) where T : struct, IConvertible { | |||
|
56 | Safe.ArgumentNotNull(set, "set"); | |||
|
57 | Token<TTag> token = null; | |||
|
58 | foreach(var c in set.Distinct()) | |||
|
59 | token = token == null ? new SymbolToken<TTag>(c) : token.Or(new SymbolToken<TTag>(c)); | |||
|
60 | return token; | |||
|
61 | } | |||
|
62 | } | |||
|
63 | } |
@@ -0,0 +1,259 | |||||
|
1 | using Implab; | |||
|
2 | using System; | |||
|
3 | using System.Collections.Generic; | |||
|
4 | using System.IO; | |||
|
5 | using Implab.Components; | |||
|
6 | ||||
|
7 | namespace Implab.Automaton { | |||
|
8 | /// <summary> | |||
|
9 | /// Базовый класс для разбора потока входных символов на токены. | |||
|
10 | /// </summary> | |||
|
11 | /// <remarks> | |||
|
12 | /// Сканнер имеет внутри буффер с симолами входного текста, по которому перемещаются два | |||
|
13 | /// указателя, начала и конца токена, при перемещении искользуется ДКА для определения | |||
|
14 | /// конца токена и допустимости текущего символа. | |||
|
15 | /// </remarks> | |||
|
16 | public abstract class Scanner<TTag> : Disposable { | |||
|
17 | struct ScannerConfig { | |||
|
18 | public DFAStateDescriptior<TTag>[] states; | |||
|
19 | public int[] alphabetMap; | |||
|
20 | } | |||
|
21 | ||||
|
22 | Stack<ScannerConfig> m_defs = new Stack<ScannerConfig>(); | |||
|
23 | ||||
|
24 | DFAStateDescriptior<TTag>[] m_states; | |||
|
25 | int[] m_alphabetMap; | |||
|
26 | ||||
|
27 | protected DFAStateDescriptior<TTag> m_currentState; | |||
|
28 | int m_previewCode; | |||
|
29 | ||||
|
30 | protected int m_tokenLen = 0; | |||
|
31 | protected int m_tokenOffset; | |||
|
32 | ||||
|
33 | protected char[] m_buffer; | |||
|
34 | protected int m_bufferSize; | |||
|
35 | protected int m_pointer; | |||
|
36 | ||||
|
37 | TextReader m_reader; | |||
|
38 | bool m_disposeReader; | |||
|
39 | int m_chunkSize = 1024; // 1k | |||
|
40 | int m_limit = 10 * 1024 * 1024; // 10Mb | |||
|
41 | ||||
|
42 | protected Scanner(DFAStateDescriptior<TTag>[] states, int[] alphabet) { | |||
|
43 | Safe.ArgumentNotEmpty(states, "states"); | |||
|
44 | Safe.ArgumentNotNull(alphabet, "alphabet"); | |||
|
45 | ||||
|
46 | m_states = states; | |||
|
47 | m_alphabetMap = alphabet; | |||
|
48 | ||||
|
49 | Feed(new char[0]); | |||
|
50 | } | |||
|
51 | ||||
|
52 | /// <summary> | |||
|
53 | /// Заполняет входными данными буффер. | |||
|
54 | /// </summary> | |||
|
55 | /// <param name="data">Данные для обработки.</param> | |||
|
56 | /// <remarks>Копирование данных не происходит, переданный массив используется в | |||
|
57 | /// качестве входного буффера.</remarks> | |||
|
58 | public void Feed(char[] data) { | |||
|
59 | Safe.ArgumentNotNull(data, "data"); | |||
|
60 | ||||
|
61 | Feed(data, data.Length); | |||
|
62 | } | |||
|
63 | ||||
|
64 | /// <summary> | |||
|
65 | /// Заполняет буффур чтения входными данными. | |||
|
66 | /// </summary> | |||
|
67 | /// <param name="data">Данные для обработки.</param> | |||
|
68 | /// <param name="length">Длина данных для обработки.</param> | |||
|
69 | /// <remarks>Копирование данных не происходит, переданный массив используется в | |||
|
70 | /// качестве входного буффера.</remarks> | |||
|
71 | public void Feed(char[] data, int length) { | |||
|
72 | Safe.ArgumentNotNull(data, "data"); | |||
|
73 | Safe.ArgumentInRange(length, 0, data.Length, "length"); | |||
|
74 | AssertNotDisposed(); | |||
|
75 | ||||
|
76 | m_pointer = -1; | |||
|
77 | m_buffer = data; | |||
|
78 | m_bufferSize = length; | |||
|
79 | Shift(); | |||
|
80 | } | |||
|
81 | ||||
|
82 | public void Feed(TextReader reader, bool dispose) { | |||
|
83 | Safe.ArgumentNotNull(reader, "reader"); | |||
|
84 | AssertNotDisposed(); | |||
|
85 | ||||
|
86 | if (m_reader != null && m_disposeReader) | |||
|
87 | m_reader.Dispose(); | |||
|
88 | ||||
|
89 | m_reader = reader; | |||
|
90 | m_disposeReader = dispose; | |||
|
91 | m_pointer = -1; | |||
|
92 | m_buffer = new char[m_chunkSize]; | |||
|
93 | m_bufferSize = 0; | |||
|
94 | Shift(); | |||
|
95 | } | |||
|
96 | ||||
|
97 | /// <summary> | |||
|
98 | /// Получает текущий токен в виде строки. | |||
|
99 | /// </summary> | |||
|
100 | /// <returns></returns> | |||
|
101 | protected string GetTokenValue() { | |||
|
102 | return new String(m_buffer, m_tokenOffset, m_tokenLen); | |||
|
103 | } | |||
|
104 | ||||
|
105 | /// <summary> | |||
|
106 | /// Метки текущего токена, которые были назначены в регулярном выражении. | |||
|
107 | /// </summary> | |||
|
108 | protected TTag[] TokenTags { | |||
|
109 | get { | |||
|
110 | return m_currentState.tag; | |||
|
111 | } | |||
|
112 | } | |||
|
113 | ||||
|
114 | /// <summary> | |||
|
115 | /// Признак конца данных | |||
|
116 | /// </summary> | |||
|
117 | public bool EOF { | |||
|
118 | get { | |||
|
119 | return m_pointer >= m_bufferSize; | |||
|
120 | } | |||
|
121 | } | |||
|
122 | ||||
|
123 | /// <summary> | |||
|
124 | /// Читает следующий токен, при этом <see cref="m_tokenOffset"/> указывает на начало токена, | |||
|
125 | /// <see cref="m_tokenLen"/> на длину токена, <see cref="m_buffer"/> - массив символов, в | |||
|
126 | /// котором находится токен. | |||
|
127 | /// </summary> | |||
|
128 | /// <returns><c>false</c> - достигнут конец данных, токен не прочитан.</returns> | |||
|
129 | protected bool ReadTokenInternal() { | |||
|
130 | if (m_pointer >= m_bufferSize) | |||
|
131 | return false; | |||
|
132 | ||||
|
133 | m_currentState = m_states[DFADefinition.INITIAL_STATE]; | |||
|
134 | m_tokenLen = 0; | |||
|
135 | m_tokenOffset = m_pointer; | |||
|
136 | int nextState; | |||
|
137 | do { | |||
|
138 | nextState = m_currentState.transitions[m_previewCode]; | |||
|
139 | if (nextState == DFAConst.UNREACHABLE_STATE) { | |||
|
140 | if (m_currentState.final) | |||
|
141 | return true; | |||
|
142 | else | |||
|
143 | throw new ParserException( | |||
|
144 | String.Format( | |||
|
145 | "Unexpected symbol '{0}', at pos {1}", | |||
|
146 | m_buffer[m_pointer], | |||
|
147 | Position | |||
|
148 | ) | |||
|
149 | ); | |||
|
150 | } else { | |||
|
151 | m_currentState = m_states[nextState]; | |||
|
152 | m_tokenLen++; | |||
|
153 | } | |||
|
154 | ||||
|
155 | } while (Shift()); | |||
|
156 | ||||
|
157 | // END OF DATA | |||
|
158 | if (!m_currentState.final) | |||
|
159 | throw new ParserException("Unexpected end of data"); | |||
|
160 | ||||
|
161 | return true; | |||
|
162 | } | |||
|
163 | ||||
|
164 | ||||
|
165 | bool Shift() { | |||
|
166 | m_pointer++; | |||
|
167 | ||||
|
168 | if (m_pointer >= m_bufferSize) { | |||
|
169 | if (!ReadNextChunk()) | |||
|
170 | return false; | |||
|
171 | } | |||
|
172 | ||||
|
173 | m_previewCode = m_alphabetMap[m_buffer[m_pointer]]; | |||
|
174 | ||||
|
175 | return true; | |||
|
176 | } | |||
|
177 | ||||
|
178 | bool ReadNextChunk() { | |||
|
179 | if (m_reader == null) | |||
|
180 | return false; | |||
|
181 | ||||
|
182 | // extend buffer if nesessary | |||
|
183 | if (m_pointer + m_chunkSize > m_buffer.Length) { | |||
|
184 | // trim unused buffer head | |||
|
185 | var size = m_tokenLen + m_chunkSize; | |||
|
186 | if (size >= m_limit) | |||
|
187 | throw new ParserException(String.Format("Input buffer {0} bytes limit exceeded", m_limit)); | |||
|
188 | var temp = new char[size]; | |||
|
189 | Array.Copy(m_buffer, m_tokenOffset, temp, 0, m_tokenLen); | |||
|
190 | m_pointer -= m_tokenOffset; | |||
|
191 | m_bufferSize -= m_tokenOffset; | |||
|
192 | m_tokenOffset = 0; | |||
|
193 | m_buffer = temp; | |||
|
194 | } | |||
|
195 | ||||
|
196 | var read = m_reader.Read(m_buffer, m_tokenLen, m_chunkSize); | |||
|
197 | if (read == 0) | |||
|
198 | return false; | |||
|
199 | ||||
|
200 | m_bufferSize += read; | |||
|
201 | ||||
|
202 | return true; | |||
|
203 | } | |||
|
204 | ||||
|
205 | /// <summary> | |||
|
206 | /// Позиция сканнера во входном буфере | |||
|
207 | /// </summary> | |||
|
208 | public int Position { | |||
|
209 | get { | |||
|
210 | return m_pointer + 1; | |||
|
211 | } | |||
|
212 | } | |||
|
213 | ||||
|
214 | /// <summary> | |||
|
215 | /// Преключает внутренний ДКА на указанный, позволяет реализовать подобие захватывающей | |||
|
216 | /// группировки. | |||
|
217 | /// </summary> | |||
|
218 | /// <param name="states">Таблица состояний нового ДКА</param> | |||
|
219 | /// <param name="alphabet">Таблица входных символов для нового ДКА</param> | |||
|
220 | protected void Switch(DFAStateDescriptior<TTag>[] states, int[] alphabet) { | |||
|
221 | Safe.ArgumentNotNull(states, "dfa"); | |||
|
222 | ||||
|
223 | m_defs.Push(new ScannerConfig { | |||
|
224 | states = m_states, | |||
|
225 | alphabetMap = m_alphabetMap | |||
|
226 | }); | |||
|
227 | ||||
|
228 | m_states = states; | |||
|
229 | m_alphabetMap = alphabet; | |||
|
230 | ||||
|
231 | m_previewCode = m_alphabetMap[m_buffer[m_pointer]]; | |||
|
232 | } | |||
|
233 | ||||
|
234 | /// <summary> | |||
|
235 | /// Восстанавливает предыдущей ДКА сканнера. | |||
|
236 | /// </summary> | |||
|
237 | protected void Restore() { | |||
|
238 | if (m_defs.Count == 0) | |||
|
239 | throw new InvalidOperationException(); | |||
|
240 | var prev = m_defs.Pop(); | |||
|
241 | m_states = prev.states; | |||
|
242 | m_alphabetMap = prev.alphabetMap; | |||
|
243 | m_previewCode = m_alphabetMap[m_buffer[m_pointer]]; | |||
|
244 | } | |||
|
245 | ||||
|
246 | protected override void Dispose(bool disposing) { | |||
|
247 | if (disposing) { | |||
|
248 | if (m_reader != null && m_disposeReader) | |||
|
249 | m_reader.Dispose(); | |||
|
250 | m_buffer = null; | |||
|
251 | m_bufferSize = 0; | |||
|
252 | m_pointer = 0; | |||
|
253 | m_tokenLen = 0; | |||
|
254 | m_tokenOffset = 0; | |||
|
255 | } | |||
|
256 | base.Dispose(disposing); | |||
|
257 | } | |||
|
258 | } | |||
|
259 | } |
@@ -102,26 +102,6 | |||||
102 | <Compile Include="Parallels\ArrayTraits.cs" /> |
|
102 | <Compile Include="Parallels\ArrayTraits.cs" /> | |
103 | <Compile Include="Parallels\MTQueue.cs" /> |
|
103 | <Compile Include="Parallels\MTQueue.cs" /> | |
104 | <Compile Include="Parallels\WorkerPool.cs" /> |
|
104 | <Compile Include="Parallels\WorkerPool.cs" /> | |
105 | <Compile Include="Parsing\AltToken.cs" /> |
|
|||
106 | <Compile Include="Parsing\BinaryToken.cs" /> |
|
|||
107 | <Compile Include="Parsing\CatToken.cs" /> |
|
|||
108 | <Compile Include="Parsing\CDFADefinition.cs" /> |
|
|||
109 | <Compile Include="Parsing\DFABuilder.cs" /> |
|
|||
110 | <Compile Include="Parsing\DFAStateDescriptor.cs" /> |
|
|||
111 | <Compile Include="Parsing\DFAutomaton.cs" /> |
|
|||
112 | <Compile Include="Parsing\EDFADefinition.cs" /> |
|
|||
113 | <Compile Include="Parsing\EmptyToken.cs" /> |
|
|||
114 | <Compile Include="Parsing\EndToken.cs" /> |
|
|||
115 | <Compile Include="Parsing\EnumAlphabet.cs" /> |
|
|||
116 | <Compile Include="Parsing\Grammar.cs" /> |
|
|||
117 | <Compile Include="Parsing\IAlphabet.cs" /> |
|
|||
118 | <Compile Include="Parsing\IDFADefinition.cs" /> |
|
|||
119 | <Compile Include="Parsing\IVisitor.cs" /> |
|
|||
120 | <Compile Include="Parsing\ParserException.cs" /> |
|
|||
121 | <Compile Include="Parsing\Scanner.cs" /> |
|
|||
122 | <Compile Include="Parsing\StarToken.cs" /> |
|
|||
123 | <Compile Include="Parsing\SymbolToken.cs" /> |
|
|||
124 | <Compile Include="Parsing\Token.cs" /> |
|
|||
125 | <Compile Include="ProgressInitEventArgs.cs" /> |
|
105 | <Compile Include="ProgressInitEventArgs.cs" /> | |
126 | <Compile Include="Properties\AssemblyInfo.cs" /> |
|
106 | <Compile Include="Properties\AssemblyInfo.cs" /> | |
127 | <Compile Include="Parallels\AsyncPool.cs" /> |
|
107 | <Compile Include="Parallels\AsyncPool.cs" /> | |
@@ -178,11 +158,34 | |||||
178 | <Compile Include="Components\ExecutionState.cs" /> |
|
158 | <Compile Include="Components\ExecutionState.cs" /> | |
179 | <Compile Include="Components\RunnableComponent.cs" /> |
|
159 | <Compile Include="Components\RunnableComponent.cs" /> | |
180 | <Compile Include="Components\IFactory.cs" /> |
|
160 | <Compile Include="Components\IFactory.cs" /> | |
181 |
<Compile Include=" |
|
161 | <Compile Include="Automaton\CDFADefinition.cs" /> | |
182 |
<Compile Include=" |
|
162 | <Compile Include="Automaton\DFAStateDescriptor.cs" /> | |
183 |
<Compile Include=" |
|
163 | <Compile Include="Automaton\DFAutomaton.cs" /> | |
184 |
<Compile Include=" |
|
164 | <Compile Include="Automaton\EDFADefinition.cs" /> | |
185 |
<Compile Include=" |
|
165 | <Compile Include="Automaton\EnumAlphabet.cs" /> | |
|
166 | <Compile Include="Automaton\IAlphabet.cs" /> | |||
|
167 | <Compile Include="Automaton\IDFADefinition.cs" /> | |||
|
168 | <Compile Include="Automaton\ParserException.cs" /> | |||
|
169 | <Compile Include="Automaton\Scanner.cs" /> | |||
|
170 | <Compile Include="Automaton\DFADefinition.cs" /> | |||
|
171 | <Compile Include="Automaton\IndexedAlphabetBase.cs" /> | |||
|
172 | <Compile Include="Automaton\CharAlphabet.cs" /> | |||
|
173 | <Compile Include="Automaton\IAlphabetBuilder.cs" /> | |||
|
174 | <Compile Include="Automaton\IDFADefinitionBuilder.cs" /> | |||
|
175 | <Compile Include="Automaton\RegularExpressions\AltToken.cs" /> | |||
|
176 | <Compile Include="Automaton\RegularExpressions\BinaryToken.cs" /> | |||
|
177 | <Compile Include="Automaton\RegularExpressions\CatToken.cs" /> | |||
|
178 | <Compile Include="Automaton\DFAConst.cs" /> | |||
|
179 | <Compile Include="Automaton\RegularExpressions\Grammar.cs" /> | |||
|
180 | <Compile Include="Automaton\RegularExpressions\StarToken.cs" /> | |||
|
181 | <Compile Include="Automaton\RegularExpressions\SymbolToken.cs" /> | |||
|
182 | <Compile Include="Automaton\RegularExpressions\EmptyToken.cs" /> | |||
|
183 | <Compile Include="Automaton\RegularExpressions\EndToken.cs" /> | |||
|
184 | <Compile Include="Automaton\RegularExpressions\Token.cs" /> | |||
|
185 | <Compile Include="Automaton\RegularExpressions\IVisitor.cs" /> | |||
|
186 | <Compile Include="Automaton\RegularExpressions\DFABuilder.cs" /> | |||
|
187 | <Compile Include="Automaton\AutomatonTransition.cs" /> | |||
|
188 | <Compile Include="Automaton\ByteAlphabet.cs" /> | |||
186 | </ItemGroup> |
|
189 | </ItemGroup> | |
187 | <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" /> |
|
190 | <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" /> | |
188 | <ItemGroup /> |
|
191 | <ItemGroup /> | |
@@ -257,5 +260,6 | |||||
257 | </ProjectExtensions> |
|
260 | </ProjectExtensions> | |
258 | <ItemGroup> |
|
261 | <ItemGroup> | |
259 | <Folder Include="Components\" /> |
|
262 | <Folder Include="Components\" /> | |
|
263 | <Folder Include="Automaton\RegularExpressions\" /> | |||
260 | </ItemGroup> |
|
264 | </ItemGroup> | |
261 | </Project> No newline at end of file |
|
265 | </Project> |
1 | NO CONTENT: file was removed |
|
NO CONTENT: file was removed |
1 | NO CONTENT: file was removed |
|
NO CONTENT: file was removed |
1 | NO CONTENT: file was removed |
|
NO CONTENT: file was removed |
1 | NO CONTENT: file was removed |
|
NO CONTENT: file was removed |
1 | NO CONTENT: file was removed |
|
NO CONTENT: file was removed |
1 | NO CONTENT: file was removed |
|
NO CONTENT: file was removed |
1 | NO CONTENT: file was removed |
|
NO CONTENT: file was removed |
1 | NO CONTENT: file was removed |
|
NO CONTENT: file was removed |
1 | NO CONTENT: file was removed |
|
NO CONTENT: file was removed |
1 | NO CONTENT: file was removed |
|
NO CONTENT: file was removed |
1 | NO CONTENT: file was removed |
|
NO CONTENT: file was removed |
1 | NO CONTENT: file was removed |
|
NO CONTENT: file was removed |
1 | NO CONTENT: file was removed |
|
NO CONTENT: file was removed |
1 | NO CONTENT: file was removed |
|
NO CONTENT: file was removed |
1 | NO CONTENT: file was removed |
|
NO CONTENT: file was removed |
1 | NO CONTENT: file was removed |
|
NO CONTENT: file was removed |
1 | NO CONTENT: file was removed |
|
NO CONTENT: file was removed |
1 | NO CONTENT: file was removed |
|
NO CONTENT: file was removed |
1 | NO CONTENT: file was removed |
|
NO CONTENT: file was removed |
1 | NO CONTENT: file was removed |
|
NO CONTENT: file was removed |
1 | NO CONTENT: file was removed |
|
NO CONTENT: file was removed |
1 | NO CONTENT: file was removed |
|
NO CONTENT: file was removed |
1 | NO CONTENT: file was removed |
|
NO CONTENT: file was removed |
1 | NO CONTENT: file was removed |
|
NO CONTENT: file was removed |
1 | NO CONTENT: file was removed |
|
NO CONTENT: file was removed |
General Comments 0
You need to be logged in to leave comments.
Login now