##// END OF EJS Templates
DFA refactoring
cin -
r162:0526412bbb26 ref20160224
parent child
Show More
@@ -0,0 +1,33
1 using System;
2
3 namespace Implab.Automaton {
4 struct AutomatonTransition : IEquatable<AutomatonTransition> {
5 public readonly int s1;
6 public readonly int s2;
7 public readonly int edge;
8
9 public AutomatonTransition(int s1, int s2, int edge) {
10 this.s1 = s1;
11 this.s2 = s2;
12 this.edge = edge;
13 }
14
15
16 #region IEquatable implementation
17 public bool Equals(AutomatonTransition other) {
18 return other.s1 == s1 && other.s2 == s2 && other.edge == edge ;
19 }
20 #endregion
21
22 public override bool Equals(object obj) {
23 if (obj is AutomatonTransition)
24 return Equals((AutomatonTransition)obj);
25 return base.Equals(obj);
26 }
27
28 public override int GetHashCode() {
29 return s1 + s2 + edge;
30 }
31 }
32 }
33
@@ -0,0 +1,23
1 using System;
2
3 namespace Implab.Automaton {
4 public class ByteAlphabet : IndexedAlphabetBase<byte> {
5 public ByteAlphabet() : base(byte.MaxValue + 1){
6 }
7
8 #region implemented abstract members of IndexedAlphabetBase
9
10 public override int GetSymbolIndex(byte symbol) {
11 return (int)symbol;
12 }
13
14 public override System.Collections.Generic.IEnumerable<byte> InputSymbols {
15 get {
16 throw new NotImplementedException();
17 }
18 }
19
20 #endregion
21 }
22 }
23
@@ -0,0 +1,22
1 namespace Implab.Automaton {
2 public class CDFADefinition : DFADefinition {
3 readonly CharAlphabet m_alphabet;
4
5 public CharAlphabet Alphabet {
6 get { return m_alphabet; }
7 }
8
9 public CDFADefinition(CharAlphabet alphabet): base(alphabet.Count) {
10 m_alphabet = alphabet;
11 }
12
13 public CDFADefinition Optimize() {
14
15 return (CDFADefinition)Optimize(alphabet => new CDFADefinition((CharAlphabet)alphabet), m_alphabet, new CharAlphabet());
16 }
17
18 public void PrintDFA() {
19 PrintDFA(m_alphabet);
20 }
21 }
22 }
@@ -0,0 +1,23
1 using Implab;
2 using System;
3 using System.Collections.Generic;
4 using System.Linq;
5 using System.Text;
6 using System.Threading.Tasks;
7
8 namespace Implab.Automaton {
9 public class CharAlphabet: IndexedAlphabetBase<char> {
10
11 public CharAlphabet()
12 : base(char.MaxValue + 1) {
13 }
14
15 public override int GetSymbolIndex(char symbol) {
16 return symbol;
17 }
18
19 public override IEnumerable<char> InputSymbols {
20 get { return Enumerable.Range(char.MinValue, char.MaxValue).Select(x => (char)x); }
21 }
22 }
23 }
@@ -0,0 +1,10
1 using System;
2
3 namespace Implab.Automaton {
4 public static class DFAConst {
5 public const int UNREACHABLE_STATE = -1;
6
7 public const int UNCLASSIFIED_INPUT = 0;
8 }
9 }
10
@@ -0,0 +1,213
1 using Implab;
2 using System;
3 using System.Collections.Generic;
4 using System.Linq;
5
6 namespace Implab.Automaton {
7 public class DFADefinition<TInput, TState, TTag> : IDFADefinitionBuilder<TTag>, IDFADefinition<TInput, TState, TTag> {
8 DFAStateDescriptior<TTag>[] m_dfaTable;
9 readonly IAlphabet<TInput> m_inputAlphabet;
10 readonly IAlphabet<TState> m_stateAlphabet;
11
12 readonly Dictionary<int, TTag[]> m_finalStates = new Dictionary<int, TTag[]>();
13 readonly HashSet<AutomatonTransition> m_transitions = new HashSet<AutomatonTransition>();
14
15 public DFADefinition(IAlphabet<TInput> inputAlphabet, IAlphabet<TState> stateAlphabet) {
16 Safe.ArgumentNotNull(inputAlphabet, "inputAlphabet");
17 Safe.ArgumentNotNull(stateAlphabet, "stateAlphabet");
18
19 m_inputAlphabet = inputAlphabet;
20 m_stateAlphabet = stateAlphabet;
21 }
22
23 public void DefineTransition(int s1, int s2, int symbol) {
24 Safe.ArgumentInRange(s1, 0, m_stateAlphabet.Count-1, "s1");
25 Safe.ArgumentInRange(s2, 0, m_stateAlphabet.Count-1, "s2");
26 Safe.ArgumentInRange(symbol, 0, m_inputAlphabet.Count-1, "symbol");
27
28 m_transitions.Add(new AutomatonTransition(s1, s2, symbol));
29 }
30
31
32 #region IDFADefinition implementation
33
34 public DFAStateDescriptior<TTag>[] GetTransitionTable() {
35 if (m_dfaTable == null) {
36 m_dfaTable = new DFAStateDescriptior<TTag>[m_stateAlphabet.Count];
37
38 foreach (var pair in m_finalStates) {
39 var idx = pair.Key;
40
41 m_dfaTable[idx].final = true;
42 m_dfaTable[idx].tag = m_dfaTable[idx].tag != null ?
43 m_dfaTable[idx].tag.Concat(pair.Value).Distinct().ToArray() :
44 pair.Value;
45 }
46
47 foreach (var t in m_transitions) {
48 if (m_dfaTable[t.s1].transitions == null) {
49 m_dfaTable[t.s1].transitions = new int[m_inputAlphabet.Count];
50 for (int i = 0; i < m_dfaTable[t.s1].transitions.Length; i++)
51 m_dfaTable[t.s1].transitions[i] = DFAConst.UNREACHABLE_STATE;
52 }
53
54 m_dfaTable[t.s1].transitions[t.edge] = t.s2;
55 }
56 }
57 return m_dfaTable;
58 }
59
60 public IAlphabet<TInput> InputAlphabet {
61 get {
62 return m_inputAlphabet;
63 }
64 }
65
66 public IAlphabet<TState> StateAlphabet {
67 get {
68 return m_stateAlphabet;
69 }
70 }
71
72 #endregion
73
74 #region IDFADefinitionBuilder
75
76 public void DefineTransition(int s1, int s2, int symbol) {
77 Safe.ArgumentInRange(s1, 0, m_stateAlphabet.Count - 1, "s1");
78 Safe.ArgumentInRange(s2, 0, m_stateAlphabet.Count - 1, "s2");
79 Safe.ArgumentInRange(symbol, 0, m_inputAlphabet.Count - 1, "symbol");
80
81 m_transitions.Add(new AutomatonTransition(s1, s2, symbol));
82 }
83
84 public void MarkFinalState(int state, params TTag[] tags) {
85 m_finalStates[state] = tags;
86 }
87
88
89 #endregion
90
91 protected void Optimize(IDFADefinitionBuilder<TTag> optimalDFA,IAlphabetBuilder<TInput> optimalInputAlphabet, IAlphabetBuilder<TState> optimalStateAlphabet) {
92 Safe.ArgumentNotNull(optimalDFA, "dfa");
93 Safe.ArgumentNotNull(optimalInputAlphabet, "optimalInputAlphabet");
94 Safe.ArgumentNotNull(optimalStateAlphabet, "optimalStateAlphabet");
95
96 var setComparer = new CustomEqualityComparer<HashSet<int>>(
97 (x, y) => x.SetEquals(y),
98 s => s.Sum(x => x.GetHashCode())
99 );
100
101 var arrayComparer = new CustomEqualityComparer<TTag[]>(
102 (x,y) => (new HashSet<int>(x)).SetEquals(new HashSet<int>(y)),
103 a => a.Sum(x => x.GetHashCode())
104 );
105
106 var optimalStates = new HashSet<HashSet<int>>(setComparer);
107 var queue = new HashSet<HashSet<int>>(setComparer);
108
109 // получаем конечные состояния, сгруппированные по маркерам
110 optimalStates.UnionWith(
111 m_finalStates
112 .GroupBy(pair => pair.Value, arrayComparer)
113 .Select(
114 g => new HashSet<int>(
115 g.Select( pair => pair.Key)
116 )
117 )
118 );
119
120 var state = new HashSet<int>(
121 Enumerable
122 .Range(0, m_stateAlphabet.Count - 1)
123 .Where(i => !m_finalStates.ContainsKey(i))
124 );
125 optimalStates.Add(state);
126 queue.Add(state);
127
128 var rmap = m_transitions
129 .GroupBy(t => t.s2)
130 .ToLookup(
131 g => g.Key, // s2
132 g => g.ToLookup(t => t.edge, t => t.s1)
133 );
134
135 while (queue.Count > 0) {
136 var stateA = queue.First();
137 queue.Remove(stateA);
138
139 for (int c = 0; c < m_inputAlphabet.Count; c++) {
140 var stateX = new HashSet<int>();
141 foreach(var a in stateA)
142 stateX.UnionWith(rmap[a][c]); // all states from wich 'c' leads to 'a'
143
144 foreach (var stateY in optimalStates.ToArray()) {
145 if (stateX.Overlaps(stateY) && !stateY.IsSubsetOf(stateX)) {
146 var stateR1 = new HashSet<int>(stateY);
147 var stateR2 = new HashSet<int>(stateY);
148
149 stateR1.IntersectWith(stateX);
150 stateR2.ExceptWith(stateX);
151
152 optimalStates.Remove(stateY);
153 optimalStates.Add(stateR1);
154 optimalStates.Add(stateR2);
155
156 if (queue.Contains(stateY)) {
157 queue.Remove(stateY);
158 queue.Add(stateR1);
159 queue.Add(stateR2);
160 } else {
161 queue.Add(stateR1.Count <= stateR2.Count ? stateR1 : stateR2);
162 }
163 }
164 }
165 }
166 }
167
168 // карта получения оптимального состояния по соотвествующему ему простому состоянию
169 var statesMap = m_stateAlphabet.Reclassify(optimalStateAlphabet, optimalStates);
170
171 // получаем минимальный алфавит
172 // входные символы не различимы, если Move(s,a1) == Move(s,a2)
173 var optimalAlphabet = m_transitions
174 .GroupBy(t => Tuple.Create(statesMap[t.s1], statesMap[t.s2]), t => t.edge);
175
176 var alphabetMap = m_inputAlphabet.Reclassify(optimalInputAlphabet, optimalAlphabet);
177
178 var optimalTags = m_finalStates
179 .GroupBy(pair => statesMap[pair.Key])
180 .ToDictionary(
181 g => g.Key,
182 g => g.SelectMany(pair => pair.Value).ToArray()
183 );
184
185 // построение автомата
186 foreach (var pair in optimalTags)
187 optimalDFA.MarkFinalState(pair.Key, pair.Value);
188
189 foreach (var t in m_transitions.Select(t => new AutomatonTransition(statesMap[t.s1],statesMap[t.s2],alphabetMap[t.edge])).Distinct())
190 optimalDFA.DefineTransition(t.s1, t.s2, t.edge);
191 }
192
193 public void PrintDFA() {
194
195 var inputMap = InputAlphabet.CreateReverseMap();
196 var stateMap = StateAlphabet.CreateReverseMap();
197
198 for (int i = 0; i < inputMap.Length; i++) {
199 Console.WriteLine("C{0}: {1}", i, String.Join(",", inputMap[i]));
200 }
201
202 foreach(var t in m_transitions)
203 Console.WriteLine(
204 "S{0} -{1}-> S{2}{3}",
205 stateMap[t.s1],
206 String.Join(",", inputMap[t.edge]),
207 stateMap[t.s2],
208 m_finalStates.ContainsKey(t.s2) ? "$" : ""
209 );
210
211 }
212 }
213 }
@@ -0,0 +1,13
1 using System;
2 using System.Collections.Generic;
3 using System.Linq;
4 using System.Text;
5 using System.Threading.Tasks;
6
7 namespace Implab.Automaton {
8 public struct DFAStateDescriptior<TTag> {
9 public bool final;
10 public TTag[] tag;
11 public int[] transitions;
12 }
13 }
@@ -0,0 +1,61
1 using Implab;
2 using System;
3 using System.Collections.Generic;
4 using System.Diagnostics;
5 using System.Linq;
6 using System.Text;
7 using System.Threading.Tasks;
8
9 namespace Implab.Automaton {
10 public abstract class DFAutomaton<T> {
11 protected struct ContextFrame {
12 public DFAStateDescriptior[] states;
13 public int current;
14 public T info;
15 }
16
17 public const int INITIAL_STATE = DFADefinition.INITIAL_STATE;
18 public const int UNREACHEBLE_STATE = DFADefinition.UNREACHEBLE_STATE;
19
20 protected ContextFrame m_context;
21 Stack<ContextFrame> m_contextStack = new Stack<ContextFrame>();
22
23 protected int Level {
24 get { return m_contextStack.Count; }
25 }
26
27 protected DFAutomaton(DFAStateDescriptior[] states, int startState, T info) {
28 Safe.ArgumentNotNull(states, "states");
29 Safe.ArgumentInRange(startState, 0, states.Length - 1, "startState");
30
31 m_context.states = states;
32 m_context.current = startState;
33 m_context.info = info;
34 }
35
36 protected void Switch(DFAStateDescriptior[] states, int current, T info) {
37 Debug.Assert(states != null);
38 Debug.Assert(current >= 0 && current < states.Length);
39 m_contextStack.Push(m_context);
40 m_context.states = states;
41 m_context.current = current;
42 m_context.info = info;
43 }
44
45 protected void Restore() {
46 Debug.Assert(m_contextStack.Count > 0);
47
48 m_context = m_contextStack.Pop();
49 }
50
51 protected void Move(int input) {
52 Debug.Assert(input > 0 && input < m_context.states[m_context.current].transitions.Length);
53 m_context.current = m_context.states[m_context.current].transitions[input];
54 }
55
56 protected bool CanMove(int input) {
57 Debug.Assert(input > 0 && input < m_context.states[m_context.current].transitions.Length);
58 return m_context.states[m_context.current].transitions[input] != UNREACHEBLE_STATE;
59 }
60 }
61 }
@@ -0,0 +1,29
1 using Implab;
2 using System;
3
4 namespace Implab.Parsing {
5 public class EDFADefinition<T> : DFADefinition where T : struct, IConvertible {
6 readonly EnumAlphabet<T> m_alphabet;
7
8 public EnumAlphabet<T> Alphabet {
9 get { return m_alphabet; }
10 }
11
12 public EDFADefinition(EnumAlphabet<T> alphabet) : base(alphabet.Count) {
13 m_alphabet = alphabet;
14 }
15
16 public void DefineTransition(int s1, int s2, T input) {
17 DefineTransition(s1, s2, m_alphabet.Translate(input));
18 }
19
20 public EDFADefinition<T> Optimize() {
21
22 return (EDFADefinition<T>)Optimize(alphabet => new EDFADefinition<T>((EnumAlphabet<T>)alphabet), m_alphabet, new EnumAlphabet<T>());
23 }
24
25 public void PrintDFA() {
26 PrintDFA(m_alphabet);
27 }
28 }
29 }
@@ -0,0 +1,67
1 using System;
2 using System.Collections.Generic;
3 using System.Diagnostics;
4 using System.Globalization;
5 using System.Linq;
6 using System.Diagnostics.CodeAnalysis;
7
8 namespace Implab.Automaton {
9 /// <summary>
10 /// Алфавит символами которого являются элементы перечислений.
11 /// </summary>
12 /// <typeparam name="T">Тип перечислений</typeparam>
13 public class EnumAlphabet<T> : IndexedAlphabetBase<T> where T : struct, IConvertible {
14 [SuppressMessage("Microsoft.Design", "CA1000:DoNotDeclareStaticMembersOnGenericTypes")]
15 static readonly T[] _symbols;
16 static readonly EnumAlphabet<T> _fullAlphabet;
17
18 [SuppressMessage("Microsoft.Design", "CA1065:DoNotRaiseExceptionsInUnexpectedLocations")]
19 static EnumAlphabet() {
20 if (!typeof(T).IsEnum)
21 throw new InvalidOperationException("Invalid generic parameter, enumeration is required");
22
23 if (Enum.GetUnderlyingType(typeof(T)) != typeof(Int32))
24 throw new InvalidOperationException("Only enums based on Int32 are supported");
25
26 _symbols = ((T[])Enum.GetValues(typeof(T)))
27 .OrderBy(x => x.ToInt32(CultureInfo.InvariantCulture))
28 .ToArray();
29
30 if (
31 _symbols[_symbols.Length - 1].ToInt32(CultureInfo.InvariantCulture) >= _symbols.Length
32 || _symbols[0].ToInt32(CultureInfo.InvariantCulture) != 0
33 )
34 throw new InvalidOperationException("The specified enumeration must be zero-based and continuously numbered");
35
36 _fullAlphabet = new EnumAlphabet<T>(_symbols.Select(x => x.ToInt32(CultureInfo.InvariantCulture)).ToArray());
37 }
38
39
40
41 public static EnumAlphabet<T> FullAlphabet {
42 get {
43 return _fullAlphabet;
44 }
45 }
46
47
48 public EnumAlphabet()
49 : base(_symbols.Length) {
50 }
51
52 public EnumAlphabet(int[] map)
53 : base(map) {
54 Debug.Assert(map.Length == _symbols.Length);
55 }
56
57
58 public override int GetSymbolIndex(T symbol) {
59 return symbol.ToInt32(CultureInfo.InvariantCulture);
60 }
61
62 public override IEnumerable<T> InputSymbols {
63 get { return _symbols; }
64 }
65
66 }
67 }
@@ -0,0 +1,48
1 using System;
2 using System.Collections.Generic;
3 using System.Linq;
4 using System.Text;
5 using System.Threading.Tasks;
6
7 namespace Implab.Automaton {
8 /// <summary>
9 /// Алфавит. Множество символов, которые разбиты на классы, при этом классы имеют непрерывную нумерацию,
10 /// что позволяет использовать их в качестве индексов массивов.
11 /// </summary>
12 /// <remarks>
13 /// <para>Алфавит является сюрьективным отображением множества символов в множество индексов, это позволяет сократить размер таблицы переходов автомата
14 /// для входных символов, которые для него не различимы.</para>
15 /// </remarks>
16 /// <typeparam name="TSymbol">Тип символов.</typeparam>
17 public interface IAlphabet<TSymbol> {
18 /// <summary>
19 /// Количество классов символов в алфавите.
20 /// </summary>
21 int Count { get; }
22
23 /// <summary>
24 /// Создает карту обратного сопоставления класса символов алфавита и сопоставленным
25 /// ему исходным символам.
26 /// </summary>
27 /// <returns></returns>
28 List<TSymbol>[] CreateReverseMap();
29
30 /// <summary>
31 /// Создает новый алфавит на основе текущего, горппируя его сиволы в более
32 /// крупные непересекающиеся классы символов.
33 /// </summary>
34 /// <param name="newAlphabet">Новый, пустой алфавит, в котором быдут определены классы.</param>
35 /// <param name="classes">Множество классов символов текущего алфавита.</param>
36 /// <returns>Карта для перехода классов текущего
37 /// алфавита к классам нового.</returns>
38 /// <remarks>Ползволяет укрупнить алфавит, объединив классы в текущем алфавите. Используется при оптимизации автомата.</remarks>
39 int[] Reclassify(IAlphabetBuilder<TSymbol> newAlphabet, IEnumerable<IEnumerable<int>> classes);
40
41 /// <summary>
42 /// Преобразует входной символ в индекс символа из алфавита.
43 /// </summary>
44 /// <param name="symobl">Исходный символ</param>
45 /// <returns>Индекс в алфавите</returns>
46 int Translate(TSymbol symobl);
47 }
48 }
@@ -0,0 +1,25
1
2 using System.Collections.Generic;
3
4 namespace Implab.Automaton {
5 public interface IAlphabetBuilder<TSymbol> : IAlphabet<TSymbol> {
6 /// <summary>
7 /// Добавляет новый символ в алфавит, если символ уже был добавлен, то
8 /// возвращается ранее сопоставленный с символом класс.
9 /// </summary>
10 /// <param name="symbol">Символ для добавления.</param>
11 /// <returns>Индекс класса, который попоставлен с символом.</returns>
12 int DefineSymbol(TSymbol symbol);
13 /// <summary>
14 /// Доабвляем класс символов. Множеству указанных исходных символов
15 /// будет сопоставлен символ в алфавите.
16 /// </summary>
17 /// <param name="symbols">Множестов исходных символов</param>
18 /// <returns>Идентификатор символа алфавита.</returns>
19 int DefineClass(IEnumerable<TSymbol> symbols);
20
21
22
23 }
24 }
25
@@ -0,0 +1,56
1
2 namespace Implab.Automaton {
3 /// <summary>
4 /// Полностью описывает DFA автомат, его поведение, состояние и входные символы.
5 /// </summary>
6 /// <example>
7 /// class MyAutomaton {
8 /// int m_current;
9 /// readonly DFAStateDescriptor<string>[] m_automaton;
10 /// readonly IAlphabet<MyCommands> m_commands;
11 ///
12 /// public MyAutomaton(IDFADefinition&lt;MyCommands,MyStates,string&gt; definition) {
13 /// m_current = definition.StateAlphabet.Translate(MyStates.Initial);
14 /// m_automaton = definition.GetTransitionTable();
15 /// m_commands = definition.InputAlphabet;
16 /// }
17 ///
18 /// // defined a method which will move the automaton to the next state
19 /// public void Move(MyCommands cmd) {
20 /// // use transition map to determine the next state
21 /// var next = m_automaton[m_current].transitions[m_commands.Translate(cmd)];
22 ///
23 /// // validate that we aren't in the unreachable state
24 /// if (next == DFAConst.UNREACHABLE_STATE)
25 /// throw new InvalidOperationException("The specified command is invalid");
26 ///
27 /// // if everything is ok
28 /// m_current = next;
29 /// }
30 /// }
31 /// </example>
32 public interface IDFADefinition<TInput, TState, TTag> {
33 /// <summary>
34 /// Алфавит входных символов
35 /// </summary>
36 /// <value>The input alphabet.</value>
37 IAlphabet<TInput> InputAlphabet {
38 get;
39 }
40
41 /// <summary>
42 /// Алфавит состояний автомата
43 /// </summary>
44 /// <value>The state alphabet.</value>
45 IAlphabet<TState> StateAlphabet {
46 get;
47 }
48
49 /// <summary>
50 /// Таблица переходов состояний автомата
51 /// </summary>
52 /// <returns>The transition table.</returns>
53 DFAStateDescriptior<TTag>[] GetTransitionTable();
54
55 }
56 }
@@ -0,0 +1,23
1 using System;
2
3 namespace Implab.Automaton {
4 public interface IDFADefinitionBuilder<TTag> {
5 /// <summary>
6 /// Marks the state as final and assings tags.
7 /// </summary>
8 /// <param name="state">State.</param>
9 /// <param name="tags">Tags.</param>
10 void MarkFinalState(int state, params TTag[] tags);
11
12 /// <summary>
13 /// Defines the transition from <paramref name="s1"/> to
14 /// <paramref name="s2"/> with input <paramref name="symbol"/>.
15 /// </summary>
16 /// <param name="s1">S1.</param>
17 /// <param name="s2">S2.</param>
18 /// <param name="symbol">Symbol.</param>
19 void DefineTransition(int s1, int s2, int symbol);
20
21 }
22 }
23
@@ -0,0 +1,105
1 using Implab;
2 using System;
3 using System.Collections.Generic;
4 using System.Diagnostics;
5 using System.Linq;
6
7 namespace Implab.Automaton {
8 /// <summary>
9 /// Indexed alphabet is the finite set of symbols where each symbol has a zero-based unique index.
10 /// </summary>
11 public abstract class IndexedAlphabetBase<T> : IAlphabetBuilder<T> {
12 public const int UNCLASSIFIED = 0;
13
14 int m_nextId = 1;
15 readonly int[] m_map;
16
17 public int Count {
18 get { return m_nextId; }
19 }
20
21 protected IndexedAlphabetBase(int mapSize) {
22 m_map = new int[mapSize];
23 }
24
25 protected IndexedAlphabetBase(int[] map) {
26 Debug.Assert(map != null);
27
28 m_map = map;
29 m_nextId = map.Max() + 1;
30 }
31
32 public int DefineSymbol(T symbol) {
33 var index = GetSymbolIndex(symbol);
34 if (m_map[index] == UNCLASSIFIED)
35 m_map[index] = m_nextId++;
36 return m_map[index];
37 }
38
39 public int DefineClass(IEnumerable<T> symbols) {
40 Safe.ArgumentNotNull(symbols, "symbols");
41 symbols = symbols.Distinct();
42
43 foreach (var symbol in symbols) {
44 var index = GetSymbolIndex(symbol);
45 if (m_map[index] == UNCLASSIFIED)
46 m_map[GetSymbolIndex(symbol)] = m_nextId;
47 else
48 throw new InvalidOperationException(String.Format("Symbol '{0}' already in use", symbol));
49 }
50 return m_nextId++;
51 }
52
53 public List<T>[] CreateReverseMap() {
54 return
55 Enumerable.Range(UNCLASSIFIED, Count)
56 .Select(
57 i => InputSymbols
58 .Where(x => i != UNCLASSIFIED && m_map[GetSymbolIndex(x)] == i)
59 .ToList()
60 )
61 .ToArray();
62 }
63
64 public int[] Reclassify(IAlphabetBuilder<T> newAlphabet, IEnumerable<IEnumerable<int>> classes) {
65 Safe.ArgumentNotNull(newAlphabet, "newAlphabet");
66 Safe.ArgumentNotNull(classes, "classes");
67 var reverseMap = CreateReverseMap();
68
69 var translationMap = new int[Count];
70
71 foreach (var scl in classes) {
72 // skip if the supper class contains the unclassified element
73 if (scl.Contains(UNCLASSIFIED))
74 continue;
75 var range = new List<T>();
76 foreach (var cl in scl) {
77 if (cl < 0 || cl >= reverseMap.Length)
78 throw new ArgumentOutOfRangeException(String.Format("Class {0} is not valid for the current alphabet", cl));
79 range.AddRange(reverseMap[cl]);
80 }
81 var newClass = newAlphabet.DefineClass(range);
82 foreach (var cl in scl)
83 translationMap[cl] = newClass;
84 }
85
86 return translationMap;
87 }
88
89 public virtual int Translate(T symbol) {
90 return m_map[GetSymbolIndex(symbol)];
91 }
92
93 public abstract int GetSymbolIndex(T symbol);
94
95 public abstract IEnumerable<T> InputSymbols { get; }
96
97 /// <summary>
98 /// Gets the translation map from the index of the symbol to it's class this is usefull for the optimized input symbols transtaion.
99 /// </summary>
100 /// <returns>The translation map.</returns>
101 public int[] GetTranslationMap() {
102 return m_map;
103 }
104 }
105 }
@@ -0,0 +1,17
1 using System;
2 using System.Collections.Generic;
3 using System.Linq;
4 using System.Text;
5
6 namespace Implab.Automaton {
7 [Serializable]
8 public class ParserException : Exception {
9 public ParserException() { }
10 public ParserException(string message) : base(message) { }
11 public ParserException(string message, Exception inner) : base(message, inner) { }
12 protected ParserException(
13 System.Runtime.Serialization.SerializationInfo info,
14 System.Runtime.Serialization.StreamingContext context)
15 : base(info, context) { }
16 }
17 }
@@ -0,0 +1,17
1 using System;
2
3 namespace Implab.Automaton.RegularExpressions {
4 public class AltToken<TTag>: BinaryToken<TTag> {
5 public AltToken(Token<TTag> left, Token<TTag> right)
6 : base(left, right) {
7 }
8
9 public override void Accept(IVisitor<TTag> visitor) {
10 Safe.ArgumentNotNull(visitor, "visitor");
11 visitor.Visit(this);
12 }
13 public override string ToString() {
14 return String.Format(Right is BinaryToken<TTag> ? "{0}|({1})" : "{0}|{1}", Left, Right);
15 }
16 }
17 }
@@ -0,0 +1,21
1 using Implab;
2
3 namespace Implab.Automaton.RegularExpressions {
4 public abstract class BinaryToken<TTag> : Token<TTag> {
5 readonly Token<TTag> m_left;
6 readonly Token<TTag> m_right;
7
8 public Token<TTag> Left {
9 get { return m_left; }
10 }
11
12 public Token<TTag> Right {
13 get { return m_right; }
14 }
15
16 protected BinaryToken(Token<TTag> left, Token<TTag> right) {
17 Safe.ArgumentNotNull(m_left = left, "left");
18 Safe.ArgumentNotNull(m_right = right, "right");
19 }
20 }
21 }
@@ -0,0 +1,22
1 using System;
2
3 namespace Implab.Automaton.RegularExpressions {
4 public class CatToken<TTag> : BinaryToken<TTag> {
5 public CatToken(Token<TTag> left, Token<TTag> right)
6 : base(left, right) {
7 }
8
9 public override void Accept(IVisitor<TTag> visitor) {
10 Safe.ArgumentNotNull(visitor, "visitor");
11 visitor.Visit(this);
12 }
13
14 public override string ToString() {
15 return String.Format("{0}{1}", FormatToken(Left), FormatToken(Right));
16 }
17
18 static string FormatToken(Token<TTag> token) {
19 return String.Format(token is AltToken<TTag> ? "({0})" : "{0}", token);
20 }
21 }
22 }
@@ -0,0 +1,181
1 using Implab;
2 using System;
3 using System.Collections.Generic;
4 using System.Diagnostics;
5 using System.Linq;
6
7 namespace Implab.Automaton.RegularExpressions {
8 /// <summary>
9 /// Используется для построения ДКА по регулярному выражению, сначала обходит
10 /// регулярное выражение и вычисляет followpos, затем используется метод
11 /// <see cref="BuildDFA(IDFADefinition)"/> для построения автомата.
12 /// </summary>
13 public class DFABuilder<TTag> : IVisitor<TTag> {
14 int m_idx = 0;
15 Token<TTag> m_root;
16 HashSet<int> m_firstpos;
17 HashSet<int> m_lastpos;
18
19 readonly Dictionary<int, HashSet<int>> m_followpos = new Dictionary<int, HashSet<int>>();
20 readonly Dictionary<int, int> m_indexes = new Dictionary<int, int>();
21 readonly Dictionary<int, TTag> m_ends = new Dictionary<int, TTag>();
22
23 public Dictionary<int, HashSet<int>> FollowposMap {
24 get { return m_followpos; }
25 }
26
27 public HashSet<int> Followpos(int pos) {
28 HashSet<int> set;
29 if (m_followpos.TryGetValue(pos, out set))
30 return set;
31 return m_followpos[pos] = new HashSet<int>();
32 }
33
34 bool Nullable(object n) {
35 if (n is EmptyToken<TTag> || n is StarToken<TTag>)
36 return true;
37 if (n is AltToken<TTag>)
38 return Nullable(((AltToken<TTag>)n).Left) || Nullable(((AltToken<TTag>)n).Right);
39 if (n is CatToken<TTag>)
40 return Nullable(((CatToken<TTag>)n).Left) && Nullable(((CatToken<TTag>)n).Right);
41 return false;
42 }
43
44
45 public void Visit(AltToken<TTag> token) {
46 if (m_root == null)
47 m_root = token;
48 var firtspos = new HashSet<int>();
49 var lastpos = new HashSet<int>();
50
51 token.Left.Accept(this);
52 firtspos.UnionWith(m_firstpos);
53 lastpos.UnionWith(m_lastpos);
54
55 token.Right.Accept(this);
56 firtspos.UnionWith(m_firstpos);
57 lastpos.UnionWith(m_lastpos);
58
59 m_firstpos = firtspos;
60 m_lastpos = lastpos;
61 }
62
63 public void Visit(StarToken<TTag> token) {
64 if (m_root == null)
65 m_root = token;
66 token.Token.Accept(this);
67
68 foreach (var i in m_lastpos)
69 Followpos(i).UnionWith(m_firstpos);
70 }
71
72 public void Visit(CatToken<TTag> token) {
73 if (m_root == null)
74 m_root = token;
75
76 var firtspos = new HashSet<int>();
77 var lastpos = new HashSet<int>();
78 token.Left.Accept(this);
79 firtspos.UnionWith(m_firstpos);
80 var leftLastpos = m_lastpos;
81
82 token.Right.Accept(this);
83 lastpos.UnionWith(m_lastpos);
84 var rightFirstpos = m_firstpos;
85
86 if (Nullable(token.Left))
87 firtspos.UnionWith(rightFirstpos);
88
89 if (Nullable(token.Right))
90 lastpos.UnionWith(leftLastpos);
91
92 m_firstpos = firtspos;
93 m_lastpos = lastpos;
94
95 foreach (var i in leftLastpos)
96 Followpos(i).UnionWith(rightFirstpos);
97
98 }
99
100 public void Visit(EmptyToken<TTag> token) {
101 if (m_root == null)
102 m_root = token;
103 }
104
105 public void Visit(SymbolToken<TTag> token) {
106 if (m_root == null)
107 m_root = token;
108 m_idx++;
109 m_indexes[m_idx] = token.Value;
110 m_firstpos = new HashSet<int>(new[] { m_idx });
111 m_lastpos = new HashSet<int>(new[] { m_idx });
112 }
113
114 public void Visit(EndToken<TTag> token) {
115 if (m_root == null)
116 m_root = token;
117 m_idx++;
118 m_indexes[m_idx] = DFAConst.UNCLASSIFIED_INPUT;
119 m_firstpos = new HashSet<int>(new[] { m_idx });
120 m_lastpos = new HashSet<int>(new[] { m_idx });
121 Followpos(m_idx);
122 m_ends.Add(m_idx, token.Tag);
123 }
124
125 public void BuildDFA(IDFADefinitionBuilder<TTag> dfa, IAlphabetBuilder<int> states) {
126 Safe.ArgumentNotNull(dfa,"dfa");
127
128 var stateMap = new Dictionary<HashSet<int>, int>(new CustomEqualityComparer<HashSet<int>>(
129 (x, y) => x.SetEquals(y),
130 x => x.Sum(n => n.GetHashCode())
131 ));
132
133 int nextState = 0;
134
135 int initialState = states.DefineSymbol(nextState++);
136 stateMap[m_firstpos] = initialState;
137
138 var tags = GetStateTags(m_firstpos);
139 if (tags != null && tags.Length > 0)
140 dfa.MarkFinalState(initialState, tags);
141
142 var inputMax = m_indexes.Values.Max();
143 var queue = new Queue<HashSet<int>>();
144
145 queue.Enqueue(m_firstpos);
146
147 while (queue.Count > 0) {
148 var state = queue.Dequeue();
149 var s1 = stateMap[state];
150
151 for (int a = 0; a <= inputMax; a++) {
152 var next = new HashSet<int>();
153 foreach (var p in state) {
154 if (m_indexes[p] == a) {
155 next.UnionWith(Followpos(p));
156 }
157 }
158 if (next.Count > 0) {
159 int s2;
160 if (!stateMap.TryGetValue(next, out s2)) {
161 s2 = states.DefineSymbol(nextState++);
162 stateMap[next] = s2;
163 tags = GetStateTags(next);
164 if (tags != null && tags.Length > 0)
165 dfa.MarkFinalState(s2, tags);
166
167 queue.Enqueue(next);
168 }
169 dfa.DefineTransition(s1, s2, a);
170 }
171 }
172 }
173 }
174
175 TTag[] GetStateTags(IEnumerable<int> state) {
176 Debug.Assert(state != null);
177 return state.Where(m_ends.ContainsKey).Select(pos => m_ends[pos]).ToArray();
178 }
179
180 }
181 }
@@ -0,0 +1,13
1 using Implab;
2
3 namespace Implab.Automaton.RegularExpressions {
4 public class EmptyToken<TTag> : Token<TTag> {
5 public override void Accept(IVisitor<TTag> visitor) {
6 Safe.ArgumentNotNull(visitor, "visitor");
7 visitor.Visit(this);
8 }
9 public override string ToString() {
10 return "$";
11 }
12 }
13 }
@@ -0,0 +1,32
1 using Implab;
2
3 namespace Implab.Automaton.RegularExpressions {
4 /// <summary>
5 /// Конечный символ расширенного регулярного выражения, при построении ДКА
6 /// используется для определения конечных состояний.
7 /// </summary>
8 public class EndToken<TTag>: Token<TTag> {
9
10 TTag m_tag;
11
12 public EndToken(TTag tag) {
13 m_tag = tag;
14 }
15
16 public EndToken()
17 : this(0) {
18 }
19
20 public TTag Tag {
21 get { return m_tag; }
22 }
23
24 public override void Accept(IVisitor<TTag> visitor) {
25 Safe.ArgumentNotNull(visitor, "visitor");
26 visitor.Visit(this);
27 }
28 public override string ToString() {
29 return "#";
30 }
31 }
32 }
@@ -0,0 +1,108
1 using Implab;
2 using System;
3 using System.Collections.Generic;
4 using System.Linq;
5 using System.Text;
6 using System.Threading.Tasks;
7
8 namespace Implab.Automaton.RegularExpressions {
9 /// <summary>
10 /// Базовый абстрактный класс. Грамматика, позволяет формулировать выражения над алфавитом типа <c>char</c>.
11 /// </summary>
12 /// <typeparam name="TGrammar"></typeparam>
13 public abstract class Grammar<TGrammar> where TGrammar: Grammar<TGrammar>, new() {
14 static TGrammar _instance;
15
16 public static TGrammar Instance{
17 get {
18 if (_instance == null)
19 _instance = new TGrammar();
20 return _instance;
21 }
22 }
23
24 readonly CharAlphabet m_alphabet = new CharAlphabet();
25
26 public CharAlphabet Alphabet {
27 get { return m_alphabet; }
28 }
29
30 public SymbolToken UnclassifiedToken() {
31 return new SymbolToken(CharAlphabet.UNCLASSIFIED);
32 }
33
34 public void DefineAlphabet(IEnumerable<char> alphabet) {
35 Safe.ArgumentNotNull(alphabet, "alphabet");
36
37 foreach (var ch in alphabet)
38 m_alphabet.DefineSymbol(ch);
39 }
40 public Token SymbolRangeToken(char start, char end) {
41 return SymbolToken(Enumerable.Range(start, end - start + 1).Select(x => (char)x));
42 }
43
44 public Token SymbolToken(char symbol) {
45 return Token.New(TranslateOrAdd(symbol));
46 }
47
48 public Token SymbolToken(IEnumerable<char> symbols) {
49 Safe.ArgumentNotNull(symbols, "symbols");
50
51 return Token.New(TranslateOrAdd(symbols).ToArray());
52 }
53
54 public Token SymbolSetToken(params char[] set) {
55 return SymbolToken(set);
56 }
57
58 int TranslateOrAdd(char ch) {
59 var t = m_alphabet.Translate(ch);
60 if (t == CharAlphabet.UNCLASSIFIED)
61 t = m_alphabet.DefineSymbol(ch);
62 return t;
63 }
64
65 IEnumerable<int> TranslateOrAdd(IEnumerable<char> symbols) {
66 return symbols.Distinct().Select(TranslateOrAdd);
67 }
68
69 int TranslateOrDie(char ch) {
70 var t = m_alphabet.Translate(ch);
71 if (t == CharAlphabet.UNCLASSIFIED)
72 throw new ApplicationException(String.Format("Symbol '{0}' is UNCLASSIFIED", ch));
73 return t;
74 }
75
76 IEnumerable<int> TranslateOrDie(IEnumerable<char> symbols) {
77 return symbols.Distinct().Select(TranslateOrDie);
78 }
79
80 public Token SymbolTokenExcept(IEnumerable<char> symbols) {
81 Safe.ArgumentNotNull(symbols, "symbols");
82
83 return Token.New( Enumerable.Range(0, m_alphabet.Count).Except(TranslateOrDie(symbols)).ToArray());
84 }
85
86 protected CDFADefinition BuildDFA(Token lang) {
87 Safe.ArgumentNotNull(lang, "lang");
88
89 var dfa = new CDFADefinition(m_alphabet);
90
91 var builder = new DFABuilder();
92
93 lang.Accept( builder );
94
95 builder.BuildDFA(dfa);
96 if (dfa.InitialStateIsFinal)
97 throw new ApplicationException("The specified language contains empty token");
98
99 return dfa.Optimize();
100 }
101
102
103
104 //protected abstract TGrammar CreateInstance();
105 }
106
107
108 }
@@ -0,0 +1,13
1 namespace Implab.Automaton.RegularExpressions {
2 /// <summary>
3 /// Интерфейс обходчика синтаксического дерева регулярного выражения
4 /// </summary>
5 public interface IVisitor<TTag> {
6 void Visit(AltToken<TTag> token);
7 void Visit(StarToken<TTag> token);
8 void Visit(CatToken<TTag> token);
9 void Visit(EmptyToken<TTag> token);
10 void Visit(EndToken<TTag> token);
11 void Visit(SymbolToken<TTag> token);
12 }
13 }
@@ -0,0 +1,34
1 using Implab;
2 using System;
3 using System.Collections.Generic;
4 using System.Linq;
5 using System.Text;
6 using System.Threading.Tasks;
7
8 namespace Implab.Automaton.RegularExpressions {
9 /// <summary>
10 /// Замыкание выражения с 0 и более повторов.
11 /// </summary>
12 public class StarToken<TTag>: Token<TTag> {
13
14 Token<TTag> m_token;
15
16 public Token<TTag> Token {
17 get { return m_token; }
18 }
19
20 public StarToken(Token<TTag> token) {
21 Safe.ArgumentNotNull(token, "token");
22 m_token = token;
23 }
24
25 public override void Accept(IVisitor<TTag> visitor) {
26 Safe.ArgumentNotNull(visitor, "visitor");
27 visitor.Visit(this);
28 }
29
30 public override string ToString() {
31 return String.Format("({0})*", Token);
32 }
33 }
34 }
@@ -0,0 +1,27
1 using Implab;
2
3 namespace Implab.Automaton.RegularExpressions {
4 /// <summary>
5 /// Выражение, соответсвующее одному символу.
6 /// </summary>
7 public class SymbolToken<TTag> : Token<TTag> {
8 int m_value;
9
10 public int Value {
11 get { return m_value; }
12 }
13
14 public SymbolToken(int value) {
15 m_value = value;
16 }
17 public override void Accept(IVisitor<TTag> visitor) {
18 Safe.ArgumentNotNull(visitor, "visitor");
19
20 visitor.Visit(this);
21 }
22
23 public override string ToString() {
24 return Value.ToString();
25 }
26 }
27 }
@@ -0,0 +1,63
1 using Implab;
2 using System;
3 using System.Linq;
4
5 namespace Implab.Automaton.RegularExpressions {
6 public abstract class Token<TTag> {
7 public abstract void Accept(IVisitor<TTag> visitor);
8
9 public Token<TTag> Extend() {
10 return Cat(new EndToken<TTag>());
11 }
12
13 public Token<TTag> Tag<T>(T tag) where T : IConvertible {
14 return Cat(new EndToken<TTag>(tag));
15 }
16
17 public Token<TTag> Cat(Token<TTag> right) {
18 return new CatToken<TTag>(this, right);
19 }
20
21 public Token<TTag> Or(Token<TTag> right) {
22 return new AltToken<TTag>(this, right);
23 }
24
25 public Token<TTag> Optional() {
26 return Or(new EmptyToken<TTag>());
27 }
28
29 public Token<TTag> EClosure() {
30 return new StarToken<TTag>(this);
31 }
32
33 public Token<TTag> Closure() {
34 return Cat(new StarToken<TTag>(this));
35 }
36
37 public Token<TTag> Repeat(int count) {
38 Token<TTag> token = null;
39
40 for (int i = 0; i < count; i++)
41 token = token != null ? token.Cat(this) : this;
42 return token ?? new EmptyToken<TTag>();
43 }
44
45 public Token<TTag> Repeat(int min, int max) {
46 if (min > max || min < 1)
47 throw new ArgumentOutOfRangeException();
48 var token = Repeat(min);
49
50 for (int i = min; i < max; i++)
51 token = token.Cat( this.Optional() );
52 return token;
53 }
54
55 public static Token<TTag> New<T>(params T[] set) where T : struct, IConvertible {
56 Safe.ArgumentNotNull(set, "set");
57 Token<TTag> token = null;
58 foreach(var c in set.Distinct())
59 token = token == null ? new SymbolToken<TTag>(c) : token.Or(new SymbolToken<TTag>(c));
60 return token;
61 }
62 }
63 }
@@ -0,0 +1,259
1 using Implab;
2 using System;
3 using System.Collections.Generic;
4 using System.IO;
5 using Implab.Components;
6
7 namespace Implab.Automaton {
8 /// <summary>
9 /// Базовый класс для разбора потока входных символов на токены.
10 /// </summary>
11 /// <remarks>
12 /// Сканнер имеет внутри буффер с симолами входного текста, по которому перемещаются два
13 /// указателя, начала и конца токена, при перемещении искользуется ДКА для определения
14 /// конца токена и допустимости текущего символа.
15 /// </remarks>
16 public abstract class Scanner<TTag> : Disposable {
17 struct ScannerConfig {
18 public DFAStateDescriptior<TTag>[] states;
19 public int[] alphabetMap;
20 }
21
22 Stack<ScannerConfig> m_defs = new Stack<ScannerConfig>();
23
24 DFAStateDescriptior<TTag>[] m_states;
25 int[] m_alphabetMap;
26
27 protected DFAStateDescriptior<TTag> m_currentState;
28 int m_previewCode;
29
30 protected int m_tokenLen = 0;
31 protected int m_tokenOffset;
32
33 protected char[] m_buffer;
34 protected int m_bufferSize;
35 protected int m_pointer;
36
37 TextReader m_reader;
38 bool m_disposeReader;
39 int m_chunkSize = 1024; // 1k
40 int m_limit = 10 * 1024 * 1024; // 10Mb
41
42 protected Scanner(DFAStateDescriptior<TTag>[] states, int[] alphabet) {
43 Safe.ArgumentNotEmpty(states, "states");
44 Safe.ArgumentNotNull(alphabet, "alphabet");
45
46 m_states = states;
47 m_alphabetMap = alphabet;
48
49 Feed(new char[0]);
50 }
51
52 /// <summary>
53 /// Заполняет входными данными буффер.
54 /// </summary>
55 /// <param name="data">Данные для обработки.</param>
56 /// <remarks>Копирование данных не происходит, переданный массив используется в
57 /// качестве входного буффера.</remarks>
58 public void Feed(char[] data) {
59 Safe.ArgumentNotNull(data, "data");
60
61 Feed(data, data.Length);
62 }
63
64 /// <summary>
65 /// Заполняет буффур чтения входными данными.
66 /// </summary>
67 /// <param name="data">Данные для обработки.</param>
68 /// <param name="length">Длина данных для обработки.</param>
69 /// <remarks>Копирование данных не происходит, переданный массив используется в
70 /// качестве входного буффера.</remarks>
71 public void Feed(char[] data, int length) {
72 Safe.ArgumentNotNull(data, "data");
73 Safe.ArgumentInRange(length, 0, data.Length, "length");
74 AssertNotDisposed();
75
76 m_pointer = -1;
77 m_buffer = data;
78 m_bufferSize = length;
79 Shift();
80 }
81
82 public void Feed(TextReader reader, bool dispose) {
83 Safe.ArgumentNotNull(reader, "reader");
84 AssertNotDisposed();
85
86 if (m_reader != null && m_disposeReader)
87 m_reader.Dispose();
88
89 m_reader = reader;
90 m_disposeReader = dispose;
91 m_pointer = -1;
92 m_buffer = new char[m_chunkSize];
93 m_bufferSize = 0;
94 Shift();
95 }
96
97 /// <summary>
98 /// Получает текущий токен в виде строки.
99 /// </summary>
100 /// <returns></returns>
101 protected string GetTokenValue() {
102 return new String(m_buffer, m_tokenOffset, m_tokenLen);
103 }
104
105 /// <summary>
106 /// Метки текущего токена, которые были назначены в регулярном выражении.
107 /// </summary>
108 protected TTag[] TokenTags {
109 get {
110 return m_currentState.tag;
111 }
112 }
113
114 /// <summary>
115 /// Признак конца данных
116 /// </summary>
117 public bool EOF {
118 get {
119 return m_pointer >= m_bufferSize;
120 }
121 }
122
123 /// <summary>
124 /// Читает следующий токен, при этом <see cref="m_tokenOffset"/> указывает на начало токена,
125 /// <see cref="m_tokenLen"/> на длину токена, <see cref="m_buffer"/> - массив символов, в
126 /// котором находится токен.
127 /// </summary>
128 /// <returns><c>false</c> - достигнут конец данных, токен не прочитан.</returns>
129 protected bool ReadTokenInternal() {
130 if (m_pointer >= m_bufferSize)
131 return false;
132
133 m_currentState = m_states[DFADefinition.INITIAL_STATE];
134 m_tokenLen = 0;
135 m_tokenOffset = m_pointer;
136 int nextState;
137 do {
138 nextState = m_currentState.transitions[m_previewCode];
139 if (nextState == DFAConst.UNREACHABLE_STATE) {
140 if (m_currentState.final)
141 return true;
142 else
143 throw new ParserException(
144 String.Format(
145 "Unexpected symbol '{0}', at pos {1}",
146 m_buffer[m_pointer],
147 Position
148 )
149 );
150 } else {
151 m_currentState = m_states[nextState];
152 m_tokenLen++;
153 }
154
155 } while (Shift());
156
157 // END OF DATA
158 if (!m_currentState.final)
159 throw new ParserException("Unexpected end of data");
160
161 return true;
162 }
163
164
165 bool Shift() {
166 m_pointer++;
167
168 if (m_pointer >= m_bufferSize) {
169 if (!ReadNextChunk())
170 return false;
171 }
172
173 m_previewCode = m_alphabetMap[m_buffer[m_pointer]];
174
175 return true;
176 }
177
178 bool ReadNextChunk() {
179 if (m_reader == null)
180 return false;
181
182 // extend buffer if nesessary
183 if (m_pointer + m_chunkSize > m_buffer.Length) {
184 // trim unused buffer head
185 var size = m_tokenLen + m_chunkSize;
186 if (size >= m_limit)
187 throw new ParserException(String.Format("Input buffer {0} bytes limit exceeded", m_limit));
188 var temp = new char[size];
189 Array.Copy(m_buffer, m_tokenOffset, temp, 0, m_tokenLen);
190 m_pointer -= m_tokenOffset;
191 m_bufferSize -= m_tokenOffset;
192 m_tokenOffset = 0;
193 m_buffer = temp;
194 }
195
196 var read = m_reader.Read(m_buffer, m_tokenLen, m_chunkSize);
197 if (read == 0)
198 return false;
199
200 m_bufferSize += read;
201
202 return true;
203 }
204
205 /// <summary>
206 /// Позиция сканнера во входном буфере
207 /// </summary>
208 public int Position {
209 get {
210 return m_pointer + 1;
211 }
212 }
213
214 /// <summary>
215 /// Преключает внутренний ДКА на указанный, позволяет реализовать подобие захватывающей
216 /// группировки.
217 /// </summary>
218 /// <param name="states">Таблица состояний нового ДКА</param>
219 /// <param name="alphabet">Таблица входных символов для нового ДКА</param>
220 protected void Switch(DFAStateDescriptior<TTag>[] states, int[] alphabet) {
221 Safe.ArgumentNotNull(states, "dfa");
222
223 m_defs.Push(new ScannerConfig {
224 states = m_states,
225 alphabetMap = m_alphabetMap
226 });
227
228 m_states = states;
229 m_alphabetMap = alphabet;
230
231 m_previewCode = m_alphabetMap[m_buffer[m_pointer]];
232 }
233
234 /// <summary>
235 /// Восстанавливает предыдущей ДКА сканнера.
236 /// </summary>
237 protected void Restore() {
238 if (m_defs.Count == 0)
239 throw new InvalidOperationException();
240 var prev = m_defs.Pop();
241 m_states = prev.states;
242 m_alphabetMap = prev.alphabetMap;
243 m_previewCode = m_alphabetMap[m_buffer[m_pointer]];
244 }
245
246 protected override void Dispose(bool disposing) {
247 if (disposing) {
248 if (m_reader != null && m_disposeReader)
249 m_reader.Dispose();
250 m_buffer = null;
251 m_bufferSize = 0;
252 m_pointer = 0;
253 m_tokenLen = 0;
254 m_tokenOffset = 0;
255 }
256 base.Dispose(disposing);
257 }
258 }
259 }
@@ -102,26 +102,6
102 102 <Compile Include="Parallels\ArrayTraits.cs" />
103 103 <Compile Include="Parallels\MTQueue.cs" />
104 104 <Compile Include="Parallels\WorkerPool.cs" />
105 <Compile Include="Parsing\AltToken.cs" />
106 <Compile Include="Parsing\BinaryToken.cs" />
107 <Compile Include="Parsing\CatToken.cs" />
108 <Compile Include="Parsing\CDFADefinition.cs" />
109 <Compile Include="Parsing\DFABuilder.cs" />
110 <Compile Include="Parsing\DFAStateDescriptor.cs" />
111 <Compile Include="Parsing\DFAutomaton.cs" />
112 <Compile Include="Parsing\EDFADefinition.cs" />
113 <Compile Include="Parsing\EmptyToken.cs" />
114 <Compile Include="Parsing\EndToken.cs" />
115 <Compile Include="Parsing\EnumAlphabet.cs" />
116 <Compile Include="Parsing\Grammar.cs" />
117 <Compile Include="Parsing\IAlphabet.cs" />
118 <Compile Include="Parsing\IDFADefinition.cs" />
119 <Compile Include="Parsing\IVisitor.cs" />
120 <Compile Include="Parsing\ParserException.cs" />
121 <Compile Include="Parsing\Scanner.cs" />
122 <Compile Include="Parsing\StarToken.cs" />
123 <Compile Include="Parsing\SymbolToken.cs" />
124 <Compile Include="Parsing\Token.cs" />
125 105 <Compile Include="ProgressInitEventArgs.cs" />
126 106 <Compile Include="Properties\AssemblyInfo.cs" />
127 107 <Compile Include="Parallels\AsyncPool.cs" />
@@ -178,11 +158,34
178 158 <Compile Include="Components\ExecutionState.cs" />
179 159 <Compile Include="Components\RunnableComponent.cs" />
180 160 <Compile Include="Components\IFactory.cs" />
181 <Compile Include="Parsing\DFADefinition.cs" />
182 <Compile Include="Parsing\IndexedAlphabetBase.cs" />
183 <Compile Include="Parsing\CharAlphabet.cs" />
184 <Compile Include="Parsing\IAlphabetBuilder.cs" />
185 <Compile Include="Parsing\IDFADefinitionBuilder.cs" />
161 <Compile Include="Automaton\CDFADefinition.cs" />
162 <Compile Include="Automaton\DFAStateDescriptor.cs" />
163 <Compile Include="Automaton\DFAutomaton.cs" />
164 <Compile Include="Automaton\EDFADefinition.cs" />
165 <Compile Include="Automaton\EnumAlphabet.cs" />
166 <Compile Include="Automaton\IAlphabet.cs" />
167 <Compile Include="Automaton\IDFADefinition.cs" />
168 <Compile Include="Automaton\ParserException.cs" />
169 <Compile Include="Automaton\Scanner.cs" />
170 <Compile Include="Automaton\DFADefinition.cs" />
171 <Compile Include="Automaton\IndexedAlphabetBase.cs" />
172 <Compile Include="Automaton\CharAlphabet.cs" />
173 <Compile Include="Automaton\IAlphabetBuilder.cs" />
174 <Compile Include="Automaton\IDFADefinitionBuilder.cs" />
175 <Compile Include="Automaton\RegularExpressions\AltToken.cs" />
176 <Compile Include="Automaton\RegularExpressions\BinaryToken.cs" />
177 <Compile Include="Automaton\RegularExpressions\CatToken.cs" />
178 <Compile Include="Automaton\DFAConst.cs" />
179 <Compile Include="Automaton\RegularExpressions\Grammar.cs" />
180 <Compile Include="Automaton\RegularExpressions\StarToken.cs" />
181 <Compile Include="Automaton\RegularExpressions\SymbolToken.cs" />
182 <Compile Include="Automaton\RegularExpressions\EmptyToken.cs" />
183 <Compile Include="Automaton\RegularExpressions\EndToken.cs" />
184 <Compile Include="Automaton\RegularExpressions\Token.cs" />
185 <Compile Include="Automaton\RegularExpressions\IVisitor.cs" />
186 <Compile Include="Automaton\RegularExpressions\DFABuilder.cs" />
187 <Compile Include="Automaton\AutomatonTransition.cs" />
188 <Compile Include="Automaton\ByteAlphabet.cs" />
186 189 </ItemGroup>
187 190 <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" />
188 191 <ItemGroup />
@@ -257,5 +260,6
257 260 </ProjectExtensions>
258 261 <ItemGroup>
259 262 <Folder Include="Components\" />
263 <Folder Include="Automaton\RegularExpressions\" />
260 264 </ItemGroup>
261 265 </Project> No newline at end of file
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
General Comments 0
You need to be logged in to leave comments. Login now