##// END OF EJS Templates
minor fixes and debug
cin -
r181:b2b6a6640aa3 ref20160224
parent child
Show More
@@ -1,313 +1,316
1 using Implab;
1 using Implab;
2 using System;
2 using System;
3 using System.Collections.Generic;
3 using System.Collections.Generic;
4 using System.Linq;
4 using System.Linq;
5 using System.Diagnostics;
5
6
6 namespace Implab.Automaton {
7 namespace Implab.Automaton {
7 public class DFATable : IDFATableBuilder {
8 public class DFATable : IDFATableBuilder {
8 int m_stateCount;
9 int m_stateCount;
9 int m_symbolCount;
10 int m_symbolCount;
10 int m_initialState;
11 int m_initialState;
11
12
12 readonly HashSet<int> m_finalStates = new HashSet<int>();
13 readonly HashSet<int> m_finalStates = new HashSet<int>();
13 readonly HashSet<AutomatonTransition> m_transitions = new HashSet<AutomatonTransition>();
14 readonly HashSet<AutomatonTransition> m_transitions = new HashSet<AutomatonTransition>();
14
15
15
16
16 #region IDFADefinition implementation
17 #region IDFADefinition implementation
17
18
18 public bool IsFinalState(int s) {
19 public bool IsFinalState(int s) {
19 Safe.ArgumentInRange(s, 0, m_stateCount, "s");
20 Safe.ArgumentInRange(s, 0, m_stateCount, "s");
20
21
21 return m_finalStates.Contains(s);
22 return m_finalStates.Contains(s);
22 }
23 }
23
24
24 public IEnumerable<int> FinalStates {
25 public IEnumerable<int> FinalStates {
25 get {
26 get {
26 return m_finalStates;
27 return m_finalStates;
27 }
28 }
28 }
29 }
29
30
30 public int StateCount {
31 public int StateCount {
31 get { return m_stateCount; }
32 get { return m_stateCount; }
32 }
33 }
33
34
34 public int AlphabetSize {
35 public int AlphabetSize {
35 get { return m_symbolCount; }
36 get { return m_symbolCount; }
36 }
37 }
37
38
38 public int InitialState {
39 public int InitialState {
39 get { return m_initialState; }
40 get { return m_initialState; }
40 }
41 }
41
42
42 #endregion
43 #endregion
43
44
44 public void SetInitialState(int s) {
45 public void SetInitialState(int s) {
45 Safe.ArgumentAssert(s >= 0, "s");
46 Safe.ArgumentAssert(s >= 0, "s");
47 m_stateCount = Math.Max(m_stateCount, s + 1);
46 m_initialState = s;
48 m_initialState = s;
47 }
49 }
48
50
49 public void MarkFinalState(int state) {
51 public void MarkFinalState(int state) {
52 m_stateCount = Math.Max(m_stateCount, state + 1);
50 m_finalStates.Add(state);
53 m_finalStates.Add(state);
51 }
54 }
52
55
53 public void Add(AutomatonTransition item) {
56 public void Add(AutomatonTransition item) {
54 Safe.ArgumentAssert(item.s1 >= 0, "item");
57 Safe.ArgumentAssert(item.s1 >= 0, "item");
55 Safe.ArgumentAssert(item.s2 >= 0, "item");
58 Safe.ArgumentAssert(item.s2 >= 0, "item");
56 Safe.ArgumentAssert(item.edge >= 0, "item");
59 Safe.ArgumentAssert(item.edge >= 0, "item");
57
60
58 m_stateCount = Math.Max(m_stateCount, Math.Max(item.s1, item.s2) + 1);
61 m_stateCount = Math.Max(m_stateCount, Math.Max(item.s1, item.s2) + 1);
59 m_symbolCount = Math.Max(m_symbolCount, item.edge);
62 m_symbolCount = Math.Max(m_symbolCount, item.edge + 1);
60
63
61 m_transitions.Add(item);
64 m_transitions.Add(item);
62 }
65 }
63
66
64 public void Clear() {
67 public void Clear() {
65 m_stateCount = 0;
68 m_stateCount = 0;
66 m_symbolCount = 0;
69 m_symbolCount = 0;
67 m_finalStates.Clear();
70 m_finalStates.Clear();
68 m_transitions.Clear();
71 m_transitions.Clear();
69 }
72 }
70
73
71 public bool Contains(AutomatonTransition item) {
74 public bool Contains(AutomatonTransition item) {
72 return m_transitions.Contains(item);
75 return m_transitions.Contains(item);
73 }
76 }
74
77
75 public void CopyTo(AutomatonTransition[] array, int arrayIndex) {
78 public void CopyTo(AutomatonTransition[] array, int arrayIndex) {
76 m_transitions.CopyTo(array, arrayIndex);
79 m_transitions.CopyTo(array, arrayIndex);
77 }
80 }
78
81
79 public bool Remove(AutomatonTransition item) {
82 public bool Remove(AutomatonTransition item) {
80 return m_transitions.Remove(item);
83 return m_transitions.Remove(item);
81 }
84 }
82
85
83 public int Count {
86 public int Count {
84 get {
87 get {
85 return m_transitions.Count;
88 return m_transitions.Count;
86 }
89 }
87 }
90 }
88
91
89 public bool IsReadOnly {
92 public bool IsReadOnly {
90 get {
93 get {
91 return false;
94 return false;
92 }
95 }
93 }
96 }
94
97
95 public IEnumerator<AutomatonTransition> GetEnumerator() {
98 public IEnumerator<AutomatonTransition> GetEnumerator() {
96 return m_transitions.GetEnumerator();
99 return m_transitions.GetEnumerator();
97 }
100 }
98
101
99 System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator() {
102 System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator() {
100 return GetEnumerator();
103 return GetEnumerator();
101 }
104 }
102
105
103 public int[,] CreateTransitionTable() {
106 public int[,] CreateTransitionTable() {
104 var table = new int[StateCount,AlphabetSize];
107 var table = new int[StateCount,AlphabetSize];
105
108
106 for (int i = 0; i < StateCount; i++)
109 for (int i = 0; i < StateCount; i++)
107 for (int j = 0; i < AlphabetSize; j++)
110 for (int j = 0; j < AlphabetSize; j++)
108 table[i, j] = AutomatonConst.UNREACHABLE_STATE;
111 table[i, j] = AutomatonConst.UNREACHABLE_STATE;
109
112
110 foreach (var t in this)
113 foreach (var t in this)
111 table[t.s1,t.edge] = t.s2;
114 table[t.s1,t.edge] = t.s2;
112
115
113 return table;
116 return table;
114 }
117 }
115
118
116 public bool[] CreateFinalStateTable() {
119 public bool[] CreateFinalStateTable() {
117 var table = new bool[StateCount];
120 var table = new bool[StateCount];
118
121
119 foreach (var s in FinalStates)
122 foreach (var s in FinalStates)
120 table[s] = true;
123 table[s] = true;
121
124
122 return table;
125 return table;
123 }
126 }
124
127
125 /// <summary>Π€ΠΎΡ€ΠΌΠΈΡ€ΡƒΠ΅Ρ‚ мноТСства ΠΊΠΎΠ½Π΅Ρ‡Π½Ρ‹Ρ… состояний ΠΏΠ΅Ρ€Π΅Π΄ Π½Π°Ρ‡Π°Π»ΠΎΠΌ Ρ€Π°Π±ΠΎΡ‚Ρ‹ Π°Π»Π³ΠΎΡ€ΠΈΡ‚ΠΌΠ° ΠΌΠΈΠ½ΠΈΠΌΠΈΠ·Π°Ρ†ΠΈΠΈ.</summary>
128 /// <summary>Π€ΠΎΡ€ΠΌΠΈΡ€ΡƒΠ΅Ρ‚ мноТСства ΠΊΠΎΠ½Π΅Ρ‡Π½Ρ‹Ρ… состояний ΠΏΠ΅Ρ€Π΅Π΄ Π½Π°Ρ‡Π°Π»ΠΎΠΌ Ρ€Π°Π±ΠΎΡ‚Ρ‹ Π°Π»Π³ΠΎΡ€ΠΈΡ‚ΠΌΠ° ΠΌΠΈΠ½ΠΈΠΌΠΈΠ·Π°Ρ†ΠΈΠΈ.</summary>
126 /// <remarks>
129 /// <remarks>
127 /// Π’ процСссС построСния минимального Π°Π²Ρ‚ΠΎΠΌΠ°Ρ‚Π° трСбуСтся Ρ€Π°Π·Π΄Π΅Π»ΠΈΡ‚ΡŒ мноТСство состояний,
130 /// Π’ процСссС построСния минимального Π°Π²Ρ‚ΠΎΠΌΠ°Ρ‚Π° трСбуСтся Ρ€Π°Π·Π΄Π΅Π»ΠΈΡ‚ΡŒ мноТСство состояний,
128 /// Π½Π° Π΄Π²Π° подмноТСства - ΠΊΠΎΠ½Π΅Ρ‡Π½Ρ‹Π΅ состояния ΠΈ всС ΠΎΡΡ‚Π°Π»ΡŒΠ½Ρ‹Π΅, послС Ρ‡Π΅Π³ΠΎ эти подмноТСства
131 /// Π½Π° Π΄Π²Π° подмноТСства - ΠΊΠΎΠ½Π΅Ρ‡Π½Ρ‹Π΅ состояния ΠΈ всС ΠΎΡΡ‚Π°Π»ΡŒΠ½Ρ‹Π΅, послС Ρ‡Π΅Π³ΠΎ эти подмноТСства
129 /// Π±ΡƒΠ΄ΡƒΡ‚ Ρ€Π΅Π·Π΄Π΅Π»Π΅Π½Ρ‹ Π½Π° Π±ΠΎΠ»Π΅Π΅ ΠΌΠ΅Π»ΠΊΠΈΠ΅. Иногда трСбуСтся Π³Π°Ρ€Π°Π½Ρ‚ΠΈΡ€ΠΎΠ²Π°Ρ‚ΡŒ различия ΠΊΠΎΠ½Π΅Ρ‡Π½Ρ‹Ρ… сосотяний,
132 /// Π±ΡƒΠ΄ΡƒΡ‚ Ρ€Π΅Π·Π΄Π΅Π»Π΅Π½Ρ‹ Π½Π° Π±ΠΎΠ»Π΅Π΅ ΠΌΠ΅Π»ΠΊΠΈΠ΅. Иногда трСбуСтся Π³Π°Ρ€Π°Π½Ρ‚ΠΈΡ€ΠΎΠ²Π°Ρ‚ΡŒ различия ΠΊΠΎΠ½Π΅Ρ‡Π½Ρ‹Ρ… сосотяний,
130 /// для этого Π½Π΅ΠΎΠ±Ρ…ΠΎΠ΄ΠΈΠΌΠΎ ΠΏΠ΅Ρ€Π΅ΠΎΠΏΡ€Π΅Π΄Π΅Π»ΠΈΡ‚ΡŒ Π΄Π°Π½Π½Ρ†ΡŽ Ρ„ΡƒΠΊΠ½Ρ†ΠΈΡŽ, для получСния мноТСств ΠΊΠΎΠ½Π΅Ρ‡Π½Ρ‹Ρ… состояний.
133 /// для этого Π½Π΅ΠΎΠ±Ρ…ΠΎΠ΄ΠΈΠΌΠΎ ΠΏΠ΅Ρ€Π΅ΠΎΠΏΡ€Π΅Π΄Π΅Π»ΠΈΡ‚ΡŒ Π΄Π°Π½Π½Ρ†ΡŽ Ρ„ΡƒΠΊΠ½Ρ†ΠΈΡŽ, для получСния мноТСств ΠΊΠΎΠ½Π΅Ρ‡Π½Ρ‹Ρ… состояний.
131 /// </remarks>
134 /// </remarks>
132 /// <returns>The final states.</returns>
135 /// <returns>The final states.</returns>
133 protected virtual IEnumerable<HashSet<int>> GroupFinalStates() {
136 protected virtual IEnumerable<HashSet<int>> GroupFinalStates() {
134 return new HashSet<int>[] { m_finalStates };
137 return new HashSet<int>[] { m_finalStates };
135 }
138 }
136
139
137 protected void Optimize(
140 protected void Optimize(
138 IDFATableBuilder optimalDFA,
141 IDFATableBuilder optimalDFA,
139 IDictionary<int,int> alphabetMap,
142 IDictionary<int,int> alphabetMap,
140 IDictionary<int,int> stateMap
143 IDictionary<int,int> stateMap
141 ) {
144 ) {
142 Safe.ArgumentNotNull(optimalDFA, "dfa");
145 Safe.ArgumentNotNull(optimalDFA, "dfa");
143 Safe.ArgumentNotNull(alphabetMap, "alphabetMap");
146 Safe.ArgumentNotNull(alphabetMap, "alphabetMap");
144 Safe.ArgumentNotNull(stateMap, "stateMap");
147 Safe.ArgumentNotNull(stateMap, "stateMap");
145
148
146
149
147 var setComparer = new CustomEqualityComparer<HashSet<int>>(
150 var setComparer = new CustomEqualityComparer<HashSet<int>>(
148 (x, y) => x.SetEquals(y),
151 (x, y) => x.SetEquals(y),
149 s => s.Sum(x => x.GetHashCode())
152 s => s.Sum(x => x.GetHashCode())
150 );
153 );
151
154
152 var optimalStates = new HashSet<HashSet<int>>(setComparer);
155 var optimalStates = new HashSet<HashSet<int>>(setComparer);
153 var queue = new HashSet<HashSet<int>>(setComparer);
156 var queue = new HashSet<HashSet<int>>(setComparer);
154
157
155 // ΠΏΠΎΠ»ΡƒΡ‡Π°Π΅ΠΌ ΠΊΠΎΠ½Π΅Ρ‡Π½Ρ‹Π΅ состояния, сгруппированныС ΠΏΠΎ ΠΌΠ°Ρ€ΠΊΠ΅Ρ€Π°ΠΌ
158 // ΠΏΠΎΠ»ΡƒΡ‡Π°Π΅ΠΌ ΠΊΠΎΠ½Π΅Ρ‡Π½Ρ‹Π΅ состояния, сгруппированныС ΠΏΠΎ ΠΌΠ°Ρ€ΠΊΠ΅Ρ€Π°ΠΌ
156 optimalStates.UnionWith(
159 optimalStates.UnionWith(
157 GroupFinalStates()
160 GroupFinalStates()
158 );
161 );
159
162
160 var state = new HashSet<int>(
163 var state = new HashSet<int>(
161 Enumerable
164 Enumerable
162 .Range(0, m_stateCount - 1)
165 .Range(0, m_stateCount - 1)
163 .Where(i => !m_finalStates.Contains(i))
166 .Where(i => !m_finalStates.Contains(i))
164 );
167 );
165
168
166 optimalStates.Add(state);
169 optimalStates.Add(state);
167 queue.Add(state);
170 queue.Add(state);
168
171
169 var rmap = m_transitions
172 var rmap = m_transitions
170 .GroupBy(t => t.s2)
173 .GroupBy(t => t.s2)
171 .ToDictionary(
174 .ToDictionary(
172 g => g.Key, // s2
175 g => g.Key, // s2
173 g => g.GroupBy(t => t.edge, t => t.s1).ToDictionary(p => p.Key)
176 g => g.ToLookup(t => t.edge, t => t.s1)//.ToDictionary(p => p.Key)
174 );
177 );
175
178
176 while (queue.Count > 0) {
179 while (queue.Count > 0) {
177 var stateA = queue.First();
180 var stateA = queue.First();
178 queue.Remove(stateA);
181 queue.Remove(stateA);
179
182
180 for (int c = 0; c < m_symbolCount; c++) {
183 for (int c = 0; c < m_symbolCount; c++) {
181 var stateX = new HashSet<int>();
184 var stateX = new HashSet<int>();
182 foreach(var a in stateA)
185 foreach(var a in stateA.Where(rmap.ContainsKey))
183 stateX.UnionWith(rmap[a][c]); // all states from wich the symbol 'c' leads to the state 'a'
186 stateX.UnionWith(rmap[a][c]); // all states from wich the symbol 'c' leads to the state 'a'
184
187
185 foreach (var stateY in optimalStates.ToArray()) {
188 foreach (var stateY in optimalStates.ToArray()) {
186 if (stateX.Overlaps(stateY) && !stateY.IsSubsetOf(stateX)) {
189 if (stateX.Overlaps(stateY) && !stateY.IsSubsetOf(stateX)) {
187 var stateR1 = new HashSet<int>(stateY);
190 var stateR1 = new HashSet<int>(stateY);
188 var stateR2 = new HashSet<int>(stateY);
191 var stateR2 = new HashSet<int>(stateY);
189
192
190 stateR1.IntersectWith(stateX);
193 stateR1.IntersectWith(stateX);
191 stateR2.ExceptWith(stateX);
194 stateR2.ExceptWith(stateX);
192
195
193 optimalStates.Remove(stateY);
196 optimalStates.Remove(stateY);
194 optimalStates.Add(stateR1);
197 optimalStates.Add(stateR1);
195 optimalStates.Add(stateR2);
198 optimalStates.Add(stateR2);
196
199
197 if (queue.Contains(stateY)) {
200 if (queue.Contains(stateY)) {
198 queue.Remove(stateY);
201 queue.Remove(stateY);
199 queue.Add(stateR1);
202 queue.Add(stateR1);
200 queue.Add(stateR2);
203 queue.Add(stateR2);
201 } else {
204 } else {
202 queue.Add(stateR1.Count <= stateR2.Count ? stateR1 : stateR2);
205 queue.Add(stateR1.Count <= stateR2.Count ? stateR1 : stateR2);
203 }
206 }
204 }
207 }
205 }
208 }
206 }
209 }
207 }
210 }
208
211
209 // ΠΊΠ°Ρ€Ρ‚Π° получСния ΠΎΠΏΡ‚ΠΈΠΌΠ°Π»ΡŒΠ½ΠΎΠ³ΠΎ состояния ΠΏΠΎ ΡΠΎΠΎΡ‚Π²Π΅ΡΡ‚Π²ΡƒΡŽΡ‰Π΅ΠΌΡƒ Π΅ΠΌΡƒ простому ΡΠΎΡΡ‚ΠΎΡΠ½ΠΈΡŽ
212 // ΠΊΠ°Ρ€Ρ‚Π° получСния ΠΎΠΏΡ‚ΠΈΠΌΠ°Π»ΡŒΠ½ΠΎΠ³ΠΎ состояния ΠΏΠΎ ΡΠΎΠΎΡ‚Π²Π΅ΡΡ‚Π²ΡƒΡŽΡ‰Π΅ΠΌΡƒ Π΅ΠΌΡƒ простому ΡΠΎΡΡ‚ΠΎΡΠ½ΠΈΡŽ
210 var nextState = 0;
213 var nextState = 0;
211 foreach (var item in optimalStates) {
214 foreach (var item in optimalStates) {
212 var id = nextState++;
215 var id = nextState++;
213 foreach (var s in item)
216 foreach (var s in item)
214 stateMap[s] = id;
217 stateMap[s] = id;
215 }
218 }
216
219
217 // ΠΏΠΎΠ»ΡƒΡ‡Π°Π΅ΠΌ ΠΌΠΈΠ½ΠΈΠΌΠ°Π»ΡŒΠ½Ρ‹ΠΉ Π°Π»Ρ„Π°Π²ΠΈΡ‚
220 // ΠΏΠΎΠ»ΡƒΡ‡Π°Π΅ΠΌ ΠΌΠΈΠ½ΠΈΠΌΠ°Π»ΡŒΠ½Ρ‹ΠΉ Π°Π»Ρ„Π°Π²ΠΈΡ‚
218 // Π²Ρ…ΠΎΠ΄Π½Ρ‹Π΅ символы Π½Π΅ Ρ€Π°Π·Π»ΠΈΡ‡ΠΈΠΌΡ‹, Ссли Move(s,a1) == Move(s,a2), для любого s
221 // Π²Ρ…ΠΎΠ΄Π½Ρ‹Π΅ символы Π½Π΅ Ρ€Π°Π·Π»ΠΈΡ‡ΠΈΠΌΡ‹, Ссли Move(s,a1) == Move(s,a2), для любого s
219 // для этого ΠΈΡΠΏΠΎΠ»ΡŒΠ·ΡƒΠ΅ΠΌ Π°Π»Π³ΠΎΡ€ΠΈΡ‚ΠΌ кластСризации, сначала
222 // для этого ΠΈΡΠΏΠΎΠ»ΡŒΠ·ΡƒΠ΅ΠΌ Π°Π»Π³ΠΎΡ€ΠΈΡ‚ΠΌ кластСризации, сначала
220 // считаСм, Ρ‡Ρ‚ΠΎ всС символы Π½Π΅ Ρ€Π°Π·Π»ΠΈΡ‡ΠΈΠΌΡ‹
223 // считаСм, Ρ‡Ρ‚ΠΎ всС символы Π½Π΅ Ρ€Π°Π·Π»ΠΈΡ‡ΠΈΠΌΡ‹
221
224
222 var minClasses = new HashSet<HashSet<int>>(setComparer);
225 var minClasses = new HashSet<HashSet<int>>(setComparer);
223 var alphaQueue = new Queue<HashSet<int>>();
226 var alphaQueue = new Queue<HashSet<int>>();
224 alphaQueue.Enqueue(new HashSet<int>(Enumerable.Range(0,AlphabetSize)));
227 alphaQueue.Enqueue(new HashSet<int>(Enumerable.Range(0,AlphabetSize)));
225
228
226 // для всСх состояний, Π±ΡƒΠ΄Π΅ΠΌ ΠΏΡ€ΠΎΠ²Π΅Ρ€ΡΡ‚ΡŒ ΠΊΠ°ΠΆΠ΄Ρ‹ΠΉ класс Π½Π° Ρ€Π°Π·Π»ΠΈΡ‡ΠΈΠΌΠΎΡΡ‚ΡŒ,
229 // для всСх состояний, Π±ΡƒΠ΄Π΅ΠΌ ΠΏΡ€ΠΎΠ²Π΅Ρ€ΡΡ‚ΡŒ ΠΊΠ°ΠΆΠ΄Ρ‹ΠΉ класс Π½Π° Ρ€Π°Π·Π»ΠΈΡ‡ΠΈΠΌΠΎΡΡ‚ΡŒ,
227 // Ρ‚.Π΅. символы Ρ€Π°Π·Π»ΠΈΡ‡ΠΈΠΌΡ‹, Ссли ΠΎΠ½ΠΈ приводят ΠΊ Ρ€Π°Π·Π½Ρ‹ΠΌ состояниям
230 // Ρ‚.Π΅. символы Ρ€Π°Π·Π»ΠΈΡ‡ΠΈΠΌΡ‹, Ссли ΠΎΠ½ΠΈ приводят ΠΊ Ρ€Π°Π·Π½Ρ‹ΠΌ состояниям
228 for (int s = 0 ; s < optimalStates.Count; s++) {
231 for (int s = 0 ; s < optimalStates.Count; s++) {
229 var newQueue = new Queue<HashSet<int>>();
232 var newQueue = new Queue<HashSet<int>>();
230
233
231 foreach (var A in alphaQueue) {
234 foreach (var A in alphaQueue) {
232 // классы ΠΈΠ· ΠΎΠ΄Π½ΠΎΠ³ΠΎ символа Π΄Π΅Π»ΠΈΡ‚ΡŒ бСсполСзно, ΠΏΠ΅Ρ€Π΅Π²ΠΎΠ΄ΠΈΠΌ ΠΈΡ… сразу Π²
235 // классы ΠΈΠ· ΠΎΠ΄Π½ΠΎΠ³ΠΎ символа Π΄Π΅Π»ΠΈΡ‚ΡŒ бСсполСзно, ΠΏΠ΅Ρ€Π΅Π²ΠΎΠ΄ΠΈΠΌ ΠΈΡ… сразу Π²
233 // Ρ€Π΅Π·ΡƒΠ»ΡŒΡ‚ΠΈΡ€ΡƒΡŽΡ‰ΠΈΠΉ Π°Π»Ρ„Π°Π²ΠΈΡ‚
236 // Ρ€Π΅Π·ΡƒΠ»ΡŒΡ‚ΠΈΡ€ΡƒΡŽΡ‰ΠΈΠΉ Π°Π»Ρ„Π°Π²ΠΈΡ‚
234 if (A.Count == 1) {
237 if (A.Count == 1) {
235 minClasses.Add(A);
238 minClasses.Add(A);
236 continue;
239 continue;
237 }
240 }
238
241
239 // Ρ€Π°Π·Π»ΠΈΡ‡Π°Π΅ΠΌ классы символов, ΠΊΠΎΡ‚ΠΎΡ€Ρ‹Π΅ пСрСводят Π² Ρ€Π°Π·Π»ΠΈΡ‡Π½Ρ‹Π΅ ΠΎΠΏΡ‚ΠΈΠΌΠ°Π»ΡŒΠ½Ρ‹Π΅ состояния
242 // Ρ€Π°Π·Π»ΠΈΡ‡Π°Π΅ΠΌ классы символов, ΠΊΠΎΡ‚ΠΎΡ€Ρ‹Π΅ пСрСводят Π² Ρ€Π°Π·Π»ΠΈΡ‡Π½Ρ‹Π΅ ΠΎΠΏΡ‚ΠΈΠΌΠ°Π»ΡŒΠ½Ρ‹Π΅ состояния
240 // optimalState -> alphaClass
243 // optimalState -> alphaClass
241 var classes = new Dictionary<int, HashSet<int>>();
244 var classes = new Dictionary<int, HashSet<int>>();
242
245
243 foreach (var term in A) {
246 foreach (var term in A) {
244 // ΠΈΡ‰Π΅ΠΌ всС ΠΏΠ΅Ρ€Π΅Ρ…ΠΎΠ΄Ρ‹ класса ΠΏΠΎ символу term
247 // ΠΈΡ‰Π΅ΠΌ всС ΠΏΠ΅Ρ€Π΅Ρ…ΠΎΠ΄Ρ‹ класса ΠΏΠΎ символу term
245 var res = m_transitions.Where(t => stateMap[t.s1] == s && t.edge == term).Select(t => stateMap[t.s2]).ToArray();
248 var res = m_transitions.Where(t => stateMap[t.s1] == s && t.edge == term).Select(t => stateMap[t.s2]).ToArray();
246
249
250 Debug.Assert(res.Length <= 1);
251
247 var s2 = res.Length > 0 ? res[0] : -1;
252 var s2 = res.Length > 0 ? res[0] : -1;
248
253
249 HashSet<int> a2;
254 HashSet<int> a2;
250 if (!classes.TryGetValue(s2, out a2)) {
255 if (!classes.TryGetValue(s2, out a2)) {
251 a2 = new HashSet<int>();
256 a2 = new HashSet<int>();
252 newQueue.Enqueue(a2);
257 newQueue.Enqueue(a2);
253 classes[s2] = a2;
258 classes[s2] = a2;
254 }
259 }
255 a2.Add(term);
260 a2.Add(term);
256 }
261 }
257 }
262 }
258
263
259 if (newQueue.Count == 0)
264 if (newQueue.Count == 0)
260 break;
265 break;
261 alphaQueue = newQueue;
266 alphaQueue = newQueue;
262 }
267 }
263
268
264 // послС окончания Ρ€Π°Π±ΠΎΡ‚Ρ‹ Π°Π»Π³ΠΎΡ€ΠΈΡ‚ΠΌΠ° Π² ΠΎΡ‡Π΅Ρ€Π΅Π΄ΠΈ останутся ΠΌΠΈΠ½ΠΈΠΌΠ°Π»ΡŒΠ½Ρ‹Π΅ Ρ€Π°Π·Π»ΠΈΡ‡ΠΈΠΌΡ‹Π΅ классы
269 // послС окончания Ρ€Π°Π±ΠΎΡ‚Ρ‹ Π°Π»Π³ΠΎΡ€ΠΈΡ‚ΠΌΠ° Π² ΠΎΡ‡Π΅Ρ€Π΅Π΄ΠΈ останутся ΠΌΠΈΠ½ΠΈΠΌΠ°Π»ΡŒΠ½Ρ‹Π΅ Ρ€Π°Π·Π»ΠΈΡ‡ΠΈΠΌΡ‹Π΅ классы
265 // Π²Ρ…ΠΎΠ΄Π½Ρ‹Ρ… символов
270 // Π²Ρ…ΠΎΠ΄Π½Ρ‹Ρ… символов
266 foreach (var A in alphaQueue)
271 foreach (var A in alphaQueue)
267 minClasses.Add(A);
272 minClasses.Add(A);
268
273
269 // построСниС отобраТСния Π°Π»Ρ„Π°Π²ΠΈΡ‚ΠΎΠ² Π²Ρ…ΠΎΠ΄Π½Ρ‹Ρ… символов.
274 // построСниС отобраТСния Π°Π»Ρ„Π°Π²ΠΈΡ‚ΠΎΠ² Π²Ρ…ΠΎΠ΄Π½Ρ‹Ρ… символов.
270 // ΠΏΠΎΡΠΊΠΎΠ»ΡŒΠΊΡƒ символ DFAConst.UNCLASSIFIED_INPUT ΠΌΠΎΠΆΠ΅Ρ‚ ΠΈΠΌΠ΅Ρ‚ΡŒ
275 // ΠΏΠΎΡΠΊΠΎΠ»ΡŒΠΊΡƒ символ DFAConst.UNCLASSIFIED_INPUT ΠΌΠΎΠΆΠ΅Ρ‚ ΠΈΠΌΠ΅Ρ‚ΡŒ
271 // ΡΠΏΠ΅Ρ†ΠΈΠ°Π»ΡŒΠ½ΠΎΠ΅ Π·Π½Π°Ρ‡Π΅Π½ΠΈΠ΅, Ρ‚ΠΎΠ³Π΄Π° сохраним ΠΌΠΈΠ½ΠΈΠΌΠ°Π»ΡŒΠ½Ρ‹ΠΉ класс,
276 // ΡΠΏΠ΅Ρ†ΠΈΠ°Π»ΡŒΠ½ΠΎΠ΅ Π·Π½Π°Ρ‡Π΅Π½ΠΈΠ΅, Ρ‚ΠΎΠ³Π΄Π° сохраним ΠΌΠΈΠ½ΠΈΠΌΠ°Π»ΡŒΠ½Ρ‹ΠΉ класс,
272 // содСрТащий этот символ Π½Π° Ρ‚ΠΎΠΌΠΆΠ΅ мСстС.
277 // содСрТащий этот символ Π½Π° Ρ‚ΠΎΠΌΠΆΠ΅ мСстС.
273
278
274 var nextCls = 0;
279 var nextCls = 0;
275 foreach (var item in minClasses) {
280 foreach (var item in minClasses) {
276 if (nextCls == AutomatonConst.UNCLASSIFIED_INPUT)
281 if (nextCls == AutomatonConst.UNCLASSIFIED_INPUT)
277 nextCls++;
282 nextCls++;
278
283
279 // сохраняСм DFAConst.UNCLASSIFIED_INPUT
284 // сохраняСм DFAConst.UNCLASSIFIED_INPUT
280 var cls = item.Contains(AutomatonConst.UNCLASSIFIED_INPUT) ? AutomatonConst.UNCLASSIFIED_INPUT : nextCls;
285 var cls = item.Contains(AutomatonConst.UNCLASSIFIED_INPUT) ? AutomatonConst.UNCLASSIFIED_INPUT : nextCls++;
281
286
282 foreach (var a in item)
287 foreach (var a in item)
283 alphabetMap[a] = cls;
288 alphabetMap[a] = cls;
284
285 nextCls++;
286 }
289 }
287
290
288 // построСниС Π°Π²Ρ‚ΠΎΠΌΠ°Ρ‚Π°
291 // построСниС Π°Π²Ρ‚ΠΎΠΌΠ°Ρ‚Π°
289 optimalDFA.SetInitialState(stateMap[m_initialState]);
292 optimalDFA.SetInitialState(stateMap[m_initialState]);
290
293
291 foreach (var sf in m_finalStates.Select(s => stateMap[s]).Distinct())
294 foreach (var sf in m_finalStates.Select(s => stateMap[s]).Distinct())
292 optimalDFA.MarkFinalState(sf);
295 optimalDFA.MarkFinalState(sf);
293
296
294 foreach (var t in m_transitions.Select(t => new AutomatonTransition(stateMap[t.s1],stateMap[t.s2],alphabetMap[t.edge])).Distinct())
297 foreach (var t in m_transitions.Select(t => new AutomatonTransition(stateMap[t.s1],stateMap[t.s2],alphabetMap[t.edge])).Distinct())
295 optimalDFA.Add(t);
298 optimalDFA.Add(t);
296 }
299 }
297
300
298 protected void PrintDFA<TInput, TState>(IAlphabet<TInput> inputAlphabet, IAlphabet<TState> stateAlphabet) {
301 protected void PrintDFA<TInput, TState>(IAlphabet<TInput> inputAlphabet, IAlphabet<TState> stateAlphabet) {
299 Safe.ArgumentNotNull(inputAlphabet, "inputAlphabet");
302 Safe.ArgumentNotNull(inputAlphabet, "inputAlphabet");
300 Safe.ArgumentNotNull(stateAlphabet, "stateAlphabet");
303 Safe.ArgumentNotNull(stateAlphabet, "stateAlphabet");
301
304
302 foreach(var t in m_transitions)
305 foreach(var t in m_transitions)
303 Console.WriteLine(
306 Console.WriteLine(
304 "[{0}] -{{{1}}}-> [{2}]{3}",
307 "[{0}] -{{{1}}}-> [{2}]{3}",
305 String.Join(",", stateAlphabet.GetSymbols(t.s1)),
308 String.Join(",", stateAlphabet.GetSymbols(t.s1)),
306 String.Join("", inputAlphabet.GetSymbols(t.edge)),
309 String.Join("", inputAlphabet.GetSymbols(t.edge)),
307 String.Join(",", stateAlphabet.GetSymbols(t.s2)),
310 String.Join(",", stateAlphabet.GetSymbols(t.s2)),
308 m_finalStates.Contains(t.s2) ? "$" : ""
311 m_finalStates.Contains(t.s2) ? "$" : ""
309 );
312 );
310 }
313 }
311
314
312 }
315 }
313 }
316 }
@@ -1,84 +1,84
1 using System;
1 using System;
2 using System.Collections.Generic;
2 using System.Collections.Generic;
3 using System.Linq;
3 using System.Linq;
4
4
5 namespace Implab.Automaton {
5 namespace Implab.Automaton {
6 public class MapAlphabet<T> : IAlphabetBuilder<T> {
6 public class MapAlphabet<T> : IAlphabetBuilder<T> {
7 readonly Dictionary<T,int> m_map;
7 readonly Dictionary<T,int> m_map;
8 int m_nextCls;
8 int m_nextCls;
9 readonly bool m_supportUnclassified;
9 readonly bool m_supportUnclassified;
10
10
11 public MapAlphabet(bool supportUnclassified, IEqualityComparer<T> comparer) {
11 public MapAlphabet(bool supportUnclassified, IEqualityComparer<T> comparer) {
12 m_map = comparer != null ? new Dictionary<T, int>(comparer) : new Dictionary<T,int>();
12 m_map = comparer != null ? new Dictionary<T, int>(comparer) : new Dictionary<T,int>();
13 m_supportUnclassified = supportUnclassified;
13 m_supportUnclassified = supportUnclassified;
14 m_nextCls = supportUnclassified ? 1 : 0;
14 m_nextCls = supportUnclassified ? 1 : 0;
15 }
15 }
16
16
17 #region IAlphabetBuilder implementation
17 #region IAlphabetBuilder implementation
18
18
19 public int DefineSymbol(T symbol) {
19 public int DefineSymbol(T symbol) {
20 int cls;
20 int cls;
21 return m_map.TryGetValue(symbol, out cls) ? cls : DefineSymbol(symbol, m_nextCls);
21 return m_map.TryGetValue(symbol, out cls) ? cls : DefineSymbol(symbol, m_nextCls);
22 }
22 }
23
23
24 public int DefineSymbol(T symbol, int cls) {
24 public int DefineSymbol(T symbol, int cls) {
25 Safe.ArgumentAssert(cls >= 0, "cls");
25 Safe.ArgumentAssert(cls >= 0, "cls");
26
26
27 m_nextCls = Math.Max(cls + 1, m_nextCls);
27 m_nextCls = Math.Max(cls + 1, m_nextCls);
28 m_map.Add(symbol, cls);
28 m_map.Add(symbol, cls);
29 return cls;
29 return cls;
30 }
30 }
31
31
32 public int DefineClass(IEnumerable<T> symbols) {
32 public int DefineClass(IEnumerable<T> symbols) {
33 return DefineClass(symbols, m_nextCls);
33 return DefineClass(symbols, m_nextCls);
34 }
34 }
35
35
36 public int DefineClass(IEnumerable<T> symbols, int cls) {
36 public int DefineClass(IEnumerable<T> symbols, int cls) {
37 Safe.ArgumentAssert(cls >= 0, "cls");
37 Safe.ArgumentAssert(cls >= 0, "cls");
38 Safe.ArgumentNotNull(symbols, "symbols");
38 Safe.ArgumentNotNull(symbols, "symbols");
39
39
40 m_nextCls = Math.Max(cls + 1, m_nextCls);
40 m_nextCls = Math.Max(cls + 1, m_nextCls);
41
41
42 foreach (var symbol in symbols)
42 foreach (var symbol in symbols)
43 m_map[symbol] = cls;
43 m_map[symbol] = cls;
44 return cls;
44 return cls;
45 }
45 }
46
46
47 #endregion
47 #endregion
48
48
49 #region IAlphabet implementation
49 #region IAlphabet implementation
50
50
51 public int Translate(T symbol) {
51 public int Translate(T symbol) {
52 int cls;
52 int cls;
53 if (m_map.TryGetValue(symbol, out cls))
53 if (m_map.TryGetValue(symbol, out cls))
54 return cls;
54 return cls;
55 if (!m_supportUnclassified)
55 if (!m_supportUnclassified)
56 throw new ArgumentOutOfRangeException("symbol", "The specified symbol isn't in the alphabet");
56 throw new ArgumentOutOfRangeException("symbol", "The specified symbol isn't in the alphabet");
57 return AutomatonConst.UNCLASSIFIED_INPUT;
57 return AutomatonConst.UNCLASSIFIED_INPUT;
58 }
58 }
59
59
60 public int Count {
60 public int Count {
61 get {
61 get {
62 return m_nextCls;
62 return m_nextCls;
63 }
63 }
64 }
64 }
65
65
66 public bool Contains(T symbol) {
66 public bool Contains(T symbol) {
67 return m_supportUnclassified || m_map.ContainsKey(symbol);
67 return m_supportUnclassified || m_map.ContainsKey(symbol);
68 }
68 }
69
69
70
70
71 public IEnumerable<T> GetSymbols(int cls) {
71 public IEnumerable<T> GetSymbols(int cls) {
72 Safe.ArgumentAssert(cls > 0, "cls");
72 Safe.ArgumentAssert(!m_supportUnclassified || cls > 0, "cls");
73 return m_map.Where(p => p.Value == cls).Select(p => p.Key);
73 return m_map.Where(p => p.Value == cls).Select(p => p.Key);
74 }
74 }
75 #endregion
75 #endregion
76
76
77 public IEnumerable<KeyValuePair<T,int>> Mappings {
77 public IEnumerable<KeyValuePair<T,int>> Mappings {
78 get {
78 get {
79 return m_map;
79 return m_map;
80 }
80 }
81 }
81 }
82 }
82 }
83 }
83 }
84
84
@@ -1,82 +1,83
1 using System.Collections.Generic;
1 using System.Collections.Generic;
2 using System.Linq;
2 using System.Linq;
3
3
4 namespace Implab.Automaton.RegularExpressions {
4 namespace Implab.Automaton.RegularExpressions {
5 public class RegularDFA<TInput, TTag> : DFATable, ITaggedDFABuilder<TTag> {
5 public class RegularDFA<TInput, TTag> : DFATable, ITaggedDFABuilder<TTag> {
6
6
7 readonly Dictionary<int,TTag[]> m_tags = new Dictionary<int, TTag[]>();
7 readonly Dictionary<int,TTag[]> m_tags = new Dictionary<int, TTag[]>();
8 readonly IAlphabet<TInput> m_alphabet;
8 readonly IAlphabet<TInput> m_alphabet;
9
9
10 public RegularDFA(IAlphabet<TInput> alphabet) {
10 public RegularDFA(IAlphabet<TInput> alphabet) {
11 Safe.ArgumentNotNull(alphabet, "aplhabet");
11 Safe.ArgumentNotNull(alphabet, "aplhabet");
12
12
13 m_alphabet = alphabet;
13 m_alphabet = alphabet;
14 }
14 }
15
15
16
16
17 public IAlphabet<TInput> InputAlphabet {
17 public IAlphabet<TInput> InputAlphabet {
18 get {
18 get {
19 return m_alphabet;
19 return m_alphabet;
20 }
20 }
21 }
21 }
22
22
23 public void MarkFinalState(int s, TTag[] tags) {
23 public void MarkFinalState(int s, TTag[] tags) {
24 MarkFinalState(s);
24 MarkFinalState(s);
25 SetStateTag(s, tags);
25 SetStateTag(s, tags);
26 }
26 }
27
27
28 public void SetStateTag(int s, TTag[] tags) {
28 public void SetStateTag(int s, TTag[] tags) {
29 Safe.ArgumentNotNull(tags, "tags");
29 Safe.ArgumentNotNull(tags, "tags");
30 m_tags[s] = tags;
30 m_tags[s] = tags;
31 }
31 }
32
32
33 public TTag[] GetStateTag(int s) {
33 public TTag[] GetStateTag(int s) {
34 TTag[] tags;
34 TTag[] tags;
35 return m_tags.TryGetValue(s, out tags) ? tags : new TTag[0];
35 return m_tags.TryGetValue(s, out tags) ? tags : new TTag[0];
36 }
36 }
37
37
38 public TTag[][] CreateTagTable() {
38 public TTag[][] CreateTagTable() {
39 var table = new TTag[StateCount][];
39 var table = new TTag[StateCount][];
40
40
41 foreach (var pair in m_tags)
41 foreach (var pair in m_tags)
42 table[pair.Key] = pair.Value;
42 table[pair.Key] = pair.Value;
43
43
44 return table;
44 return table;
45 }
45 }
46
46
47 /// <summary>
47 /// <summary>
48 /// Optimize the specified alphabet.
48 /// Optimize the specified alphabet.
49 /// </summary>
49 /// </summary>
50 /// <param name="alphabet">ΠŸΡƒΡΡ‚ΠΎΠΉ Π°Π»Ρ„Π°Π²ΠΈΡ‚, ΠΊΠΎΡ‚ΠΎΡ€Ρ‹ΠΉ Π±ΡƒΠ΄Π΅Ρ‚ Π·ΠΏΠΎΠ»Π½Π΅Π½ Π² процСссС ΠΎΠΏΡ‚ΠΈΠΌΠΈΠ·Π°Ρ†ΠΈΠΈ.</param>
50 /// <param name="alphabet">ΠŸΡƒΡΡ‚ΠΎΠΉ Π°Π»Ρ„Π°Π²ΠΈΡ‚, ΠΊΠΎΡ‚ΠΎΡ€Ρ‹ΠΉ Π±ΡƒΠ΄Π΅Ρ‚ Π·ΠΏΠΎΠ»Π½Π΅Π½ Π² процСссС ΠΎΠΏΡ‚ΠΈΠΌΠΈΠ·Π°Ρ†ΠΈΠΈ.</param>
51 public RegularDFA<TInput,TTag> Optimize(IAlphabetBuilder<TInput> alphabet) {
51 public RegularDFA<TInput,TTag> Optimize(IAlphabetBuilder<TInput> alphabet) {
52 Safe.ArgumentNotNull(alphabet, "alphabet");
52 Safe.ArgumentNotNull(alphabet, "alphabet");
53
53
54 var dfa = new RegularDFA<TInput, TTag>(alphabet);
54 var dfa = new RegularDFA<TInput, TTag>(alphabet);
55
55
56 var alphaMap = new Dictionary<int,int>();
56 var alphaMap = new Dictionary<int,int>();
57 var stateMap = new Dictionary<int,int>();
57 var stateMap = new Dictionary<int,int>();
58
58
59 Optimize(dfa, alphaMap, stateMap);
59 Optimize(dfa, alphaMap, stateMap);
60
60
61 // mark tags in the new DFA
61 // mark tags in the new DFA
62 foreach (var g in m_tags.Where(x => x.Key < StateCount).GroupBy(x => stateMap[x.Key], x => x.Value ))
62 foreach (var g in m_tags.Where(x => x.Key < StateCount).GroupBy(x => stateMap[x.Key], x => x.Value ))
63 dfa.SetStateTag(g.Key, g.SelectMany(x => x).ToArray());
63 dfa.SetStateTag(g.Key, g.SelectMany(x => x).ToArray());
64
64
65 // make the alphabet for the new DFA
65 // make the alphabet for the new DFA
66 foreach (var pair in alphaMap)
66 // skip all unclassified symbols
67 foreach (var pair in alphaMap.Where(x => x.Value != 0))
67 alphabet.DefineClass(m_alphabet.GetSymbols(pair.Key), pair.Value);
68 alphabet.DefineClass(m_alphabet.GetSymbols(pair.Key), pair.Value);
68
69
69 return dfa;
70 return dfa;
70 }
71 }
71
72
72 protected override IEnumerable<HashSet<int>> GroupFinalStates() {
73 protected override IEnumerable<HashSet<int>> GroupFinalStates() {
73 var arrayComparer = new CustomEqualityComparer<TTag[]>(
74 var arrayComparer = new CustomEqualityComparer<TTag[]>(
74 (x,y) => x.Length == y.Length && x.All(it => y.Contains(it)),
75 (x,y) => x.Length == y.Length && x.All(it => y.Contains(it)),
75 x => x.Sum(it => x.GetHashCode())
76 x => x.Sum(it => x.GetHashCode())
76 );
77 );
77 return FinalStates.GroupBy(x => m_tags[x], arrayComparer).Select(g => new HashSet<int>(g));
78 return FinalStates.GroupBy(x => m_tags[x], arrayComparer).Select(g => new HashSet<int>(g));
78 }
79 }
79
80
80 }
81 }
81 }
82 }
82
83
@@ -1,119 +1,119
1 using System.Linq;
1 using System.Linq;
2 using Implab.Automaton.RegularExpressions;
2 using Implab.Automaton.RegularExpressions;
3 using System;
3 using System;
4 using Implab.Automaton;
4 using Implab.Automaton;
5 using Implab.Components;
5 using Implab.Components;
6
6
7 namespace Implab.Formats.JSON {
7 namespace Implab.Formats.JSON {
8 class JSONGrammar : Grammar<char> {
8 class JSONGrammar : Grammar<char> {
9 public enum TokenType {
9 public enum TokenType {
10 None,
10 None,
11 BeginObject,
11 BeginObject,
12 EndObject,
12 EndObject,
13 BeginArray,
13 BeginArray,
14 EndArray,
14 EndArray,
15 String,
15 String,
16 Number,
16 Number,
17 Literal,
17 Literal,
18 NameSeparator,
18 NameSeparator,
19 ValueSeparator,
19 ValueSeparator,
20
20
21 StringBound,
21 StringBound,
22 EscapedChar,
22 EscapedChar,
23 UnescapedChar,
23 UnescapedChar,
24 EscapedUnicode
24 EscapedUnicode
25 }
25 }
26
26
27 static LazyAndWeak<JSONGrammar> _instance = new LazyAndWeak<JSONGrammar>(() => new JSONGrammar());
27 static LazyAndWeak<JSONGrammar> _instance = new LazyAndWeak<JSONGrammar>(() => new JSONGrammar());
28
28
29 public static JSONGrammar Instance {
29 public static JSONGrammar Instance {
30 get { return _instance.Value; }
30 get { return _instance.Value; }
31 }
31 }
32
32
33 readonly ScannerContext<TokenType> m_jsonExpression;
33 readonly ScannerContext<TokenType> m_jsonExpression;
34 readonly ScannerContext<TokenType> m_stringExpression;
34 readonly ScannerContext<TokenType> m_stringExpression;
35 readonly CharAlphabet m_defaultAlphabet = new CharAlphabet();
35 readonly CharAlphabet m_defaultAlphabet = new CharAlphabet();
36
36
37 public JSONGrammar() {
37 public JSONGrammar() {
38 DefineAlphabet(Enumerable.Range(0, 0x20).Select(x => (char)x));
38 DefineAlphabet(Enumerable.Range(0, 0x20).Select(x => (char)x));
39 var hexDigit = SymbolRangeToken('a','f').Or(SymbolRangeToken('A','F')).Or(SymbolRangeToken('0','9'));
39 var hexDigit = SymbolRangeToken('a','f').Or(SymbolRangeToken('A','F')).Or(SymbolRangeToken('0','9'));
40 var digit9 = SymbolRangeToken('1', '9');
40 var digit9 = SymbolRangeToken('1', '9');
41 var zero = SymbolToken('0');
41 var zero = SymbolToken('0');
42 var digit = zero.Or(digit9);
42 var digit = zero.Or(digit9);
43 var dot = SymbolToken('.');
43 var dot = SymbolToken('.');
44 var minus = SymbolToken('-');
44 var minus = SymbolToken('-');
45 var sign = SymbolSetToken('-', '+');
45 var sign = SymbolSetToken('-', '+');
46 var expSign = SymbolSetToken('e', 'E');
46 var expSign = SymbolSetToken('e', 'E');
47 var letters = SymbolRangeToken('a', 'z');
47 var letters = SymbolRangeToken('a', 'z');
48 var integer = zero.Or(digit9.Cat(digit.EClosure()));
48 var integer = zero.Or(digit9.Cat(digit.EClosure()));
49 var frac = dot.Cat(digit.Closure());
49 var frac = dot.Cat(digit.Closure());
50 var exp = expSign.Cat(sign.Optional()).Cat(digit.Closure());
50 var exp = expSign.Cat(sign.Optional()).Cat(digit.Closure());
51 var quote = SymbolToken('"');
51 var quote = SymbolToken('"');
52 var backSlash = SymbolToken('\\');
52 var backSlash = SymbolToken('\\');
53 var specialEscapeChars = SymbolSetToken('\\', '"', '/', 'b', 'f', 't', 'n', 'r');
53 var specialEscapeChars = SymbolSetToken('\\', '"', '/', 'b', 'f', 't', 'n', 'r');
54 var unicodeEspace = SymbolToken('u').Cat(hexDigit.Repeat(4));
54 var unicodeEspace = SymbolToken('u').Cat(hexDigit.Repeat(4));
55 var whitespace = SymbolSetToken('\n', '\r', '\t', ' ').EClosure();
55 var whitespace = SymbolSetToken('\n', '\r', '\t', ' ').EClosure();
56 var beginObject = whitespace.Cat(SymbolToken('{')).Cat(whitespace);
56 var beginObject = whitespace.Cat(SymbolToken('{')).Cat(whitespace);
57 var endObject = whitespace.Cat(SymbolToken('}')).Cat(whitespace);
57 var endObject = whitespace.Cat(SymbolToken('}')).Cat(whitespace);
58 var beginArray = whitespace.Cat(SymbolToken('[')).Cat(whitespace);
58 var beginArray = whitespace.Cat(SymbolToken('[')).Cat(whitespace);
59 var endArray = whitespace.Cat(SymbolToken(']')).Cat(whitespace);
59 var endArray = whitespace.Cat(SymbolToken(']')).Cat(whitespace);
60 var nameSep = whitespace.Cat(SymbolToken(':')).Cat(whitespace);
60 var nameSep = whitespace.Cat(SymbolToken(':')).Cat(whitespace);
61 var valueSep = whitespace.Cat(SymbolToken(',')).Cat(whitespace);
61 var valueSep = whitespace.Cat(SymbolToken(',')).Cat(whitespace);
62
62
63 var number = minus.Optional().Cat(integer).Cat(frac.Optional()).Cat(exp.Optional());
63 var number = minus.Optional().Cat(integer).Cat(frac.Optional()).Cat(exp.Optional());
64 var literal = letters.Closure();
64 var literal = letters.Closure();
65 var unescaped = SymbolTokenExcept(Enumerable.Range(0, 0x20).Union(new int[] { '\\', '"' }).Select(x => (char)x));
65 var unescaped = SymbolTokenExcept(Enumerable.Range(0, 0x20).Union(new int[] { '\\', '"' }).Select(x => (char)x));
66
66
67 var jsonExpression =
67 var jsonExpression =
68 number.Tag(TokenType.Number)
68 number.Tag(TokenType.Number)
69 .Or(literal.Tag(TokenType.Literal))
69 .Or(literal.Tag(TokenType.Literal))
70 .Or(quote.Tag(TokenType.StringBound))
70 .Or(quote.Tag(TokenType.StringBound))
71 .Or(beginObject.Tag(TokenType.BeginObject))
71 .Or(beginObject.Tag(TokenType.BeginObject))
72 .Or(endObject.Tag(TokenType.EndObject))
72 .Or(endObject.Tag(TokenType.EndObject))
73 .Or(beginArray.Tag(TokenType.BeginArray))
73 .Or(beginArray.Tag(TokenType.BeginArray))
74 .Or(endArray.Tag(TokenType.EndArray))
74 .Or(endArray.Tag(TokenType.EndArray))
75 .Or(nameSep.Tag(TokenType.NameSeparator))
75 .Or(nameSep.Tag(TokenType.NameSeparator))
76 .Or(valueSep.Tag(TokenType.ValueSeparator));
76 .Or(valueSep.Tag(TokenType.ValueSeparator));
77
77
78
78
79 var jsonStringExpression =
79 var jsonStringExpression =
80 quote.Tag(TokenType.StringBound)
80 quote.Tag(TokenType.StringBound)
81 .Or(backSlash.Cat(specialEscapeChars).Tag(TokenType.EscapedChar))
81 .Or(backSlash.Cat(specialEscapeChars).Tag(TokenType.EscapedChar))
82 .Or(backSlash.Cat(unicodeEspace).Tag(TokenType.EscapedUnicode))
82 .Or(backSlash.Cat(unicodeEspace).Tag(TokenType.EscapedUnicode))
83 .Or(unescaped.Closure().Tag(TokenType.UnescapedChar));
83 .Or(unescaped.Closure().Tag(TokenType.UnescapedChar));
84
84
85
85
86 m_jsonExpression = BuildScannerContext<TokenType>(jsonExpression);
86 m_jsonExpression = BuildScannerContext<TokenType>(jsonExpression);
87 m_stringExpression = BuildScannerContext<TokenType>(jsonStringExpression);
87 m_stringExpression = BuildScannerContext<TokenType>(jsonStringExpression);
88
88
89
89
90 }
90 }
91
91
92 protected override IAlphabetBuilder<char> AlphabetBuilder {
92 protected override IAlphabetBuilder<char> AlphabetBuilder {
93 get {
93 get {
94 return m_defaultAlphabet;
94 return m_defaultAlphabet;
95 }
95 }
96 }
96 }
97
97
98 public ScannerContext<TokenType> JsonExpression {
98 public ScannerContext<TokenType> JsonExpression {
99 get {
99 get {
100 return m_jsonExpression;
100 return m_jsonExpression;
101 }
101 }
102 }
102 }
103
103
104 public ScannerContext<TokenType> JsonStringExpression {
104 public ScannerContext<TokenType> JsonStringExpression {
105 get {
105 get {
106 return m_stringExpression;
106 return m_stringExpression;
107 }
107 }
108 }
108 }
109
109
110 Token SymbolRangeToken(char start, char stop) {
110 Token SymbolRangeToken(char start, char stop) {
111 return SymbolToken(Enumerable.Range(start,stop - start).Cast<char>());
111 return SymbolToken(Enumerable.Range(start,stop - start).Select(x => (char)x));
112 }
112 }
113
113
114 protected override IndexedAlphabetBase<char> CreateAlphabet() {
114 protected override IndexedAlphabetBase<char> CreateAlphabet() {
115 return new CharAlphabet();
115 return new CharAlphabet();
116 }
116 }
117
117
118 }
118 }
119 }
119 }
@@ -1,156 +1,162
1 using System;
1 using System;
2 using Implab.Components;
2 using Implab.Components;
3 using System.Diagnostics;
3 using System.Diagnostics;
4 using Implab.Automaton;
4 using Implab.Automaton;
5 using System.Text;
5 using System.Text;
6
6
7 namespace Implab.Formats {
7 namespace Implab.Formats {
8 public abstract class TextScanner : Disposable {
8 public abstract class TextScanner : Disposable {
9 readonly int m_bufferMax;
9 readonly int m_bufferMax;
10 readonly int m_chunkSize;
10 readonly int m_chunkSize;
11
11
12 char[] m_buffer;
12 char[] m_buffer;
13 int m_bufferOffset;
13 int m_bufferOffset;
14 int m_bufferSize;
14 int m_bufferSize;
15 int m_tokenOffset;
15 int m_tokenOffset;
16 int m_tokenLength;
16 int m_tokenLength;
17
17
18 /// <summary>
18 /// <summary>
19 /// Initializes a new instance of the <see cref="Implab.Formats.TextScanner"/> class.
19 /// Initializes a new instance of the <see cref="Implab.Formats.TextScanner"/> class.
20 /// </summary>
20 /// </summary>
21 /// <param name="bufferMax">Buffer max.</param>
21 /// <param name="bufferMax">Buffer max.</param>
22 /// <param name="chunkSize">Chunk size.</param>
22 /// <param name="chunkSize">Chunk size.</param>
23 protected TextScanner(int bufferMax, int chunkSize) {
23 protected TextScanner(int bufferMax, int chunkSize) {
24 Debug.Assert(m_chunkSize <= m_bufferMax);
24 Debug.Assert(m_chunkSize <= m_bufferMax);
25
25
26 m_bufferMax = bufferMax;
26 m_bufferMax = bufferMax;
27 m_chunkSize = chunkSize;
27 m_chunkSize = chunkSize;
28 }
28 }
29
29
30 /// <summary>
30 /// <summary>
31 /// Initializes a new instance of the <see cref="Implab.Formats.TextScanner"/> class.
31 /// Initializes a new instance of the <see cref="Implab.Formats.TextScanner"/> class.
32 /// </summary>
32 /// </summary>
33 /// <param name="buffer">Buffer.</param>
33 /// <param name="buffer">Buffer.</param>
34 protected TextScanner(char[] buffer) {
34 protected TextScanner(char[] buffer) {
35 if (buffer != null) {
35 if (buffer != null) {
36 m_buffer = buffer;
36 m_buffer = buffer;
37 m_bufferSize = buffer.Length;
37 m_bufferSize = buffer.Length;
38 }
38 }
39 }
39 }
40
40
41 /// <summary>
41 /// <summary>
42 /// (hungry) Reads the next token.
42 /// (hungry) Reads the next token.
43 /// </summary>
43 /// </summary>
44 /// <returns><c>true</c>, if token internal was read, <c>false</c> if there is no more tokens in the stream.</returns>
44 /// <returns><c>true</c>, if token internal was read, <c>false</c> if there is no more tokens in the stream.</returns>
45 /// <param name="dfa">The transition map for the automaton</param>
45 /// <param name="dfa">The transition map for the automaton</param>
46 /// <param name="final">Final states of the automaton.</param>
46 /// <param name="final">Final states of the automaton.</param>
47 /// <param name="tags">Tags.</param>
47 /// <param name="tags">Tags.</param>
48 /// <param name="state">The initial state for the automaton.</param>
48 /// <param name="state">The initial state for the automaton.</param>
49 /// <param name="alphabet"></param>
49 /// <param name="alphabet"></param>
50 /// <param name = "tag"></param>
50 /// <param name = "tag"></param>
51 internal bool ReadToken<TTag>(int[,] dfa, bool[] final, TTag[][] tags, int state, int[] alphabet, out TTag[] tag) {
51 internal bool ReadToken<TTag>(int[,] dfa, bool[] final, TTag[][] tags, int state, int[] alphabet, out TTag[] tag) {
52 m_tokenLength = 0;
52 m_tokenLength = 0;
53 tag = null;
53 tag = null;
54
54
55 var maxSymbol = alphabet.Length - 1;
55 var maxSymbol = alphabet.Length - 1;
56
56
57 do {
57 do {
58 // after the next chunk is read the offset in the buffer may change
58 // after the next chunk is read the offset in the buffer may change
59 int pos = m_bufferOffset + m_tokenLength;
59 int pos = m_bufferOffset + m_tokenLength;
60
60
61 while (pos < m_bufferSize) {
61 while (pos < m_bufferSize) {
62 var ch = m_buffer[pos];
62 var ch = m_buffer[pos];
63
63
64 state = dfa[state, ch > maxSymbol ? AutomatonConst.UNCLASSIFIED_INPUT : alphabet[ch]];
64 try {
65 if (state == AutomatonConst.UNREACHABLE_STATE)
65 var next = dfa[state, ch > maxSymbol ? AutomatonConst.UNCLASSIFIED_INPUT : alphabet[ch]];
66
67 if (next == AutomatonConst.UNREACHABLE_STATE)
66 break;
68 break;
67
69
70 state = next;
71 }catch {
72 throw;
73 }
68 pos++;
74 pos++;
69 }
75 }
70
76
71 m_tokenLength = pos - m_bufferOffset;
77 m_tokenLength = pos - m_bufferOffset;
72 } while (state != AutomatonConst.UNREACHABLE_STATE && Feed());
78 } while (state != AutomatonConst.UNREACHABLE_STATE && Feed());
73
79
74 m_tokenOffset = m_bufferOffset;
80 m_tokenOffset = m_bufferOffset;
75 m_bufferOffset += m_tokenLength;
81 m_bufferOffset += m_tokenLength;
76
82
77 if (final[state]) {
83 if (final[state]) {
78 tag = tags[state];
84 tag = tags[state];
79 return true;
85 return true;
80 }
86 }
81
87
82 if (m_bufferOffset == m_bufferSize) {
88 if (m_bufferOffset == m_bufferSize) {
83 if (m_tokenLength == 0) //EOF
89 if (m_tokenLength == 0) //EOF
84 return false;
90 return false;
85
91
86 throw new ParserException();
92 throw new ParserException();
87 }
93 }
88
94
89 throw new ParserException(String.Format("Unexpected symbol '{0}'", m_buffer[m_bufferOffset]));
95 throw new ParserException(String.Format("Unexpected symbol '{0}'", m_buffer[m_bufferOffset]));
90
96
91 }
97 }
92
98
93 protected void Feed(char[] buffer, int offset, int length) {
99 protected void Feed(char[] buffer, int offset, int length) {
94 m_buffer = buffer;
100 m_buffer = buffer;
95 m_bufferOffset = offset;
101 m_bufferOffset = offset;
96 m_bufferSize = offset + length;
102 m_bufferSize = offset + length;
97 }
103 }
98
104
99 protected bool Feed() {
105 protected bool Feed() {
100 if (m_chunkSize <= 0)
106 if (m_chunkSize <= 0)
101 return false;
107 return false;
102
108
103 if (m_buffer != null) {
109 if (m_buffer != null) {
104 var free = m_buffer.Length - m_bufferSize;
110 var free = m_buffer.Length - m_bufferSize;
105
111
106 if (free < m_chunkSize) {
112 if (free < m_chunkSize) {
107 free += m_chunkSize;
113 free += m_chunkSize;
108 var used = m_bufferSize - m_bufferOffset;
114 var used = m_bufferSize - m_bufferOffset;
109 var size = used + free;
115 var size = used + free;
110
116
111 if (size > m_bufferMax)
117 if (size > m_bufferMax)
112 throw new ParserException(String.Format("The buffer limit ({0} Kb) is reached", m_bufferMax / 1024));
118 throw new ParserException(String.Format("The buffer limit ({0} Kb) is reached", m_bufferMax / 1024));
113
119
114 var temp = new char[size];
120 var temp = new char[size];
115
121
116 var read = Read(temp, used, m_chunkSize);
122 var read = Read(temp, used, m_chunkSize);
117 if (read == 0)
123 if (read == 0)
118 return false;
124 return false;
119
125
120 Array.Copy(m_buffer, m_bufferOffset, temp, 0, used);
126 Array.Copy(m_buffer, m_bufferOffset, temp, 0, used);
121
127
122 m_bufferOffset = 0;
128 m_bufferOffset = 0;
123 m_bufferSize = used + read;
129 m_bufferSize = used + read;
124 m_buffer = temp;
130 m_buffer = temp;
125 } else {
131 } else {
126 var read = Read(m_buffer, m_bufferSize, m_chunkSize);
132 var read = Read(m_buffer, m_bufferSize, m_chunkSize);
127 if (read == 0)
133 if (read == 0)
128 return false;
134 return false;
129 m_bufferSize += m_chunkSize;
135 m_bufferSize += m_chunkSize;
130 }
136 }
131 return true;
137 return true;
132 } else {
138 } else {
133 Debug.Assert(m_bufferOffset == 0);
139 Debug.Assert(m_bufferOffset == 0);
134 m_buffer = new char[m_chunkSize];
140 m_buffer = new char[m_chunkSize];
135 m_bufferSize = Read(m_buffer, 0, m_chunkSize);
141 m_bufferSize = Read(m_buffer, 0, m_chunkSize);
136 return (m_bufferSize != 0);
142 return (m_bufferSize != 0);
137 }
143 }
138 }
144 }
139
145
140 protected abstract int Read(char[] buffer, int offset, int size);
146 protected abstract int Read(char[] buffer, int offset, int size);
141
147
142 public string GetTokenValue() {
148 public string GetTokenValue() {
143 return new String(m_buffer, m_tokenOffset, m_tokenLength);
149 return new String(m_buffer, m_tokenOffset, m_tokenLength);
144 }
150 }
145
151
146 public void CopyTokenTo(char[] buffer, int offset) {
152 public void CopyTokenTo(char[] buffer, int offset) {
147 m_buffer.CopyTo(buffer, offset);
153 m_buffer.CopyTo(buffer, offset);
148 }
154 }
149
155
150 public void CopyTokenTo(StringBuilder sb) {
156 public void CopyTokenTo(StringBuilder sb) {
151 sb.Append(m_buffer, m_tokenOffset, m_tokenLength);
157 sb.Append(m_buffer, m_tokenOffset, m_tokenLength);
152 }
158 }
153
159
154 }
160 }
155 }
161 }
156
162
General Comments 0
You need to be logged in to leave comments. Login now