##// END OF EJS Templates
pretty print DFA, the minimization is still buggy
cin -
r182:76e8f2ba12b8 ref20160224
parent child
Show More
@@ -1,45 +1,52
1 1 <?xml version="1.0" encoding="utf-8"?>
2 2 <Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
3 3 <PropertyGroup>
4 4 <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
5 5 <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
6 6 <ProductVersion>8.0.30703</ProductVersion>
7 7 <SchemaVersion>2.0</SchemaVersion>
8 8 <ProjectGuid>{4D364996-7ECD-4193-8F90-F223FFEA49DA}</ProjectGuid>
9 9 <OutputType>Library</OutputType>
10 10 <RootNamespace>Implab.Format.Test</RootNamespace>
11 11 <AssemblyName>Implab.Format.Test</AssemblyName>
12 12 <TargetFrameworkVersion>v4.5</TargetFrameworkVersion>
13 <ReleaseVersion>0.2</ReleaseVersion>
13 14 </PropertyGroup>
14 15 <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
15 16 <DebugSymbols>true</DebugSymbols>
16 17 <DebugType>full</DebugType>
17 18 <Optimize>false</Optimize>
18 19 <OutputPath>bin\Debug</OutputPath>
19 20 <DefineConstants>DEBUG;</DefineConstants>
20 21 <ErrorReport>prompt</ErrorReport>
21 22 <WarningLevel>4</WarningLevel>
22 23 <ConsolePause>false</ConsolePause>
23 24 </PropertyGroup>
24 25 <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
25 26 <DebugType>full</DebugType>
26 27 <Optimize>true</Optimize>
27 28 <OutputPath>bin\Release</OutputPath>
28 29 <ErrorReport>prompt</ErrorReport>
29 30 <WarningLevel>4</WarningLevel>
30 31 <ConsolePause>false</ConsolePause>
31 32 </PropertyGroup>
32 33 <ItemGroup>
33 34 <Reference Include="System" />
34 35 <Reference Include="nunit.framework">
35 <HintPath>..\..\packages\NUnit.3.0.1\lib\net45\nunit.framework.dll</HintPath>
36 <HintPath>..\..\packages\NUnit.2.6.4\lib\nunit.framework.dll</HintPath>
36 37 </Reference>
37 38 </ItemGroup>
38 39 <ItemGroup>
39 40 <Compile Include="JsonTests.cs" />
40 41 </ItemGroup>
41 42 <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" />
42 43 <ItemGroup>
44 <ProjectReference Include="..\..\Implab\Implab.csproj">
45 <Project>{F550F1F8-8746-4AD0-9614-855F4C4B7F05}</Project>
46 <Name>Implab</Name>
47 </ProjectReference>
48 </ItemGroup>
49 <ItemGroup>
43 50 <None Include="packages.config" />
44 51 </ItemGroup>
45 52 </Project> No newline at end of file
@@ -1,12 +1,50
1 1 using NUnit.Framework;
2 2 using System;
3 using Implab.Formats.JSON;
3 4
4 5 namespace Implab.Format.Test {
5 [TestFixture()]
6 [TestFixture]
6 7 public class JsonTests {
7 [Test()]
8 public void TestCase() {
8 [Test]
9 public void TestScannerValidTokens() {
10 var scanner = new JSONScanner(@"9123, -123, 0, 0.1, -0.2, -0.1e3, 1.3E-3, ""some \t\n\u0020 text"", literal []{}:");
11
12 Tuple<JsonTokenType,object>[] expexted = new [] {
13 new Tuple<JsonTokenType,object>(JsonTokenType.Number, 9123d),
14 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
15 new Tuple<JsonTokenType,object>(JsonTokenType.Number, -123d ),
16 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
17 new Tuple<JsonTokenType,object>(JsonTokenType.Number, 0d ),
18 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
19 new Tuple<JsonTokenType,object>(JsonTokenType.Number, 0.1d ),
20 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
21 new Tuple<JsonTokenType,object>(JsonTokenType.Number, -0.2d ),
22 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
23 new Tuple<JsonTokenType,object>(JsonTokenType.Number, -0.1e3d ),
24 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
25 new Tuple<JsonTokenType,object>(JsonTokenType.Number, 1.3E-3d ),
26 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
27 new Tuple<JsonTokenType,object>(JsonTokenType.String, "some \t\n text" ),
28 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
29 new Tuple<JsonTokenType,object>(JsonTokenType.Literal, "literal" ),
30 new Tuple<JsonTokenType,object>(JsonTokenType.BeginArray, " [" ),
31 new Tuple<JsonTokenType,object>(JsonTokenType.EndArray, "]" ),
32 new Tuple<JsonTokenType,object>(JsonTokenType.BeginObject, "{" ),
33 new Tuple<JsonTokenType,object>(JsonTokenType.EndObject, "}" ),
34 new Tuple<JsonTokenType,object>(JsonTokenType.NameSeparator, ":" )
35 };
36
37 object value;
38 JsonTokenType tokenType;
39 for (var i = 0; i < expexted.Length; i++) {
40
41 Assert.IsTrue(scanner.ReadToken(out value, out tokenType));
42 Assert.AreEqual(expexted[i].Item1, tokenType);
43 Assert.AreEqual(expexted[i].Item2, value);
44 }
45
46 Assert.IsFalse(scanner.ReadToken(out value, out tokenType));
9 47 }
10 48 }
11 49 }
12 50
@@ -1,4 +1,4
1 1 ο»Ώ<?xml version="1.0" encoding="utf-8"?>
2 2 <packages>
3 <package id="NUnit" version="3.0.1" targetFramework="net45" />
3 <package id="NUnit" version="2.6.4" targetFramework="net45" />
4 4 </packages> No newline at end of file
@@ -1,316 +1,343
1 1 using Implab;
2 2 using System;
3 3 using System.Collections.Generic;
4 4 using System.Linq;
5 5 using System.Diagnostics;
6 using System.IO;
7 using System.CodeDom.Compiler;
8 using System.CodeDom;
6 9
7 10 namespace Implab.Automaton {
8 11 public class DFATable : IDFATableBuilder {
9 12 int m_stateCount;
10 13 int m_symbolCount;
11 14 int m_initialState;
12 15
13 16 readonly HashSet<int> m_finalStates = new HashSet<int>();
14 17 readonly HashSet<AutomatonTransition> m_transitions = new HashSet<AutomatonTransition>();
15 18
16 19
17 20 #region IDFADefinition implementation
18 21
19 22 public bool IsFinalState(int s) {
20 23 Safe.ArgumentInRange(s, 0, m_stateCount, "s");
21 24
22 25 return m_finalStates.Contains(s);
23 26 }
24 27
25 28 public IEnumerable<int> FinalStates {
26 29 get {
27 30 return m_finalStates;
28 31 }
29 32 }
30 33
31 34 public int StateCount {
32 35 get { return m_stateCount; }
33 36 }
34 37
35 38 public int AlphabetSize {
36 39 get { return m_symbolCount; }
37 40 }
38 41
39 42 public int InitialState {
40 43 get { return m_initialState; }
41 44 }
42 45
43 46 #endregion
44 47
45 48 public void SetInitialState(int s) {
46 49 Safe.ArgumentAssert(s >= 0, "s");
47 50 m_stateCount = Math.Max(m_stateCount, s + 1);
48 51 m_initialState = s;
49 52 }
50 53
51 54 public void MarkFinalState(int state) {
52 55 m_stateCount = Math.Max(m_stateCount, state + 1);
53 56 m_finalStates.Add(state);
54 57 }
55 58
56 59 public void Add(AutomatonTransition item) {
57 60 Safe.ArgumentAssert(item.s1 >= 0, "item");
58 61 Safe.ArgumentAssert(item.s2 >= 0, "item");
59 62 Safe.ArgumentAssert(item.edge >= 0, "item");
60 63
61 64 m_stateCount = Math.Max(m_stateCount, Math.Max(item.s1, item.s2) + 1);
62 65 m_symbolCount = Math.Max(m_symbolCount, item.edge + 1);
63 66
64 67 m_transitions.Add(item);
65 68 }
66 69
67 70 public void Clear() {
68 71 m_stateCount = 0;
69 72 m_symbolCount = 0;
70 73 m_finalStates.Clear();
71 74 m_transitions.Clear();
72 75 }
73 76
74 77 public bool Contains(AutomatonTransition item) {
75 78 return m_transitions.Contains(item);
76 79 }
77 80
78 81 public void CopyTo(AutomatonTransition[] array, int arrayIndex) {
79 82 m_transitions.CopyTo(array, arrayIndex);
80 83 }
81 84
82 85 public bool Remove(AutomatonTransition item) {
83 86 return m_transitions.Remove(item);
84 87 }
85 88
86 89 public int Count {
87 90 get {
88 91 return m_transitions.Count;
89 92 }
90 93 }
91 94
92 95 public bool IsReadOnly {
93 96 get {
94 97 return false;
95 98 }
96 99 }
97 100
98 101 public IEnumerator<AutomatonTransition> GetEnumerator() {
99 102 return m_transitions.GetEnumerator();
100 103 }
101 104
102 105 System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator() {
103 106 return GetEnumerator();
104 107 }
105 108
109 public void AddSymbol(int symbol) {
110 Safe.ArgumentAssert(symbol >= 0, "symbol");
111 m_symbolCount = Math.Max(symbol + 1, m_symbolCount);
112 }
113
106 114 public int[,] CreateTransitionTable() {
107 115 var table = new int[StateCount,AlphabetSize];
108 116
109 117 for (int i = 0; i < StateCount; i++)
110 118 for (int j = 0; j < AlphabetSize; j++)
111 119 table[i, j] = AutomatonConst.UNREACHABLE_STATE;
112 120
113 121 foreach (var t in this)
114 122 table[t.s1,t.edge] = t.s2;
115 123
116 124 return table;
117 125 }
118 126
119 127 public bool[] CreateFinalStateTable() {
120 128 var table = new bool[StateCount];
121 129
122 130 foreach (var s in FinalStates)
123 131 table[s] = true;
124 132
125 133 return table;
126 134 }
127 135
128 136 /// <summary>Π€ΠΎΡ€ΠΌΠΈΡ€ΡƒΠ΅Ρ‚ мноТСства ΠΊΠΎΠ½Π΅Ρ‡Π½Ρ‹Ρ… состояний ΠΏΠ΅Ρ€Π΅Π΄ Π½Π°Ρ‡Π°Π»ΠΎΠΌ Ρ€Π°Π±ΠΎΡ‚Ρ‹ Π°Π»Π³ΠΎΡ€ΠΈΡ‚ΠΌΠ° ΠΌΠΈΠ½ΠΈΠΌΠΈΠ·Π°Ρ†ΠΈΠΈ.</summary>
129 137 /// <remarks>
130 138 /// Π’ процСссС построСния минимального Π°Π²Ρ‚ΠΎΠΌΠ°Ρ‚Π° трСбуСтся Ρ€Π°Π·Π΄Π΅Π»ΠΈΡ‚ΡŒ мноТСство состояний,
131 139 /// Π½Π° Π΄Π²Π° подмноТСства - ΠΊΠΎΠ½Π΅Ρ‡Π½Ρ‹Π΅ состояния ΠΈ всС ΠΎΡΡ‚Π°Π»ΡŒΠ½Ρ‹Π΅, послС Ρ‡Π΅Π³ΠΎ эти подмноТСства
132 140 /// Π±ΡƒΠ΄ΡƒΡ‚ Ρ€Π΅Π·Π΄Π΅Π»Π΅Π½Ρ‹ Π½Π° Π±ΠΎΠ»Π΅Π΅ ΠΌΠ΅Π»ΠΊΠΈΠ΅. Иногда трСбуСтся Π³Π°Ρ€Π°Π½Ρ‚ΠΈΡ€ΠΎΠ²Π°Ρ‚ΡŒ различия ΠΊΠΎΠ½Π΅Ρ‡Π½Ρ‹Ρ… сосотяний,
133 141 /// для этого Π½Π΅ΠΎΠ±Ρ…ΠΎΠ΄ΠΈΠΌΠΎ ΠΏΠ΅Ρ€Π΅ΠΎΠΏΡ€Π΅Π΄Π΅Π»ΠΈΡ‚ΡŒ Π΄Π°Π½Π½Ρ†ΡŽ Ρ„ΡƒΠΊΠ½Ρ†ΠΈΡŽ, для получСния мноТСств ΠΊΠΎΠ½Π΅Ρ‡Π½Ρ‹Ρ… состояний.
134 142 /// </remarks>
135 143 /// <returns>The final states.</returns>
136 144 protected virtual IEnumerable<HashSet<int>> GroupFinalStates() {
137 145 return new HashSet<int>[] { m_finalStates };
138 146 }
139 147
140 148 protected void Optimize(
141 149 IDFATableBuilder optimalDFA,
142 150 IDictionary<int,int> alphabetMap,
143 151 IDictionary<int,int> stateMap
144 152 ) {
145 153 Safe.ArgumentNotNull(optimalDFA, "dfa");
146 154 Safe.ArgumentNotNull(alphabetMap, "alphabetMap");
147 155 Safe.ArgumentNotNull(stateMap, "stateMap");
148 156
149 157
150 158 var setComparer = new CustomEqualityComparer<HashSet<int>>(
151 159 (x, y) => x.SetEquals(y),
152 160 s => s.Sum(x => x.GetHashCode())
153 161 );
154 162
155 163 var optimalStates = new HashSet<HashSet<int>>(setComparer);
156 164 var queue = new HashSet<HashSet<int>>(setComparer);
157 165
158 166 // ΠΏΠΎΠ»ΡƒΡ‡Π°Π΅ΠΌ ΠΊΠΎΠ½Π΅Ρ‡Π½Ρ‹Π΅ состояния, сгруппированныС ΠΏΠΎ ΠΌΠ°Ρ€ΠΊΠ΅Ρ€Π°ΠΌ
159 167 optimalStates.UnionWith(
160 168 GroupFinalStates()
161 169 );
162 170
163 171 var state = new HashSet<int>(
164 172 Enumerable
165 .Range(0, m_stateCount - 1)
173 .Range(0, m_stateCount)
166 174 .Where(i => !m_finalStates.Contains(i))
167 175 );
168 176
169 177 optimalStates.Add(state);
170 178 queue.Add(state);
171 179
172 180 var rmap = m_transitions
173 181 .GroupBy(t => t.s2)
174 182 .ToDictionary(
175 183 g => g.Key, // s2
176 184 g => g.ToLookup(t => t.edge, t => t.s1)//.ToDictionary(p => p.Key)
177 185 );
178 186
179 187 while (queue.Count > 0) {
180 188 var stateA = queue.First();
181 189 queue.Remove(stateA);
182 190
183 191 for (int c = 0; c < m_symbolCount; c++) {
184 192 var stateX = new HashSet<int>();
185 foreach(var a in stateA.Where(rmap.ContainsKey))
186 stateX.UnionWith(rmap[a][c]); // all states from wich the symbol 'c' leads to the state 'a'
193 //foreach(var a in stateA.Where(rmap.ContainsKey))
194 // stateX.UnionWith(rmap[a][c]); // all states from wich the symbol 'c' leads to the state 'a'
187 195
188 foreach (var stateY in optimalStates.ToArray()) {
196 stateX.UnionWith(m_transitions.Where(t => stateA.Contains(t.s2) && t.edge == c).Select(t => t.s1));
197
198 var tmp = optimalStates.ToArray();
199 foreach (var stateY in tmp) {
189 200 if (stateX.Overlaps(stateY) && !stateY.IsSubsetOf(stateX)) {
190 201 var stateR1 = new HashSet<int>(stateY);
191 202 var stateR2 = new HashSet<int>(stateY);
192 203
193 204 stateR1.IntersectWith(stateX);
194 205 stateR2.ExceptWith(stateX);
195 206
196 207 optimalStates.Remove(stateY);
197 208 optimalStates.Add(stateR1);
198 209 optimalStates.Add(stateR2);
199 210
200 211 if (queue.Contains(stateY)) {
201 212 queue.Remove(stateY);
202 213 queue.Add(stateR1);
203 214 queue.Add(stateR2);
204 215 } else {
205 216 queue.Add(stateR1.Count <= stateR2.Count ? stateR1 : stateR2);
206 217 }
207 218 }
208 219 }
209 220 }
210 221 }
211 222
212 223 // ΠΊΠ°Ρ€Ρ‚Π° получСния ΠΎΠΏΡ‚ΠΈΠΌΠ°Π»ΡŒΠ½ΠΎΠ³ΠΎ состояния ΠΏΠΎ ΡΠΎΠΎΡ‚Π²Π΅ΡΡ‚Π²ΡƒΡŽΡ‰Π΅ΠΌΡƒ Π΅ΠΌΡƒ простому ΡΠΎΡΡ‚ΠΎΡΠ½ΠΈΡŽ
213 224 var nextState = 0;
214 225 foreach (var item in optimalStates) {
215 226 var id = nextState++;
216 227 foreach (var s in item)
217 228 stateMap[s] = id;
218 229 }
219 230
220 231 // ΠΏΠΎΠ»ΡƒΡ‡Π°Π΅ΠΌ ΠΌΠΈΠ½ΠΈΠΌΠ°Π»ΡŒΠ½Ρ‹ΠΉ Π°Π»Ρ„Π°Π²ΠΈΡ‚
221 232 // Π²Ρ…ΠΎΠ΄Π½Ρ‹Π΅ символы Π½Π΅ Ρ€Π°Π·Π»ΠΈΡ‡ΠΈΠΌΡ‹, Ссли Move(s,a1) == Move(s,a2), для любого s
222 233 // для этого ΠΈΡΠΏΠΎΠ»ΡŒΠ·ΡƒΠ΅ΠΌ Π°Π»Π³ΠΎΡ€ΠΈΡ‚ΠΌ кластСризации, сначала
223 234 // считаСм, Ρ‡Ρ‚ΠΎ всС символы Π½Π΅ Ρ€Π°Π·Π»ΠΈΡ‡ΠΈΠΌΡ‹
224 235
225 236 var minClasses = new HashSet<HashSet<int>>(setComparer);
226 237 var alphaQueue = new Queue<HashSet<int>>();
227 238 alphaQueue.Enqueue(new HashSet<int>(Enumerable.Range(0,AlphabetSize)));
228 239
229 240 // для всСх состояний, Π±ΡƒΠ΄Π΅ΠΌ ΠΏΡ€ΠΎΠ²Π΅Ρ€ΡΡ‚ΡŒ ΠΊΠ°ΠΆΠ΄Ρ‹ΠΉ класс Π½Π° Ρ€Π°Π·Π»ΠΈΡ‡ΠΈΠΌΠΎΡΡ‚ΡŒ,
230 241 // Ρ‚.Π΅. символы Ρ€Π°Π·Π»ΠΈΡ‡ΠΈΠΌΡ‹, Ссли ΠΎΠ½ΠΈ приводят ΠΊ Ρ€Π°Π·Π½Ρ‹ΠΌ состояниям
231 242 for (int s = 0 ; s < optimalStates.Count; s++) {
232 243 var newQueue = new Queue<HashSet<int>>();
233 244
234 245 foreach (var A in alphaQueue) {
235 246 // классы ΠΈΠ· ΠΎΠ΄Π½ΠΎΠ³ΠΎ символа Π΄Π΅Π»ΠΈΡ‚ΡŒ бСсполСзно, ΠΏΠ΅Ρ€Π΅Π²ΠΎΠ΄ΠΈΠΌ ΠΈΡ… сразу Π²
236 247 // Ρ€Π΅Π·ΡƒΠ»ΡŒΡ‚ΠΈΡ€ΡƒΡŽΡ‰ΠΈΠΉ Π°Π»Ρ„Π°Π²ΠΈΡ‚
237 248 if (A.Count == 1) {
238 249 minClasses.Add(A);
239 250 continue;
240 251 }
241 252
242 253 // Ρ€Π°Π·Π»ΠΈΡ‡Π°Π΅ΠΌ классы символов, ΠΊΠΎΡ‚ΠΎΡ€Ρ‹Π΅ пСрСводят Π² Ρ€Π°Π·Π»ΠΈΡ‡Π½Ρ‹Π΅ ΠΎΠΏΡ‚ΠΈΠΌΠ°Π»ΡŒΠ½Ρ‹Π΅ состояния
243 254 // optimalState -> alphaClass
244 255 var classes = new Dictionary<int, HashSet<int>>();
245 256
246 257 foreach (var term in A) {
247 258 // ΠΈΡ‰Π΅ΠΌ всС ΠΏΠ΅Ρ€Π΅Ρ…ΠΎΠ΄Ρ‹ класса ΠΏΠΎ символу term
248 var res = m_transitions.Where(t => stateMap[t.s1] == s && t.edge == term).Select(t => stateMap[t.s2]).ToArray();
259 var s2 = m_transitions.Where(t => stateMap[t.s1] == s && t.edge == term).Select(t => stateMap[t.s2]).DefaultIfEmpty(-1).First();
249 260
250 Debug.Assert(res.Length <= 1);
251
252 var s2 = res.Length > 0 ? res[0] : -1;
253
254 261 HashSet<int> a2;
255 262 if (!classes.TryGetValue(s2, out a2)) {
256 263 a2 = new HashSet<int>();
257 264 newQueue.Enqueue(a2);
258 265 classes[s2] = a2;
259 266 }
260 267 a2.Add(term);
261 268 }
262 269 }
263 270
264 271 if (newQueue.Count == 0)
265 272 break;
266 273 alphaQueue = newQueue;
267 274 }
268 275
269 276 // послС окончания Ρ€Π°Π±ΠΎΡ‚Ρ‹ Π°Π»Π³ΠΎΡ€ΠΈΡ‚ΠΌΠ° Π² ΠΎΡ‡Π΅Ρ€Π΅Π΄ΠΈ останутся ΠΌΠΈΠ½ΠΈΠΌΠ°Π»ΡŒΠ½Ρ‹Π΅ Ρ€Π°Π·Π»ΠΈΡ‡ΠΈΠΌΡ‹Π΅ классы
270 277 // Π²Ρ…ΠΎΠ΄Π½Ρ‹Ρ… символов
271 278 foreach (var A in alphaQueue)
272 279 minClasses.Add(A);
273 280
274 281 // построСниС отобраТСния Π°Π»Ρ„Π°Π²ΠΈΡ‚ΠΎΠ² Π²Ρ…ΠΎΠ΄Π½Ρ‹Ρ… символов.
275 282 // ΠΏΠΎΡΠΊΠΎΠ»ΡŒΠΊΡƒ символ DFAConst.UNCLASSIFIED_INPUT ΠΌΠΎΠΆΠ΅Ρ‚ ΠΈΠΌΠ΅Ρ‚ΡŒ
276 283 // ΡΠΏΠ΅Ρ†ΠΈΠ°Π»ΡŒΠ½ΠΎΠ΅ Π·Π½Π°Ρ‡Π΅Π½ΠΈΠ΅, Ρ‚ΠΎΠ³Π΄Π° сохраним ΠΌΠΈΠ½ΠΈΠΌΠ°Π»ΡŒΠ½Ρ‹ΠΉ класс,
277 284 // содСрТащий этот символ Π½Π° Ρ‚ΠΎΠΌΠΆΠ΅ мСстС.
278 285
279 286 var nextCls = 0;
280 287 foreach (var item in minClasses) {
281 288 if (nextCls == AutomatonConst.UNCLASSIFIED_INPUT)
282 289 nextCls++;
283 290
284 291 // сохраняСм DFAConst.UNCLASSIFIED_INPUT
285 292 var cls = item.Contains(AutomatonConst.UNCLASSIFIED_INPUT) ? AutomatonConst.UNCLASSIFIED_INPUT : nextCls++;
293 optimalDFA.AddSymbol(cls);
286 294
287 295 foreach (var a in item)
288 296 alphabetMap[a] = cls;
289 297 }
290 298
291 299 // построСниС Π°Π²Ρ‚ΠΎΠΌΠ°Ρ‚Π°
292 300 optimalDFA.SetInitialState(stateMap[m_initialState]);
293 301
294 302 foreach (var sf in m_finalStates.Select(s => stateMap[s]).Distinct())
295 303 optimalDFA.MarkFinalState(sf);
296 304
297 305 foreach (var t in m_transitions.Select(t => new AutomatonTransition(stateMap[t.s1],stateMap[t.s2],alphabetMap[t.edge])).Distinct())
298 306 optimalDFA.Add(t);
299 307 }
300 308
301 protected void PrintDFA<TInput, TState>(IAlphabet<TInput> inputAlphabet, IAlphabet<TState> stateAlphabet) {
309 protected string PrintDFA<TInput, TState>(IAlphabet<TInput> inputAlphabet, IAlphabet<TState> stateAlphabet) {
302 310 Safe.ArgumentNotNull(inputAlphabet, "inputAlphabet");
303 311 Safe.ArgumentNotNull(stateAlphabet, "stateAlphabet");
304 312
305 foreach(var t in m_transitions)
306 Console.WriteLine(
307 "[{0}] -{{{1}}}-> [{2}]{3}",
308 String.Join(",", stateAlphabet.GetSymbols(t.s1)),
309 String.Join("", inputAlphabet.GetSymbols(t.edge)),
310 String.Join(",", stateAlphabet.GetSymbols(t.s2)),
311 m_finalStates.Contains(t.s2) ? "$" : ""
312 );
313 var data = new List<string>();
314
315 data.Add("digraph dfa {");
316
317 foreach (var final in m_finalStates)
318 data.Add(String.Format("{0} [shape=box];",String.Join("", stateAlphabet.GetSymbols(final))));
319
320 foreach (var t in m_transitions)
321 data.Add(String.Format(
322 "{0} -> {2} [label={1}];",
323 String.Join("", stateAlphabet.GetSymbols(t.s1)),
324 ToLiteral(ToLiteral(String.Join("", t.edge == AutomatonConst.UNCLASSIFIED_INPUT ? new [] { "@" } : inputAlphabet.GetSymbols(t.edge).Select(x => x.ToString())))),
325 String.Join("", stateAlphabet.GetSymbols(t.s2))
326 ));
327 data.Add("}");
328 return String.Join("\n", data);
313 329 }
314 330
331 static string ToLiteral(string input)
332 {
333 using (var writer = new StringWriter())
334 {
335 using (var provider = CodeDomProvider.CreateProvider("CSharp"))
336 {
337 provider.GenerateCodeFromExpression(new CodePrimitiveExpression(input), writer, null);
338 return writer.ToString();
339 }
340 }
341 }
315 342 }
316 343 }
@@ -1,15 +1,26
1 1 using System;
2 2 using System.Collections.Generic;
3 3
4 4 namespace Implab.Automaton {
5 5 public interface IDFATableBuilder : IDFATable, ICollection<AutomatonTransition> {
6 6 /// <summary>
7 7 /// Marks the state as final.
8 8 /// </summary>
9 9 /// <param name="state">State.</param>
10 10 void MarkFinalState(int state);
11 11
12 12 void SetInitialState(int s);
13
14 /// <summary>
15 /// Increases if needed the input alphabet size to hold the specified symbol.
16 /// </summary>
17 /// <remarks>
18 /// <code>
19 /// AlphabetSize = Math.Max(AlphabetSize, symbol + 1)
20 /// </code>
21 /// </remarks>
22 /// <param name="symbol">Symbol.</param>
23 void AddSymbol(int symbol);
13 24 }
14 25 }
15 26
@@ -1,83 +1,95
1 1 using System.Collections.Generic;
2 2 using System.Linq;
3 3
4 4 namespace Implab.Automaton.RegularExpressions {
5 5 public class RegularDFA<TInput, TTag> : DFATable, ITaggedDFABuilder<TTag> {
6 6
7 7 readonly Dictionary<int,TTag[]> m_tags = new Dictionary<int, TTag[]>();
8 8 readonly IAlphabet<TInput> m_alphabet;
9 9
10 10 public RegularDFA(IAlphabet<TInput> alphabet) {
11 11 Safe.ArgumentNotNull(alphabet, "aplhabet");
12 12
13 13 m_alphabet = alphabet;
14 14 }
15 15
16 16
17 17 public IAlphabet<TInput> InputAlphabet {
18 18 get {
19 19 return m_alphabet;
20 20 }
21 21 }
22 22
23 23 public void MarkFinalState(int s, TTag[] tags) {
24 24 MarkFinalState(s);
25 25 SetStateTag(s, tags);
26 26 }
27 27
28 28 public void SetStateTag(int s, TTag[] tags) {
29 29 Safe.ArgumentNotNull(tags, "tags");
30 30 m_tags[s] = tags;
31 31 }
32 32
33 33 public TTag[] GetStateTag(int s) {
34 34 TTag[] tags;
35 35 return m_tags.TryGetValue(s, out tags) ? tags : new TTag[0];
36 36 }
37 37
38 38 public TTag[][] CreateTagTable() {
39 39 var table = new TTag[StateCount][];
40 40
41 41 foreach (var pair in m_tags)
42 42 table[pair.Key] = pair.Value;
43 43
44 44 return table;
45 45 }
46 46
47 47 /// <summary>
48 48 /// Optimize the specified alphabet.
49 49 /// </summary>
50 50 /// <param name="alphabet">ΠŸΡƒΡΡ‚ΠΎΠΉ Π°Π»Ρ„Π°Π²ΠΈΡ‚, ΠΊΠΎΡ‚ΠΎΡ€Ρ‹ΠΉ Π±ΡƒΠ΄Π΅Ρ‚ Π·ΠΏΠΎΠ»Π½Π΅Π½ Π² процСссС ΠΎΠΏΡ‚ΠΈΠΌΠΈΠ·Π°Ρ†ΠΈΠΈ.</param>
51 51 public RegularDFA<TInput,TTag> Optimize(IAlphabetBuilder<TInput> alphabet) {
52 52 Safe.ArgumentNotNull(alphabet, "alphabet");
53 53
54 54 var dfa = new RegularDFA<TInput, TTag>(alphabet);
55 55
56 56 var alphaMap = new Dictionary<int,int>();
57 57 var stateMap = new Dictionary<int,int>();
58 58
59 59 Optimize(dfa, alphaMap, stateMap);
60 60
61 61 // mark tags in the new DFA
62 62 foreach (var g in m_tags.Where(x => x.Key < StateCount).GroupBy(x => stateMap[x.Key], x => x.Value ))
63 63 dfa.SetStateTag(g.Key, g.SelectMany(x => x).ToArray());
64 64
65 65 // make the alphabet for the new DFA
66 66 // skip all unclassified symbols
67 67 foreach (var pair in alphaMap.Where(x => x.Value != 0))
68 68 alphabet.DefineClass(m_alphabet.GetSymbols(pair.Key), pair.Value);
69
70 var orig = ToString();
71 var opt = dfa.ToString();
69 72
70 73 return dfa;
71 74 }
72 75
73 76 protected override IEnumerable<HashSet<int>> GroupFinalStates() {
74 77 var arrayComparer = new CustomEqualityComparer<TTag[]>(
75 78 (x,y) => x.Length == y.Length && x.All(it => y.Contains(it)),
76 79 x => x.Sum(it => x.GetHashCode())
77 80 );
78 81 return FinalStates.GroupBy(x => m_tags[x], arrayComparer).Select(g => new HashSet<int>(g));
79 82 }
80 83
84 public override string ToString() {
85 var states = new MapAlphabet<string>(false, null);
86
87 for (int i = 0; i < StateCount; i++)
88 states.DefineSymbol(string.Format("s{0}", i), i);
89
90 return string.Format("//[RegularDFA {1} x {2}]\n{0}", PrintDFA(InputAlphabet, states),StateCount, AlphabetSize);
91 }
92
81 93 }
82 94 }
83 95
@@ -1,63 +1,64
1 1 using System;
2 2 using System.Threading;
3 3
4 4 namespace Implab.Components {
5 5 /// <summary>
6 6 /// Creates an instace on-demand and allows it to be garbage collected.
7 7 /// </summary>
8 8 /// <remarks>
9 9 /// Usefull when dealing with memory-intensive objects which are frequently used.
10 /// This class is similar to <see cref="ObjectPool{T}"/> except is a singleton.
10 /// This class is similar to <see cref="ObjectPool{T}"/> except it is a singleton.
11 11 /// </remarks>
12 12 public class LazyAndWeak<T> where T : class {
13 13
14 14 readonly Func<T> m_factory;
15 15 readonly object m_lock;
16 16 WeakReference m_reference;
17 17
18 18
19 19 public LazyAndWeak(Func<T> factory, bool useLock) {
20 20 Safe.ArgumentNotNull(factory, "factory");
21 21 m_factory = factory;
22 22 m_lock = useLock ? new object() : null;
23 23 }
24 24
25 25 public LazyAndWeak(Func<T> factory) : this(factory, false) {
26 26 }
27 27
28 28 public T Value {
29 29 get {
30 30 while (true) {
31 31 var weak = m_reference;
32 32 T value;
33 33 if (weak != null) {
34 34 value = weak.Target as T;
35 35 if (value != null)
36 36 return value;
37 37 }
38 38
39 39 if (m_lock == null) {
40 40 value = m_factory();
41 41
42 42 if (Interlocked.CompareExchange(ref m_reference, new WeakReference(value), weak) == weak)
43 43 return value;
44 44 } else {
45 45 lock (m_lock) {
46 46 // double check
47 weak = m_reference;
47 48 if (weak != null) {
48 49 value = weak.Target as T;
49 50 if (value != null)
50 51 return value;
51 52 }
52 53 // we are safe to write
53 54 value = m_factory();
54 55 m_reference = new WeakReference(value);
55 56 return value;
56 57 }
57 58 }
58 59 }
59 60 }
60 61 }
61 62 }
62 63 }
63 64
@@ -1,119 +1,119
1 1 using System.Linq;
2 2 using Implab.Automaton.RegularExpressions;
3 3 using System;
4 4 using Implab.Automaton;
5 5 using Implab.Components;
6 6
7 7 namespace Implab.Formats.JSON {
8 8 class JSONGrammar : Grammar<char> {
9 9 public enum TokenType {
10 10 None,
11 11 BeginObject,
12 12 EndObject,
13 13 BeginArray,
14 14 EndArray,
15 15 String,
16 16 Number,
17 17 Literal,
18 18 NameSeparator,
19 19 ValueSeparator,
20 20
21 21 StringBound,
22 22 EscapedChar,
23 23 UnescapedChar,
24 24 EscapedUnicode
25 25 }
26 26
27 27 static LazyAndWeak<JSONGrammar> _instance = new LazyAndWeak<JSONGrammar>(() => new JSONGrammar());
28 28
29 29 public static JSONGrammar Instance {
30 30 get { return _instance.Value; }
31 31 }
32 32
33 33 readonly ScannerContext<TokenType> m_jsonExpression;
34 34 readonly ScannerContext<TokenType> m_stringExpression;
35 35 readonly CharAlphabet m_defaultAlphabet = new CharAlphabet();
36 36
37 37 public JSONGrammar() {
38 38 DefineAlphabet(Enumerable.Range(0, 0x20).Select(x => (char)x));
39 39 var hexDigit = SymbolRangeToken('a','f').Or(SymbolRangeToken('A','F')).Or(SymbolRangeToken('0','9'));
40 40 var digit9 = SymbolRangeToken('1', '9');
41 41 var zero = SymbolToken('0');
42 42 var digit = zero.Or(digit9);
43 43 var dot = SymbolToken('.');
44 44 var minus = SymbolToken('-');
45 45 var sign = SymbolSetToken('-', '+');
46 46 var expSign = SymbolSetToken('e', 'E');
47 47 var letters = SymbolRangeToken('a', 'z');
48 48 var integer = zero.Or(digit9.Cat(digit.EClosure()));
49 49 var frac = dot.Cat(digit.Closure());
50 50 var exp = expSign.Cat(sign.Optional()).Cat(digit.Closure());
51 51 var quote = SymbolToken('"');
52 52 var backSlash = SymbolToken('\\');
53 53 var specialEscapeChars = SymbolSetToken('\\', '"', '/', 'b', 'f', 't', 'n', 'r');
54 54 var unicodeEspace = SymbolToken('u').Cat(hexDigit.Repeat(4));
55 55 var whitespace = SymbolSetToken('\n', '\r', '\t', ' ').EClosure();
56 56 var beginObject = whitespace.Cat(SymbolToken('{')).Cat(whitespace);
57 57 var endObject = whitespace.Cat(SymbolToken('}')).Cat(whitespace);
58 58 var beginArray = whitespace.Cat(SymbolToken('[')).Cat(whitespace);
59 59 var endArray = whitespace.Cat(SymbolToken(']')).Cat(whitespace);
60 60 var nameSep = whitespace.Cat(SymbolToken(':')).Cat(whitespace);
61 61 var valueSep = whitespace.Cat(SymbolToken(',')).Cat(whitespace);
62 62
63 63 var number = minus.Optional().Cat(integer).Cat(frac.Optional()).Cat(exp.Optional());
64 64 var literal = letters.Closure();
65 65 var unescaped = SymbolTokenExcept(Enumerable.Range(0, 0x20).Union(new int[] { '\\', '"' }).Select(x => (char)x));
66 66
67 67 var jsonExpression =
68 68 number.Tag(TokenType.Number)
69 69 .Or(literal.Tag(TokenType.Literal))
70 70 .Or(quote.Tag(TokenType.StringBound))
71 71 .Or(beginObject.Tag(TokenType.BeginObject))
72 72 .Or(endObject.Tag(TokenType.EndObject))
73 73 .Or(beginArray.Tag(TokenType.BeginArray))
74 74 .Or(endArray.Tag(TokenType.EndArray))
75 75 .Or(nameSep.Tag(TokenType.NameSeparator))
76 76 .Or(valueSep.Tag(TokenType.ValueSeparator));
77 77
78 78
79 79 var jsonStringExpression =
80 80 quote.Tag(TokenType.StringBound)
81 81 .Or(backSlash.Cat(specialEscapeChars).Tag(TokenType.EscapedChar))
82 82 .Or(backSlash.Cat(unicodeEspace).Tag(TokenType.EscapedUnicode))
83 83 .Or(unescaped.Closure().Tag(TokenType.UnescapedChar));
84 84
85 85
86 86 m_jsonExpression = BuildScannerContext<TokenType>(jsonExpression);
87 87 m_stringExpression = BuildScannerContext<TokenType>(jsonStringExpression);
88 88
89 89
90 90 }
91 91
92 92 protected override IAlphabetBuilder<char> AlphabetBuilder {
93 93 get {
94 94 return m_defaultAlphabet;
95 95 }
96 96 }
97 97
98 98 public ScannerContext<TokenType> JsonExpression {
99 99 get {
100 100 return m_jsonExpression;
101 101 }
102 102 }
103 103
104 104 public ScannerContext<TokenType> JsonStringExpression {
105 105 get {
106 106 return m_stringExpression;
107 107 }
108 108 }
109 109
110 110 Token SymbolRangeToken(char start, char stop) {
111 return SymbolToken(Enumerable.Range(start,stop - start).Select(x => (char)x));
111 return SymbolToken(Enumerable.Range(start, stop - start + 1).Select(x => (char)x));
112 112 }
113 113
114 114 protected override IndexedAlphabetBase<char> CreateAlphabet() {
115 115 return new CharAlphabet();
116 116 }
117 117
118 118 }
119 119 }
@@ -1,26 +1,18
1 1 using System;
2 2
3 3 namespace Implab.Formats {
4 4 public class StringScanner: TextScanner {
5 5 const int CHUNK_SIZE = 1024;
6 6
7 readonly string m_text;
8 int m_pos;
9
10 public StringScanner(string text) : base(text.Length, text.Length < CHUNK_SIZE ? text.Length : CHUNK_SIZE) {
11 m_text = text;
12 Feed();
7 public StringScanner(string text) : base(null) {
8 Safe.ArgumentNotNull(text, "text");
9 var data = text.ToCharArray();
10 Feed(data, 0, data.Length);
13 11 }
14 12
15 13 protected override int Read(char[] buffer, int offset, int size) {
16 var actual = size + m_pos > m_text.Length ? m_text.Length - m_pos : size;
17
18 m_text.CopyTo(m_pos,buffer,offset, actual);
19
20 m_pos += actual;
21
22 return actual;
14 return 0;
23 15 }
24 16 }
25 17 }
26 18
@@ -1,162 +1,157
1 1 using System;
2 2 using Implab.Components;
3 3 using System.Diagnostics;
4 4 using Implab.Automaton;
5 5 using System.Text;
6 6
7 7 namespace Implab.Formats {
8 8 public abstract class TextScanner : Disposable {
9 9 readonly int m_bufferMax;
10 10 readonly int m_chunkSize;
11 11
12 12 char[] m_buffer;
13 13 int m_bufferOffset;
14 14 int m_bufferSize;
15 15 int m_tokenOffset;
16 16 int m_tokenLength;
17 17
18 18 /// <summary>
19 19 /// Initializes a new instance of the <see cref="Implab.Formats.TextScanner"/> class.
20 20 /// </summary>
21 21 /// <param name="bufferMax">Buffer max.</param>
22 22 /// <param name="chunkSize">Chunk size.</param>
23 23 protected TextScanner(int bufferMax, int chunkSize) {
24 24 Debug.Assert(m_chunkSize <= m_bufferMax);
25 25
26 26 m_bufferMax = bufferMax;
27 27 m_chunkSize = chunkSize;
28 28 }
29 29
30 30 /// <summary>
31 31 /// Initializes a new instance of the <see cref="Implab.Formats.TextScanner"/> class.
32 32 /// </summary>
33 33 /// <param name="buffer">Buffer.</param>
34 34 protected TextScanner(char[] buffer) {
35 35 if (buffer != null) {
36 36 m_buffer = buffer;
37 37 m_bufferSize = buffer.Length;
38 38 }
39 39 }
40 40
41 41 /// <summary>
42 42 /// (hungry) Reads the next token.
43 43 /// </summary>
44 44 /// <returns><c>true</c>, if token internal was read, <c>false</c> if there is no more tokens in the stream.</returns>
45 45 /// <param name="dfa">The transition map for the automaton</param>
46 46 /// <param name="final">Final states of the automaton.</param>
47 47 /// <param name="tags">Tags.</param>
48 48 /// <param name="state">The initial state for the automaton.</param>
49 49 /// <param name="alphabet"></param>
50 50 /// <param name = "tag"></param>
51 51 internal bool ReadToken<TTag>(int[,] dfa, bool[] final, TTag[][] tags, int state, int[] alphabet, out TTag[] tag) {
52 52 m_tokenLength = 0;
53 53 tag = null;
54 54
55 55 var maxSymbol = alphabet.Length - 1;
56
56 int next;
57 57 do {
58 58 // after the next chunk is read the offset in the buffer may change
59 59 int pos = m_bufferOffset + m_tokenLength;
60
60 next = state;
61 61 while (pos < m_bufferSize) {
62 62 var ch = m_buffer[pos];
63 63
64 try {
65 var next = dfa[state, ch > maxSymbol ? AutomatonConst.UNCLASSIFIED_INPUT : alphabet[ch]];
64 next = dfa[next, ch > maxSymbol ? AutomatonConst.UNCLASSIFIED_INPUT : alphabet[ch]];
66 65
67 66 if (next == AutomatonConst.UNREACHABLE_STATE)
68 67 break;
69
68
70 69 state = next;
71 }catch {
72 throw;
73 }
74 70 pos++;
75 71 }
76
77 72 m_tokenLength = pos - m_bufferOffset;
78 } while (state != AutomatonConst.UNREACHABLE_STATE && Feed());
73 } while (next != AutomatonConst.UNREACHABLE_STATE && Feed());
79 74
80 75 m_tokenOffset = m_bufferOffset;
81 76 m_bufferOffset += m_tokenLength;
82 77
83 78 if (final[state]) {
84 79 tag = tags[state];
85 80 return true;
86 81 }
87 82
88 83 if (m_bufferOffset == m_bufferSize) {
89 84 if (m_tokenLength == 0) //EOF
90 85 return false;
91 86
92 87 throw new ParserException();
93 88 }
94 89
95 90 throw new ParserException(String.Format("Unexpected symbol '{0}'", m_buffer[m_bufferOffset]));
96 91
97 92 }
98 93
99 94 protected void Feed(char[] buffer, int offset, int length) {
100 95 m_buffer = buffer;
101 96 m_bufferOffset = offset;
102 97 m_bufferSize = offset + length;
103 98 }
104 99
105 100 protected bool Feed() {
106 101 if (m_chunkSize <= 0)
107 102 return false;
108 103
109 104 if (m_buffer != null) {
110 105 var free = m_buffer.Length - m_bufferSize;
111 106
112 107 if (free < m_chunkSize) {
113 108 free += m_chunkSize;
114 109 var used = m_bufferSize - m_bufferOffset;
115 110 var size = used + free;
116 111
117 112 if (size > m_bufferMax)
118 113 throw new ParserException(String.Format("The buffer limit ({0} Kb) is reached", m_bufferMax / 1024));
119 114
120 115 var temp = new char[size];
121 116
122 117 var read = Read(temp, used, m_chunkSize);
123 118 if (read == 0)
124 119 return false;
125 120
126 121 Array.Copy(m_buffer, m_bufferOffset, temp, 0, used);
127 122
128 123 m_bufferOffset = 0;
129 124 m_bufferSize = used + read;
130 125 m_buffer = temp;
131 126 } else {
132 127 var read = Read(m_buffer, m_bufferSize, m_chunkSize);
133 128 if (read == 0)
134 129 return false;
135 130 m_bufferSize += m_chunkSize;
136 131 }
137 132 return true;
138 133 } else {
139 134 Debug.Assert(m_bufferOffset == 0);
140 135 m_buffer = new char[m_chunkSize];
141 136 m_bufferSize = Read(m_buffer, 0, m_chunkSize);
142 137 return (m_bufferSize != 0);
143 138 }
144 139 }
145 140
146 141 protected abstract int Read(char[] buffer, int offset, int size);
147 142
148 143 public string GetTokenValue() {
149 144 return new String(m_buffer, m_tokenOffset, m_tokenLength);
150 145 }
151 146
152 147 public void CopyTokenTo(char[] buffer, int offset) {
153 m_buffer.CopyTo(buffer, offset);
148 Array.Copy(m_buffer, m_tokenOffset,buffer, offset, m_tokenLength);
154 149 }
155 150
156 151 public void CopyTokenTo(StringBuilder sb) {
157 152 sb.Append(m_buffer, m_tokenOffset, m_tokenLength);
158 153 }
159 154
160 155 }
161 156 }
162 157
General Comments 0
You need to be logged in to leave comments. Login now