##// END OF EJS Templates
pretty print DFA, the minimization is still buggy
cin -
r182:76e8f2ba12b8 ref20160224
parent child
Show More
@@ -10,6 +10,7
10 <RootNamespace>Implab.Format.Test</RootNamespace>
10 <RootNamespace>Implab.Format.Test</RootNamespace>
11 <AssemblyName>Implab.Format.Test</AssemblyName>
11 <AssemblyName>Implab.Format.Test</AssemblyName>
12 <TargetFrameworkVersion>v4.5</TargetFrameworkVersion>
12 <TargetFrameworkVersion>v4.5</TargetFrameworkVersion>
13 <ReleaseVersion>0.2</ReleaseVersion>
13 </PropertyGroup>
14 </PropertyGroup>
14 <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
15 <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
15 <DebugSymbols>true</DebugSymbols>
16 <DebugSymbols>true</DebugSymbols>
@@ -32,7 +33,7
32 <ItemGroup>
33 <ItemGroup>
33 <Reference Include="System" />
34 <Reference Include="System" />
34 <Reference Include="nunit.framework">
35 <Reference Include="nunit.framework">
35 <HintPath>..\..\packages\NUnit.3.0.1\lib\net45\nunit.framework.dll</HintPath>
36 <HintPath>..\..\packages\NUnit.2.6.4\lib\nunit.framework.dll</HintPath>
36 </Reference>
37 </Reference>
37 </ItemGroup>
38 </ItemGroup>
38 <ItemGroup>
39 <ItemGroup>
@@ -40,6 +41,12
40 </ItemGroup>
41 </ItemGroup>
41 <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" />
42 <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" />
42 <ItemGroup>
43 <ItemGroup>
44 <ProjectReference Include="..\..\Implab\Implab.csproj">
45 <Project>{F550F1F8-8746-4AD0-9614-855F4C4B7F05}</Project>
46 <Name>Implab</Name>
47 </ProjectReference>
48 </ItemGroup>
49 <ItemGroup>
43 <None Include="packages.config" />
50 <None Include="packages.config" />
44 </ItemGroup>
51 </ItemGroup>
45 </Project> No newline at end of file
52 </Project>
@@ -1,11 +1,49
1 using NUnit.Framework;
1 using NUnit.Framework;
2 using System;
2 using System;
3 using Implab.Formats.JSON;
3
4
4 namespace Implab.Format.Test {
5 namespace Implab.Format.Test {
5 [TestFixture()]
6 [TestFixture]
6 public class JsonTests {
7 public class JsonTests {
7 [Test()]
8 [Test]
8 public void TestCase() {
9 public void TestScannerValidTokens() {
10 var scanner = new JSONScanner(@"9123, -123, 0, 0.1, -0.2, -0.1e3, 1.3E-3, ""some \t\n\u0020 text"", literal []{}:");
11
12 Tuple<JsonTokenType,object>[] expexted = new [] {
13 new Tuple<JsonTokenType,object>(JsonTokenType.Number, 9123d),
14 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
15 new Tuple<JsonTokenType,object>(JsonTokenType.Number, -123d ),
16 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
17 new Tuple<JsonTokenType,object>(JsonTokenType.Number, 0d ),
18 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
19 new Tuple<JsonTokenType,object>(JsonTokenType.Number, 0.1d ),
20 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
21 new Tuple<JsonTokenType,object>(JsonTokenType.Number, -0.2d ),
22 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
23 new Tuple<JsonTokenType,object>(JsonTokenType.Number, -0.1e3d ),
24 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
25 new Tuple<JsonTokenType,object>(JsonTokenType.Number, 1.3E-3d ),
26 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
27 new Tuple<JsonTokenType,object>(JsonTokenType.String, "some \t\n text" ),
28 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
29 new Tuple<JsonTokenType,object>(JsonTokenType.Literal, "literal" ),
30 new Tuple<JsonTokenType,object>(JsonTokenType.BeginArray, " [" ),
31 new Tuple<JsonTokenType,object>(JsonTokenType.EndArray, "]" ),
32 new Tuple<JsonTokenType,object>(JsonTokenType.BeginObject, "{" ),
33 new Tuple<JsonTokenType,object>(JsonTokenType.EndObject, "}" ),
34 new Tuple<JsonTokenType,object>(JsonTokenType.NameSeparator, ":" )
35 };
36
37 object value;
38 JsonTokenType tokenType;
39 for (var i = 0; i < expexted.Length; i++) {
40
41 Assert.IsTrue(scanner.ReadToken(out value, out tokenType));
42 Assert.AreEqual(expexted[i].Item1, tokenType);
43 Assert.AreEqual(expexted[i].Item2, value);
44 }
45
46 Assert.IsFalse(scanner.ReadToken(out value, out tokenType));
9 }
47 }
10 }
48 }
11 }
49 }
@@ -1,4 +1,4
1 ο»Ώ<?xml version="1.0" encoding="utf-8"?>
1 ο»Ώ<?xml version="1.0" encoding="utf-8"?>
2 <packages>
2 <packages>
3 <package id="NUnit" version="3.0.1" targetFramework="net45" />
3 <package id="NUnit" version="2.6.4" targetFramework="net45" />
4 </packages> No newline at end of file
4 </packages>
@@ -3,6 +3,9 using System;
3 using System.Collections.Generic;
3 using System.Collections.Generic;
4 using System.Linq;
4 using System.Linq;
5 using System.Diagnostics;
5 using System.Diagnostics;
6 using System.IO;
7 using System.CodeDom.Compiler;
8 using System.CodeDom;
6
9
7 namespace Implab.Automaton {
10 namespace Implab.Automaton {
8 public class DFATable : IDFATableBuilder {
11 public class DFATable : IDFATableBuilder {
@@ -103,6 +106,11 namespace Implab.Automaton {
103 return GetEnumerator();
106 return GetEnumerator();
104 }
107 }
105
108
109 public void AddSymbol(int symbol) {
110 Safe.ArgumentAssert(symbol >= 0, "symbol");
111 m_symbolCount = Math.Max(symbol + 1, m_symbolCount);
112 }
113
106 public int[,] CreateTransitionTable() {
114 public int[,] CreateTransitionTable() {
107 var table = new int[StateCount,AlphabetSize];
115 var table = new int[StateCount,AlphabetSize];
108
116
@@ -162,7 +170,7 namespace Implab.Automaton {
162
170
163 var state = new HashSet<int>(
171 var state = new HashSet<int>(
164 Enumerable
172 Enumerable
165 .Range(0, m_stateCount - 1)
173 .Range(0, m_stateCount)
166 .Where(i => !m_finalStates.Contains(i))
174 .Where(i => !m_finalStates.Contains(i))
167 );
175 );
168
176
@@ -182,10 +190,13 namespace Implab.Automaton {
182
190
183 for (int c = 0; c < m_symbolCount; c++) {
191 for (int c = 0; c < m_symbolCount; c++) {
184 var stateX = new HashSet<int>();
192 var stateX = new HashSet<int>();
185 foreach(var a in stateA.Where(rmap.ContainsKey))
193 //foreach(var a in stateA.Where(rmap.ContainsKey))
186 stateX.UnionWith(rmap[a][c]); // all states from wich the symbol 'c' leads to the state 'a'
194 // stateX.UnionWith(rmap[a][c]); // all states from wich the symbol 'c' leads to the state 'a'
187
195
188 foreach (var stateY in optimalStates.ToArray()) {
196 stateX.UnionWith(m_transitions.Where(t => stateA.Contains(t.s2) && t.edge == c).Select(t => t.s1));
197
198 var tmp = optimalStates.ToArray();
199 foreach (var stateY in tmp) {
189 if (stateX.Overlaps(stateY) && !stateY.IsSubsetOf(stateX)) {
200 if (stateX.Overlaps(stateY) && !stateY.IsSubsetOf(stateX)) {
190 var stateR1 = new HashSet<int>(stateY);
201 var stateR1 = new HashSet<int>(stateY);
191 var stateR2 = new HashSet<int>(stateY);
202 var stateR2 = new HashSet<int>(stateY);
@@ -245,11 +256,7 namespace Implab.Automaton {
245
256
246 foreach (var term in A) {
257 foreach (var term in A) {
247 // ΠΈΡ‰Π΅ΠΌ всС ΠΏΠ΅Ρ€Π΅Ρ…ΠΎΠ΄Ρ‹ класса ΠΏΠΎ символу term
258 // ΠΈΡ‰Π΅ΠΌ всС ΠΏΠ΅Ρ€Π΅Ρ…ΠΎΠ΄Ρ‹ класса ΠΏΠΎ символу term
248 var res = m_transitions.Where(t => stateMap[t.s1] == s && t.edge == term).Select(t => stateMap[t.s2]).ToArray();
259 var s2 = m_transitions.Where(t => stateMap[t.s1] == s && t.edge == term).Select(t => stateMap[t.s2]).DefaultIfEmpty(-1).First();
249
250 Debug.Assert(res.Length <= 1);
251
252 var s2 = res.Length > 0 ? res[0] : -1;
253
260
254 HashSet<int> a2;
261 HashSet<int> a2;
255 if (!classes.TryGetValue(s2, out a2)) {
262 if (!classes.TryGetValue(s2, out a2)) {
@@ -283,6 +290,7 namespace Implab.Automaton {
283
290
284 // сохраняСм DFAConst.UNCLASSIFIED_INPUT
291 // сохраняСм DFAConst.UNCLASSIFIED_INPUT
285 var cls = item.Contains(AutomatonConst.UNCLASSIFIED_INPUT) ? AutomatonConst.UNCLASSIFIED_INPUT : nextCls++;
292 var cls = item.Contains(AutomatonConst.UNCLASSIFIED_INPUT) ? AutomatonConst.UNCLASSIFIED_INPUT : nextCls++;
293 optimalDFA.AddSymbol(cls);
286
294
287 foreach (var a in item)
295 foreach (var a in item)
288 alphabetMap[a] = cls;
296 alphabetMap[a] = cls;
@@ -298,19 +306,38 namespace Implab.Automaton {
298 optimalDFA.Add(t);
306 optimalDFA.Add(t);
299 }
307 }
300
308
301 protected void PrintDFA<TInput, TState>(IAlphabet<TInput> inputAlphabet, IAlphabet<TState> stateAlphabet) {
309 protected string PrintDFA<TInput, TState>(IAlphabet<TInput> inputAlphabet, IAlphabet<TState> stateAlphabet) {
302 Safe.ArgumentNotNull(inputAlphabet, "inputAlphabet");
310 Safe.ArgumentNotNull(inputAlphabet, "inputAlphabet");
303 Safe.ArgumentNotNull(stateAlphabet, "stateAlphabet");
311 Safe.ArgumentNotNull(stateAlphabet, "stateAlphabet");
304
312
313 var data = new List<string>();
314
315 data.Add("digraph dfa {");
316
317 foreach (var final in m_finalStates)
318 data.Add(String.Format("{0} [shape=box];",String.Join("", stateAlphabet.GetSymbols(final))));
319
305 foreach(var t in m_transitions)
320 foreach (var t in m_transitions)
306 Console.WriteLine(
321 data.Add(String.Format(
307 "[{0}] -{{{1}}}-> [{2}]{3}",
322 "{0} -> {2} [label={1}];",
308 String.Join(",", stateAlphabet.GetSymbols(t.s1)),
323 String.Join("", stateAlphabet.GetSymbols(t.s1)),
309 String.Join("", inputAlphabet.GetSymbols(t.edge)),
324 ToLiteral(ToLiteral(String.Join("", t.edge == AutomatonConst.UNCLASSIFIED_INPUT ? new [] { "@" } : inputAlphabet.GetSymbols(t.edge).Select(x => x.ToString())))),
310 String.Join(",", stateAlphabet.GetSymbols(t.s2)),
325 String.Join("", stateAlphabet.GetSymbols(t.s2))
311 m_finalStates.Contains(t.s2) ? "$" : ""
326 ));
312 );
327 data.Add("}");
328 return String.Join("\n", data);
313 }
329 }
314
330
331 static string ToLiteral(string input)
332 {
333 using (var writer = new StringWriter())
334 {
335 using (var provider = CodeDomProvider.CreateProvider("CSharp"))
336 {
337 provider.GenerateCodeFromExpression(new CodePrimitiveExpression(input), writer, null);
338 return writer.ToString();
315 }
339 }
316 }
340 }
341 }
342 }
343 }
@@ -10,6 +10,17 namespace Implab.Automaton {
10 void MarkFinalState(int state);
10 void MarkFinalState(int state);
11
11
12 void SetInitialState(int s);
12 void SetInitialState(int s);
13
14 /// <summary>
15 /// Increases if needed the input alphabet size to hold the specified symbol.
16 /// </summary>
17 /// <remarks>
18 /// <code>
19 /// AlphabetSize = Math.Max(AlphabetSize, symbol + 1)
20 /// </code>
21 /// </remarks>
22 /// <param name="symbol">Symbol.</param>
23 void AddSymbol(int symbol);
13 }
24 }
14 }
25 }
15
26
@@ -67,6 +67,9 namespace Implab.Automaton.RegularExpres
67 foreach (var pair in alphaMap.Where(x => x.Value != 0))
67 foreach (var pair in alphaMap.Where(x => x.Value != 0))
68 alphabet.DefineClass(m_alphabet.GetSymbols(pair.Key), pair.Value);
68 alphabet.DefineClass(m_alphabet.GetSymbols(pair.Key), pair.Value);
69
69
70 var orig = ToString();
71 var opt = dfa.ToString();
72
70 return dfa;
73 return dfa;
71 }
74 }
72
75
@@ -78,6 +81,15 namespace Implab.Automaton.RegularExpres
78 return FinalStates.GroupBy(x => m_tags[x], arrayComparer).Select(g => new HashSet<int>(g));
81 return FinalStates.GroupBy(x => m_tags[x], arrayComparer).Select(g => new HashSet<int>(g));
79 }
82 }
80
83
84 public override string ToString() {
85 var states = new MapAlphabet<string>(false, null);
86
87 for (int i = 0; i < StateCount; i++)
88 states.DefineSymbol(string.Format("s{0}", i), i);
89
90 return string.Format("//[RegularDFA {1} x {2}]\n{0}", PrintDFA(InputAlphabet, states),StateCount, AlphabetSize);
91 }
92
81 }
93 }
82 }
94 }
83
95
@@ -7,7 +7,7 namespace Implab.Components {
7 /// </summary>
7 /// </summary>
8 /// <remarks>
8 /// <remarks>
9 /// Usefull when dealing with memory-intensive objects which are frequently used.
9 /// Usefull when dealing with memory-intensive objects which are frequently used.
10 /// This class is similar to <see cref="ObjectPool{T}"/> except is a singleton.
10 /// This class is similar to <see cref="ObjectPool{T}"/> except it is a singleton.
11 /// </remarks>
11 /// </remarks>
12 public class LazyAndWeak<T> where T : class {
12 public class LazyAndWeak<T> where T : class {
13
13
@@ -44,6 +44,7 namespace Implab.Components {
44 } else {
44 } else {
45 lock (m_lock) {
45 lock (m_lock) {
46 // double check
46 // double check
47 weak = m_reference;
47 if (weak != null) {
48 if (weak != null) {
48 value = weak.Target as T;
49 value = weak.Target as T;
49 if (value != null)
50 if (value != null)
@@ -108,7 +108,7 namespace Implab.Formats.JSON {
108 }
108 }
109
109
110 Token SymbolRangeToken(char start, char stop) {
110 Token SymbolRangeToken(char start, char stop) {
111 return SymbolToken(Enumerable.Range(start,stop - start).Select(x => (char)x));
111 return SymbolToken(Enumerable.Range(start, stop - start + 1).Select(x => (char)x));
112 }
112 }
113
113
114 protected override IndexedAlphabetBase<char> CreateAlphabet() {
114 protected override IndexedAlphabetBase<char> CreateAlphabet() {
@@ -4,22 +4,14 namespace Implab.Formats {
4 public class StringScanner: TextScanner {
4 public class StringScanner: TextScanner {
5 const int CHUNK_SIZE = 1024;
5 const int CHUNK_SIZE = 1024;
6
6
7 readonly string m_text;
7 public StringScanner(string text) : base(null) {
8 int m_pos;
8 Safe.ArgumentNotNull(text, "text");
9
9 var data = text.ToCharArray();
10 public StringScanner(string text) : base(text.Length, text.Length < CHUNK_SIZE ? text.Length : CHUNK_SIZE) {
10 Feed(data, 0, data.Length);
11 m_text = text;
12 Feed();
13 }
11 }
14
12
15 protected override int Read(char[] buffer, int offset, int size) {
13 protected override int Read(char[] buffer, int offset, int size) {
16 var actual = size + m_pos > m_text.Length ? m_text.Length - m_pos : size;
14 return 0;
17
18 m_text.CopyTo(m_pos,buffer,offset, actual);
19
20 m_pos += actual;
21
22 return actual;
23 }
15 }
24 }
16 }
25 }
17 }
@@ -53,29 +53,24 namespace Implab.Formats {
53 tag = null;
53 tag = null;
54
54
55 var maxSymbol = alphabet.Length - 1;
55 var maxSymbol = alphabet.Length - 1;
56
56 int next;
57 do {
57 do {
58 // after the next chunk is read the offset in the buffer may change
58 // after the next chunk is read the offset in the buffer may change
59 int pos = m_bufferOffset + m_tokenLength;
59 int pos = m_bufferOffset + m_tokenLength;
60
60 next = state;
61 while (pos < m_bufferSize) {
61 while (pos < m_bufferSize) {
62 var ch = m_buffer[pos];
62 var ch = m_buffer[pos];
63
63
64 try {
64 next = dfa[next, ch > maxSymbol ? AutomatonConst.UNCLASSIFIED_INPUT : alphabet[ch]];
65 var next = dfa[state, ch > maxSymbol ? AutomatonConst.UNCLASSIFIED_INPUT : alphabet[ch]];
66
65
67 if (next == AutomatonConst.UNREACHABLE_STATE)
66 if (next == AutomatonConst.UNREACHABLE_STATE)
68 break;
67 break;
69
68
70 state = next;
69 state = next;
71 }catch {
72 throw;
73 }
74 pos++;
70 pos++;
75 }
71 }
76
77 m_tokenLength = pos - m_bufferOffset;
72 m_tokenLength = pos - m_bufferOffset;
78 } while (state != AutomatonConst.UNREACHABLE_STATE && Feed());
73 } while (next != AutomatonConst.UNREACHABLE_STATE && Feed());
79
74
80 m_tokenOffset = m_bufferOffset;
75 m_tokenOffset = m_bufferOffset;
81 m_bufferOffset += m_tokenLength;
76 m_bufferOffset += m_tokenLength;
@@ -150,7 +145,7 namespace Implab.Formats {
150 }
145 }
151
146
152 public void CopyTokenTo(char[] buffer, int offset) {
147 public void CopyTokenTo(char[] buffer, int offset) {
153 m_buffer.CopyTo(buffer, offset);
148 Array.Copy(m_buffer, m_tokenOffset,buffer, offset, m_tokenLength);
154 }
149 }
155
150
156 public void CopyTokenTo(StringBuilder sb) {
151 public void CopyTokenTo(StringBuilder sb) {
General Comments 0
You need to be logged in to leave comments. Login now