##// END OF EJS Templates
pretty print DFA, the minimization is still buggy
cin -
r182:76e8f2ba12b8 ref20160224
parent child
Show More
@@ -10,6 +10,7
10 10 <RootNamespace>Implab.Format.Test</RootNamespace>
11 11 <AssemblyName>Implab.Format.Test</AssemblyName>
12 12 <TargetFrameworkVersion>v4.5</TargetFrameworkVersion>
13 <ReleaseVersion>0.2</ReleaseVersion>
13 14 </PropertyGroup>
14 15 <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
15 16 <DebugSymbols>true</DebugSymbols>
@@ -32,7 +33,7
32 33 <ItemGroup>
33 34 <Reference Include="System" />
34 35 <Reference Include="nunit.framework">
35 <HintPath>..\..\packages\NUnit.3.0.1\lib\net45\nunit.framework.dll</HintPath>
36 <HintPath>..\..\packages\NUnit.2.6.4\lib\nunit.framework.dll</HintPath>
36 37 </Reference>
37 38 </ItemGroup>
38 39 <ItemGroup>
@@ -40,6 +41,12
40 41 </ItemGroup>
41 42 <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" />
42 43 <ItemGroup>
44 <ProjectReference Include="..\..\Implab\Implab.csproj">
45 <Project>{F550F1F8-8746-4AD0-9614-855F4C4B7F05}</Project>
46 <Name>Implab</Name>
47 </ProjectReference>
48 </ItemGroup>
49 <ItemGroup>
43 50 <None Include="packages.config" />
44 51 </ItemGroup>
45 52 </Project> No newline at end of file
@@ -1,11 +1,49
1 1 using NUnit.Framework;
2 2 using System;
3 using Implab.Formats.JSON;
3 4
4 5 namespace Implab.Format.Test {
5 [TestFixture()]
6 [TestFixture]
6 7 public class JsonTests {
7 [Test()]
8 public void TestCase() {
8 [Test]
9 public void TestScannerValidTokens() {
10 var scanner = new JSONScanner(@"9123, -123, 0, 0.1, -0.2, -0.1e3, 1.3E-3, ""some \t\n\u0020 text"", literal []{}:");
11
12 Tuple<JsonTokenType,object>[] expexted = new [] {
13 new Tuple<JsonTokenType,object>(JsonTokenType.Number, 9123d),
14 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
15 new Tuple<JsonTokenType,object>(JsonTokenType.Number, -123d ),
16 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
17 new Tuple<JsonTokenType,object>(JsonTokenType.Number, 0d ),
18 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
19 new Tuple<JsonTokenType,object>(JsonTokenType.Number, 0.1d ),
20 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
21 new Tuple<JsonTokenType,object>(JsonTokenType.Number, -0.2d ),
22 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
23 new Tuple<JsonTokenType,object>(JsonTokenType.Number, -0.1e3d ),
24 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
25 new Tuple<JsonTokenType,object>(JsonTokenType.Number, 1.3E-3d ),
26 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
27 new Tuple<JsonTokenType,object>(JsonTokenType.String, "some \t\n text" ),
28 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
29 new Tuple<JsonTokenType,object>(JsonTokenType.Literal, "literal" ),
30 new Tuple<JsonTokenType,object>(JsonTokenType.BeginArray, " [" ),
31 new Tuple<JsonTokenType,object>(JsonTokenType.EndArray, "]" ),
32 new Tuple<JsonTokenType,object>(JsonTokenType.BeginObject, "{" ),
33 new Tuple<JsonTokenType,object>(JsonTokenType.EndObject, "}" ),
34 new Tuple<JsonTokenType,object>(JsonTokenType.NameSeparator, ":" )
35 };
36
37 object value;
38 JsonTokenType tokenType;
39 for (var i = 0; i < expexted.Length; i++) {
40
41 Assert.IsTrue(scanner.ReadToken(out value, out tokenType));
42 Assert.AreEqual(expexted[i].Item1, tokenType);
43 Assert.AreEqual(expexted[i].Item2, value);
44 }
45
46 Assert.IsFalse(scanner.ReadToken(out value, out tokenType));
9 47 }
10 48 }
11 49 }
@@ -1,4 +1,4
1 1 <?xml version="1.0" encoding="utf-8"?>
2 2 <packages>
3 <package id="NUnit" version="3.0.1" targetFramework="net45" />
3 <package id="NUnit" version="2.6.4" targetFramework="net45" />
4 4 </packages> No newline at end of file
@@ -3,6 +3,9 using System;
3 3 using System.Collections.Generic;
4 4 using System.Linq;
5 5 using System.Diagnostics;
6 using System.IO;
7 using System.CodeDom.Compiler;
8 using System.CodeDom;
6 9
7 10 namespace Implab.Automaton {
8 11 public class DFATable : IDFATableBuilder {
@@ -103,6 +106,11 namespace Implab.Automaton {
103 106 return GetEnumerator();
104 107 }
105 108
109 public void AddSymbol(int symbol) {
110 Safe.ArgumentAssert(symbol >= 0, "symbol");
111 m_symbolCount = Math.Max(symbol + 1, m_symbolCount);
112 }
113
106 114 public int[,] CreateTransitionTable() {
107 115 var table = new int[StateCount,AlphabetSize];
108 116
@@ -162,7 +170,7 namespace Implab.Automaton {
162 170
163 171 var state = new HashSet<int>(
164 172 Enumerable
165 .Range(0, m_stateCount - 1)
173 .Range(0, m_stateCount)
166 174 .Where(i => !m_finalStates.Contains(i))
167 175 );
168 176
@@ -182,10 +190,13 namespace Implab.Automaton {
182 190
183 191 for (int c = 0; c < m_symbolCount; c++) {
184 192 var stateX = new HashSet<int>();
185 foreach(var a in stateA.Where(rmap.ContainsKey))
186 stateX.UnionWith(rmap[a][c]); // all states from wich the symbol 'c' leads to the state 'a'
193 //foreach(var a in stateA.Where(rmap.ContainsKey))
194 // stateX.UnionWith(rmap[a][c]); // all states from wich the symbol 'c' leads to the state 'a'
187 195
188 foreach (var stateY in optimalStates.ToArray()) {
196 stateX.UnionWith(m_transitions.Where(t => stateA.Contains(t.s2) && t.edge == c).Select(t => t.s1));
197
198 var tmp = optimalStates.ToArray();
199 foreach (var stateY in tmp) {
189 200 if (stateX.Overlaps(stateY) && !stateY.IsSubsetOf(stateX)) {
190 201 var stateR1 = new HashSet<int>(stateY);
191 202 var stateR2 = new HashSet<int>(stateY);
@@ -245,11 +256,7 namespace Implab.Automaton {
245 256
246 257 foreach (var term in A) {
247 258 // ищем все переходы класса по символу term
248 var res = m_transitions.Where(t => stateMap[t.s1] == s && t.edge == term).Select(t => stateMap[t.s2]).ToArray();
249
250 Debug.Assert(res.Length <= 1);
251
252 var s2 = res.Length > 0 ? res[0] : -1;
259 var s2 = m_transitions.Where(t => stateMap[t.s1] == s && t.edge == term).Select(t => stateMap[t.s2]).DefaultIfEmpty(-1).First();
253 260
254 261 HashSet<int> a2;
255 262 if (!classes.TryGetValue(s2, out a2)) {
@@ -283,6 +290,7 namespace Implab.Automaton {
283 290
284 291 // сохраняем DFAConst.UNCLASSIFIED_INPUT
285 292 var cls = item.Contains(AutomatonConst.UNCLASSIFIED_INPUT) ? AutomatonConst.UNCLASSIFIED_INPUT : nextCls++;
293 optimalDFA.AddSymbol(cls);
286 294
287 295 foreach (var a in item)
288 296 alphabetMap[a] = cls;
@@ -298,19 +306,38 namespace Implab.Automaton {
298 306 optimalDFA.Add(t);
299 307 }
300 308
301 protected void PrintDFA<TInput, TState>(IAlphabet<TInput> inputAlphabet, IAlphabet<TState> stateAlphabet) {
309 protected string PrintDFA<TInput, TState>(IAlphabet<TInput> inputAlphabet, IAlphabet<TState> stateAlphabet) {
302 310 Safe.ArgumentNotNull(inputAlphabet, "inputAlphabet");
303 311 Safe.ArgumentNotNull(stateAlphabet, "stateAlphabet");
304 312
313 var data = new List<string>();
314
315 data.Add("digraph dfa {");
316
317 foreach (var final in m_finalStates)
318 data.Add(String.Format("{0} [shape=box];",String.Join("", stateAlphabet.GetSymbols(final))));
319
305 320 foreach(var t in m_transitions)
306 Console.WriteLine(
307 "[{0}] -{{{1}}}-> [{2}]{3}",
308 String.Join(",", stateAlphabet.GetSymbols(t.s1)),
309 String.Join("", inputAlphabet.GetSymbols(t.edge)),
310 String.Join(",", stateAlphabet.GetSymbols(t.s2)),
311 m_finalStates.Contains(t.s2) ? "$" : ""
312 );
321 data.Add(String.Format(
322 "{0} -> {2} [label={1}];",
323 String.Join("", stateAlphabet.GetSymbols(t.s1)),
324 ToLiteral(ToLiteral(String.Join("", t.edge == AutomatonConst.UNCLASSIFIED_INPUT ? new [] { "@" } : inputAlphabet.GetSymbols(t.edge).Select(x => x.ToString())))),
325 String.Join("", stateAlphabet.GetSymbols(t.s2))
326 ));
327 data.Add("}");
328 return String.Join("\n", data);
313 329 }
314 330
331 static string ToLiteral(string input)
332 {
333 using (var writer = new StringWriter())
334 {
335 using (var provider = CodeDomProvider.CreateProvider("CSharp"))
336 {
337 provider.GenerateCodeFromExpression(new CodePrimitiveExpression(input), writer, null);
338 return writer.ToString();
315 339 }
316 340 }
341 }
342 }
343 }
@@ -10,6 +10,17 namespace Implab.Automaton {
10 10 void MarkFinalState(int state);
11 11
12 12 void SetInitialState(int s);
13
14 /// <summary>
15 /// Increases if needed the input alphabet size to hold the specified symbol.
16 /// </summary>
17 /// <remarks>
18 /// <code>
19 /// AlphabetSize = Math.Max(AlphabetSize, symbol + 1)
20 /// </code>
21 /// </remarks>
22 /// <param name="symbol">Symbol.</param>
23 void AddSymbol(int symbol);
13 24 }
14 25 }
15 26
@@ -67,6 +67,9 namespace Implab.Automaton.RegularExpres
67 67 foreach (var pair in alphaMap.Where(x => x.Value != 0))
68 68 alphabet.DefineClass(m_alphabet.GetSymbols(pair.Key), pair.Value);
69 69
70 var orig = ToString();
71 var opt = dfa.ToString();
72
70 73 return dfa;
71 74 }
72 75
@@ -78,6 +81,15 namespace Implab.Automaton.RegularExpres
78 81 return FinalStates.GroupBy(x => m_tags[x], arrayComparer).Select(g => new HashSet<int>(g));
79 82 }
80 83
84 public override string ToString() {
85 var states = new MapAlphabet<string>(false, null);
86
87 for (int i = 0; i < StateCount; i++)
88 states.DefineSymbol(string.Format("s{0}", i), i);
89
90 return string.Format("//[RegularDFA {1} x {2}]\n{0}", PrintDFA(InputAlphabet, states),StateCount, AlphabetSize);
91 }
92
81 93 }
82 94 }
83 95
@@ -7,7 +7,7 namespace Implab.Components {
7 7 /// </summary>
8 8 /// <remarks>
9 9 /// Usefull when dealing with memory-intensive objects which are frequently used.
10 /// This class is similar to <see cref="ObjectPool{T}"/> except is a singleton.
10 /// This class is similar to <see cref="ObjectPool{T}"/> except it is a singleton.
11 11 /// </remarks>
12 12 public class LazyAndWeak<T> where T : class {
13 13
@@ -44,6 +44,7 namespace Implab.Components {
44 44 } else {
45 45 lock (m_lock) {
46 46 // double check
47 weak = m_reference;
47 48 if (weak != null) {
48 49 value = weak.Target as T;
49 50 if (value != null)
@@ -108,7 +108,7 namespace Implab.Formats.JSON {
108 108 }
109 109
110 110 Token SymbolRangeToken(char start, char stop) {
111 return SymbolToken(Enumerable.Range(start,stop - start).Select(x => (char)x));
111 return SymbolToken(Enumerable.Range(start, stop - start + 1).Select(x => (char)x));
112 112 }
113 113
114 114 protected override IndexedAlphabetBase<char> CreateAlphabet() {
@@ -4,22 +4,14 namespace Implab.Formats {
4 4 public class StringScanner: TextScanner {
5 5 const int CHUNK_SIZE = 1024;
6 6
7 readonly string m_text;
8 int m_pos;
9
10 public StringScanner(string text) : base(text.Length, text.Length < CHUNK_SIZE ? text.Length : CHUNK_SIZE) {
11 m_text = text;
12 Feed();
7 public StringScanner(string text) : base(null) {
8 Safe.ArgumentNotNull(text, "text");
9 var data = text.ToCharArray();
10 Feed(data, 0, data.Length);
13 11 }
14 12
15 13 protected override int Read(char[] buffer, int offset, int size) {
16 var actual = size + m_pos > m_text.Length ? m_text.Length - m_pos : size;
17
18 m_text.CopyTo(m_pos,buffer,offset, actual);
19
20 m_pos += actual;
21
22 return actual;
14 return 0;
23 15 }
24 16 }
25 17 }
@@ -53,29 +53,24 namespace Implab.Formats {
53 53 tag = null;
54 54
55 55 var maxSymbol = alphabet.Length - 1;
56
56 int next;
57 57 do {
58 58 // after the next chunk is read the offset in the buffer may change
59 59 int pos = m_bufferOffset + m_tokenLength;
60
60 next = state;
61 61 while (pos < m_bufferSize) {
62 62 var ch = m_buffer[pos];
63 63
64 try {
65 var next = dfa[state, ch > maxSymbol ? AutomatonConst.UNCLASSIFIED_INPUT : alphabet[ch]];
64 next = dfa[next, ch > maxSymbol ? AutomatonConst.UNCLASSIFIED_INPUT : alphabet[ch]];
66 65
67 66 if (next == AutomatonConst.UNREACHABLE_STATE)
68 67 break;
69 68
70 69 state = next;
71 }catch {
72 throw;
73 }
74 70 pos++;
75 71 }
76
77 72 m_tokenLength = pos - m_bufferOffset;
78 } while (state != AutomatonConst.UNREACHABLE_STATE && Feed());
73 } while (next != AutomatonConst.UNREACHABLE_STATE && Feed());
79 74
80 75 m_tokenOffset = m_bufferOffset;
81 76 m_bufferOffset += m_tokenLength;
@@ -150,7 +145,7 namespace Implab.Formats {
150 145 }
151 146
152 147 public void CopyTokenTo(char[] buffer, int offset) {
153 m_buffer.CopyTo(buffer, offset);
148 Array.Copy(m_buffer, m_tokenOffset,buffer, offset, m_tokenLength);
154 149 }
155 150
156 151 public void CopyTokenTo(StringBuilder sb) {
General Comments 0
You need to be logged in to leave comments. Login now