##// END OF EJS Templates
fixed DFA optimization, JSON is fully functional
cin -
r183:4f82e0f161c3 ref20160224
parent child
Show More

The requested changes are too big and content was truncated. Show full diff

@@ -0,0 +1,4
1 <?xml version="1.0" encoding="utf-8"?>
2 <packages>
3 <package id="System.Text.Json" version="2.0.0.11" targetFramework="net45" />
4 </packages> No newline at end of file
1 NO CONTENT: new file 100644, binary diff hidden
1 NO CONTENT: new file 100644, binary diff hidden
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644, binary diff hidden
1 NO CONTENT: new file 100644, binary diff hidden
@@ -1,49 +1,87
1 1 using NUnit.Framework;
2 2 using System;
3 3 using Implab.Formats.JSON;
4 using Implab.Automaton;
4 5
5 6 namespace Implab.Format.Test {
6 7 [TestFixture]
7 8 public class JsonTests {
8 9 [Test]
9 10 public void TestScannerValidTokens() {
10 var scanner = new JSONScanner(@"9123, -123, 0, 0.1, -0.2, -0.1e3, 1.3E-3, ""some \t\n\u0020 text"", literal []{}:");
11 using (var scanner = new JSONScanner(@"9123, -123, 0, 0.1, -0.2, -0.1e3, 1.3E-3, ""some \t\n\u0020 text"", literal []{}:")) {
12
13 Tuple<JsonTokenType,object>[] expexted = new [] {
14 new Tuple<JsonTokenType,object>(JsonTokenType.Number, 9123d),
15 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", "),
16 new Tuple<JsonTokenType,object>(JsonTokenType.Number, -123d),
17 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", "),
18 new Tuple<JsonTokenType,object>(JsonTokenType.Number, 0d),
19 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", "),
20 new Tuple<JsonTokenType,object>(JsonTokenType.Number, 0.1d),
21 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", "),
22 new Tuple<JsonTokenType,object>(JsonTokenType.Number, -0.2d),
23 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", "),
24 new Tuple<JsonTokenType,object>(JsonTokenType.Number, -0.1e3d),
25 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", "),
26 new Tuple<JsonTokenType,object>(JsonTokenType.Number, 1.3E-3d),
27 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", "),
28 new Tuple<JsonTokenType,object>(JsonTokenType.String, "some \t\n text"),
29 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", "),
30 new Tuple<JsonTokenType,object>(JsonTokenType.Literal, "literal"),
31 new Tuple<JsonTokenType,object>(JsonTokenType.BeginArray, " ["),
32 new Tuple<JsonTokenType,object>(JsonTokenType.EndArray, "]"),
33 new Tuple<JsonTokenType,object>(JsonTokenType.BeginObject, "{"),
34 new Tuple<JsonTokenType,object>(JsonTokenType.EndObject, "}"),
35 new Tuple<JsonTokenType,object>(JsonTokenType.NameSeparator, ":")
36 };
11 37
12 Tuple<JsonTokenType,object>[] expexted = new [] {
13 new Tuple<JsonTokenType,object>(JsonTokenType.Number, 9123d),
14 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
15 new Tuple<JsonTokenType,object>(JsonTokenType.Number, -123d ),
16 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
17 new Tuple<JsonTokenType,object>(JsonTokenType.Number, 0d ),
18 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
19 new Tuple<JsonTokenType,object>(JsonTokenType.Number, 0.1d ),
20 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
21 new Tuple<JsonTokenType,object>(JsonTokenType.Number, -0.2d ),
22 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
23 new Tuple<JsonTokenType,object>(JsonTokenType.Number, -0.1e3d ),
24 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
25 new Tuple<JsonTokenType,object>(JsonTokenType.Number, 1.3E-3d ),
26 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
27 new Tuple<JsonTokenType,object>(JsonTokenType.String, "some \t\n text" ),
28 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
29 new Tuple<JsonTokenType,object>(JsonTokenType.Literal, "literal" ),
30 new Tuple<JsonTokenType,object>(JsonTokenType.BeginArray, " [" ),
31 new Tuple<JsonTokenType,object>(JsonTokenType.EndArray, "]" ),
32 new Tuple<JsonTokenType,object>(JsonTokenType.BeginObject, "{" ),
33 new Tuple<JsonTokenType,object>(JsonTokenType.EndObject, "}" ),
34 new Tuple<JsonTokenType,object>(JsonTokenType.NameSeparator, ":" )
38 object value;
39 JsonTokenType tokenType;
40 for (var i = 0; i < expexted.Length; i++) {
41
42 Assert.IsTrue(scanner.ReadToken(out value, out tokenType));
43 Assert.AreEqual(expexted[i].Item1, tokenType);
44 Assert.AreEqual(expexted[i].Item2, value);
45 }
46
47 Assert.IsFalse(scanner.ReadToken(out value, out tokenType));
48 }
49 }
50
51 [Test]
52 public void TestScannerBadTokens() {
53 var bad = new [] {
54 " 1",
55 " literal",
56 " \"",
57 "\"unclosed string",
58 "1.bad",
59 "001", // should be read as three numbers
60 "--10",
61 "+10",
62 "1.0.0",
63 "1e1.0",
64 "l1teral0",
65 ".123",
66 "-.123"
35 67 };
36 68
37 object value;
38 JsonTokenType tokenType;
39 for (var i = 0; i < expexted.Length; i++) {
40
41 Assert.IsTrue(scanner.ReadToken(out value, out tokenType));
42 Assert.AreEqual(expexted[i].Item1, tokenType);
43 Assert.AreEqual(expexted[i].Item2, value);
44 }
45
46 Assert.IsFalse(scanner.ReadToken(out value, out tokenType));
69 foreach (var json in bad)
70 using (var scanner = new JSONScanner(json)) {
71 try {
72 object value;
73 JsonTokenType token;
74 scanner.ReadToken(out value, out token);
75 if (!Object.Equals(value,json)) {
76 Console.WriteLine("'{0}' is read as {1}", json, value is String ? String.Format("'{0}'", value) : value );
77 continue;
78 }
79 Assert.Fail("Token '{0}' shouldn't pass", json);
80 } catch (ParserException e) {
81 Console.WriteLine(e.Message);
82 }
83 }
84
47 85 }
48 86 }
49 87 }
@@ -141,8 +141,8 namespace Implab.Automaton {
141 141 /// для этого необходимо переопределить даннцю фукнцию, для получения множеств конечных состояний.
142 142 /// </remarks>
143 143 /// <returns>The final states.</returns>
144 protected virtual IEnumerable<HashSet<int>> GroupFinalStates() {
145 return new HashSet<int>[] { m_finalStates };
144 protected virtual IEnumerable<HashSet<int>> SplitFinalStates(IEnumerable<int> states) {
145 return new [] { new HashSet<int>(states) };
146 146 }
147 147
148 148 protected void Optimize(
@@ -163,10 +163,7 namespace Implab.Automaton {
163 163 var optimalStates = new HashSet<HashSet<int>>(setComparer);
164 164 var queue = new HashSet<HashSet<int>>(setComparer);
165 165
166 // получаем конечные состояния, сгруппированные по маркерам
167 optimalStates.UnionWith(
168 GroupFinalStates()
169 );
166 optimalStates.Add(new HashSet<int>(FinalStates));
170 167
171 168 var state = new HashSet<int>(
172 169 Enumerable
@@ -190,19 +187,19 namespace Implab.Automaton {
190 187
191 188 for (int c = 0; c < m_symbolCount; c++) {
192 189 var stateX = new HashSet<int>();
193 //foreach(var a in stateA.Where(rmap.ContainsKey))
194 // stateX.UnionWith(rmap[a][c]); // all states from wich the symbol 'c' leads to the state 'a'
195
196 stateX.UnionWith(m_transitions.Where(t => stateA.Contains(t.s2) && t.edge == c).Select(t => t.s1));
190 foreach(var a in stateA.Where(rmap.ContainsKey))
191 stateX.UnionWith(rmap[a][c]); // all states from wich the symbol 'c' leads to the state 'a'
197 192
198 193 var tmp = optimalStates.ToArray();
199 194 foreach (var stateY in tmp) {
200 if (stateX.Overlaps(stateY) && !stateY.IsSubsetOf(stateX)) {
201 var stateR1 = new HashSet<int>(stateY);
202 var stateR2 = new HashSet<int>(stateY);
195 var stateR1 = new HashSet<int>(stateY);
196 var stateR2 = new HashSet<int>(stateY);
203 197
204 stateR1.IntersectWith(stateX);
205 stateR2.ExceptWith(stateX);
198 stateR1.IntersectWith(stateX);
199 stateR2.ExceptWith(stateX);
200
201 if (stateR1.Count > 0 && stateR2.Count > 0) {
202
206 203
207 204 optimalStates.Remove(stateY);
208 205 optimalStates.Add(stateR1);
@@ -220,6 +217,14 namespace Implab.Automaton {
220 217 }
221 218 }
222 219
220 // дополнительно разбиваем конечные состояния
221 foreach (var final in optimalStates.Where(s => s.Overlaps(m_finalStates)).ToArray()) {
222 optimalStates.Remove(final);
223 foreach (var split in SplitFinalStates(final))
224 optimalStates.Add(split);
225 }
226
227
223 228 // карта получения оптимального состояния по соотвествующему ему простому состоянию
224 229 var nextState = 0;
225 230 foreach (var item in optimalStates) {
@@ -66,19 +66,15 namespace Implab.Automaton.RegularExpres
66 66 // skip all unclassified symbols
67 67 foreach (var pair in alphaMap.Where(x => x.Value != 0))
68 68 alphabet.DefineClass(m_alphabet.GetSymbols(pair.Key), pair.Value);
69
70 var orig = ToString();
71 var opt = dfa.ToString();
72
73 69 return dfa;
74 70 }
75 71
76 protected override IEnumerable<HashSet<int>> GroupFinalStates() {
72 protected override IEnumerable<HashSet<int>> SplitFinalStates(IEnumerable<int> states) {
77 73 var arrayComparer = new CustomEqualityComparer<TTag[]>(
78 74 (x,y) => x.Length == y.Length && x.All(it => y.Contains(it)),
79 75 x => x.Sum(it => x.GetHashCode())
80 76 );
81 return FinalStates.GroupBy(x => m_tags[x], arrayComparer).Select(g => new HashSet<int>(g));
77 return states.GroupBy(x => m_tags[x] ?? new TTag[0], arrayComparer).Select(g => new HashSet<int>(g));
82 78 }
83 79
84 80 public override string ToString() {
@@ -17,6 +17,7 namespace Implab.Formats.JSON {
17 17 Literal,
18 18 NameSeparator,
19 19 ValueSeparator,
20 Whitespace,
20 21
21 22 StringBound,
22 23 EscapedChar,
@@ -73,7 +74,8 namespace Implab.Formats.JSON {
73 74 .Or(beginArray.Tag(TokenType.BeginArray))
74 75 .Or(endArray.Tag(TokenType.EndArray))
75 76 .Or(nameSep.Tag(TokenType.NameSeparator))
76 .Or(valueSep.Tag(TokenType.ValueSeparator));
77 .Or(valueSep.Tag(TokenType.ValueSeparator))
78 .Or(SymbolSetToken('\n', '\r', '\t', ' ').Closure().Tag(TokenType.Whitespace));
77 79
78 80
79 81 var jsonStringExpression =
@@ -46,7 +46,7 namespace Implab.Formats.JSON {
46 46 /// в строках обрабатываются экранированные символы, числа становтся типа double.</remarks>
47 47 public bool ReadToken(out object tokenValue, out JsonTokenType tokenType) {
48 48 JSONGrammar.TokenType[] tag;
49 if (m_jsonContext.Execute(m_scanner, out tag)) {
49 while (m_jsonContext.Execute(m_scanner, out tag)) {
50 50 switch (tag[0]) {
51 51 case JSONGrammar.TokenType.StringBound:
52 52 tokenValue = ReadString();
@@ -56,6 +56,8 namespace Implab.Formats.JSON {
56 56 tokenValue = Double.Parse(m_scanner.GetTokenValue(), CultureInfo.InvariantCulture);
57 57 tokenType = JsonTokenType.Number;
58 58 break;
59 case JSONGrammar.TokenType.Whitespace:
60 continue;
59 61 default:
60 62 tokenType = (JsonTokenType)tag[0];
61 63 tokenValue = m_scanner.GetTokenValue();
@@ -3,7 +3,7 using System.IO;
3 3
4 4 namespace Implab.Formats {
5 5 public class ReaderScanner: TextScanner {
6 const int CHUNK_SIZE = 1024;
6 const int CHUNK_SIZE = 1024*4;
7 7 const int BUFFER_MAX = CHUNK_SIZE*1024;
8 8
9 9 readonly TextReader m_reader;
@@ -32,6 +32,9
32 32 </PropertyGroup>
33 33 <ItemGroup>
34 34 <Reference Include="System" />
35 <Reference Include="System.Text.Json">
36 <HintPath>..\packages\System.Text.Json.2.0.0.11\lib\net40\System.Text.Json.dll</HintPath>
37 </Reference>
35 38 </ItemGroup>
36 39 <ItemGroup>
37 40 <Compile Include="Program.cs" />
@@ -44,4 +47,7
44 47 <Name>Implab</Name>
45 48 </ProjectReference>
46 49 </ItemGroup>
50 <ItemGroup>
51 <None Include="packages.config" />
52 </ItemGroup>
47 53 </Project> No newline at end of file
@@ -1,6 +1,9
1 1 using System;
2 2 using Implab;
3 3 using System.Threading.Tasks;
4 using Implab.Formats.JSON;
5 using System.IO;
6 using System.Text.Json;
4 7
5 8 namespace MonoPlay {
6 9 class MainClass {
@@ -9,28 +12,33 namespace MonoPlay {
9 12 public static void Main(string[] args) {
10 13 if (args == null)
11 14 throw new ArgumentNullException("args");
12
13 var t1 = Environment.TickCount;
14
15 DoWork().GetAwaiter().GetResult();
15 int t1, t2;
16 16
17 var t2 = Environment.TickCount;
18 Console.WriteLine("done: {0} ms, {1:.00} Mb, {2} GC", t2 - t1, GC.GetTotalMemory(false) / (1024*1024), GC.CollectionCount(0) );
17 for (int i = 0; i < 2; i++) {
18 t1 = Environment.TickCount;
19 int elements =0;
20 using (var reader = new JSONParser(File.OpenText("/home/sergey/temp/citylots.json"))) {
21 while (reader.Read())
22 elements++;
23 }
19 24
20 }
25 t2 = Environment.TickCount;
26 Console.WriteLine("attempt {0} done: {1} ms, {2:.00} Mb, {3} GC, Elements: {4}",i+1, t2 - t1, GC.GetTotalMemory(false) / (1024*1024), GC.CollectionCount(0), elements );
27 }
21 28
22 static IPromise<int> DoItem(int x) {
23 //return Promise<int>.FromResult(x + 1);
24 var p = new Promise<int>();
25 p.Resolve(x+1);
26 return p;
27 }
29 Console.WriteLine("Syste.Text.Json");
30 var paraser = new JsonParser();
31 for (int i = 0; i < 2; i++) {
32 t1 = Environment.TickCount;
33 using (var reader = File.OpenText("/home/sergey/temp/citylots.json")) {
34 paraser.Parse(reader);
35 }
28 36
29 static async Task<int> DoWork() {
30 var c = 0;
31 for (int i = 0; i < 10000000; i++)
32 c = await DoItem(c);
33 return c;
37 t2 = Environment.TickCount;
38 Console.WriteLine("attempt {0} done: {1} ms, {2:.00} Mb, {3} GC, ",i+1, t2 - t1, GC.GetTotalMemory(false) / (1024*1024), GC.CollectionCount(0));
39 }
40
41
34 42 }
35 43
36 44 }
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
General Comments 0
You need to be logged in to leave comments. Login now