##// END OF EJS Templates
fixed DFA optimization, JSON is fully functional
cin -
r183:4f82e0f161c3 ref20160224
parent child
Show More

The requested changes are too big and content was truncated. Show full diff

@@ -0,0 +1,4
1 <?xml version="1.0" encoding="utf-8"?>
2 <packages>
3 <package id="System.Text.Json" version="2.0.0.11" targetFramework="net45" />
4 </packages> No newline at end of file
1 NO CONTENT: new file 100644, binary diff hidden
NO CONTENT: new file 100644, binary diff hidden
1 NO CONTENT: new file 100644, binary diff hidden
NO CONTENT: new file 100644, binary diff hidden
1 NO CONTENT: new file 100644
NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644, binary diff hidden
NO CONTENT: new file 100644, binary diff hidden
1 NO CONTENT: new file 100644, binary diff hidden
NO CONTENT: new file 100644, binary diff hidden
@@ -1,49 +1,87
1 using NUnit.Framework;
1 using NUnit.Framework;
2 using System;
2 using System;
3 using Implab.Formats.JSON;
3 using Implab.Formats.JSON;
4 using Implab.Automaton;
4
5
5 namespace Implab.Format.Test {
6 namespace Implab.Format.Test {
6 [TestFixture]
7 [TestFixture]
7 public class JsonTests {
8 public class JsonTests {
8 [Test]
9 [Test]
9 public void TestScannerValidTokens() {
10 public void TestScannerValidTokens() {
10 var scanner = new JSONScanner(@"9123, -123, 0, 0.1, -0.2, -0.1e3, 1.3E-3, ""some \t\n\u0020 text"", literal []{}:");
11 using (var scanner = new JSONScanner(@"9123, -123, 0, 0.1, -0.2, -0.1e3, 1.3E-3, ""some \t\n\u0020 text"", literal []{}:")) {
12
13 Tuple<JsonTokenType,object>[] expexted = new [] {
14 new Tuple<JsonTokenType,object>(JsonTokenType.Number, 9123d),
15 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", "),
16 new Tuple<JsonTokenType,object>(JsonTokenType.Number, -123d),
17 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", "),
18 new Tuple<JsonTokenType,object>(JsonTokenType.Number, 0d),
19 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", "),
20 new Tuple<JsonTokenType,object>(JsonTokenType.Number, 0.1d),
21 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", "),
22 new Tuple<JsonTokenType,object>(JsonTokenType.Number, -0.2d),
23 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", "),
24 new Tuple<JsonTokenType,object>(JsonTokenType.Number, -0.1e3d),
25 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", "),
26 new Tuple<JsonTokenType,object>(JsonTokenType.Number, 1.3E-3d),
27 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", "),
28 new Tuple<JsonTokenType,object>(JsonTokenType.String, "some \t\n text"),
29 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", "),
30 new Tuple<JsonTokenType,object>(JsonTokenType.Literal, "literal"),
31 new Tuple<JsonTokenType,object>(JsonTokenType.BeginArray, " ["),
32 new Tuple<JsonTokenType,object>(JsonTokenType.EndArray, "]"),
33 new Tuple<JsonTokenType,object>(JsonTokenType.BeginObject, "{"),
34 new Tuple<JsonTokenType,object>(JsonTokenType.EndObject, "}"),
35 new Tuple<JsonTokenType,object>(JsonTokenType.NameSeparator, ":")
36 };
11
37
12 Tuple<JsonTokenType,object>[] expexted = new [] {
38 object value;
13 new Tuple<JsonTokenType,object>(JsonTokenType.Number, 9123d),
39 JsonTokenType tokenType;
14 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
40 for (var i = 0; i < expexted.Length; i++) {
15 new Tuple<JsonTokenType,object>(JsonTokenType.Number, -123d ),
41
16 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
42 Assert.IsTrue(scanner.ReadToken(out value, out tokenType));
17 new Tuple<JsonTokenType,object>(JsonTokenType.Number, 0d ),
43 Assert.AreEqual(expexted[i].Item1, tokenType);
18 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
44 Assert.AreEqual(expexted[i].Item2, value);
19 new Tuple<JsonTokenType,object>(JsonTokenType.Number, 0.1d ),
45 }
20 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
46
21 new Tuple<JsonTokenType,object>(JsonTokenType.Number, -0.2d ),
47 Assert.IsFalse(scanner.ReadToken(out value, out tokenType));
22 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
48 }
23 new Tuple<JsonTokenType,object>(JsonTokenType.Number, -0.1e3d ),
49 }
24 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
50
25 new Tuple<JsonTokenType,object>(JsonTokenType.Number, 1.3E-3d ),
51 [Test]
26 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
52 public void TestScannerBadTokens() {
27 new Tuple<JsonTokenType,object>(JsonTokenType.String, "some \t\n text" ),
53 var bad = new [] {
28 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
54 " 1",
29 new Tuple<JsonTokenType,object>(JsonTokenType.Literal, "literal" ),
55 " literal",
30 new Tuple<JsonTokenType,object>(JsonTokenType.BeginArray, " [" ),
56 " \"",
31 new Tuple<JsonTokenType,object>(JsonTokenType.EndArray, "]" ),
57 "\"unclosed string",
32 new Tuple<JsonTokenType,object>(JsonTokenType.BeginObject, "{" ),
58 "1.bad",
33 new Tuple<JsonTokenType,object>(JsonTokenType.EndObject, "}" ),
59 "001", // should be read as three numbers
34 new Tuple<JsonTokenType,object>(JsonTokenType.NameSeparator, ":" )
60 "--10",
61 "+10",
62 "1.0.0",
63 "1e1.0",
64 "l1teral0",
65 ".123",
66 "-.123"
35 };
67 };
36
68
37 object value;
69 foreach (var json in bad)
38 JsonTokenType tokenType;
70 using (var scanner = new JSONScanner(json)) {
39 for (var i = 0; i < expexted.Length; i++) {
71 try {
40
72 object value;
41 Assert.IsTrue(scanner.ReadToken(out value, out tokenType));
73 JsonTokenType token;
42 Assert.AreEqual(expexted[i].Item1, tokenType);
74 scanner.ReadToken(out value, out token);
43 Assert.AreEqual(expexted[i].Item2, value);
75 if (!Object.Equals(value,json)) {
44 }
76 Console.WriteLine("'{0}' is read as {1}", json, value is String ? String.Format("'{0}'", value) : value );
45
77 continue;
46 Assert.IsFalse(scanner.ReadToken(out value, out tokenType));
78 }
79 Assert.Fail("Token '{0}' shouldn't pass", json);
80 } catch (ParserException e) {
81 Console.WriteLine(e.Message);
82 }
83 }
84
47 }
85 }
48 }
86 }
49 }
87 }
@@ -141,8 +141,8 namespace Implab.Automaton {
141 /// для этого необходимо переопределить даннцю фукнцию, для получения множеств конечных состояний.
141 /// для этого необходимо переопределить даннцю фукнцию, для получения множеств конечных состояний.
142 /// </remarks>
142 /// </remarks>
143 /// <returns>The final states.</returns>
143 /// <returns>The final states.</returns>
144 protected virtual IEnumerable<HashSet<int>> GroupFinalStates() {
144 protected virtual IEnumerable<HashSet<int>> SplitFinalStates(IEnumerable<int> states) {
145 return new HashSet<int>[] { m_finalStates };
145 return new [] { new HashSet<int>(states) };
146 }
146 }
147
147
148 protected void Optimize(
148 protected void Optimize(
@@ -163,10 +163,7 namespace Implab.Automaton {
163 var optimalStates = new HashSet<HashSet<int>>(setComparer);
163 var optimalStates = new HashSet<HashSet<int>>(setComparer);
164 var queue = new HashSet<HashSet<int>>(setComparer);
164 var queue = new HashSet<HashSet<int>>(setComparer);
165
165
166 // получаем конечные состояния, сгруппированные по маркерам
166 optimalStates.Add(new HashSet<int>(FinalStates));
167 optimalStates.UnionWith(
168 GroupFinalStates()
169 );
170
167
171 var state = new HashSet<int>(
168 var state = new HashSet<int>(
172 Enumerable
169 Enumerable
@@ -190,19 +187,19 namespace Implab.Automaton {
190
187
191 for (int c = 0; c < m_symbolCount; c++) {
188 for (int c = 0; c < m_symbolCount; c++) {
192 var stateX = new HashSet<int>();
189 var stateX = new HashSet<int>();
193 //foreach(var a in stateA.Where(rmap.ContainsKey))
190 foreach(var a in stateA.Where(rmap.ContainsKey))
194 // stateX.UnionWith(rmap[a][c]); // all states from wich the symbol 'c' leads to the state 'a'
191 stateX.UnionWith(rmap[a][c]); // all states from wich the symbol 'c' leads to the state 'a'
195
196 stateX.UnionWith(m_transitions.Where(t => stateA.Contains(t.s2) && t.edge == c).Select(t => t.s1));
197
192
198 var tmp = optimalStates.ToArray();
193 var tmp = optimalStates.ToArray();
199 foreach (var stateY in tmp) {
194 foreach (var stateY in tmp) {
200 if (stateX.Overlaps(stateY) && !stateY.IsSubsetOf(stateX)) {
195 var stateR1 = new HashSet<int>(stateY);
201 var stateR1 = new HashSet<int>(stateY);
196 var stateR2 = new HashSet<int>(stateY);
202 var stateR2 = new HashSet<int>(stateY);
203
197
204 stateR1.IntersectWith(stateX);
198 stateR1.IntersectWith(stateX);
205 stateR2.ExceptWith(stateX);
199 stateR2.ExceptWith(stateX);
200
201 if (stateR1.Count > 0 && stateR2.Count > 0) {
202
206
203
207 optimalStates.Remove(stateY);
204 optimalStates.Remove(stateY);
208 optimalStates.Add(stateR1);
205 optimalStates.Add(stateR1);
@@ -220,6 +217,14 namespace Implab.Automaton {
220 }
217 }
221 }
218 }
222
219
220 // дополнительно разбиваем конечные состояния
221 foreach (var final in optimalStates.Where(s => s.Overlaps(m_finalStates)).ToArray()) {
222 optimalStates.Remove(final);
223 foreach (var split in SplitFinalStates(final))
224 optimalStates.Add(split);
225 }
226
227
223 // карта получения оптимального состояния по соотвествующему ему простому состоянию
228 // карта получения оптимального состояния по соотвествующему ему простому состоянию
224 var nextState = 0;
229 var nextState = 0;
225 foreach (var item in optimalStates) {
230 foreach (var item in optimalStates) {
@@ -66,19 +66,15 namespace Implab.Automaton.RegularExpres
66 // skip all unclassified symbols
66 // skip all unclassified symbols
67 foreach (var pair in alphaMap.Where(x => x.Value != 0))
67 foreach (var pair in alphaMap.Where(x => x.Value != 0))
68 alphabet.DefineClass(m_alphabet.GetSymbols(pair.Key), pair.Value);
68 alphabet.DefineClass(m_alphabet.GetSymbols(pair.Key), pair.Value);
69
70 var orig = ToString();
71 var opt = dfa.ToString();
72
73 return dfa;
69 return dfa;
74 }
70 }
75
71
76 protected override IEnumerable<HashSet<int>> GroupFinalStates() {
72 protected override IEnumerable<HashSet<int>> SplitFinalStates(IEnumerable<int> states) {
77 var arrayComparer = new CustomEqualityComparer<TTag[]>(
73 var arrayComparer = new CustomEqualityComparer<TTag[]>(
78 (x,y) => x.Length == y.Length && x.All(it => y.Contains(it)),
74 (x,y) => x.Length == y.Length && x.All(it => y.Contains(it)),
79 x => x.Sum(it => x.GetHashCode())
75 x => x.Sum(it => x.GetHashCode())
80 );
76 );
81 return FinalStates.GroupBy(x => m_tags[x], arrayComparer).Select(g => new HashSet<int>(g));
77 return states.GroupBy(x => m_tags[x] ?? new TTag[0], arrayComparer).Select(g => new HashSet<int>(g));
82 }
78 }
83
79
84 public override string ToString() {
80 public override string ToString() {
@@ -17,6 +17,7 namespace Implab.Formats.JSON {
17 Literal,
17 Literal,
18 NameSeparator,
18 NameSeparator,
19 ValueSeparator,
19 ValueSeparator,
20 Whitespace,
20
21
21 StringBound,
22 StringBound,
22 EscapedChar,
23 EscapedChar,
@@ -73,7 +74,8 namespace Implab.Formats.JSON {
73 .Or(beginArray.Tag(TokenType.BeginArray))
74 .Or(beginArray.Tag(TokenType.BeginArray))
74 .Or(endArray.Tag(TokenType.EndArray))
75 .Or(endArray.Tag(TokenType.EndArray))
75 .Or(nameSep.Tag(TokenType.NameSeparator))
76 .Or(nameSep.Tag(TokenType.NameSeparator))
76 .Or(valueSep.Tag(TokenType.ValueSeparator));
77 .Or(valueSep.Tag(TokenType.ValueSeparator))
78 .Or(SymbolSetToken('\n', '\r', '\t', ' ').Closure().Tag(TokenType.Whitespace));
77
79
78
80
79 var jsonStringExpression =
81 var jsonStringExpression =
@@ -46,7 +46,7 namespace Implab.Formats.JSON {
46 /// в строках обрабатываются экранированные символы, числа становтся типа double.</remarks>
46 /// в строках обрабатываются экранированные символы, числа становтся типа double.</remarks>
47 public bool ReadToken(out object tokenValue, out JsonTokenType tokenType) {
47 public bool ReadToken(out object tokenValue, out JsonTokenType tokenType) {
48 JSONGrammar.TokenType[] tag;
48 JSONGrammar.TokenType[] tag;
49 if (m_jsonContext.Execute(m_scanner, out tag)) {
49 while (m_jsonContext.Execute(m_scanner, out tag)) {
50 switch (tag[0]) {
50 switch (tag[0]) {
51 case JSONGrammar.TokenType.StringBound:
51 case JSONGrammar.TokenType.StringBound:
52 tokenValue = ReadString();
52 tokenValue = ReadString();
@@ -56,6 +56,8 namespace Implab.Formats.JSON {
56 tokenValue = Double.Parse(m_scanner.GetTokenValue(), CultureInfo.InvariantCulture);
56 tokenValue = Double.Parse(m_scanner.GetTokenValue(), CultureInfo.InvariantCulture);
57 tokenType = JsonTokenType.Number;
57 tokenType = JsonTokenType.Number;
58 break;
58 break;
59 case JSONGrammar.TokenType.Whitespace:
60 continue;
59 default:
61 default:
60 tokenType = (JsonTokenType)tag[0];
62 tokenType = (JsonTokenType)tag[0];
61 tokenValue = m_scanner.GetTokenValue();
63 tokenValue = m_scanner.GetTokenValue();
@@ -3,7 +3,7 using System.IO;
3
3
4 namespace Implab.Formats {
4 namespace Implab.Formats {
5 public class ReaderScanner: TextScanner {
5 public class ReaderScanner: TextScanner {
6 const int CHUNK_SIZE = 1024;
6 const int CHUNK_SIZE = 1024*4;
7 const int BUFFER_MAX = CHUNK_SIZE*1024;
7 const int BUFFER_MAX = CHUNK_SIZE*1024;
8
8
9 readonly TextReader m_reader;
9 readonly TextReader m_reader;
@@ -32,6 +32,9
32 </PropertyGroup>
32 </PropertyGroup>
33 <ItemGroup>
33 <ItemGroup>
34 <Reference Include="System" />
34 <Reference Include="System" />
35 <Reference Include="System.Text.Json">
36 <HintPath>..\packages\System.Text.Json.2.0.0.11\lib\net40\System.Text.Json.dll</HintPath>
37 </Reference>
35 </ItemGroup>
38 </ItemGroup>
36 <ItemGroup>
39 <ItemGroup>
37 <Compile Include="Program.cs" />
40 <Compile Include="Program.cs" />
@@ -44,4 +47,7
44 <Name>Implab</Name>
47 <Name>Implab</Name>
45 </ProjectReference>
48 </ProjectReference>
46 </ItemGroup>
49 </ItemGroup>
50 <ItemGroup>
51 <None Include="packages.config" />
52 </ItemGroup>
47 </Project> No newline at end of file
53 </Project>
@@ -1,6 +1,9
1 using System;
1 using System;
2 using Implab;
2 using Implab;
3 using System.Threading.Tasks;
3 using System.Threading.Tasks;
4 using Implab.Formats.JSON;
5 using System.IO;
6 using System.Text.Json;
4
7
5 namespace MonoPlay {
8 namespace MonoPlay {
6 class MainClass {
9 class MainClass {
@@ -9,28 +12,33 namespace MonoPlay {
9 public static void Main(string[] args) {
12 public static void Main(string[] args) {
10 if (args == null)
13 if (args == null)
11 throw new ArgumentNullException("args");
14 throw new ArgumentNullException("args");
12
15 int t1, t2;
13 var t1 = Environment.TickCount;
14
15 DoWork().GetAwaiter().GetResult();
16
16
17 var t2 = Environment.TickCount;
17 for (int i = 0; i < 2; i++) {
18 Console.WriteLine("done: {0} ms, {1:.00} Mb, {2} GC", t2 - t1, GC.GetTotalMemory(false) / (1024*1024), GC.CollectionCount(0) );
18 t1 = Environment.TickCount;
19 int elements =0;
20 using (var reader = new JSONParser(File.OpenText("/home/sergey/temp/citylots.json"))) {
21 while (reader.Read())
22 elements++;
23 }
19
24
20 }
25 t2 = Environment.TickCount;
26 Console.WriteLine("attempt {0} done: {1} ms, {2:.00} Mb, {3} GC, Elements: {4}",i+1, t2 - t1, GC.GetTotalMemory(false) / (1024*1024), GC.CollectionCount(0), elements );
27 }
21
28
22 static IPromise<int> DoItem(int x) {
29 Console.WriteLine("Syste.Text.Json");
23 //return Promise<int>.FromResult(x + 1);
30 var paraser = new JsonParser();
24 var p = new Promise<int>();
31 for (int i = 0; i < 2; i++) {
25 p.Resolve(x+1);
32 t1 = Environment.TickCount;
26 return p;
33 using (var reader = File.OpenText("/home/sergey/temp/citylots.json")) {
27 }
34 paraser.Parse(reader);
35 }
28
36
29 static async Task<int> DoWork() {
37 t2 = Environment.TickCount;
30 var c = 0;
38 Console.WriteLine("attempt {0} done: {1} ms, {2:.00} Mb, {3} GC, ",i+1, t2 - t1, GC.GetTotalMemory(false) / (1024*1024), GC.CollectionCount(0));
31 for (int i = 0; i < 10000000; i++)
39 }
32 c = await DoItem(c);
40
33 return c;
41
34 }
42 }
35
43
36 }
44 }
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
General Comments 0
You need to be logged in to leave comments. Login now