##// END OF EJS Templates
JsonReader optimizations
cin -
r236:302ca905c19e v2
parent child
Show More
@@ -9,8 +9,8 using System.IO;
9 9
10 10 namespace Implab.Format.Test {
11 11 [TestFixture]
12 public class JsonTests {
13
12 public class JsonTests {
13
14 14 [Test]
15 15 public void TestScannerValidTokens() {
16 16 using (var scanner = JsonStringScanner.Create(@"9123, -123, 0, 0.1, -0.2, -0.1e3, 1.3E-3, ""some \t\n\u0020 text"", literal []{}:")) {
@@ -114,10 +114,36 namespace Implab.Format.Test {
114 114 DumpJsonParse("[{\"info\": [7,8,9]}]");
115 115 DumpJsonFlatParse("[1,2,\"\",[3,4],{\"info\": [5,6]},{\"num\": [7,8,null]}, null,[null]]");
116 116 }
117
117
118 [Test]
119 public void JsonBenchmark() {
120 var t = Environment.TickCount;
121 using (var reader = new JsonXmlReader(JsonReader.Create("e:\\citylots.json"), new JsonXmlReaderOptions { NamespaceUri = "XmlReaderSimpleTest", RootName = "data" })) {
122 while (reader.Read()) {
123 }
124 }
125 Console.WriteLine($"JsonXmlReader: {Environment.TickCount - t} ms");
126
127 t = Environment.TickCount;
128 using(var reader = JsonReader.Create("e:\\citylots.json")) {
129 while(reader.Read()) {
130 }
131 }
132
133 Console.WriteLine($"JsonReader: {Environment.TickCount - t} ms");
134
135 t = Environment.TickCount;
136 using (var reader = XmlReader.Create("file:///e:\\citylots.xml")) {
137 while (reader.Read()) {
138 }
139 }
140
141 Console.WriteLine($"XmlReader: {Environment.TickCount - t} ms");
142 }
143
118 144 void AssertRead(XmlReader reader, XmlNodeType expected) {
119 145 Assert.IsTrue(reader.Read());
120 Console.WriteLine($"{new string(' ', reader.Depth*2)}{reader}");
146 Console.WriteLine($"{new string(' ', reader.Depth * 2)}{reader}");
121 147 Assert.AreEqual(expected, reader.NodeType);
122 148 }
123 149
@@ -78,103 +78,13 namespace Implab.Playground {
78 78
79 79 static void Main(string[] args) {
80 80
81 //var queue = new ConcurrentQueue<int>();
82 var queue = new AsyncQueue<int>();
83 //var queue = new SimpleAsyncQueue<int>();
84
85 const int wBatch = 32;
86 const long wCount = 1000000;
87 const long total = wBatch * wCount * 3;
88
89 long r1 = 0, r2 = 0, r3 = 0;
90 const int rBatch = 1000;
91 long read = 0;
92
93 var t1 = Environment.TickCount;
94
95 AsyncPool.RunThread(
96 () => {
97 var buffer = new int[wBatch];
98 for (int i = 0; i < wBatch; i++)
99 buffer[i] = 1;
100
101 for (int i = 0; i < wCount; i++)
102 EnqueueRange(queue, buffer, 0, wBatch);
103 Console.WriteLine("done writer #1: {0} ms", Environment.TickCount - t1);
104 },
105 () => {
106 var buffer = new int[wBatch];
107 for (int i = 0; i < wBatch; i++)
108 buffer[i] = 1;
109
110 for (int i = 0; i < wCount; i++)
111 EnqueueRange(queue, buffer, 0, wBatch);
112 Console.WriteLine("done writer #2: {0} ms", Environment.TickCount - t1);
113 },
114 () => {
115 var buffer = new int[wBatch];
116 for (int i = 0; i < wBatch; i++)
117 buffer[i] = 1;
118
119 for (int i = 0; i < wCount; i++)
120 EnqueueRange(queue, buffer, 0, wBatch);
121 Console.WriteLine("done writer #3: {0} ms", Environment.TickCount - t1);
122 },
123 () => {
124 var buffer = new int[rBatch];
125
126 while (read < total) {
127 int actual;
128 if (TryDequeueRange(queue, buffer, 0, rBatch, out actual)) {
129 for (int i = 0; i < actual; i++)
130 r1 += buffer[i];
131 Interlocked.Add(ref read, actual);
132 }
133 }
134
135 Console.WriteLine("done reader #1: {0} ms", Environment.TickCount - t1);
136 }/*,
137 () => {
138 var buffer = new int[rBatch];
139
140 while (read < total) {
141 int actual;
142 if (TryDequeueRange(queue, buffer, 0, rBatch, out actual)) {
143 for (int i = 0; i < actual; i++)
144 r2 += buffer[i];
145 Interlocked.Add(ref read, actual);
146 }
147 }
148
149 Console.WriteLine("done reader #2: {0} ms", Environment.TickCount - t1);
150 }*//*,
151 () => {
152 var buffer = new int[rBatch];
153
154 while (read < total) {
155 int actual;
156 if (TryDequeueRange(queue, buffer, 0, rBatch, out actual)) {
157 for (int i = 0; i < actual; i++)
158 r3 += buffer[i];
159 Interlocked.Add(ref read, actual);
160 }
161 }
162
163 Console.WriteLine("done reader #3: {0} ms", Environment.TickCount - t1);
164 }*/
165 )
166 .PromiseAll()
167 .Join();
168
169
170 Console.WriteLine(
171 "done: {0} ms, summ#1: {1}, summ#2: {2}, total: {3}, count: {4}",
172 Environment.TickCount - t1,
173 r1,
174 r2,
175 r1 + r2 + r3,
176 total
177 );
81 var t = Environment.TickCount;
82 using (var reader = JsonReader.Create("e:\\citylots.json")) {
83 while (reader.Read()) {
84 }
85 }
86
87 Console.WriteLine($"JsonReader: {Environment.TickCount - t} ms");
178 88
179 89 Console.WriteLine("done");
180 90 }
@@ -1,9 +1,9
1 1
2 2 namespace Implab.Automaton {
3 3 public static class AutomatonConst {
4 public const int UNREACHABLE_STATE = -1;
4 public const int UnreachableState = -1;
5 5
6 public const int UNCLASSIFIED_INPUT = 0;
6 public const int UnclassifiedInput = 0;
7 7 }
8 8 }
9 9
@@ -116,7 +116,7 namespace Implab.Automaton {
116 116
117 117 for (int i = 0; i < StateCount; i++)
118 118 for (int j = 0; j < AlphabetSize; j++)
119 table[i, j] = AutomatonConst.UNREACHABLE_STATE;
119 table[i, j] = AutomatonConst.UnreachableState;
120 120
121 121 foreach (var t in this)
122 122 table[t.s1,t.edge] = (byte)t.s2;
@@ -290,11 +290,11 namespace Implab.Automaton {
290 290
291 291 var nextCls = 0;
292 292 foreach (var item in minClasses) {
293 if (nextCls == AutomatonConst.UNCLASSIFIED_INPUT)
293 if (nextCls == AutomatonConst.UnclassifiedInput)
294 294 nextCls++;
295 295
296 296 // сохраняем DFAConst.UNCLASSIFIED_INPUT
297 var cls = item.Contains(AutomatonConst.UNCLASSIFIED_INPUT) ? AutomatonConst.UNCLASSIFIED_INPUT : nextCls++;
297 var cls = item.Contains(AutomatonConst.UnclassifiedInput) ? AutomatonConst.UnclassifiedInput : nextCls++;
298 298 optimalDFA.AddSymbol(cls);
299 299
300 300 foreach (var a in item)
@@ -326,7 +326,7 namespace Implab.Automaton {
326 326 data.Add(String.Format(
327 327 "{0} -> {2} [label={1}];",
328 328 String.Join("", stateAlphabet.GetSymbols(t.s1)),
329 ToLiteral(ToLiteral(String.Join("", t.edge == AutomatonConst.UNCLASSIFIED_INPUT ? new [] { "@" } : inputAlphabet.GetSymbols(t.edge).Select(x => x.ToString())))),
329 ToLiteral(ToLiteral(String.Join("", t.edge == AutomatonConst.UnclassifiedInput ? new [] { "@" } : inputAlphabet.GetSymbols(t.edge).Select(x => x.ToString())))),
330 330 String.Join("", stateAlphabet.GetSymbols(t.s2))
331 331 ));
332 332 data.Add("}");
@@ -54,7 +54,7 namespace Implab.Automaton {
54 54 return cls;
55 55 if (!m_supportUnclassified)
56 56 throw new ArgumentOutOfRangeException("symbol", "The specified symbol isn't in the alphabet");
57 return AutomatonConst.UNCLASSIFIED_INPUT;
57 return AutomatonConst.UnclassifiedInput;
58 58 }
59 59
60 60 public int Count {
@@ -129,7 +129,7 namespace Implab.Automaton.RegularExpres
129 129 if (m_root == null)
130 130 m_root = token;
131 131 m_idx++;
132 m_indexes[m_idx] = AutomatonConst.UNCLASSIFIED_INPUT;
132 m_indexes[m_idx] = AutomatonConst.UnclassifiedInput;
133 133 m_firstpos = new HashSet<int>(new[] { m_idx });
134 134 m_lastpos = new HashSet<int>(new[] { m_idx });
135 135 Followpos(m_idx);
@@ -4,7 +4,7 using Implab.Automaton;
4 4 using System;
5 5
6 6 namespace Implab.Formats {
7 public class CharAlphabet: IndexedAlphabetBase<char> {
7 public class CharAlphabet : IndexedAlphabetBase<char> {
8 8
9 9 public override int GetSymbolIndex(char symbol) {
10 10 return symbol;
@@ -25,7 +25,7 namespace Implab.Formats {
25 25 }
26 26
27 27 public bool Contains(char symbol) {
28 return symbol >= m_min && symbol <= m_max && m_map[symbol-m_min] != AutomatonConst.UNCLASSIFIED_INPUT;
28 return symbol >= m_min && symbol <= m_max && m_map[symbol-m_min] != AutomatonConst.UnclassifiedInput;
29 29 }
30 30
31 31 public IEnumerable<char> GetSymbols(int cls) {
@@ -36,7 +36,7 namespace Implab.Formats {
36 36
37 37 [MethodImpl(MethodImplOptions.AggressiveInlining)]
38 38 public int Translate(char symbol) {
39 return symbol >= m_min && symbol <= m_max ? m_map[symbol-m_min] : AutomatonConst.UNCLASSIFIED_INPUT;
39 return symbol >= m_min && symbol <= m_max ? m_map[symbol-m_min] : AutomatonConst.UnclassifiedInput;
40 40 }
41 41 }
42 42 }
@@ -16,7 +16,7 namespace Implab.Formats {
16 16 }
17 17
18 18 protected SymbolToken UnclassifiedToken() {
19 return new SymbolToken(AutomatonConst.UNCLASSIFIED_INPUT);
19 return new SymbolToken(AutomatonConst.UnclassifiedInput);
20 20 }
21 21
22 22 protected void DefineAlphabet(IEnumerable<TSymbol> alphabet) {
@@ -42,7 +42,7 namespace Implab.Formats {
42 42
43 43 int TranslateOrAdd(TSymbol ch) {
44 44 var t = AlphabetBuilder.Translate(ch);
45 if (t == AutomatonConst.UNCLASSIFIED_INPUT)
45 if (t == AutomatonConst.UnclassifiedInput)
46 46 t = AlphabetBuilder.DefineSymbol(ch);
47 47 return t;
48 48 }
@@ -53,7 +53,7 namespace Implab.Formats {
53 53
54 54 int TranslateOrDie(TSymbol ch) {
55 55 var t = AlphabetBuilder.Translate(ch);
56 if (t == AutomatonConst.UNCLASSIFIED_INPUT)
56 if (t == AutomatonConst.UnclassifiedInput)
57 57 throw new ApplicationException(String.Format("Symbol '{0}' is UNCLASSIFIED", ch));
58 58 return t;
59 59 }
@@ -69,7 +69,7 namespace Implab.Formats {
69 69
70 70 while(offset < max) {
71 71 next = m_dfa[next, m_alphabet.Translate(data[offset])];
72 if (next == AutomatonConst.UNREACHABLE_STATE) {
72 if (next == AutomatonConst.UnreachableState) {
73 73 // scanner stops on the next position after last recognized symbol
74 74 m_position = offset;
75 75 return false;
@@ -31,8 +31,8 namespace Implab.Formats.Json {
31 31 get { return _instance.Value; }
32 32 }
33 33
34 readonly InputScanner<TokenType> m_jsonExpression;
35 readonly InputScanner<TokenType> m_stringExpression;
34 readonly FastInputScanner<TokenType> m_jsonExpression;
35 readonly FastInputScanner<TokenType> m_stringExpression;
36 36 readonly CharAlphabet m_defaultAlphabet = new CharAlphabet();
37 37
38 38 public CharAlphabet DefaultAlphabet { get { return m_defaultAlphabet; } }
@@ -87,15 +87,15 namespace Implab.Formats.Json {
87 87 .Or(unescaped.Closure().Tag(TokenType.UnescapedChar));
88 88
89 89
90 m_jsonExpression = BuildScanner(jsonExpression);
91 m_stringExpression = BuildScanner(jsonStringExpression);
90 m_jsonExpression = BuildFastScanner(jsonExpression);
91 m_stringExpression = BuildFastScanner(jsonStringExpression);
92 92 }
93 93
94 public static InputScanner<TokenType> CreateJsonExpressionScanner() {
94 public static FastInputScanner<TokenType> CreateJsonExpressionScanner() {
95 95 return Instance.m_jsonExpression.Clone();
96 96 }
97 97
98 public static InputScanner<TokenType> CreateStringExpressionScanner() {
98 public static FastInputScanner<TokenType> CreateStringExpressionScanner() {
99 99 return Instance.m_stringExpression.Clone();
100 100 }
101 101
@@ -109,7 +109,7 namespace Implab.Formats.Json {
109 109 return SymbolToken(Enumerable.Range(start, stop - start + 1).Select(x => (char)x));
110 110 }
111 111
112 public InputScanner<TokenType> BuildScanner(Token regexp) {
112 public FastInputScanner<TokenType> BuildFastScanner(Token regexp) {
113 113 var dfa = new RegularDFA<char, TokenType>(AlphabetBuilder);
114 114
115 115 var visitor = new RegularExpressionVisitor<TokenType>(dfa);
@@ -122,12 +122,12 namespace Implab.Formats.Json {
122 122 var ab = new CharAlphabet();
123 123 var optimal = dfa.Optimize(ab);
124 124
125 return new InputScanner<TokenType>(
125 return new FastInputScanner<TokenType>(
126 126 optimal.CreateTransitionTable(),
127 127 optimal.CreateFinalStateTable(),
128 128 NormalizeTags(optimal.CreateTagTable()),
129 129 optimal.InitialState,
130 ab.CreateCharMap()
130 ab.GetTranslationMap()
131 131 );
132 132 }
133 133
@@ -48,7 +48,7 namespace Implab.Formats.Json {
48 48
49 49 public bool Move(JsonTokenType token) {
50 50 var next = m_dfa[m_state, (int)token];
51 if (next == AutomatonConst.UNREACHABLE_STATE)
51 if (next == AutomatonConst.UnreachableState)
52 52 return false;
53 53 m_state = next;
54 54 return true;
@@ -116,7 +116,7 namespace Implab.Formats.Json {
116 116 MemberContext m_memberContext = MemberContext.MemberValue;
117 117
118 118 JsonElementType m_elementType;
119 object m_elementValue;
119 string m_elementValue;
120 120 string m_memberName = String.Empty;
121 121
122 122 Stack<ParserContext> m_stack = new Stack<ParserContext>();
@@ -152,7 +152,7 namespace Implab.Formats.Json {
152 152 /// <summary>
153 153 /// Значение элемента. Только для элементов типа <see cref="JsonElementType.Value"/>, для остальных <c>null</c>
154 154 /// </summary>
155 public object ElementValue {
155 public string ElementValue {
156 156 get { return m_elementValue; }
157 157 }
158 158
@@ -213,11 +213,11 namespace Implab.Formats.Json {
213 213 return true;
214 214 case JsonTokenType.Number:
215 215 m_elementType = JsonElementType.Value;
216 m_elementValue = double.Parse(tokenValue, CultureInfo.InvariantCulture);
216 m_elementValue = tokenValue;
217 217 return true;
218 218 case JsonTokenType.Literal:
219 219 m_elementType = JsonElementType.Value;
220 m_elementValue = ParseLiteral(tokenValue);
220 m_elementValue = tokenValue == "null" ? null : tokenValue;
221 221 return true;
222 222 case JsonTokenType.NameSeparator:
223 223 m_memberContext = MemberContext.MemberValue;
@@ -10,8 +10,8 namespace Implab.Formats.Json {
10 10 /// Сканнер (лексер), разбивающий поток символов на токены JSON.
11 11 /// </summary>
12 12 public abstract class JsonScanner : Disposable {
13 readonly InputScanner<JsonGrammar.TokenType> m_jsonContext = JsonGrammar.CreateJsonExpressionScanner();
14 readonly InputScanner<JsonGrammar.TokenType> m_stringContext = JsonGrammar.CreateStringExpressionScanner();
13 readonly FastInputScanner<JsonGrammar.TokenType> m_jsonContext = JsonGrammar.CreateJsonExpressionScanner();
14 readonly FastInputScanner<JsonGrammar.TokenType> m_stringContext = JsonGrammar.CreateStringExpressionScanner();
15 15
16 16 readonly char[] m_unescapeBuf = new char[4];
17 17 readonly char[] m_buffer;
@@ -25,7 +25,7 namespace Implab.Formats.Json {
25 25 m_length = length;
26 26 }
27 27
28 bool ReadChunk(InputScanner<JsonGrammar.TokenType> scanner, out JsonGrammar.TokenType tokenType) {
28 bool ReadChunk(FastInputScanner<JsonGrammar.TokenType> scanner, out JsonGrammar.TokenType tokenType) {
29 29 scanner.ResetState();
30 30
31 31 while(scanner.Scan(m_buffer, m_pos, m_length)) {
@@ -71,7 +71,7 namespace Implab.Formats.Json {
71 71 return true;
72 72 }
73 73
74 bool ReadStringChunk(InputScanner<JsonGrammar.TokenType> scanner, out JsonGrammar.TokenType tokenType) {
74 bool ReadStringChunk(FastInputScanner<JsonGrammar.TokenType> scanner, out JsonGrammar.TokenType tokenType) {
75 75 scanner.ResetState();
76 76
77 77 while (scanner.Scan(m_buffer, m_pos, m_length)) {
@@ -107,7 +107,7 namespace Implab.Formats.Json {
107 107
108 108 // scanner stops as scannerPos
109 109 if (!scanner.IsFinal)
110 throw new ParserException($"Unexpected character '{m_buffer[scannerPos + 1]}'");
110 throw new ParserException($"Unexpected character '{m_buffer[scannerPos]}'");
111 111
112 112 if (scannerPos != m_pos) {
113 113 m_tokenBuilder.Append(m_buffer, m_pos, scannerPos - m_pos);
@@ -55,6 +55,7
55 55 <Compile Include="Diagnostics\TraceEventType.cs" />
56 56 <Compile Include="Diagnostics\TraceSourceAttribute.cs" />
57 57 <Compile Include="Formats\CharMap.cs" />
58 <Compile Include="Formats\FastInpurScanner.cs" />
58 59 <Compile Include="Formats\InputScanner.cs" />
59 60 <Compile Include="Formats\Json\JsonStringScanner.cs" />
60 61 <Compile Include="Formats\Json\JsonTextScanner.cs" />
@@ -39,7 +39,7 namespace Implab.Xml {
39 39 int m_xmlDepth;
40 40
41 41 XmlSimpleAttribute[] m_attributes;
42 object m_value;
42 string m_value;
43 43 bool m_isEmpty;
44 44
45 45 XmlNodeType m_nodeType = XmlNodeType.None;
@@ -158,29 +158,13 namespace Implab.Xml {
158 158
159 159 public override string Value {
160 160 get {
161 return ConvertValueToString(m_value);
161 return m_value;
162 162 }
163 163 }
164
165 static string ConvertValueToString(object value) {
166 if (value == null)
167 return string.Empty;
168
169 switch (Convert.GetTypeCode(value)) {
170 case TypeCode.Double:
171 return ((double)value).ToString(CultureInfo.InvariantCulture);
172 case TypeCode.String:
173 return (string)value;
174 case TypeCode.Boolean:
175 return (bool)value ? "true" : "false";
176 default:
177 return value.ToString();
178 }
179 }
180
164
181 165 public override string GetAttribute(int i) {
182 166 Safe.ArgumentInRange(i, 0, AttributeCount - 1, nameof(i));
183 return ConvertValueToString(m_attributes[i].Value);
167 return m_attributes[i].Value;
184 168 }
185 169
186 170 public override string GetAttribute(string name) {
@@ -188,7 +172,7 namespace Implab.Xml {
188 172 return null;
189 173 var qName = m_context.Resolve(name);
190 174 var attr = Array.Find(m_attributes, x => x.QName == qName);
191 var value = ConvertValueToString(attr?.Value);
175 var value = attr?.Value;
192 176 return value == string.Empty ? null : value;
193 177 }
194 178
@@ -197,7 +181,7 namespace Implab.Xml {
197 181 return null;
198 182 var qName = new XmlQualifiedName(name, namespaceURI);
199 183 var attr = Array.Find(m_attributes, x => x.QName == qName);
200 var value = ConvertValueToString(attr?.Value);
184 var value = attr?.Value;
201 185 return value == string.Empty ? null : value;
202 186 }
203 187
@@ -319,7 +303,7 namespace Implab.Xml {
319 303 }
320 304 }
321 305
322 void ValueNode(object value) {
306 void ValueNode(string value) {
323 307 if (!IsSibling()) // the node is nested
324 308 m_xmlDepth++;
325 309
@@ -344,11 +328,11 namespace Implab.Xml {
344 328 if (attr.QName.Name == "xmlns") {
345 329 if (context == m_context)
346 330 context = new XmlNameContext(m_context, m_xmlDepth);
347 context.DefinePrefix(ConvertValueToString(attr.Value), string.Empty);
331 context.DefinePrefix(attr.Value, string.Empty);
348 332 } else if (attr.Prefix == m_xmlnsPrefix) {
349 333 if (context == m_context)
350 334 context = new XmlNameContext(m_context, m_xmlDepth);
351 context.DefinePrefix(ConvertValueToString(attr.Value), attr.QName.Name);
335 context.DefinePrefix(attr.Value, attr.QName.Name);
352 336 } else {
353 337 string attrPrefix;
354 338 if (string.IsNullOrEmpty(attr.QName.Namespace))
@@ -516,7 +500,7 namespace Implab.Xml {
516 500 m_jsonValueName,
517 501 m_jsonNamespace,
518 502 new[] {
519 new XmlSimpleAttribute("nil", m_xsiNamespace, m_xsiPrefix, true)
503 new XmlSimpleAttribute("nil", m_xsiNamespace, m_xsiPrefix, "true")
520 504 },
521 505 true
522 506 );
@@ -607,7 +591,7 namespace Implab.Xml {
607 591 public override string ToString() {
608 592 switch (NodeType) {
609 593 case XmlNodeType.Element:
610 return $"<{Name} {string.Join(" ", (m_attributes ?? new XmlSimpleAttribute[0]).Select(x => $"{x.Prefix}{(string.IsNullOrEmpty(x.Prefix) ? "" : ":")}{x.QName.Name}='{ConvertValueToString(x.Value)}'"))} {(IsEmptyElement ? "/" : "")}>";
594 return $"<{Name} {string.Join(" ", (m_attributes ?? new XmlSimpleAttribute[0]).Select(x => $"{x.Prefix}{(string.IsNullOrEmpty(x.Prefix) ? "" : ":")}{x.QName.Name}='{x.Value}'"))} {(IsEmptyElement ? "/" : "")}>";
611 595 case XmlNodeType.Attribute:
612 596 return $"@{Name}";
613 597 case XmlNodeType.Text:
@@ -7,7 +7,7 using System.Xml;
7 7
8 8 namespace Implab.Xml {
9 9 public class XmlSimpleAttribute {
10 public XmlSimpleAttribute(string name, string ns, string prefix, object value) {
10 public XmlSimpleAttribute(string name, string ns, string prefix, string value) {
11 11 QName = new XmlQualifiedName(name, ns);
12 12 Prefix = prefix;
13 13 Value = value;
@@ -17,6 +17,6 namespace Implab.Xml {
17 17
18 18 public string Prefix { get; set; }
19 19
20 public object Value { get; set; }
20 public string Value { get; set; }
21 21 }
22 22 }
General Comments 3
Under Review
author

Auto status change to "Under Review"

Approved
author

ok, latest stable version should be in default

You need to be logged in to leave comments. Login now