The requested changes are too big and content was truncated. Show full diff
| @@ -0,0 +1,4 | |||||
|  | 1 | <?xml version="1.0" encoding="utf-8"?> | |||
|  | 2 | <packages> | |||
|  | 3 | <package id="System.Text.Json" version="2.0.0.11" targetFramework="net45" /> | |||
|  | 4 | </packages> No newline at end of file | |||
| 1 | NO CONTENT: new file 100644, binary diff hidden |  | NO CONTENT: new file 100644, binary diff hidden | 
| 1 | NO CONTENT: new file 100644, binary diff hidden |  | NO CONTENT: new file 100644, binary diff hidden | 
| 1 | NO CONTENT: new file 100644 |  | NO CONTENT: new file 100644 | ||
| The requested commit or file is too big and content was truncated. Show full diff | |||||
| 1 | NO CONTENT: new file 100644 |  | NO CONTENT: new file 100644 | ||
| The requested commit or file is too big and content was truncated. Show full diff | |||||
| 1 | NO CONTENT: new file 100644, binary diff hidden |  | NO CONTENT: new file 100644, binary diff hidden | 
| 1 | NO CONTENT: new file 100644, binary diff hidden |  | NO CONTENT: new file 100644, binary diff hidden | 
| @@ -1,50 +1,88 | |||||
| 1 | using NUnit.Framework; |  | 1 | using NUnit.Framework; | |
| 2 | using System; |  | 2 | using System; | |
| 3 | using Implab.Formats.JSON; |  | 3 | using Implab.Formats.JSON; | |
|  | 4 | using Implab.Automaton; | |||
| 4 |  | 5 | |||
| 5 | namespace Implab.Format.Test { |  | 6 | namespace Implab.Format.Test { | |
| 6 | [TestFixture] |  | 7 | [TestFixture] | |
| 7 | public class JsonTests { |  | 8 | public class JsonTests { | |
| 8 | [Test] |  | 9 | [Test] | |
| 9 | public void TestScannerValidTokens() { |  | 10 | public void TestScannerValidTokens() { | |
| 10 | var scanner = new JSONScanner(@"9123, -123, 0, 0.1, -0.2, -0.1e3, 1.3E-3, ""some \t\n\u0020 text"", literal []{}:") |  | 11 | using (var scanner = new JSONScanner(@"9123, -123, 0, 0.1, -0.2, -0.1e3, 1.3E-3, ""some \t\n\u0020 text"", literal []{}:")) { | |
|  | 12 | ||||
|  | 13 | Tuple<JsonTokenType,object>[] expexted = new [] { | |||
|  | 14 | new Tuple<JsonTokenType,object>(JsonTokenType.Number, 9123d), | |||
|  | 15 | new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", "), | |||
|  | 16 | new Tuple<JsonTokenType,object>(JsonTokenType.Number, -123d), | |||
|  | 17 | new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", "), | |||
|  | 18 | new Tuple<JsonTokenType,object>(JsonTokenType.Number, 0d), | |||
|  | 19 | new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", "), | |||
|  | 20 | new Tuple<JsonTokenType,object>(JsonTokenType.Number, 0.1d), | |||
|  | 21 | new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", "), | |||
|  | 22 | new Tuple<JsonTokenType,object>(JsonTokenType.Number, -0.2d), | |||
|  | 23 | new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", "), | |||
|  | 24 | new Tuple<JsonTokenType,object>(JsonTokenType.Number, -0.1e3d), | |||
|  | 25 | new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", "), | |||
|  | 26 | new Tuple<JsonTokenType,object>(JsonTokenType.Number, 1.3E-3d), | |||
|  | 27 | new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", "), | |||
|  | 28 | new Tuple<JsonTokenType,object>(JsonTokenType.String, "some \t\n text"), | |||
|  | 29 | new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", "), | |||
|  | 30 | new Tuple<JsonTokenType,object>(JsonTokenType.Literal, "literal"), | |||
|  | 31 | new Tuple<JsonTokenType,object>(JsonTokenType.BeginArray, " ["), | |||
|  | 32 | new Tuple<JsonTokenType,object>(JsonTokenType.EndArray, "]"), | |||
|  | 33 | new Tuple<JsonTokenType,object>(JsonTokenType.BeginObject, "{"), | |||
|  | 34 | new Tuple<JsonTokenType,object>(JsonTokenType.EndObject, "}"), | |||
|  | 35 | new Tuple<JsonTokenType,object>(JsonTokenType.NameSeparator, ":") | |||
|  | 36 | }; | |||
| 11 |  | 37 | |||
| 12 | Tuple<JsonTokenType,object>[] expexted = new [] { |  | 38 | object value; | |
| 13 |  |  | 39 | JsonTokenType tokenType; | |
| 14 | new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ), |  | 40 | for (var i = 0; i < expexted.Length; i++) { | |
| 15 | new Tuple<JsonTokenType,object>(JsonTokenType.Number, -123d ), |  | 41 | ||
| 16 | new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ), |  | 42 | Assert.IsTrue(scanner.ReadToken(out value, out tokenType)); | |
| 17 | new Tuple<JsonTokenType,object>(JsonTokenType.Number, 0d ), |  | 43 | Assert.AreEqual(expexted[i].Item1, tokenType); | |
| 18 | new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ), |  | 44 | Assert.AreEqual(expexted[i].Item2, value); | |
| 19 | new Tuple<JsonTokenType,object>(JsonTokenType.Number, 0.1d ), |  | 45 | } | |
| 20 | new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ), |  | 46 | ||
| 21 | new Tuple<JsonTokenType,object>(JsonTokenType.Number, -0.2d ), |  | 47 | Assert.IsFalse(scanner.ReadToken(out value, out tokenType)); | |
| 22 | new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ), |  | 48 | } | |
| 23 | new Tuple<JsonTokenType,object>(JsonTokenType.Number, -0.1e3d ), |  | 49 | } | |
| 24 | new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ), |  | 50 | ||
| 25 | new Tuple<JsonTokenType,object>(JsonTokenType.Number, 1.3E-3d ), |  | 51 | [Test] | |
| 26 | new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ), |  | 52 | public void TestScannerBadTokens() { | |
| 27 | new Tuple<JsonTokenType,object>(JsonTokenType.String, "some \t\n text" ), |  | 53 | var bad = new [] { | |
| 28 | new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ), |  | 54 | " 1", | |
| 29 | new Tuple<JsonTokenType,object>(JsonTokenType.Literal, "literal" ), |  | 55 | " literal", | |
| 30 | new Tuple<JsonTokenType,object>(JsonTokenType.BeginArray, " [" ), |  | 56 | " \"", | |
| 31 | new Tuple<JsonTokenType,object>(JsonTokenType.EndArray, "]" ), |  | 57 | "\"unclosed string", | |
| 32 | new Tuple<JsonTokenType,object>(JsonTokenType.BeginObject, "{" ), |  | 58 | "1.bad", | |
| 33 | new Tuple<JsonTokenType,object>(JsonTokenType.EndObject, "}" ), |  | 59 | "001", // should be read as three numbers | |
| 34 | new Tuple<JsonTokenType,object>(JsonTokenType.NameSeparator, ":" ) |  | 60 | "--10", | |
|  | 61 | "+10", | |||
|  | 62 | "1.0.0", | |||
|  | 63 | "1e1.0", | |||
|  | 64 | "l1teral0", | |||
|  | 65 | ".123", | |||
|  | 66 | "-.123" | |||
| 35 | }; |  | 67 | }; | |
| 36 |  | 68 | |||
| 37 | object value; |  | 69 | foreach (var json in bad) | |
| 38 | JsonTokenType tokenType; |  | 70 | using (var scanner = new JSONScanner(json)) { | |
| 39 | for (var i = 0; i < expexted.Length; i++) { |  | 71 | try { | |
| 40 |  | 72 | object value; | ||
| 41 | Assert.IsTrue(scanner.ReadToken(out value, out tokenType)); |  | 73 | JsonTokenType token; | |
| 42 | Assert.AreEqual(expexted[i].Item1, tokenType); |  | 74 | scanner.ReadToken(out value, out token); | |
| 43 | Assert.AreEqual(expexted[i].Item2, value); |  | 75 | if (!Object.Equals(value,json)) { | |
| 44 | } |  | 76 | Console.WriteLine("'{0}' is read as {1}", json, value is String ? String.Format("'{0}'", value) : value ); | |
| 45 |  | 77 | continue; | ||
| 46 | Assert.IsFalse(scanner.ReadToken(out value, out tokenType)); |  | 78 | } | |
|  | 79 | Assert.Fail("Token '{0}' shouldn't pass", json); | |||
|  | 80 | } catch (ParserException e) { | |||
|  | 81 | Console.WriteLine(e.Message); | |||
|  | 82 | } | |||
|  | 83 | } | |||
|  | 84 | ||||
| 47 | } |  | 85 | } | |
| 48 | } |  | 86 | } | |
| 49 | } |  | 87 | } | |
| 50 |  | 88 | |||
| @@ -1,343 +1,348 | |||||
| 1 | using Implab; |  | 1 | using Implab; | |
| 2 | using System; |  | 2 | using System; | |
| 3 | using System.Collections.Generic; |  | 3 | using System.Collections.Generic; | |
| 4 | using System.Linq; |  | 4 | using System.Linq; | |
| 5 | using System.Diagnostics; |  | 5 | using System.Diagnostics; | |
| 6 | using System.IO; |  | 6 | using System.IO; | |
| 7 | using System.CodeDom.Compiler; |  | 7 | using System.CodeDom.Compiler; | |
| 8 | using System.CodeDom; |  | 8 | using System.CodeDom; | |
| 9 |  | 9 | |||
| 10 | namespace Implab.Automaton { |  | 10 | namespace Implab.Automaton { | |
| 11 | public class DFATable : IDFATableBuilder { |  | 11 | public class DFATable : IDFATableBuilder { | |
| 12 | int m_stateCount; |  | 12 | int m_stateCount; | |
| 13 | int m_symbolCount; |  | 13 | int m_symbolCount; | |
| 14 | int m_initialState; |  | 14 | int m_initialState; | |
| 15 |  | 15 | |||
| 16 | readonly HashSet<int> m_finalStates = new HashSet<int>(); |  | 16 | readonly HashSet<int> m_finalStates = new HashSet<int>(); | |
| 17 | readonly HashSet<AutomatonTransition> m_transitions = new HashSet<AutomatonTransition>(); |  | 17 | readonly HashSet<AutomatonTransition> m_transitions = new HashSet<AutomatonTransition>(); | |
| 18 |  | 18 | |||
| 19 |  | 19 | |||
| 20 | #region IDFADefinition implementation |  | 20 | #region IDFADefinition implementation | |
| 21 |  | 21 | |||
| 22 | public bool IsFinalState(int s) { |  | 22 | public bool IsFinalState(int s) { | |
| 23 | Safe.ArgumentInRange(s, 0, m_stateCount, "s"); |  | 23 | Safe.ArgumentInRange(s, 0, m_stateCount, "s"); | |
| 24 |  | 24 | |||
| 25 | return m_finalStates.Contains(s); |  | 25 | return m_finalStates.Contains(s); | |
| 26 | } |  | 26 | } | |
| 27 |  | 27 | |||
| 28 | public IEnumerable<int> FinalStates { |  | 28 | public IEnumerable<int> FinalStates { | |
| 29 | get { |  | 29 | get { | |
| 30 | return m_finalStates; |  | 30 | return m_finalStates; | |
| 31 | } |  | 31 | } | |
| 32 | } |  | 32 | } | |
| 33 |  | 33 | |||
| 34 | public int StateCount { |  | 34 | public int StateCount { | |
| 35 | get { return m_stateCount; } |  | 35 | get { return m_stateCount; } | |
| 36 | } |  | 36 | } | |
| 37 |  | 37 | |||
| 38 | public int AlphabetSize { |  | 38 | public int AlphabetSize { | |
| 39 | get { return m_symbolCount; } |  | 39 | get { return m_symbolCount; } | |
| 40 | } |  | 40 | } | |
| 41 |  | 41 | |||
| 42 | public int InitialState { |  | 42 | public int InitialState { | |
| 43 | get { return m_initialState; } |  | 43 | get { return m_initialState; } | |
| 44 | } |  | 44 | } | |
| 45 |  | 45 | |||
| 46 | #endregion |  | 46 | #endregion | |
| 47 |  | 47 | |||
| 48 | public void SetInitialState(int s) { |  | 48 | public void SetInitialState(int s) { | |
| 49 | Safe.ArgumentAssert(s >= 0, "s"); |  | 49 | Safe.ArgumentAssert(s >= 0, "s"); | |
| 50 | m_stateCount = Math.Max(m_stateCount, s + 1); |  | 50 | m_stateCount = Math.Max(m_stateCount, s + 1); | |
| 51 | m_initialState = s; |  | 51 | m_initialState = s; | |
| 52 | } |  | 52 | } | |
| 53 |  | 53 | |||
| 54 | public void MarkFinalState(int state) { |  | 54 | public void MarkFinalState(int state) { | |
| 55 | m_stateCount = Math.Max(m_stateCount, state + 1); |  | 55 | m_stateCount = Math.Max(m_stateCount, state + 1); | |
| 56 | m_finalStates.Add(state); |  | 56 | m_finalStates.Add(state); | |
| 57 | } |  | 57 | } | |
| 58 |  | 58 | |||
| 59 | public void Add(AutomatonTransition item) { |  | 59 | public void Add(AutomatonTransition item) { | |
| 60 | Safe.ArgumentAssert(item.s1 >= 0, "item"); |  | 60 | Safe.ArgumentAssert(item.s1 >= 0, "item"); | |
| 61 | Safe.ArgumentAssert(item.s2 >= 0, "item"); |  | 61 | Safe.ArgumentAssert(item.s2 >= 0, "item"); | |
| 62 | Safe.ArgumentAssert(item.edge >= 0, "item"); |  | 62 | Safe.ArgumentAssert(item.edge >= 0, "item"); | |
| 63 |  | 63 | |||
| 64 | m_stateCount = Math.Max(m_stateCount, Math.Max(item.s1, item.s2) + 1); |  | 64 | m_stateCount = Math.Max(m_stateCount, Math.Max(item.s1, item.s2) + 1); | |
| 65 | m_symbolCount = Math.Max(m_symbolCount, item.edge + 1); |  | 65 | m_symbolCount = Math.Max(m_symbolCount, item.edge + 1); | |
| 66 |  | 66 | |||
| 67 | m_transitions.Add(item); |  | 67 | m_transitions.Add(item); | |
| 68 | } |  | 68 | } | |
| 69 |  | 69 | |||
| 70 | public void Clear() { |  | 70 | public void Clear() { | |
| 71 | m_stateCount = 0; |  | 71 | m_stateCount = 0; | |
| 72 | m_symbolCount = 0; |  | 72 | m_symbolCount = 0; | |
| 73 | m_finalStates.Clear(); |  | 73 | m_finalStates.Clear(); | |
| 74 | m_transitions.Clear(); |  | 74 | m_transitions.Clear(); | |
| 75 | } |  | 75 | } | |
| 76 |  | 76 | |||
| 77 | public bool Contains(AutomatonTransition item) { |  | 77 | public bool Contains(AutomatonTransition item) { | |
| 78 | return m_transitions.Contains(item); |  | 78 | return m_transitions.Contains(item); | |
| 79 | } |  | 79 | } | |
| 80 |  | 80 | |||
| 81 | public void CopyTo(AutomatonTransition[] array, int arrayIndex) { |  | 81 | public void CopyTo(AutomatonTransition[] array, int arrayIndex) { | |
| 82 | m_transitions.CopyTo(array, arrayIndex); |  | 82 | m_transitions.CopyTo(array, arrayIndex); | |
| 83 | } |  | 83 | } | |
| 84 |  | 84 | |||
| 85 | public bool Remove(AutomatonTransition item) { |  | 85 | public bool Remove(AutomatonTransition item) { | |
| 86 | return m_transitions.Remove(item); |  | 86 | return m_transitions.Remove(item); | |
| 87 | } |  | 87 | } | |
| 88 |  | 88 | |||
| 89 | public int Count { |  | 89 | public int Count { | |
| 90 | get { |  | 90 | get { | |
| 91 | return m_transitions.Count; |  | 91 | return m_transitions.Count; | |
| 92 | } |  | 92 | } | |
| 93 | } |  | 93 | } | |
| 94 |  | 94 | |||
| 95 | public bool IsReadOnly { |  | 95 | public bool IsReadOnly { | |
| 96 | get { |  | 96 | get { | |
| 97 | return false; |  | 97 | return false; | |
| 98 | } |  | 98 | } | |
| 99 | } |  | 99 | } | |
| 100 |  | 100 | |||
| 101 | public IEnumerator<AutomatonTransition> GetEnumerator() { |  | 101 | public IEnumerator<AutomatonTransition> GetEnumerator() { | |
| 102 | return m_transitions.GetEnumerator(); |  | 102 | return m_transitions.GetEnumerator(); | |
| 103 | } |  | 103 | } | |
| 104 |  | 104 | |||
| 105 | System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator() { |  | 105 | System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator() { | |
| 106 | return GetEnumerator(); |  | 106 | return GetEnumerator(); | |
| 107 | } |  | 107 | } | |
| 108 |  | 108 | |||
| 109 | public void AddSymbol(int symbol) { |  | 109 | public void AddSymbol(int symbol) { | |
| 110 | Safe.ArgumentAssert(symbol >= 0, "symbol"); |  | 110 | Safe.ArgumentAssert(symbol >= 0, "symbol"); | |
| 111 | m_symbolCount = Math.Max(symbol + 1, m_symbolCount); |  | 111 | m_symbolCount = Math.Max(symbol + 1, m_symbolCount); | |
| 112 | } |  | 112 | } | |
| 113 |  | 113 | |||
| 114 | public int[,] CreateTransitionTable() { |  | 114 | public int[,] CreateTransitionTable() { | |
| 115 | var table = new int[StateCount,AlphabetSize]; |  | 115 | var table = new int[StateCount,AlphabetSize]; | |
| 116 |  | 116 | |||
| 117 | for (int i = 0; i < StateCount; i++) |  | 117 | for (int i = 0; i < StateCount; i++) | |
| 118 | for (int j = 0; j < AlphabetSize; j++) |  | 118 | for (int j = 0; j < AlphabetSize; j++) | |
| 119 | table[i, j] = AutomatonConst.UNREACHABLE_STATE; |  | 119 | table[i, j] = AutomatonConst.UNREACHABLE_STATE; | |
| 120 |  | 120 | |||
| 121 | foreach (var t in this) |  | 121 | foreach (var t in this) | |
| 122 | table[t.s1,t.edge] = t.s2; |  | 122 | table[t.s1,t.edge] = t.s2; | |
| 123 |  | 123 | |||
| 124 | return table; |  | 124 | return table; | |
| 125 | } |  | 125 | } | |
| 126 |  | 126 | |||
| 127 | public bool[] CreateFinalStateTable() { |  | 127 | public bool[] CreateFinalStateTable() { | |
| 128 | var table = new bool[StateCount]; |  | 128 | var table = new bool[StateCount]; | |
| 129 |  | 129 | |||
| 130 | foreach (var s in FinalStates) |  | 130 | foreach (var s in FinalStates) | |
| 131 | table[s] = true; |  | 131 | table[s] = true; | |
| 132 |  | 132 | |||
| 133 | return table; |  | 133 | return table; | |
| 134 | } |  | 134 | } | |
| 135 |  | 135 | |||
| 136 | /// <summary>Формирует множества конечных состояний перед началом работы алгоритма минимизации.</summary> |  | 136 | /// <summary>Формирует множества конечных состояний перед началом работы алгоритма минимизации.</summary> | |
| 137 | /// <remarks> |  | 137 | /// <remarks> | |
| 138 | /// В процессе построения минимального автомата требуется разделить множество состояний, |  | 138 | /// В процессе построения минимального автомата требуется разделить множество состояний, | |
| 139 | /// на два подмножества - конечные состояния и все остальные, после чего эти подмножества |  | 139 | /// на два подмножества - конечные состояния и все остальные, после чего эти подмножества | |
| 140 | /// будут резделены на более мелкие. Иногда требуется гарантировать различия конечных сосотяний, |  | 140 | /// будут резделены на более мелкие. Иногда требуется гарантировать различия конечных сосотяний, | |
| 141 | /// для этого необходимо переопределить даннцю фукнцию, для получения множеств конечных состояний. |  | 141 | /// для этого необходимо переопределить даннцю фукнцию, для получения множеств конечных состояний. | |
| 142 | /// </remarks> |  | 142 | /// </remarks> | |
| 143 | /// <returns>The final states.</returns> |  | 143 | /// <returns>The final states.</returns> | |
| 144 | protected virtual IEnumerable<HashSet<int>> |  | 144 | protected virtual IEnumerable<HashSet<int>> SplitFinalStates(IEnumerable<int> states) { | |
| 145 | return new HashSet<int> |  | 145 | return new [] { new HashSet<int>(states) }; | |
| 146 | } |  | 146 | } | |
| 147 |  | 147 | |||
| 148 | protected void Optimize( |  | 148 | protected void Optimize( | |
| 149 | IDFATableBuilder optimalDFA, |  | 149 | IDFATableBuilder optimalDFA, | |
| 150 | IDictionary<int,int> alphabetMap, |  | 150 | IDictionary<int,int> alphabetMap, | |
| 151 | IDictionary<int,int> stateMap |  | 151 | IDictionary<int,int> stateMap | |
| 152 | ) { |  | 152 | ) { | |
| 153 | Safe.ArgumentNotNull(optimalDFA, "dfa"); |  | 153 | Safe.ArgumentNotNull(optimalDFA, "dfa"); | |
| 154 | Safe.ArgumentNotNull(alphabetMap, "alphabetMap"); |  | 154 | Safe.ArgumentNotNull(alphabetMap, "alphabetMap"); | |
| 155 | Safe.ArgumentNotNull(stateMap, "stateMap"); |  | 155 | Safe.ArgumentNotNull(stateMap, "stateMap"); | |
| 156 |  | 156 | |||
| 157 |  | 157 | |||
| 158 | var setComparer = new CustomEqualityComparer<HashSet<int>>( |  | 158 | var setComparer = new CustomEqualityComparer<HashSet<int>>( | |
| 159 | (x, y) => x.SetEquals(y), |  | 159 | (x, y) => x.SetEquals(y), | |
| 160 | s => s.Sum(x => x.GetHashCode()) |  | 160 | s => s.Sum(x => x.GetHashCode()) | |
| 161 | ); |  | 161 | ); | |
| 162 |  | 162 | |||
| 163 | var optimalStates = new HashSet<HashSet<int>>(setComparer); |  | 163 | var optimalStates = new HashSet<HashSet<int>>(setComparer); | |
| 164 | var queue = new HashSet<HashSet<int>>(setComparer); |  | 164 | var queue = new HashSet<HashSet<int>>(setComparer); | |
| 165 |  | 165 | |||
| 166 | // получаем конечные состояния, сгруппированные по маркерам |  | 166 | optimalStates.Add(new HashSet<int>(FinalStates)); | |
| 167 | optimalStates.UnionWith( |  | |||
| 168 | GroupFinalStates() |  | |||
| 169 | ); |  | |||
| 170 |  | 167 | |||
| 171 | var state = new HashSet<int>( |  | 168 | var state = new HashSet<int>( | |
| 172 | Enumerable |  | 169 | Enumerable | |
| 173 | .Range(0, m_stateCount) |  | 170 | .Range(0, m_stateCount) | |
| 174 | .Where(i => !m_finalStates.Contains(i)) |  | 171 | .Where(i => !m_finalStates.Contains(i)) | |
| 175 | ); |  | 172 | ); | |
| 176 |  | 173 | |||
| 177 | optimalStates.Add(state); |  | 174 | optimalStates.Add(state); | |
| 178 | queue.Add(state); |  | 175 | queue.Add(state); | |
| 179 |  | 176 | |||
| 180 | var rmap = m_transitions |  | 177 | var rmap = m_transitions | |
| 181 | .GroupBy(t => t.s2) |  | 178 | .GroupBy(t => t.s2) | |
| 182 | .ToDictionary( |  | 179 | .ToDictionary( | |
| 183 | g => g.Key, // s2 |  | 180 | g => g.Key, // s2 | |
| 184 | g => g.ToLookup(t => t.edge, t => t.s1)//.ToDictionary(p => p.Key) |  | 181 | g => g.ToLookup(t => t.edge, t => t.s1)//.ToDictionary(p => p.Key) | |
| 185 | ); |  | 182 | ); | |
| 186 |  | 183 | |||
| 187 | while (queue.Count > 0) { |  | 184 | while (queue.Count > 0) { | |
| 188 | var stateA = queue.First(); |  | 185 | var stateA = queue.First(); | |
| 189 | queue.Remove(stateA); |  | 186 | queue.Remove(stateA); | |
| 190 |  | 187 | |||
| 191 | for (int c = 0; c < m_symbolCount; c++) { |  | 188 | for (int c = 0; c < m_symbolCount; c++) { | |
| 192 | var stateX = new HashSet<int>(); |  | 189 | var stateX = new HashSet<int>(); | |
| 193 |  |  | 190 | foreach(var a in stateA.Where(rmap.ContainsKey)) | |
| 194 |  |  | 191 | stateX.UnionWith(rmap[a][c]); // all states from wich the symbol 'c' leads to the state 'a' | |
| 195 |  | ||||
| 196 | stateX.UnionWith(m_transitions.Where(t => stateA.Contains(t.s2) && t.edge == c).Select(t => t.s1)); |  | |||
| 197 |  | 192 | |||
| 198 | var tmp = optimalStates.ToArray(); |  | 193 | var tmp = optimalStates.ToArray(); | |
| 199 | foreach (var stateY in tmp) { |  | 194 | foreach (var stateY in tmp) { | |
| 200 | if (stateX.Overlaps(stateY) && !stateY.IsSubsetOf(stateX)) { |  | 195 | var stateR1 = new HashSet<int>(stateY); | |
| 201 |  |  | 196 | var stateR2 = new HashSet<int>(stateY); | |
| 202 | var stateR2 = new HashSet<int>(stateY); |  | |||
| 203 |  | 197 | |||
| 204 |  |  | 198 | stateR1.IntersectWith(stateX); | |
| 205 |  |  | 199 | stateR2.ExceptWith(stateX); | |
|  | 200 | ||||
|  | 201 | if (stateR1.Count > 0 && stateR2.Count > 0) { | |||
|  | 202 | ||||
| 206 |  | 203 | |||
| 207 | optimalStates.Remove(stateY); |  | 204 | optimalStates.Remove(stateY); | |
| 208 | optimalStates.Add(stateR1); |  | 205 | optimalStates.Add(stateR1); | |
| 209 | optimalStates.Add(stateR2); |  | 206 | optimalStates.Add(stateR2); | |
| 210 |  | 207 | |||
| 211 | if (queue.Contains(stateY)) { |  | 208 | if (queue.Contains(stateY)) { | |
| 212 | queue.Remove(stateY); |  | 209 | queue.Remove(stateY); | |
| 213 | queue.Add(stateR1); |  | 210 | queue.Add(stateR1); | |
| 214 | queue.Add(stateR2); |  | 211 | queue.Add(stateR2); | |
| 215 | } else { |  | 212 | } else { | |
| 216 | queue.Add(stateR1.Count <= stateR2.Count ? stateR1 : stateR2); |  | 213 | queue.Add(stateR1.Count <= stateR2.Count ? stateR1 : stateR2); | |
| 217 | } |  | 214 | } | |
| 218 | } |  | 215 | } | |
| 219 | } |  | 216 | } | |
| 220 | } |  | 217 | } | |
| 221 | } |  | 218 | } | |
| 222 |  | 219 | |||
|  | 220 | // дополнительно разбиваем конечные состояния | |||
|  | 221 | foreach (var final in optimalStates.Where(s => s.Overlaps(m_finalStates)).ToArray()) { | |||
|  | 222 | optimalStates.Remove(final); | |||
|  | 223 | foreach (var split in SplitFinalStates(final)) | |||
|  | 224 | optimalStates.Add(split); | |||
|  | 225 | } | |||
|  | 226 | ||||
|  | 227 | ||||
| 223 | // карта получения оптимального состояния по соотвествующему ему простому состоянию |  | 228 | // карта получения оптимального состояния по соотвествующему ему простому состоянию | |
| 224 | var nextState = 0; |  | 229 | var nextState = 0; | |
| 225 | foreach (var item in optimalStates) { |  | 230 | foreach (var item in optimalStates) { | |
| 226 | var id = nextState++; |  | 231 | var id = nextState++; | |
| 227 | foreach (var s in item) |  | 232 | foreach (var s in item) | |
| 228 | stateMap[s] = id; |  | 233 | stateMap[s] = id; | |
| 229 | } |  | 234 | } | |
| 230 |  | 235 | |||
| 231 | // получаем минимальный алфавит |  | 236 | // получаем минимальный алфавит | |
| 232 | // входные символы не различимы, если Move(s,a1) == Move(s,a2), для любого s |  | 237 | // входные символы не различимы, если Move(s,a1) == Move(s,a2), для любого s | |
| 233 | // для этого используем алгоритм кластеризации, сначала |  | 238 | // для этого используем алгоритм кластеризации, сначала | |
| 234 | // считаем, что все символы не различимы |  | 239 | // считаем, что все символы не различимы | |
| 235 |  | 240 | |||
| 236 | var minClasses = new HashSet<HashSet<int>>(setComparer); |  | 241 | var minClasses = new HashSet<HashSet<int>>(setComparer); | |
| 237 | var alphaQueue = new Queue<HashSet<int>>(); |  | 242 | var alphaQueue = new Queue<HashSet<int>>(); | |
| 238 | alphaQueue.Enqueue(new HashSet<int>(Enumerable.Range(0,AlphabetSize))); |  | 243 | alphaQueue.Enqueue(new HashSet<int>(Enumerable.Range(0,AlphabetSize))); | |
| 239 |  | 244 | |||
| 240 | // для всех состояний, будем проверять каждый класс на различимость, |  | 245 | // для всех состояний, будем проверять каждый класс на различимость, | |
| 241 | // т.е. символы различимы, если они приводят к разным состояниям |  | 246 | // т.е. символы различимы, если они приводят к разным состояниям | |
| 242 | for (int s = 0 ; s < optimalStates.Count; s++) { |  | 247 | for (int s = 0 ; s < optimalStates.Count; s++) { | |
| 243 | var newQueue = new Queue<HashSet<int>>(); |  | 248 | var newQueue = new Queue<HashSet<int>>(); | |
| 244 |  | 249 | |||
| 245 | foreach (var A in alphaQueue) { |  | 250 | foreach (var A in alphaQueue) { | |
| 246 | // классы из одного символа делить бесполезно, переводим их сразу в |  | 251 | // классы из одного символа делить бесполезно, переводим их сразу в | |
| 247 | // результирующий алфавит |  | 252 | // результирующий алфавит | |
| 248 | if (A.Count == 1) { |  | 253 | if (A.Count == 1) { | |
| 249 | minClasses.Add(A); |  | 254 | minClasses.Add(A); | |
| 250 | continue; |  | 255 | continue; | |
| 251 | } |  | 256 | } | |
| 252 |  | 257 | |||
| 253 | // различаем классы символов, которые переводят в различные оптимальные состояния |  | 258 | // различаем классы символов, которые переводят в различные оптимальные состояния | |
| 254 | // optimalState -> alphaClass |  | 259 | // optimalState -> alphaClass | |
| 255 | var classes = new Dictionary<int, HashSet<int>>(); |  | 260 | var classes = new Dictionary<int, HashSet<int>>(); | |
| 256 |  | 261 | |||
| 257 | foreach (var term in A) { |  | 262 | foreach (var term in A) { | |
| 258 | // ищем все переходы класса по символу term |  | 263 | // ищем все переходы класса по символу term | |
| 259 | var s2 = m_transitions.Where(t => stateMap[t.s1] == s && t.edge == term).Select(t => stateMap[t.s2]).DefaultIfEmpty(-1).First(); |  | 264 | var s2 = m_transitions.Where(t => stateMap[t.s1] == s && t.edge == term).Select(t => stateMap[t.s2]).DefaultIfEmpty(-1).First(); | |
| 260 |  | 265 | |||
| 261 | HashSet<int> a2; |  | 266 | HashSet<int> a2; | |
| 262 | if (!classes.TryGetValue(s2, out a2)) { |  | 267 | if (!classes.TryGetValue(s2, out a2)) { | |
| 263 | a2 = new HashSet<int>(); |  | 268 | a2 = new HashSet<int>(); | |
| 264 | newQueue.Enqueue(a2); |  | 269 | newQueue.Enqueue(a2); | |
| 265 | classes[s2] = a2; |  | 270 | classes[s2] = a2; | |
| 266 | } |  | 271 | } | |
| 267 | a2.Add(term); |  | 272 | a2.Add(term); | |
| 268 | } |  | 273 | } | |
| 269 | } |  | 274 | } | |
| 270 |  | 275 | |||
| 271 | if (newQueue.Count == 0) |  | 276 | if (newQueue.Count == 0) | |
| 272 | break; |  | 277 | break; | |
| 273 | alphaQueue = newQueue; |  | 278 | alphaQueue = newQueue; | |
| 274 | } |  | 279 | } | |
| 275 |  | 280 | |||
| 276 | // после окончания работы алгоритма в очереди останутся минимальные различимые классы |  | 281 | // после окончания работы алгоритма в очереди останутся минимальные различимые классы | |
| 277 | // входных символов |  | 282 | // входных символов | |
| 278 | foreach (var A in alphaQueue) |  | 283 | foreach (var A in alphaQueue) | |
| 279 | minClasses.Add(A); |  | 284 | minClasses.Add(A); | |
| 280 |  | 285 | |||
| 281 | // построение отображения алфавитов входных символов. |  | 286 | // построение отображения алфавитов входных символов. | |
| 282 | // поскольку символ DFAConst.UNCLASSIFIED_INPUT может иметь |  | 287 | // поскольку символ DFAConst.UNCLASSIFIED_INPUT может иметь | |
| 283 | // специальное значение, тогда сохраним минимальный класс, |  | 288 | // специальное значение, тогда сохраним минимальный класс, | |
| 284 | // содержащий этот символ на томже месте. |  | 289 | // содержащий этот символ на томже месте. | |
| 285 |  | 290 | |||
| 286 | var nextCls = 0; |  | 291 | var nextCls = 0; | |
| 287 | foreach (var item in minClasses) { |  | 292 | foreach (var item in minClasses) { | |
| 288 | if (nextCls == AutomatonConst.UNCLASSIFIED_INPUT) |  | 293 | if (nextCls == AutomatonConst.UNCLASSIFIED_INPUT) | |
| 289 | nextCls++; |  | 294 | nextCls++; | |
| 290 |  | 295 | |||
| 291 | // сохраняем DFAConst.UNCLASSIFIED_INPUT |  | 296 | // сохраняем DFAConst.UNCLASSIFIED_INPUT | |
| 292 | var cls = item.Contains(AutomatonConst.UNCLASSIFIED_INPUT) ? AutomatonConst.UNCLASSIFIED_INPUT : nextCls++; |  | 297 | var cls = item.Contains(AutomatonConst.UNCLASSIFIED_INPUT) ? AutomatonConst.UNCLASSIFIED_INPUT : nextCls++; | |
| 293 | optimalDFA.AddSymbol(cls); |  | 298 | optimalDFA.AddSymbol(cls); | |
| 294 |  | 299 | |||
| 295 | foreach (var a in item) |  | 300 | foreach (var a in item) | |
| 296 | alphabetMap[a] = cls; |  | 301 | alphabetMap[a] = cls; | |
| 297 | } |  | 302 | } | |
| 298 |  | 303 | |||
| 299 | // построение автомата |  | 304 | // построение автомата | |
| 300 | optimalDFA.SetInitialState(stateMap[m_initialState]); |  | 305 | optimalDFA.SetInitialState(stateMap[m_initialState]); | |
| 301 |  | 306 | |||
| 302 | foreach (var sf in m_finalStates.Select(s => stateMap[s]).Distinct()) |  | 307 | foreach (var sf in m_finalStates.Select(s => stateMap[s]).Distinct()) | |
| 303 | optimalDFA.MarkFinalState(sf); |  | 308 | optimalDFA.MarkFinalState(sf); | |
| 304 |  | 309 | |||
| 305 | foreach (var t in m_transitions.Select(t => new AutomatonTransition(stateMap[t.s1],stateMap[t.s2],alphabetMap[t.edge])).Distinct()) |  | 310 | foreach (var t in m_transitions.Select(t => new AutomatonTransition(stateMap[t.s1],stateMap[t.s2],alphabetMap[t.edge])).Distinct()) | |
| 306 | optimalDFA.Add(t); |  | 311 | optimalDFA.Add(t); | |
| 307 | } |  | 312 | } | |
| 308 |  | 313 | |||
| 309 | protected string PrintDFA<TInput, TState>(IAlphabet<TInput> inputAlphabet, IAlphabet<TState> stateAlphabet) { |  | 314 | protected string PrintDFA<TInput, TState>(IAlphabet<TInput> inputAlphabet, IAlphabet<TState> stateAlphabet) { | |
| 310 | Safe.ArgumentNotNull(inputAlphabet, "inputAlphabet"); |  | 315 | Safe.ArgumentNotNull(inputAlphabet, "inputAlphabet"); | |
| 311 | Safe.ArgumentNotNull(stateAlphabet, "stateAlphabet"); |  | 316 | Safe.ArgumentNotNull(stateAlphabet, "stateAlphabet"); | |
| 312 |  | 317 | |||
| 313 | var data = new List<string>(); |  | 318 | var data = new List<string>(); | |
| 314 |  | 319 | |||
| 315 | data.Add("digraph dfa {"); |  | 320 | data.Add("digraph dfa {"); | |
| 316 |  | 321 | |||
| 317 | foreach (var final in m_finalStates) |  | 322 | foreach (var final in m_finalStates) | |
| 318 | data.Add(String.Format("{0} [shape=box];",String.Join("", stateAlphabet.GetSymbols(final)))); |  | 323 | data.Add(String.Format("{0} [shape=box];",String.Join("", stateAlphabet.GetSymbols(final)))); | |
| 319 |  | 324 | |||
| 320 | foreach (var t in m_transitions) |  | 325 | foreach (var t in m_transitions) | |
| 321 | data.Add(String.Format( |  | 326 | data.Add(String.Format( | |
| 322 | "{0} -> {2} [label={1}];", |  | 327 | "{0} -> {2} [label={1}];", | |
| 323 | String.Join("", stateAlphabet.GetSymbols(t.s1)), |  | 328 | String.Join("", stateAlphabet.GetSymbols(t.s1)), | |
| 324 | ToLiteral(ToLiteral(String.Join("", t.edge == AutomatonConst.UNCLASSIFIED_INPUT ? new [] { "@" } : inputAlphabet.GetSymbols(t.edge).Select(x => x.ToString())))), |  | 329 | ToLiteral(ToLiteral(String.Join("", t.edge == AutomatonConst.UNCLASSIFIED_INPUT ? new [] { "@" } : inputAlphabet.GetSymbols(t.edge).Select(x => x.ToString())))), | |
| 325 | String.Join("", stateAlphabet.GetSymbols(t.s2)) |  | 330 | String.Join("", stateAlphabet.GetSymbols(t.s2)) | |
| 326 | )); |  | 331 | )); | |
| 327 | data.Add("}"); |  | 332 | data.Add("}"); | |
| 328 | return String.Join("\n", data); |  | 333 | return String.Join("\n", data); | |
| 329 | } |  | 334 | } | |
| 330 |  | 335 | |||
| 331 | static string ToLiteral(string input) |  | 336 | static string ToLiteral(string input) | |
| 332 | { |  | 337 | { | |
| 333 | using (var writer = new StringWriter()) |  | 338 | using (var writer = new StringWriter()) | |
| 334 | { |  | 339 | { | |
| 335 | using (var provider = CodeDomProvider.CreateProvider("CSharp")) |  | 340 | using (var provider = CodeDomProvider.CreateProvider("CSharp")) | |
| 336 | { |  | 341 | { | |
| 337 | provider.GenerateCodeFromExpression(new CodePrimitiveExpression(input), writer, null); |  | 342 | provider.GenerateCodeFromExpression(new CodePrimitiveExpression(input), writer, null); | |
| 338 | return writer.ToString(); |  | 343 | return writer.ToString(); | |
| 339 | } |  | 344 | } | |
| 340 | } |  | 345 | } | |
| 341 | } |  | 346 | } | |
| 342 | } |  | 347 | } | |
| 343 | } |  | 348 | } | |
| @@ -1,95 +1,91 | |||||
| 1 | using System.Collections.Generic; |  | 1 | using System.Collections.Generic; | |
| 2 | using System.Linq; |  | 2 | using System.Linq; | |
| 3 |  | 3 | |||
| 4 | namespace Implab.Automaton.RegularExpressions { |  | 4 | namespace Implab.Automaton.RegularExpressions { | |
| 5 | public class RegularDFA<TInput, TTag> : DFATable, ITaggedDFABuilder<TTag> { |  | 5 | public class RegularDFA<TInput, TTag> : DFATable, ITaggedDFABuilder<TTag> { | |
| 6 |  | 6 | |||
| 7 | readonly Dictionary<int,TTag[]> m_tags = new Dictionary<int, TTag[]>(); |  | 7 | readonly Dictionary<int,TTag[]> m_tags = new Dictionary<int, TTag[]>(); | |
| 8 | readonly IAlphabet<TInput> m_alphabet; |  | 8 | readonly IAlphabet<TInput> m_alphabet; | |
| 9 |  | 9 | |||
| 10 | public RegularDFA(IAlphabet<TInput> alphabet) { |  | 10 | public RegularDFA(IAlphabet<TInput> alphabet) { | |
| 11 | Safe.ArgumentNotNull(alphabet, "aplhabet"); |  | 11 | Safe.ArgumentNotNull(alphabet, "aplhabet"); | |
| 12 |  | 12 | |||
| 13 | m_alphabet = alphabet; |  | 13 | m_alphabet = alphabet; | |
| 14 | } |  | 14 | } | |
| 15 |  | 15 | |||
| 16 |  | 16 | |||
| 17 | public IAlphabet<TInput> InputAlphabet { |  | 17 | public IAlphabet<TInput> InputAlphabet { | |
| 18 | get { |  | 18 | get { | |
| 19 | return m_alphabet; |  | 19 | return m_alphabet; | |
| 20 | } |  | 20 | } | |
| 21 | } |  | 21 | } | |
| 22 |  | 22 | |||
| 23 | public void MarkFinalState(int s, TTag[] tags) { |  | 23 | public void MarkFinalState(int s, TTag[] tags) { | |
| 24 | MarkFinalState(s); |  | 24 | MarkFinalState(s); | |
| 25 | SetStateTag(s, tags); |  | 25 | SetStateTag(s, tags); | |
| 26 | } |  | 26 | } | |
| 27 |  | 27 | |||
| 28 | public void SetStateTag(int s, TTag[] tags) { |  | 28 | public void SetStateTag(int s, TTag[] tags) { | |
| 29 | Safe.ArgumentNotNull(tags, "tags"); |  | 29 | Safe.ArgumentNotNull(tags, "tags"); | |
| 30 | m_tags[s] = tags; |  | 30 | m_tags[s] = tags; | |
| 31 | } |  | 31 | } | |
| 32 |  | 32 | |||
| 33 | public TTag[] GetStateTag(int s) { |  | 33 | public TTag[] GetStateTag(int s) { | |
| 34 | TTag[] tags; |  | 34 | TTag[] tags; | |
| 35 | return m_tags.TryGetValue(s, out tags) ? tags : new TTag[0]; |  | 35 | return m_tags.TryGetValue(s, out tags) ? tags : new TTag[0]; | |
| 36 | } |  | 36 | } | |
| 37 |  | 37 | |||
| 38 | public TTag[][] CreateTagTable() { |  | 38 | public TTag[][] CreateTagTable() { | |
| 39 | var table = new TTag[StateCount][]; |  | 39 | var table = new TTag[StateCount][]; | |
| 40 |  | 40 | |||
| 41 | foreach (var pair in m_tags) |  | 41 | foreach (var pair in m_tags) | |
| 42 | table[pair.Key] = pair.Value; |  | 42 | table[pair.Key] = pair.Value; | |
| 43 |  | 43 | |||
| 44 | return table; |  | 44 | return table; | |
| 45 | } |  | 45 | } | |
| 46 |  | 46 | |||
| 47 | /// <summary> |  | 47 | /// <summary> | |
| 48 | /// Optimize the specified alphabet. |  | 48 | /// Optimize the specified alphabet. | |
| 49 | /// </summary> |  | 49 | /// </summary> | |
| 50 | /// <param name="alphabet">Пустой алфавит, который будет зполнен в процессе оптимизации.</param> |  | 50 | /// <param name="alphabet">Пустой алфавит, который будет зполнен в процессе оптимизации.</param> | |
| 51 | public RegularDFA<TInput,TTag> Optimize(IAlphabetBuilder<TInput> alphabet) { |  | 51 | public RegularDFA<TInput,TTag> Optimize(IAlphabetBuilder<TInput> alphabet) { | |
| 52 | Safe.ArgumentNotNull(alphabet, "alphabet"); |  | 52 | Safe.ArgumentNotNull(alphabet, "alphabet"); | |
| 53 |  | 53 | |||
| 54 | var dfa = new RegularDFA<TInput, TTag>(alphabet); |  | 54 | var dfa = new RegularDFA<TInput, TTag>(alphabet); | |
| 55 |  | 55 | |||
| 56 | var alphaMap = new Dictionary<int,int>(); |  | 56 | var alphaMap = new Dictionary<int,int>(); | |
| 57 | var stateMap = new Dictionary<int,int>(); |  | 57 | var stateMap = new Dictionary<int,int>(); | |
| 58 |  | 58 | |||
| 59 | Optimize(dfa, alphaMap, stateMap); |  | 59 | Optimize(dfa, alphaMap, stateMap); | |
| 60 |  | 60 | |||
| 61 | // mark tags in the new DFA |  | 61 | // mark tags in the new DFA | |
| 62 | foreach (var g in m_tags.Where(x => x.Key < StateCount).GroupBy(x => stateMap[x.Key], x => x.Value )) |  | 62 | foreach (var g in m_tags.Where(x => x.Key < StateCount).GroupBy(x => stateMap[x.Key], x => x.Value )) | |
| 63 | dfa.SetStateTag(g.Key, g.SelectMany(x => x).ToArray()); |  | 63 | dfa.SetStateTag(g.Key, g.SelectMany(x => x).ToArray()); | |
| 64 |  | 64 | |||
| 65 | // make the alphabet for the new DFA |  | 65 | // make the alphabet for the new DFA | |
| 66 | // skip all unclassified symbols |  | 66 | // skip all unclassified symbols | |
| 67 | foreach (var pair in alphaMap.Where(x => x.Value != 0)) |  | 67 | foreach (var pair in alphaMap.Where(x => x.Value != 0)) | |
| 68 | alphabet.DefineClass(m_alphabet.GetSymbols(pair.Key), pair.Value); |  | 68 | alphabet.DefineClass(m_alphabet.GetSymbols(pair.Key), pair.Value); | |
| 69 |  | ||||
| 70 | var orig = ToString(); |  | |||
| 71 | var opt = dfa.ToString(); |  | |||
| 72 |  | ||||
| 73 | return dfa; |  | 69 | return dfa; | |
| 74 | } |  | 70 | } | |
| 75 |  | 71 | |||
| 76 | protected override IEnumerable<HashSet<int>> |  | 72 | protected override IEnumerable<HashSet<int>> SplitFinalStates(IEnumerable<int> states) { | |
| 77 | var arrayComparer = new CustomEqualityComparer<TTag[]>( |  | 73 | var arrayComparer = new CustomEqualityComparer<TTag[]>( | |
| 78 | (x,y) => x.Length == y.Length && x.All(it => y.Contains(it)), |  | 74 | (x,y) => x.Length == y.Length && x.All(it => y.Contains(it)), | |
| 79 | x => x.Sum(it => x.GetHashCode()) |  | 75 | x => x.Sum(it => x.GetHashCode()) | |
| 80 | ); |  | 76 | ); | |
| 81 | return |  | 77 | return states.GroupBy(x => m_tags[x] ?? new TTag[0], arrayComparer).Select(g => new HashSet<int>(g)); | |
| 82 | } |  | 78 | } | |
| 83 |  | 79 | |||
| 84 | public override string ToString() { |  | 80 | public override string ToString() { | |
| 85 | var states = new MapAlphabet<string>(false, null); |  | 81 | var states = new MapAlphabet<string>(false, null); | |
| 86 |  | 82 | |||
| 87 | for (int i = 0; i < StateCount; i++) |  | 83 | for (int i = 0; i < StateCount; i++) | |
| 88 | states.DefineSymbol(string.Format("s{0}", i), i); |  | 84 | states.DefineSymbol(string.Format("s{0}", i), i); | |
| 89 |  | 85 | |||
| 90 | return string.Format("//[RegularDFA {1} x {2}]\n{0}", PrintDFA(InputAlphabet, states),StateCount, AlphabetSize); |  | 86 | return string.Format("//[RegularDFA {1} x {2}]\n{0}", PrintDFA(InputAlphabet, states),StateCount, AlphabetSize); | |
| 91 | } |  | 87 | } | |
| 92 |  | 88 | |||
| 93 | } |  | 89 | } | |
| 94 | } |  | 90 | } | |
| 95 |  | 91 | |||
| @@ -1,119 +1,121 | |||||
| 1 | using System.Linq; |  | 1 | using System.Linq; | |
| 2 | using Implab.Automaton.RegularExpressions; |  | 2 | using Implab.Automaton.RegularExpressions; | |
| 3 | using System; |  | 3 | using System; | |
| 4 | using Implab.Automaton; |  | 4 | using Implab.Automaton; | |
| 5 | using Implab.Components; |  | 5 | using Implab.Components; | |
| 6 |  | 6 | |||
| 7 | namespace Implab.Formats.JSON { |  | 7 | namespace Implab.Formats.JSON { | |
| 8 | class JSONGrammar : Grammar<char> { |  | 8 | class JSONGrammar : Grammar<char> { | |
| 9 | public enum TokenType { |  | 9 | public enum TokenType { | |
| 10 | None, |  | 10 | None, | |
| 11 | BeginObject, |  | 11 | BeginObject, | |
| 12 | EndObject, |  | 12 | EndObject, | |
| 13 | BeginArray, |  | 13 | BeginArray, | |
| 14 | EndArray, |  | 14 | EndArray, | |
| 15 | String, |  | 15 | String, | |
| 16 | Number, |  | 16 | Number, | |
| 17 | Literal, |  | 17 | Literal, | |
| 18 | NameSeparator, |  | 18 | NameSeparator, | |
| 19 | ValueSeparator, |  | 19 | ValueSeparator, | |
|  | 20 | Whitespace, | |||
| 20 |  | 21 | |||
| 21 | StringBound, |  | 22 | StringBound, | |
| 22 | EscapedChar, |  | 23 | EscapedChar, | |
| 23 | UnescapedChar, |  | 24 | UnescapedChar, | |
| 24 | EscapedUnicode |  | 25 | EscapedUnicode | |
| 25 | } |  | 26 | } | |
| 26 |  | 27 | |||
| 27 | static LazyAndWeak<JSONGrammar> _instance = new LazyAndWeak<JSONGrammar>(() => new JSONGrammar()); |  | 28 | static LazyAndWeak<JSONGrammar> _instance = new LazyAndWeak<JSONGrammar>(() => new JSONGrammar()); | |
| 28 |  | 29 | |||
| 29 | public static JSONGrammar Instance { |  | 30 | public static JSONGrammar Instance { | |
| 30 | get { return _instance.Value; } |  | 31 | get { return _instance.Value; } | |
| 31 | } |  | 32 | } | |
| 32 |  | 33 | |||
| 33 | readonly ScannerContext<TokenType> m_jsonExpression; |  | 34 | readonly ScannerContext<TokenType> m_jsonExpression; | |
| 34 | readonly ScannerContext<TokenType> m_stringExpression; |  | 35 | readonly ScannerContext<TokenType> m_stringExpression; | |
| 35 | readonly CharAlphabet m_defaultAlphabet = new CharAlphabet(); |  | 36 | readonly CharAlphabet m_defaultAlphabet = new CharAlphabet(); | |
| 36 |  | 37 | |||
| 37 | public JSONGrammar() { |  | 38 | public JSONGrammar() { | |
| 38 | DefineAlphabet(Enumerable.Range(0, 0x20).Select(x => (char)x)); |  | 39 | DefineAlphabet(Enumerable.Range(0, 0x20).Select(x => (char)x)); | |
| 39 | var hexDigit = SymbolRangeToken('a','f').Or(SymbolRangeToken('A','F')).Or(SymbolRangeToken('0','9')); |  | 40 | var hexDigit = SymbolRangeToken('a','f').Or(SymbolRangeToken('A','F')).Or(SymbolRangeToken('0','9')); | |
| 40 | var digit9 = SymbolRangeToken('1', '9'); |  | 41 | var digit9 = SymbolRangeToken('1', '9'); | |
| 41 | var zero = SymbolToken('0'); |  | 42 | var zero = SymbolToken('0'); | |
| 42 | var digit = zero.Or(digit9); |  | 43 | var digit = zero.Or(digit9); | |
| 43 | var dot = SymbolToken('.'); |  | 44 | var dot = SymbolToken('.'); | |
| 44 | var minus = SymbolToken('-'); |  | 45 | var minus = SymbolToken('-'); | |
| 45 | var sign = SymbolSetToken('-', '+'); |  | 46 | var sign = SymbolSetToken('-', '+'); | |
| 46 | var expSign = SymbolSetToken('e', 'E'); |  | 47 | var expSign = SymbolSetToken('e', 'E'); | |
| 47 | var letters = SymbolRangeToken('a', 'z'); |  | 48 | var letters = SymbolRangeToken('a', 'z'); | |
| 48 | var integer = zero.Or(digit9.Cat(digit.EClosure())); |  | 49 | var integer = zero.Or(digit9.Cat(digit.EClosure())); | |
| 49 | var frac = dot.Cat(digit.Closure()); |  | 50 | var frac = dot.Cat(digit.Closure()); | |
| 50 | var exp = expSign.Cat(sign.Optional()).Cat(digit.Closure()); |  | 51 | var exp = expSign.Cat(sign.Optional()).Cat(digit.Closure()); | |
| 51 | var quote = SymbolToken('"'); |  | 52 | var quote = SymbolToken('"'); | |
| 52 | var backSlash = SymbolToken('\\'); |  | 53 | var backSlash = SymbolToken('\\'); | |
| 53 | var specialEscapeChars = SymbolSetToken('\\', '"', '/', 'b', 'f', 't', 'n', 'r'); |  | 54 | var specialEscapeChars = SymbolSetToken('\\', '"', '/', 'b', 'f', 't', 'n', 'r'); | |
| 54 | var unicodeEspace = SymbolToken('u').Cat(hexDigit.Repeat(4)); |  | 55 | var unicodeEspace = SymbolToken('u').Cat(hexDigit.Repeat(4)); | |
| 55 | var whitespace = SymbolSetToken('\n', '\r', '\t', ' ').EClosure(); |  | 56 | var whitespace = SymbolSetToken('\n', '\r', '\t', ' ').EClosure(); | |
| 56 | var beginObject = whitespace.Cat(SymbolToken('{')).Cat(whitespace); |  | 57 | var beginObject = whitespace.Cat(SymbolToken('{')).Cat(whitespace); | |
| 57 | var endObject = whitespace.Cat(SymbolToken('}')).Cat(whitespace); |  | 58 | var endObject = whitespace.Cat(SymbolToken('}')).Cat(whitespace); | |
| 58 | var beginArray = whitespace.Cat(SymbolToken('[')).Cat(whitespace); |  | 59 | var beginArray = whitespace.Cat(SymbolToken('[')).Cat(whitespace); | |
| 59 | var endArray = whitespace.Cat(SymbolToken(']')).Cat(whitespace); |  | 60 | var endArray = whitespace.Cat(SymbolToken(']')).Cat(whitespace); | |
| 60 | var nameSep = whitespace.Cat(SymbolToken(':')).Cat(whitespace); |  | 61 | var nameSep = whitespace.Cat(SymbolToken(':')).Cat(whitespace); | |
| 61 | var valueSep = whitespace.Cat(SymbolToken(',')).Cat(whitespace); |  | 62 | var valueSep = whitespace.Cat(SymbolToken(',')).Cat(whitespace); | |
| 62 |  | 63 | |||
| 63 | var number = minus.Optional().Cat(integer).Cat(frac.Optional()).Cat(exp.Optional()); |  | 64 | var number = minus.Optional().Cat(integer).Cat(frac.Optional()).Cat(exp.Optional()); | |
| 64 | var literal = letters.Closure(); |  | 65 | var literal = letters.Closure(); | |
| 65 | var unescaped = SymbolTokenExcept(Enumerable.Range(0, 0x20).Union(new int[] { '\\', '"' }).Select(x => (char)x)); |  | 66 | var unescaped = SymbolTokenExcept(Enumerable.Range(0, 0x20).Union(new int[] { '\\', '"' }).Select(x => (char)x)); | |
| 66 |  | 67 | |||
| 67 | var jsonExpression = |  | 68 | var jsonExpression = | |
| 68 | number.Tag(TokenType.Number) |  | 69 | number.Tag(TokenType.Number) | |
| 69 | .Or(literal.Tag(TokenType.Literal)) |  | 70 | .Or(literal.Tag(TokenType.Literal)) | |
| 70 | .Or(quote.Tag(TokenType.StringBound)) |  | 71 | .Or(quote.Tag(TokenType.StringBound)) | |
| 71 | .Or(beginObject.Tag(TokenType.BeginObject)) |  | 72 | .Or(beginObject.Tag(TokenType.BeginObject)) | |
| 72 | .Or(endObject.Tag(TokenType.EndObject)) |  | 73 | .Or(endObject.Tag(TokenType.EndObject)) | |
| 73 | .Or(beginArray.Tag(TokenType.BeginArray)) |  | 74 | .Or(beginArray.Tag(TokenType.BeginArray)) | |
| 74 | .Or(endArray.Tag(TokenType.EndArray)) |  | 75 | .Or(endArray.Tag(TokenType.EndArray)) | |
| 75 | .Or(nameSep.Tag(TokenType.NameSeparator)) |  | 76 | .Or(nameSep.Tag(TokenType.NameSeparator)) | |
| 76 | .Or(valueSep.Tag(TokenType.ValueSeparator)) |  | 77 | .Or(valueSep.Tag(TokenType.ValueSeparator)) | |
|  | 78 | .Or(SymbolSetToken('\n', '\r', '\t', ' ').Closure().Tag(TokenType.Whitespace)); | |||
| 77 |  | 79 | |||
| 78 |  | 80 | |||
| 79 | var jsonStringExpression = |  | 81 | var jsonStringExpression = | |
| 80 | quote.Tag(TokenType.StringBound) |  | 82 | quote.Tag(TokenType.StringBound) | |
| 81 | .Or(backSlash.Cat(specialEscapeChars).Tag(TokenType.EscapedChar)) |  | 83 | .Or(backSlash.Cat(specialEscapeChars).Tag(TokenType.EscapedChar)) | |
| 82 | .Or(backSlash.Cat(unicodeEspace).Tag(TokenType.EscapedUnicode)) |  | 84 | .Or(backSlash.Cat(unicodeEspace).Tag(TokenType.EscapedUnicode)) | |
| 83 | .Or(unescaped.Closure().Tag(TokenType.UnescapedChar)); |  | 85 | .Or(unescaped.Closure().Tag(TokenType.UnescapedChar)); | |
| 84 |  | 86 | |||
| 85 |  | 87 | |||
| 86 | m_jsonExpression = BuildScannerContext<TokenType>(jsonExpression); |  | 88 | m_jsonExpression = BuildScannerContext<TokenType>(jsonExpression); | |
| 87 | m_stringExpression = BuildScannerContext<TokenType>(jsonStringExpression); |  | 89 | m_stringExpression = BuildScannerContext<TokenType>(jsonStringExpression); | |
| 88 |  | 90 | |||
| 89 |  | 91 | |||
| 90 | } |  | 92 | } | |
| 91 |  | 93 | |||
| 92 | protected override IAlphabetBuilder<char> AlphabetBuilder { |  | 94 | protected override IAlphabetBuilder<char> AlphabetBuilder { | |
| 93 | get { |  | 95 | get { | |
| 94 | return m_defaultAlphabet; |  | 96 | return m_defaultAlphabet; | |
| 95 | } |  | 97 | } | |
| 96 | } |  | 98 | } | |
| 97 |  | 99 | |||
| 98 | public ScannerContext<TokenType> JsonExpression { |  | 100 | public ScannerContext<TokenType> JsonExpression { | |
| 99 | get { |  | 101 | get { | |
| 100 | return m_jsonExpression; |  | 102 | return m_jsonExpression; | |
| 101 | } |  | 103 | } | |
| 102 | } |  | 104 | } | |
| 103 |  | 105 | |||
| 104 | public ScannerContext<TokenType> JsonStringExpression { |  | 106 | public ScannerContext<TokenType> JsonStringExpression { | |
| 105 | get { |  | 107 | get { | |
| 106 | return m_stringExpression; |  | 108 | return m_stringExpression; | |
| 107 | } |  | 109 | } | |
| 108 | } |  | 110 | } | |
| 109 |  | 111 | |||
| 110 | Token SymbolRangeToken(char start, char stop) { |  | 112 | Token SymbolRangeToken(char start, char stop) { | |
| 111 | return SymbolToken(Enumerable.Range(start, stop - start + 1).Select(x => (char)x)); |  | 113 | return SymbolToken(Enumerable.Range(start, stop - start + 1).Select(x => (char)x)); | |
| 112 | } |  | 114 | } | |
| 113 |  | 115 | |||
| 114 | protected override IndexedAlphabetBase<char> CreateAlphabet() { |  | 116 | protected override IndexedAlphabetBase<char> CreateAlphabet() { | |
| 115 | return new CharAlphabet(); |  | 117 | return new CharAlphabet(); | |
| 116 | } |  | 118 | } | |
| 117 |  | 119 | |||
| 118 | } |  | 120 | } | |
| 119 | } |  | 121 | } | |
| @@ -1,107 +1,109 | |||||
| 1 | using System; |  | 1 | using System; | |
| 2 | using System.Globalization; |  | 2 | using System.Globalization; | |
| 3 | using Implab.Automaton; |  | 3 | using Implab.Automaton; | |
| 4 | using System.Text; |  | 4 | using System.Text; | |
| 5 | using Implab.Components; |  | 5 | using Implab.Components; | |
| 6 | using System.IO; |  | 6 | using System.IO; | |
| 7 |  | 7 | |||
| 8 | namespace Implab.Formats.JSON { |  | 8 | namespace Implab.Formats.JSON { | |
| 9 | /// <summary> |  | 9 | /// <summary> | |
| 10 | /// Сканнер (лексер), разбивающий поток символов на токены JSON. |  | 10 | /// Сканнер (лексер), разбивающий поток символов на токены JSON. | |
| 11 | /// </summary> |  | 11 | /// </summary> | |
| 12 | public class JSONScanner : Disposable { |  | 12 | public class JSONScanner : Disposable { | |
| 13 | readonly StringBuilder m_builder = new StringBuilder(); |  | 13 | readonly StringBuilder m_builder = new StringBuilder(); | |
| 14 |  | 14 | |||
| 15 | readonly ScannerContext<JSONGrammar.TokenType> m_jsonContext = JSONGrammar.Instance.JsonExpression; |  | 15 | readonly ScannerContext<JSONGrammar.TokenType> m_jsonContext = JSONGrammar.Instance.JsonExpression; | |
| 16 | readonly ScannerContext<JSONGrammar.TokenType> m_stringContext = JSONGrammar.Instance.JsonStringExpression; |  | 16 | readonly ScannerContext<JSONGrammar.TokenType> m_stringContext = JSONGrammar.Instance.JsonStringExpression; | |
| 17 |  | 17 | |||
| 18 |  | 18 | |||
| 19 | readonly TextScanner m_scanner; |  | 19 | readonly TextScanner m_scanner; | |
| 20 |  | 20 | |||
| 21 | /// <summary> |  | 21 | /// <summary> | |
| 22 | /// Создает новый экземпляр сканнера |  | 22 | /// Создает новый экземпляр сканнера | |
| 23 | /// </summary> |  | 23 | /// </summary> | |
| 24 | public JSONScanner(string text) { |  | 24 | public JSONScanner(string text) { | |
| 25 | Safe.ArgumentNotEmpty(text, "text"); |  | 25 | Safe.ArgumentNotEmpty(text, "text"); | |
| 26 |  | 26 | |||
| 27 | m_scanner = new StringScanner(text); |  | 27 | m_scanner = new StringScanner(text); | |
| 28 | } |  | 28 | } | |
| 29 |  | 29 | |||
| 30 | public JSONScanner(TextReader reader, int bufferMax, int chunkSize) { |  | 30 | public JSONScanner(TextReader reader, int bufferMax, int chunkSize) { | |
| 31 | Safe.ArgumentNotNull(reader, "reader"); |  | 31 | Safe.ArgumentNotNull(reader, "reader"); | |
| 32 |  | 32 | |||
| 33 | m_scanner = new ReaderScanner(reader, bufferMax, chunkSize); |  | 33 | m_scanner = new ReaderScanner(reader, bufferMax, chunkSize); | |
| 34 | } |  | 34 | } | |
| 35 |  | 35 | |||
| 36 | public JSONScanner(TextReader reader) : this(reader, 1024*1024, 1024){ |  | 36 | public JSONScanner(TextReader reader) : this(reader, 1024*1024, 1024){ | |
| 37 | } |  | 37 | } | |
| 38 |  | 38 | |||
| 39 | /// <summary> |  | 39 | /// <summary> | |
| 40 | /// Читает следующий лексический элемент из входных данных. |  | 40 | /// Читает следующий лексический элемент из входных данных. | |
| 41 | /// </summary> |  | 41 | /// </summary> | |
| 42 | /// <param name="tokenValue">Возвращает значение прочитанного токена.</param> |  | 42 | /// <param name="tokenValue">Возвращает значение прочитанного токена.</param> | |
| 43 | /// <param name="tokenType">Возвращает тип прочитанного токена.</param> |  | 43 | /// <param name="tokenType">Возвращает тип прочитанного токена.</param> | |
| 44 | /// <returns><c>true</c> - чтение произведено успешно. <c>false</c> - достигнут конец входных данных</returns> |  | 44 | /// <returns><c>true</c> - чтение произведено успешно. <c>false</c> - достигнут конец входных данных</returns> | |
| 45 | /// <remarks>В случе если токен не распознается, возникает исключение. Значения токенов обрабатываются, т.е. |  | 45 | /// <remarks>В случе если токен не распознается, возникает исключение. Значения токенов обрабатываются, т.е. | |
| 46 | /// в строках обрабатываются экранированные символы, числа становтся типа double.</remarks> |  | 46 | /// в строках обрабатываются экранированные символы, числа становтся типа double.</remarks> | |
| 47 | public bool ReadToken(out object tokenValue, out JsonTokenType tokenType) { |  | 47 | public bool ReadToken(out object tokenValue, out JsonTokenType tokenType) { | |
| 48 | JSONGrammar.TokenType[] tag; |  | 48 | JSONGrammar.TokenType[] tag; | |
| 49 |  |  | 49 | while (m_jsonContext.Execute(m_scanner, out tag)) { | |
| 50 | switch (tag[0]) { |  | 50 | switch (tag[0]) { | |
| 51 | case JSONGrammar.TokenType.StringBound: |  | 51 | case JSONGrammar.TokenType.StringBound: | |
| 52 | tokenValue = ReadString(); |  | 52 | tokenValue = ReadString(); | |
| 53 | tokenType = JsonTokenType.String; |  | 53 | tokenType = JsonTokenType.String; | |
| 54 | break; |  | 54 | break; | |
| 55 | case JSONGrammar.TokenType.Number: |  | 55 | case JSONGrammar.TokenType.Number: | |
| 56 | tokenValue = Double.Parse(m_scanner.GetTokenValue(), CultureInfo.InvariantCulture); |  | 56 | tokenValue = Double.Parse(m_scanner.GetTokenValue(), CultureInfo.InvariantCulture); | |
| 57 | tokenType = JsonTokenType.Number; |  | 57 | tokenType = JsonTokenType.Number; | |
| 58 | break; |  | 58 | break; | |
|  | 59 | case JSONGrammar.TokenType.Whitespace: | |||
|  | 60 | continue; | |||
| 59 | default: |  | 61 | default: | |
| 60 | tokenType = (JsonTokenType)tag[0]; |  | 62 | tokenType = (JsonTokenType)tag[0]; | |
| 61 | tokenValue = m_scanner.GetTokenValue(); |  | 63 | tokenValue = m_scanner.GetTokenValue(); | |
| 62 | break; |  | 64 | break; | |
| 63 | } |  | 65 | } | |
| 64 | return true; |  | 66 | return true; | |
| 65 | } |  | 67 | } | |
| 66 | tokenValue = null; |  | 68 | tokenValue = null; | |
| 67 | tokenType = JsonTokenType.None; |  | 69 | tokenType = JsonTokenType.None; | |
| 68 | return false; |  | 70 | return false; | |
| 69 | } |  | 71 | } | |
| 70 |  | 72 | |||
| 71 | string ReadString() { |  | 73 | string ReadString() { | |
| 72 | int pos = 0; |  | 74 | int pos = 0; | |
| 73 | var buf = new char[6]; // the buffer for unescaping chars |  | 75 | var buf = new char[6]; // the buffer for unescaping chars | |
| 74 |  | 76 | |||
| 75 | JSONGrammar.TokenType[] tag; |  | 77 | JSONGrammar.TokenType[] tag; | |
| 76 | m_builder.Clear(); |  | 78 | m_builder.Clear(); | |
| 77 |  | 79 | |||
| 78 | while (m_stringContext.Execute(m_scanner, out tag)) { |  | 80 | while (m_stringContext.Execute(m_scanner, out tag)) { | |
| 79 | switch (tag[0]) { |  | 81 | switch (tag[0]) { | |
| 80 | case JSONGrammar.TokenType.StringBound: |  | 82 | case JSONGrammar.TokenType.StringBound: | |
| 81 | return m_builder.ToString(); |  | 83 | return m_builder.ToString(); | |
| 82 | case JSONGrammar.TokenType.UnescapedChar: |  | 84 | case JSONGrammar.TokenType.UnescapedChar: | |
| 83 | m_scanner.CopyTokenTo(m_builder); |  | 85 | m_scanner.CopyTokenTo(m_builder); | |
| 84 | break; |  | 86 | break; | |
| 85 | case JSONGrammar.TokenType.EscapedUnicode: // \xXXXX - unicode escape sequence |  | 87 | case JSONGrammar.TokenType.EscapedUnicode: // \xXXXX - unicode escape sequence | |
| 86 | m_scanner.CopyTokenTo(buf, 0); |  | 88 | m_scanner.CopyTokenTo(buf, 0); | |
| 87 | m_builder.Append(StringTranslator.TranslateHexUnicode(buf, 2)); |  | 89 | m_builder.Append(StringTranslator.TranslateHexUnicode(buf, 2)); | |
| 88 | pos++; |  | 90 | pos++; | |
| 89 | break; |  | 91 | break; | |
| 90 | case JSONGrammar.TokenType.EscapedChar: // \t - escape sequence |  | 92 | case JSONGrammar.TokenType.EscapedChar: // \t - escape sequence | |
| 91 | m_scanner.CopyTokenTo(buf, 0); |  | 93 | m_scanner.CopyTokenTo(buf, 0); | |
| 92 | m_builder.Append(StringTranslator.TranslateEscapedChar(buf[1])); |  | 94 | m_builder.Append(StringTranslator.TranslateEscapedChar(buf[1])); | |
| 93 | break; |  | 95 | break; | |
| 94 | } |  | 96 | } | |
| 95 |  | 97 | |||
| 96 | } |  | 98 | } | |
| 97 |  | 99 | |||
| 98 | throw new ParserException("Unexpected end of data"); |  | 100 | throw new ParserException("Unexpected end of data"); | |
| 99 | } |  | 101 | } | |
| 100 |  | 102 | |||
| 101 | protected override void Dispose(bool disposing) { |  | 103 | protected override void Dispose(bool disposing) { | |
| 102 | if (disposing) |  | 104 | if (disposing) | |
| 103 | Safe.Dispose(m_scanner); |  | 105 | Safe.Dispose(m_scanner); | |
| 104 | base.Dispose(disposing); |  | 106 | base.Dispose(disposing); | |
| 105 | } |  | 107 | } | |
| 106 | } |  | 108 | } | |
| 107 | } |  | 109 | } | |
| @@ -1,30 +1,30 | |||||
| 1 | using System; |  | 1 | using System; | |
| 2 | using System.IO; |  | 2 | using System.IO; | |
| 3 |  | 3 | |||
| 4 | namespace Implab.Formats { |  | 4 | namespace Implab.Formats { | |
| 5 | public class ReaderScanner: TextScanner { |  | 5 | public class ReaderScanner: TextScanner { | |
| 6 | const int CHUNK_SIZE = 1024; |  | 6 | const int CHUNK_SIZE = 1024*4; | |
| 7 | const int BUFFER_MAX = CHUNK_SIZE*1024; |  | 7 | const int BUFFER_MAX = CHUNK_SIZE*1024; | |
| 8 |  | 8 | |||
| 9 | readonly TextReader m_reader; |  | 9 | readonly TextReader m_reader; | |
| 10 |  | 10 | |||
| 11 | public ReaderScanner(TextReader reader, int limit, int chunk) : base(limit, chunk) { |  | 11 | public ReaderScanner(TextReader reader, int limit, int chunk) : base(limit, chunk) { | |
| 12 | Safe.ArgumentNotNull(reader, "reader"); |  | 12 | Safe.ArgumentNotNull(reader, "reader"); | |
| 13 | m_reader = reader; |  | 13 | m_reader = reader; | |
| 14 | } |  | 14 | } | |
| 15 |  | 15 | |||
| 16 | public ReaderScanner(TextReader reader) : this(reader, BUFFER_MAX, CHUNK_SIZE) { |  | 16 | public ReaderScanner(TextReader reader) : this(reader, BUFFER_MAX, CHUNK_SIZE) { | |
| 17 | } |  | 17 | } | |
| 18 |  | 18 | |||
| 19 | protected override int Read(char[] buffer, int offset, int size) { |  | 19 | protected override int Read(char[] buffer, int offset, int size) { | |
| 20 | return m_reader.Read(buffer, offset, size); |  | 20 | return m_reader.Read(buffer, offset, size); | |
| 21 | } |  | 21 | } | |
| 22 |  | 22 | |||
| 23 | protected override void Dispose(bool disposing) { |  | 23 | protected override void Dispose(bool disposing) { | |
| 24 | if (disposing) |  | 24 | if (disposing) | |
| 25 | Safe.Dispose(m_reader); |  | 25 | Safe.Dispose(m_reader); | |
| 26 | base.Dispose(disposing); |  | 26 | base.Dispose(disposing); | |
| 27 | } |  | 27 | } | |
| 28 | } |  | 28 | } | |
| 29 | } |  | 29 | } | |
| 30 |  | 30 | |||
| @@ -1,47 +1,53 | |||||
| 1 | <?xml version="1.0" encoding="utf-8"?> |  | 1 | <?xml version="1.0" encoding="utf-8"?> | |
| 2 | <Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> |  | 2 | <Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> | |
| 3 | <PropertyGroup> |  | 3 | <PropertyGroup> | |
| 4 | <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration> |  | 4 | <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration> | |
| 5 | <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform> |  | 5 | <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform> | |
| 6 | <ProductVersion>8.0.30703</ProductVersion> |  | 6 | <ProductVersion>8.0.30703</ProductVersion> | |
| 7 | <SchemaVersion>2.0</SchemaVersion> |  | 7 | <SchemaVersion>2.0</SchemaVersion> | |
| 8 | <ProjectGuid>{15DD7123-D504-4627-8B4F-D00C7F04D033}</ProjectGuid> |  | 8 | <ProjectGuid>{15DD7123-D504-4627-8B4F-D00C7F04D033}</ProjectGuid> | |
| 9 | <OutputType>Exe</OutputType> |  | 9 | <OutputType>Exe</OutputType> | |
| 10 | <RootNamespace>MonoPlay</RootNamespace> |  | 10 | <RootNamespace>MonoPlay</RootNamespace> | |
| 11 | <AssemblyName>MonoPlay</AssemblyName> |  | 11 | <AssemblyName>MonoPlay</AssemblyName> | |
| 12 | <TargetFrameworkVersion>v4.5</TargetFrameworkVersion> |  | 12 | <TargetFrameworkVersion>v4.5</TargetFrameworkVersion> | |
| 13 | <ReleaseVersion>0.2</ReleaseVersion> |  | 13 | <ReleaseVersion>0.2</ReleaseVersion> | |
| 14 | </PropertyGroup> |  | 14 | </PropertyGroup> | |
| 15 | <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' "> |  | 15 | <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' "> | |
| 16 | <DebugSymbols>true</DebugSymbols> |  | 16 | <DebugSymbols>true</DebugSymbols> | |
| 17 | <DebugType>full</DebugType> |  | 17 | <DebugType>full</DebugType> | |
| 18 | <Optimize>false</Optimize> |  | 18 | <Optimize>false</Optimize> | |
| 19 | <OutputPath>bin\Debug</OutputPath> |  | 19 | <OutputPath>bin\Debug</OutputPath> | |
| 20 | <DefineConstants>DEBUG;TRACE;</DefineConstants> |  | 20 | <DefineConstants>DEBUG;TRACE;</DefineConstants> | |
| 21 | <ErrorReport>prompt</ErrorReport> |  | 21 | <ErrorReport>prompt</ErrorReport> | |
| 22 | <WarningLevel>4</WarningLevel> |  | 22 | <WarningLevel>4</WarningLevel> | |
| 23 | <ConsolePause>false</ConsolePause> |  | 23 | <ConsolePause>false</ConsolePause> | |
| 24 | </PropertyGroup> |  | 24 | </PropertyGroup> | |
| 25 | <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' "> |  | 25 | <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' "> | |
| 26 | <DebugType>full</DebugType> |  | 26 | <DebugType>full</DebugType> | |
| 27 | <Optimize>true</Optimize> |  | 27 | <Optimize>true</Optimize> | |
| 28 | <OutputPath>bin\Release</OutputPath> |  | 28 | <OutputPath>bin\Release</OutputPath> | |
| 29 | <ErrorReport>prompt</ErrorReport> |  | 29 | <ErrorReport>prompt</ErrorReport> | |
| 30 | <WarningLevel>4</WarningLevel> |  | 30 | <WarningLevel>4</WarningLevel> | |
| 31 | <ConsolePause>false</ConsolePause> |  | 31 | <ConsolePause>false</ConsolePause> | |
| 32 | </PropertyGroup> |  | 32 | </PropertyGroup> | |
| 33 | <ItemGroup> |  | 33 | <ItemGroup> | |
| 34 | <Reference Include="System" /> |  | 34 | <Reference Include="System" /> | |
|  | 35 | <Reference Include="System.Text.Json"> | |||
|  | 36 | <HintPath>..\packages\System.Text.Json.2.0.0.11\lib\net40\System.Text.Json.dll</HintPath> | |||
|  | 37 | </Reference> | |||
| 35 | </ItemGroup> |  | 38 | </ItemGroup> | |
| 36 | <ItemGroup> |  | 39 | <ItemGroup> | |
| 37 | <Compile Include="Program.cs" /> |  | 40 | <Compile Include="Program.cs" /> | |
| 38 | <Compile Include="Properties\AssemblyInfo.cs" /> |  | 41 | <Compile Include="Properties\AssemblyInfo.cs" /> | |
| 39 | </ItemGroup> |  | 42 | </ItemGroup> | |
| 40 | <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" /> |  | 43 | <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" /> | |
| 41 | <ItemGroup> |  | 44 | <ItemGroup> | |
| 42 | <ProjectReference Include="..\Implab\Implab.csproj"> |  | 45 | <ProjectReference Include="..\Implab\Implab.csproj"> | |
| 43 | <Project>{F550F1F8-8746-4AD0-9614-855F4C4B7F05}</Project> |  | 46 | <Project>{F550F1F8-8746-4AD0-9614-855F4C4B7F05}</Project> | |
| 44 | <Name>Implab</Name> |  | 47 | <Name>Implab</Name> | |
| 45 | </ProjectReference> |  | 48 | </ProjectReference> | |
| 46 | </ItemGroup> |  | 49 | </ItemGroup> | |
|  | 50 | <ItemGroup> | |||
|  | 51 | <None Include="packages.config" /> | |||
|  | 52 | </ItemGroup> | |||
| 47 | </Project> No newline at end of file |  | 53 | </Project> | |
| @@ -1,37 +1,45 | |||||
| 1 | using System; |  | 1 | using System; | |
| 2 | using Implab; |  | 2 | using Implab; | |
| 3 | using System.Threading.Tasks; |  | 3 | using System.Threading.Tasks; | |
|  | 4 | using Implab.Formats.JSON; | |||
|  | 5 | using System.IO; | |||
|  | 6 | using System.Text.Json; | |||
| 4 |  | 7 | |||
| 5 | namespace MonoPlay { |  | 8 | namespace MonoPlay { | |
| 6 | class MainClass { |  | 9 | class MainClass { | |
| 7 |  | 10 | |||
| 8 |  | 11 | |||
| 9 | public static void Main(string[] args) { |  | 12 | public static void Main(string[] args) { | |
| 10 | if (args == null) |  | 13 | if (args == null) | |
| 11 | throw new ArgumentNullException("args"); |  | 14 | throw new ArgumentNullException("args"); | |
| 12 |  | 15 | int t1, t2; | ||
| 13 | var t1 = Environment.TickCount; |  | |||
| 14 |  | ||||
| 15 | DoWork().GetAwaiter().GetResult(); |  | |||
| 16 |  | 16 | |||
| 17 | var t2 = Environment.TickCount; |  | 17 | for (int i = 0; i < 2; i++) { | |
| 18 | Console.WriteLine("done: {0} ms, {1:.00} Mb, {2} GC", t2 - t1, GC.GetTotalMemory(false) / (1024*1024), GC.CollectionCount(0) ); |  | 18 | t1 = Environment.TickCount; | |
|  | 19 | int elements =0; | |||
|  | 20 | using (var reader = new JSONParser(File.OpenText("/home/sergey/temp/citylots.json"))) { | |||
|  | 21 | while (reader.Read()) | |||
|  | 22 | elements++; | |||
|  | 23 | } | |||
| 19 |  | 24 | |||
| 20 | } |  | 25 | t2 = Environment.TickCount; | |
|  | 26 | Console.WriteLine("attempt {0} done: {1} ms, {2:.00} Mb, {3} GC, Elements: {4}",i+1, t2 - t1, GC.GetTotalMemory(false) / (1024*1024), GC.CollectionCount(0), elements ); | |||
|  | 27 | } | |||
| 21 |  | 28 | |||
| 22 | static IPromise<int> DoItem(int x) { |  | 29 | Console.WriteLine("Syste.Text.Json"); | |
| 23 | //return Promise<int>.FromResult(x + 1); |  | 30 | var paraser = new JsonParser(); | |
| 24 | var p = new Promise<int>(); |  | 31 | for (int i = 0; i < 2; i++) { | |
| 25 | p.Resolve(x+1); |  | 32 | t1 = Environment.TickCount; | |
| 26 | return p; |  | 33 | using (var reader = File.OpenText("/home/sergey/temp/citylots.json")) { | |
| 27 | } |  | 34 | paraser.Parse(reader); | |
|  | 35 | } | |||
| 28 |  | 36 | |||
| 29 | static async Task<int> DoWork() { |  | 37 | t2 = Environment.TickCount; | |
| 30 | var c = 0; |  | 38 | Console.WriteLine("attempt {0} done: {1} ms, {2:.00} Mb, {3} GC, ",i+1, t2 - t1, GC.GetTotalMemory(false) / (1024*1024), GC.CollectionCount(0)); | |
| 31 | for (int i = 0; i < 10000000; i++) |  | 39 | } | |
| 32 | c = await DoItem(c); |  | 40 | ||
| 33 | return c; |  | 41 | ||
| 34 | } |  | 42 | } | |
| 35 |  | 43 | |||
| 36 | } |  | 44 | } | |
| 37 | } |  | 45 | } | |
| 1 | NO CONTENT: modified file |  | NO CONTENT: modified file | ||
| The requested commit or file is too big and content was truncated. Show full diff | |||||
        
        General Comments 0
    
    
  
  
                      You need to be logged in to leave comments.
                      Login now
                    
                