##// END OF EJS Templates
pretty print DFA, the minimization is still buggy
cin -
r182:76e8f2ba12b8 ref20160224
parent child
Show More
@@ -1,45 +1,52
1 <?xml version="1.0" encoding="utf-8"?>
1 <?xml version="1.0" encoding="utf-8"?>
2 <Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
2 <Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
3 <PropertyGroup>
3 <PropertyGroup>
4 <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
4 <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
5 <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
5 <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
6 <ProductVersion>8.0.30703</ProductVersion>
6 <ProductVersion>8.0.30703</ProductVersion>
7 <SchemaVersion>2.0</SchemaVersion>
7 <SchemaVersion>2.0</SchemaVersion>
8 <ProjectGuid>{4D364996-7ECD-4193-8F90-F223FFEA49DA}</ProjectGuid>
8 <ProjectGuid>{4D364996-7ECD-4193-8F90-F223FFEA49DA}</ProjectGuid>
9 <OutputType>Library</OutputType>
9 <OutputType>Library</OutputType>
10 <RootNamespace>Implab.Format.Test</RootNamespace>
10 <RootNamespace>Implab.Format.Test</RootNamespace>
11 <AssemblyName>Implab.Format.Test</AssemblyName>
11 <AssemblyName>Implab.Format.Test</AssemblyName>
12 <TargetFrameworkVersion>v4.5</TargetFrameworkVersion>
12 <TargetFrameworkVersion>v4.5</TargetFrameworkVersion>
13 <ReleaseVersion>0.2</ReleaseVersion>
13 </PropertyGroup>
14 </PropertyGroup>
14 <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
15 <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
15 <DebugSymbols>true</DebugSymbols>
16 <DebugSymbols>true</DebugSymbols>
16 <DebugType>full</DebugType>
17 <DebugType>full</DebugType>
17 <Optimize>false</Optimize>
18 <Optimize>false</Optimize>
18 <OutputPath>bin\Debug</OutputPath>
19 <OutputPath>bin\Debug</OutputPath>
19 <DefineConstants>DEBUG;</DefineConstants>
20 <DefineConstants>DEBUG;</DefineConstants>
20 <ErrorReport>prompt</ErrorReport>
21 <ErrorReport>prompt</ErrorReport>
21 <WarningLevel>4</WarningLevel>
22 <WarningLevel>4</WarningLevel>
22 <ConsolePause>false</ConsolePause>
23 <ConsolePause>false</ConsolePause>
23 </PropertyGroup>
24 </PropertyGroup>
24 <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
25 <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
25 <DebugType>full</DebugType>
26 <DebugType>full</DebugType>
26 <Optimize>true</Optimize>
27 <Optimize>true</Optimize>
27 <OutputPath>bin\Release</OutputPath>
28 <OutputPath>bin\Release</OutputPath>
28 <ErrorReport>prompt</ErrorReport>
29 <ErrorReport>prompt</ErrorReport>
29 <WarningLevel>4</WarningLevel>
30 <WarningLevel>4</WarningLevel>
30 <ConsolePause>false</ConsolePause>
31 <ConsolePause>false</ConsolePause>
31 </PropertyGroup>
32 </PropertyGroup>
32 <ItemGroup>
33 <ItemGroup>
33 <Reference Include="System" />
34 <Reference Include="System" />
34 <Reference Include="nunit.framework">
35 <Reference Include="nunit.framework">
35 <HintPath>..\..\packages\NUnit.3.0.1\lib\net45\nunit.framework.dll</HintPath>
36 <HintPath>..\..\packages\NUnit.2.6.4\lib\nunit.framework.dll</HintPath>
36 </Reference>
37 </Reference>
37 </ItemGroup>
38 </ItemGroup>
38 <ItemGroup>
39 <ItemGroup>
39 <Compile Include="JsonTests.cs" />
40 <Compile Include="JsonTests.cs" />
40 </ItemGroup>
41 </ItemGroup>
41 <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" />
42 <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" />
42 <ItemGroup>
43 <ItemGroup>
44 <ProjectReference Include="..\..\Implab\Implab.csproj">
45 <Project>{F550F1F8-8746-4AD0-9614-855F4C4B7F05}</Project>
46 <Name>Implab</Name>
47 </ProjectReference>
48 </ItemGroup>
49 <ItemGroup>
43 <None Include="packages.config" />
50 <None Include="packages.config" />
44 </ItemGroup>
51 </ItemGroup>
45 </Project> No newline at end of file
52 </Project>
@@ -1,12 +1,50
1 using NUnit.Framework;
1 using NUnit.Framework;
2 using System;
2 using System;
3 using Implab.Formats.JSON;
3
4
4 namespace Implab.Format.Test {
5 namespace Implab.Format.Test {
5 [TestFixture()]
6 [TestFixture]
6 public class JsonTests {
7 public class JsonTests {
7 [Test()]
8 [Test]
8 public void TestCase() {
9 public void TestScannerValidTokens() {
10 var scanner = new JSONScanner(@"9123, -123, 0, 0.1, -0.2, -0.1e3, 1.3E-3, ""some \t\n\u0020 text"", literal []{}:");
11
12 Tuple<JsonTokenType,object>[] expexted = new [] {
13 new Tuple<JsonTokenType,object>(JsonTokenType.Number, 9123d),
14 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
15 new Tuple<JsonTokenType,object>(JsonTokenType.Number, -123d ),
16 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
17 new Tuple<JsonTokenType,object>(JsonTokenType.Number, 0d ),
18 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
19 new Tuple<JsonTokenType,object>(JsonTokenType.Number, 0.1d ),
20 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
21 new Tuple<JsonTokenType,object>(JsonTokenType.Number, -0.2d ),
22 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
23 new Tuple<JsonTokenType,object>(JsonTokenType.Number, -0.1e3d ),
24 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
25 new Tuple<JsonTokenType,object>(JsonTokenType.Number, 1.3E-3d ),
26 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
27 new Tuple<JsonTokenType,object>(JsonTokenType.String, "some \t\n text" ),
28 new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
29 new Tuple<JsonTokenType,object>(JsonTokenType.Literal, "literal" ),
30 new Tuple<JsonTokenType,object>(JsonTokenType.BeginArray, " [" ),
31 new Tuple<JsonTokenType,object>(JsonTokenType.EndArray, "]" ),
32 new Tuple<JsonTokenType,object>(JsonTokenType.BeginObject, "{" ),
33 new Tuple<JsonTokenType,object>(JsonTokenType.EndObject, "}" ),
34 new Tuple<JsonTokenType,object>(JsonTokenType.NameSeparator, ":" )
35 };
36
37 object value;
38 JsonTokenType tokenType;
39 for (var i = 0; i < expexted.Length; i++) {
40
41 Assert.IsTrue(scanner.ReadToken(out value, out tokenType));
42 Assert.AreEqual(expexted[i].Item1, tokenType);
43 Assert.AreEqual(expexted[i].Item2, value);
44 }
45
46 Assert.IsFalse(scanner.ReadToken(out value, out tokenType));
9 }
47 }
10 }
48 }
11 }
49 }
12
50
@@ -1,4 +1,4
1 ο»Ώ<?xml version="1.0" encoding="utf-8"?>
1 ο»Ώ<?xml version="1.0" encoding="utf-8"?>
2 <packages>
2 <packages>
3 <package id="NUnit" version="3.0.1" targetFramework="net45" />
3 <package id="NUnit" version="2.6.4" targetFramework="net45" />
4 </packages> No newline at end of file
4 </packages>
@@ -1,316 +1,343
1 using Implab;
1 using Implab;
2 using System;
2 using System;
3 using System.Collections.Generic;
3 using System.Collections.Generic;
4 using System.Linq;
4 using System.Linq;
5 using System.Diagnostics;
5 using System.Diagnostics;
6 using System.IO;
7 using System.CodeDom.Compiler;
8 using System.CodeDom;
6
9
7 namespace Implab.Automaton {
10 namespace Implab.Automaton {
8 public class DFATable : IDFATableBuilder {
11 public class DFATable : IDFATableBuilder {
9 int m_stateCount;
12 int m_stateCount;
10 int m_symbolCount;
13 int m_symbolCount;
11 int m_initialState;
14 int m_initialState;
12
15
13 readonly HashSet<int> m_finalStates = new HashSet<int>();
16 readonly HashSet<int> m_finalStates = new HashSet<int>();
14 readonly HashSet<AutomatonTransition> m_transitions = new HashSet<AutomatonTransition>();
17 readonly HashSet<AutomatonTransition> m_transitions = new HashSet<AutomatonTransition>();
15
18
16
19
17 #region IDFADefinition implementation
20 #region IDFADefinition implementation
18
21
19 public bool IsFinalState(int s) {
22 public bool IsFinalState(int s) {
20 Safe.ArgumentInRange(s, 0, m_stateCount, "s");
23 Safe.ArgumentInRange(s, 0, m_stateCount, "s");
21
24
22 return m_finalStates.Contains(s);
25 return m_finalStates.Contains(s);
23 }
26 }
24
27
25 public IEnumerable<int> FinalStates {
28 public IEnumerable<int> FinalStates {
26 get {
29 get {
27 return m_finalStates;
30 return m_finalStates;
28 }
31 }
29 }
32 }
30
33
31 public int StateCount {
34 public int StateCount {
32 get { return m_stateCount; }
35 get { return m_stateCount; }
33 }
36 }
34
37
35 public int AlphabetSize {
38 public int AlphabetSize {
36 get { return m_symbolCount; }
39 get { return m_symbolCount; }
37 }
40 }
38
41
39 public int InitialState {
42 public int InitialState {
40 get { return m_initialState; }
43 get { return m_initialState; }
41 }
44 }
42
45
43 #endregion
46 #endregion
44
47
45 public void SetInitialState(int s) {
48 public void SetInitialState(int s) {
46 Safe.ArgumentAssert(s >= 0, "s");
49 Safe.ArgumentAssert(s >= 0, "s");
47 m_stateCount = Math.Max(m_stateCount, s + 1);
50 m_stateCount = Math.Max(m_stateCount, s + 1);
48 m_initialState = s;
51 m_initialState = s;
49 }
52 }
50
53
51 public void MarkFinalState(int state) {
54 public void MarkFinalState(int state) {
52 m_stateCount = Math.Max(m_stateCount, state + 1);
55 m_stateCount = Math.Max(m_stateCount, state + 1);
53 m_finalStates.Add(state);
56 m_finalStates.Add(state);
54 }
57 }
55
58
56 public void Add(AutomatonTransition item) {
59 public void Add(AutomatonTransition item) {
57 Safe.ArgumentAssert(item.s1 >= 0, "item");
60 Safe.ArgumentAssert(item.s1 >= 0, "item");
58 Safe.ArgumentAssert(item.s2 >= 0, "item");
61 Safe.ArgumentAssert(item.s2 >= 0, "item");
59 Safe.ArgumentAssert(item.edge >= 0, "item");
62 Safe.ArgumentAssert(item.edge >= 0, "item");
60
63
61 m_stateCount = Math.Max(m_stateCount, Math.Max(item.s1, item.s2) + 1);
64 m_stateCount = Math.Max(m_stateCount, Math.Max(item.s1, item.s2) + 1);
62 m_symbolCount = Math.Max(m_symbolCount, item.edge + 1);
65 m_symbolCount = Math.Max(m_symbolCount, item.edge + 1);
63
66
64 m_transitions.Add(item);
67 m_transitions.Add(item);
65 }
68 }
66
69
67 public void Clear() {
70 public void Clear() {
68 m_stateCount = 0;
71 m_stateCount = 0;
69 m_symbolCount = 0;
72 m_symbolCount = 0;
70 m_finalStates.Clear();
73 m_finalStates.Clear();
71 m_transitions.Clear();
74 m_transitions.Clear();
72 }
75 }
73
76
74 public bool Contains(AutomatonTransition item) {
77 public bool Contains(AutomatonTransition item) {
75 return m_transitions.Contains(item);
78 return m_transitions.Contains(item);
76 }
79 }
77
80
78 public void CopyTo(AutomatonTransition[] array, int arrayIndex) {
81 public void CopyTo(AutomatonTransition[] array, int arrayIndex) {
79 m_transitions.CopyTo(array, arrayIndex);
82 m_transitions.CopyTo(array, arrayIndex);
80 }
83 }
81
84
82 public bool Remove(AutomatonTransition item) {
85 public bool Remove(AutomatonTransition item) {
83 return m_transitions.Remove(item);
86 return m_transitions.Remove(item);
84 }
87 }
85
88
86 public int Count {
89 public int Count {
87 get {
90 get {
88 return m_transitions.Count;
91 return m_transitions.Count;
89 }
92 }
90 }
93 }
91
94
92 public bool IsReadOnly {
95 public bool IsReadOnly {
93 get {
96 get {
94 return false;
97 return false;
95 }
98 }
96 }
99 }
97
100
98 public IEnumerator<AutomatonTransition> GetEnumerator() {
101 public IEnumerator<AutomatonTransition> GetEnumerator() {
99 return m_transitions.GetEnumerator();
102 return m_transitions.GetEnumerator();
100 }
103 }
101
104
102 System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator() {
105 System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator() {
103 return GetEnumerator();
106 return GetEnumerator();
104 }
107 }
105
108
109 public void AddSymbol(int symbol) {
110 Safe.ArgumentAssert(symbol >= 0, "symbol");
111 m_symbolCount = Math.Max(symbol + 1, m_symbolCount);
112 }
113
106 public int[,] CreateTransitionTable() {
114 public int[,] CreateTransitionTable() {
107 var table = new int[StateCount,AlphabetSize];
115 var table = new int[StateCount,AlphabetSize];
108
116
109 for (int i = 0; i < StateCount; i++)
117 for (int i = 0; i < StateCount; i++)
110 for (int j = 0; j < AlphabetSize; j++)
118 for (int j = 0; j < AlphabetSize; j++)
111 table[i, j] = AutomatonConst.UNREACHABLE_STATE;
119 table[i, j] = AutomatonConst.UNREACHABLE_STATE;
112
120
113 foreach (var t in this)
121 foreach (var t in this)
114 table[t.s1,t.edge] = t.s2;
122 table[t.s1,t.edge] = t.s2;
115
123
116 return table;
124 return table;
117 }
125 }
118
126
119 public bool[] CreateFinalStateTable() {
127 public bool[] CreateFinalStateTable() {
120 var table = new bool[StateCount];
128 var table = new bool[StateCount];
121
129
122 foreach (var s in FinalStates)
130 foreach (var s in FinalStates)
123 table[s] = true;
131 table[s] = true;
124
132
125 return table;
133 return table;
126 }
134 }
127
135
128 /// <summary>Π€ΠΎΡ€ΠΌΠΈΡ€ΡƒΠ΅Ρ‚ мноТСства ΠΊΠΎΠ½Π΅Ρ‡Π½Ρ‹Ρ… состояний ΠΏΠ΅Ρ€Π΅Π΄ Π½Π°Ρ‡Π°Π»ΠΎΠΌ Ρ€Π°Π±ΠΎΡ‚Ρ‹ Π°Π»Π³ΠΎΡ€ΠΈΡ‚ΠΌΠ° ΠΌΠΈΠ½ΠΈΠΌΠΈΠ·Π°Ρ†ΠΈΠΈ.</summary>
136 /// <summary>Π€ΠΎΡ€ΠΌΠΈΡ€ΡƒΠ΅Ρ‚ мноТСства ΠΊΠΎΠ½Π΅Ρ‡Π½Ρ‹Ρ… состояний ΠΏΠ΅Ρ€Π΅Π΄ Π½Π°Ρ‡Π°Π»ΠΎΠΌ Ρ€Π°Π±ΠΎΡ‚Ρ‹ Π°Π»Π³ΠΎΡ€ΠΈΡ‚ΠΌΠ° ΠΌΠΈΠ½ΠΈΠΌΠΈΠ·Π°Ρ†ΠΈΠΈ.</summary>
129 /// <remarks>
137 /// <remarks>
130 /// Π’ процСссС построСния минимального Π°Π²Ρ‚ΠΎΠΌΠ°Ρ‚Π° трСбуСтся Ρ€Π°Π·Π΄Π΅Π»ΠΈΡ‚ΡŒ мноТСство состояний,
138 /// Π’ процСссС построСния минимального Π°Π²Ρ‚ΠΎΠΌΠ°Ρ‚Π° трСбуСтся Ρ€Π°Π·Π΄Π΅Π»ΠΈΡ‚ΡŒ мноТСство состояний,
131 /// Π½Π° Π΄Π²Π° подмноТСства - ΠΊΠΎΠ½Π΅Ρ‡Π½Ρ‹Π΅ состояния ΠΈ всС ΠΎΡΡ‚Π°Π»ΡŒΠ½Ρ‹Π΅, послС Ρ‡Π΅Π³ΠΎ эти подмноТСства
139 /// Π½Π° Π΄Π²Π° подмноТСства - ΠΊΠΎΠ½Π΅Ρ‡Π½Ρ‹Π΅ состояния ΠΈ всС ΠΎΡΡ‚Π°Π»ΡŒΠ½Ρ‹Π΅, послС Ρ‡Π΅Π³ΠΎ эти подмноТСства
132 /// Π±ΡƒΠ΄ΡƒΡ‚ Ρ€Π΅Π·Π΄Π΅Π»Π΅Π½Ρ‹ Π½Π° Π±ΠΎΠ»Π΅Π΅ ΠΌΠ΅Π»ΠΊΠΈΠ΅. Иногда трСбуСтся Π³Π°Ρ€Π°Π½Ρ‚ΠΈΡ€ΠΎΠ²Π°Ρ‚ΡŒ различия ΠΊΠΎΠ½Π΅Ρ‡Π½Ρ‹Ρ… сосотяний,
140 /// Π±ΡƒΠ΄ΡƒΡ‚ Ρ€Π΅Π·Π΄Π΅Π»Π΅Π½Ρ‹ Π½Π° Π±ΠΎΠ»Π΅Π΅ ΠΌΠ΅Π»ΠΊΠΈΠ΅. Иногда трСбуСтся Π³Π°Ρ€Π°Π½Ρ‚ΠΈΡ€ΠΎΠ²Π°Ρ‚ΡŒ различия ΠΊΠΎΠ½Π΅Ρ‡Π½Ρ‹Ρ… сосотяний,
133 /// для этого Π½Π΅ΠΎΠ±Ρ…ΠΎΠ΄ΠΈΠΌΠΎ ΠΏΠ΅Ρ€Π΅ΠΎΠΏΡ€Π΅Π΄Π΅Π»ΠΈΡ‚ΡŒ Π΄Π°Π½Π½Ρ†ΡŽ Ρ„ΡƒΠΊΠ½Ρ†ΠΈΡŽ, для получСния мноТСств ΠΊΠΎΠ½Π΅Ρ‡Π½Ρ‹Ρ… состояний.
141 /// для этого Π½Π΅ΠΎΠ±Ρ…ΠΎΠ΄ΠΈΠΌΠΎ ΠΏΠ΅Ρ€Π΅ΠΎΠΏΡ€Π΅Π΄Π΅Π»ΠΈΡ‚ΡŒ Π΄Π°Π½Π½Ρ†ΡŽ Ρ„ΡƒΠΊΠ½Ρ†ΠΈΡŽ, для получСния мноТСств ΠΊΠΎΠ½Π΅Ρ‡Π½Ρ‹Ρ… состояний.
134 /// </remarks>
142 /// </remarks>
135 /// <returns>The final states.</returns>
143 /// <returns>The final states.</returns>
136 protected virtual IEnumerable<HashSet<int>> GroupFinalStates() {
144 protected virtual IEnumerable<HashSet<int>> GroupFinalStates() {
137 return new HashSet<int>[] { m_finalStates };
145 return new HashSet<int>[] { m_finalStates };
138 }
146 }
139
147
140 protected void Optimize(
148 protected void Optimize(
141 IDFATableBuilder optimalDFA,
149 IDFATableBuilder optimalDFA,
142 IDictionary<int,int> alphabetMap,
150 IDictionary<int,int> alphabetMap,
143 IDictionary<int,int> stateMap
151 IDictionary<int,int> stateMap
144 ) {
152 ) {
145 Safe.ArgumentNotNull(optimalDFA, "dfa");
153 Safe.ArgumentNotNull(optimalDFA, "dfa");
146 Safe.ArgumentNotNull(alphabetMap, "alphabetMap");
154 Safe.ArgumentNotNull(alphabetMap, "alphabetMap");
147 Safe.ArgumentNotNull(stateMap, "stateMap");
155 Safe.ArgumentNotNull(stateMap, "stateMap");
148
156
149
157
150 var setComparer = new CustomEqualityComparer<HashSet<int>>(
158 var setComparer = new CustomEqualityComparer<HashSet<int>>(
151 (x, y) => x.SetEquals(y),
159 (x, y) => x.SetEquals(y),
152 s => s.Sum(x => x.GetHashCode())
160 s => s.Sum(x => x.GetHashCode())
153 );
161 );
154
162
155 var optimalStates = new HashSet<HashSet<int>>(setComparer);
163 var optimalStates = new HashSet<HashSet<int>>(setComparer);
156 var queue = new HashSet<HashSet<int>>(setComparer);
164 var queue = new HashSet<HashSet<int>>(setComparer);
157
165
158 // ΠΏΠΎΠ»ΡƒΡ‡Π°Π΅ΠΌ ΠΊΠΎΠ½Π΅Ρ‡Π½Ρ‹Π΅ состояния, сгруппированныС ΠΏΠΎ ΠΌΠ°Ρ€ΠΊΠ΅Ρ€Π°ΠΌ
166 // ΠΏΠΎΠ»ΡƒΡ‡Π°Π΅ΠΌ ΠΊΠΎΠ½Π΅Ρ‡Π½Ρ‹Π΅ состояния, сгруппированныС ΠΏΠΎ ΠΌΠ°Ρ€ΠΊΠ΅Ρ€Π°ΠΌ
159 optimalStates.UnionWith(
167 optimalStates.UnionWith(
160 GroupFinalStates()
168 GroupFinalStates()
161 );
169 );
162
170
163 var state = new HashSet<int>(
171 var state = new HashSet<int>(
164 Enumerable
172 Enumerable
165 .Range(0, m_stateCount - 1)
173 .Range(0, m_stateCount)
166 .Where(i => !m_finalStates.Contains(i))
174 .Where(i => !m_finalStates.Contains(i))
167 );
175 );
168
176
169 optimalStates.Add(state);
177 optimalStates.Add(state);
170 queue.Add(state);
178 queue.Add(state);
171
179
172 var rmap = m_transitions
180 var rmap = m_transitions
173 .GroupBy(t => t.s2)
181 .GroupBy(t => t.s2)
174 .ToDictionary(
182 .ToDictionary(
175 g => g.Key, // s2
183 g => g.Key, // s2
176 g => g.ToLookup(t => t.edge, t => t.s1)//.ToDictionary(p => p.Key)
184 g => g.ToLookup(t => t.edge, t => t.s1)//.ToDictionary(p => p.Key)
177 );
185 );
178
186
179 while (queue.Count > 0) {
187 while (queue.Count > 0) {
180 var stateA = queue.First();
188 var stateA = queue.First();
181 queue.Remove(stateA);
189 queue.Remove(stateA);
182
190
183 for (int c = 0; c < m_symbolCount; c++) {
191 for (int c = 0; c < m_symbolCount; c++) {
184 var stateX = new HashSet<int>();
192 var stateX = new HashSet<int>();
185 foreach(var a in stateA.Where(rmap.ContainsKey))
193 //foreach(var a in stateA.Where(rmap.ContainsKey))
186 stateX.UnionWith(rmap[a][c]); // all states from wich the symbol 'c' leads to the state 'a'
194 // stateX.UnionWith(rmap[a][c]); // all states from wich the symbol 'c' leads to the state 'a'
187
195
188 foreach (var stateY in optimalStates.ToArray()) {
196 stateX.UnionWith(m_transitions.Where(t => stateA.Contains(t.s2) && t.edge == c).Select(t => t.s1));
197
198 var tmp = optimalStates.ToArray();
199 foreach (var stateY in tmp) {
189 if (stateX.Overlaps(stateY) && !stateY.IsSubsetOf(stateX)) {
200 if (stateX.Overlaps(stateY) && !stateY.IsSubsetOf(stateX)) {
190 var stateR1 = new HashSet<int>(stateY);
201 var stateR1 = new HashSet<int>(stateY);
191 var stateR2 = new HashSet<int>(stateY);
202 var stateR2 = new HashSet<int>(stateY);
192
203
193 stateR1.IntersectWith(stateX);
204 stateR1.IntersectWith(stateX);
194 stateR2.ExceptWith(stateX);
205 stateR2.ExceptWith(stateX);
195
206
196 optimalStates.Remove(stateY);
207 optimalStates.Remove(stateY);
197 optimalStates.Add(stateR1);
208 optimalStates.Add(stateR1);
198 optimalStates.Add(stateR2);
209 optimalStates.Add(stateR2);
199
210
200 if (queue.Contains(stateY)) {
211 if (queue.Contains(stateY)) {
201 queue.Remove(stateY);
212 queue.Remove(stateY);
202 queue.Add(stateR1);
213 queue.Add(stateR1);
203 queue.Add(stateR2);
214 queue.Add(stateR2);
204 } else {
215 } else {
205 queue.Add(stateR1.Count <= stateR2.Count ? stateR1 : stateR2);
216 queue.Add(stateR1.Count <= stateR2.Count ? stateR1 : stateR2);
206 }
217 }
207 }
218 }
208 }
219 }
209 }
220 }
210 }
221 }
211
222
212 // ΠΊΠ°Ρ€Ρ‚Π° получСния ΠΎΠΏΡ‚ΠΈΠΌΠ°Π»ΡŒΠ½ΠΎΠ³ΠΎ состояния ΠΏΠΎ ΡΠΎΠΎΡ‚Π²Π΅ΡΡ‚Π²ΡƒΡŽΡ‰Π΅ΠΌΡƒ Π΅ΠΌΡƒ простому ΡΠΎΡΡ‚ΠΎΡΠ½ΠΈΡŽ
223 // ΠΊΠ°Ρ€Ρ‚Π° получСния ΠΎΠΏΡ‚ΠΈΠΌΠ°Π»ΡŒΠ½ΠΎΠ³ΠΎ состояния ΠΏΠΎ ΡΠΎΠΎΡ‚Π²Π΅ΡΡ‚Π²ΡƒΡŽΡ‰Π΅ΠΌΡƒ Π΅ΠΌΡƒ простому ΡΠΎΡΡ‚ΠΎΡΠ½ΠΈΡŽ
213 var nextState = 0;
224 var nextState = 0;
214 foreach (var item in optimalStates) {
225 foreach (var item in optimalStates) {
215 var id = nextState++;
226 var id = nextState++;
216 foreach (var s in item)
227 foreach (var s in item)
217 stateMap[s] = id;
228 stateMap[s] = id;
218 }
229 }
219
230
220 // ΠΏΠΎΠ»ΡƒΡ‡Π°Π΅ΠΌ ΠΌΠΈΠ½ΠΈΠΌΠ°Π»ΡŒΠ½Ρ‹ΠΉ Π°Π»Ρ„Π°Π²ΠΈΡ‚
231 // ΠΏΠΎΠ»ΡƒΡ‡Π°Π΅ΠΌ ΠΌΠΈΠ½ΠΈΠΌΠ°Π»ΡŒΠ½Ρ‹ΠΉ Π°Π»Ρ„Π°Π²ΠΈΡ‚
221 // Π²Ρ…ΠΎΠ΄Π½Ρ‹Π΅ символы Π½Π΅ Ρ€Π°Π·Π»ΠΈΡ‡ΠΈΠΌΡ‹, Ссли Move(s,a1) == Move(s,a2), для любого s
232 // Π²Ρ…ΠΎΠ΄Π½Ρ‹Π΅ символы Π½Π΅ Ρ€Π°Π·Π»ΠΈΡ‡ΠΈΠΌΡ‹, Ссли Move(s,a1) == Move(s,a2), для любого s
222 // для этого ΠΈΡΠΏΠΎΠ»ΡŒΠ·ΡƒΠ΅ΠΌ Π°Π»Π³ΠΎΡ€ΠΈΡ‚ΠΌ кластСризации, сначала
233 // для этого ΠΈΡΠΏΠΎΠ»ΡŒΠ·ΡƒΠ΅ΠΌ Π°Π»Π³ΠΎΡ€ΠΈΡ‚ΠΌ кластСризации, сначала
223 // считаСм, Ρ‡Ρ‚ΠΎ всС символы Π½Π΅ Ρ€Π°Π·Π»ΠΈΡ‡ΠΈΠΌΡ‹
234 // считаСм, Ρ‡Ρ‚ΠΎ всС символы Π½Π΅ Ρ€Π°Π·Π»ΠΈΡ‡ΠΈΠΌΡ‹
224
235
225 var minClasses = new HashSet<HashSet<int>>(setComparer);
236 var minClasses = new HashSet<HashSet<int>>(setComparer);
226 var alphaQueue = new Queue<HashSet<int>>();
237 var alphaQueue = new Queue<HashSet<int>>();
227 alphaQueue.Enqueue(new HashSet<int>(Enumerable.Range(0,AlphabetSize)));
238 alphaQueue.Enqueue(new HashSet<int>(Enumerable.Range(0,AlphabetSize)));
228
239
229 // для всСх состояний, Π±ΡƒΠ΄Π΅ΠΌ ΠΏΡ€ΠΎΠ²Π΅Ρ€ΡΡ‚ΡŒ ΠΊΠ°ΠΆΠ΄Ρ‹ΠΉ класс Π½Π° Ρ€Π°Π·Π»ΠΈΡ‡ΠΈΠΌΠΎΡΡ‚ΡŒ,
240 // для всСх состояний, Π±ΡƒΠ΄Π΅ΠΌ ΠΏΡ€ΠΎΠ²Π΅Ρ€ΡΡ‚ΡŒ ΠΊΠ°ΠΆΠ΄Ρ‹ΠΉ класс Π½Π° Ρ€Π°Π·Π»ΠΈΡ‡ΠΈΠΌΠΎΡΡ‚ΡŒ,
230 // Ρ‚.Π΅. символы Ρ€Π°Π·Π»ΠΈΡ‡ΠΈΠΌΡ‹, Ссли ΠΎΠ½ΠΈ приводят ΠΊ Ρ€Π°Π·Π½Ρ‹ΠΌ состояниям
241 // Ρ‚.Π΅. символы Ρ€Π°Π·Π»ΠΈΡ‡ΠΈΠΌΡ‹, Ссли ΠΎΠ½ΠΈ приводят ΠΊ Ρ€Π°Π·Π½Ρ‹ΠΌ состояниям
231 for (int s = 0 ; s < optimalStates.Count; s++) {
242 for (int s = 0 ; s < optimalStates.Count; s++) {
232 var newQueue = new Queue<HashSet<int>>();
243 var newQueue = new Queue<HashSet<int>>();
233
244
234 foreach (var A in alphaQueue) {
245 foreach (var A in alphaQueue) {
235 // классы ΠΈΠ· ΠΎΠ΄Π½ΠΎΠ³ΠΎ символа Π΄Π΅Π»ΠΈΡ‚ΡŒ бСсполСзно, ΠΏΠ΅Ρ€Π΅Π²ΠΎΠ΄ΠΈΠΌ ΠΈΡ… сразу Π²
246 // классы ΠΈΠ· ΠΎΠ΄Π½ΠΎΠ³ΠΎ символа Π΄Π΅Π»ΠΈΡ‚ΡŒ бСсполСзно, ΠΏΠ΅Ρ€Π΅Π²ΠΎΠ΄ΠΈΠΌ ΠΈΡ… сразу Π²
236 // Ρ€Π΅Π·ΡƒΠ»ΡŒΡ‚ΠΈΡ€ΡƒΡŽΡ‰ΠΈΠΉ Π°Π»Ρ„Π°Π²ΠΈΡ‚
247 // Ρ€Π΅Π·ΡƒΠ»ΡŒΡ‚ΠΈΡ€ΡƒΡŽΡ‰ΠΈΠΉ Π°Π»Ρ„Π°Π²ΠΈΡ‚
237 if (A.Count == 1) {
248 if (A.Count == 1) {
238 minClasses.Add(A);
249 minClasses.Add(A);
239 continue;
250 continue;
240 }
251 }
241
252
242 // Ρ€Π°Π·Π»ΠΈΡ‡Π°Π΅ΠΌ классы символов, ΠΊΠΎΡ‚ΠΎΡ€Ρ‹Π΅ пСрСводят Π² Ρ€Π°Π·Π»ΠΈΡ‡Π½Ρ‹Π΅ ΠΎΠΏΡ‚ΠΈΠΌΠ°Π»ΡŒΠ½Ρ‹Π΅ состояния
253 // Ρ€Π°Π·Π»ΠΈΡ‡Π°Π΅ΠΌ классы символов, ΠΊΠΎΡ‚ΠΎΡ€Ρ‹Π΅ пСрСводят Π² Ρ€Π°Π·Π»ΠΈΡ‡Π½Ρ‹Π΅ ΠΎΠΏΡ‚ΠΈΠΌΠ°Π»ΡŒΠ½Ρ‹Π΅ состояния
243 // optimalState -> alphaClass
254 // optimalState -> alphaClass
244 var classes = new Dictionary<int, HashSet<int>>();
255 var classes = new Dictionary<int, HashSet<int>>();
245
256
246 foreach (var term in A) {
257 foreach (var term in A) {
247 // ΠΈΡ‰Π΅ΠΌ всС ΠΏΠ΅Ρ€Π΅Ρ…ΠΎΠ΄Ρ‹ класса ΠΏΠΎ символу term
258 // ΠΈΡ‰Π΅ΠΌ всС ΠΏΠ΅Ρ€Π΅Ρ…ΠΎΠ΄Ρ‹ класса ΠΏΠΎ символу term
248 var res = m_transitions.Where(t => stateMap[t.s1] == s && t.edge == term).Select(t => stateMap[t.s2]).ToArray();
259 var s2 = m_transitions.Where(t => stateMap[t.s1] == s && t.edge == term).Select(t => stateMap[t.s2]).DefaultIfEmpty(-1).First();
249
250 Debug.Assert(res.Length <= 1);
251
252 var s2 = res.Length > 0 ? res[0] : -1;
253
260
254 HashSet<int> a2;
261 HashSet<int> a2;
255 if (!classes.TryGetValue(s2, out a2)) {
262 if (!classes.TryGetValue(s2, out a2)) {
256 a2 = new HashSet<int>();
263 a2 = new HashSet<int>();
257 newQueue.Enqueue(a2);
264 newQueue.Enqueue(a2);
258 classes[s2] = a2;
265 classes[s2] = a2;
259 }
266 }
260 a2.Add(term);
267 a2.Add(term);
261 }
268 }
262 }
269 }
263
270
264 if (newQueue.Count == 0)
271 if (newQueue.Count == 0)
265 break;
272 break;
266 alphaQueue = newQueue;
273 alphaQueue = newQueue;
267 }
274 }
268
275
269 // послС окончания Ρ€Π°Π±ΠΎΡ‚Ρ‹ Π°Π»Π³ΠΎΡ€ΠΈΡ‚ΠΌΠ° Π² ΠΎΡ‡Π΅Ρ€Π΅Π΄ΠΈ останутся ΠΌΠΈΠ½ΠΈΠΌΠ°Π»ΡŒΠ½Ρ‹Π΅ Ρ€Π°Π·Π»ΠΈΡ‡ΠΈΠΌΡ‹Π΅ классы
276 // послС окончания Ρ€Π°Π±ΠΎΡ‚Ρ‹ Π°Π»Π³ΠΎΡ€ΠΈΡ‚ΠΌΠ° Π² ΠΎΡ‡Π΅Ρ€Π΅Π΄ΠΈ останутся ΠΌΠΈΠ½ΠΈΠΌΠ°Π»ΡŒΠ½Ρ‹Π΅ Ρ€Π°Π·Π»ΠΈΡ‡ΠΈΠΌΡ‹Π΅ классы
270 // Π²Ρ…ΠΎΠ΄Π½Ρ‹Ρ… символов
277 // Π²Ρ…ΠΎΠ΄Π½Ρ‹Ρ… символов
271 foreach (var A in alphaQueue)
278 foreach (var A in alphaQueue)
272 minClasses.Add(A);
279 minClasses.Add(A);
273
280
274 // построСниС отобраТСния Π°Π»Ρ„Π°Π²ΠΈΡ‚ΠΎΠ² Π²Ρ…ΠΎΠ΄Π½Ρ‹Ρ… символов.
281 // построСниС отобраТСния Π°Π»Ρ„Π°Π²ΠΈΡ‚ΠΎΠ² Π²Ρ…ΠΎΠ΄Π½Ρ‹Ρ… символов.
275 // ΠΏΠΎΡΠΊΠΎΠ»ΡŒΠΊΡƒ символ DFAConst.UNCLASSIFIED_INPUT ΠΌΠΎΠΆΠ΅Ρ‚ ΠΈΠΌΠ΅Ρ‚ΡŒ
282 // ΠΏΠΎΡΠΊΠΎΠ»ΡŒΠΊΡƒ символ DFAConst.UNCLASSIFIED_INPUT ΠΌΠΎΠΆΠ΅Ρ‚ ΠΈΠΌΠ΅Ρ‚ΡŒ
276 // ΡΠΏΠ΅Ρ†ΠΈΠ°Π»ΡŒΠ½ΠΎΠ΅ Π·Π½Π°Ρ‡Π΅Π½ΠΈΠ΅, Ρ‚ΠΎΠ³Π΄Π° сохраним ΠΌΠΈΠ½ΠΈΠΌΠ°Π»ΡŒΠ½Ρ‹ΠΉ класс,
283 // ΡΠΏΠ΅Ρ†ΠΈΠ°Π»ΡŒΠ½ΠΎΠ΅ Π·Π½Π°Ρ‡Π΅Π½ΠΈΠ΅, Ρ‚ΠΎΠ³Π΄Π° сохраним ΠΌΠΈΠ½ΠΈΠΌΠ°Π»ΡŒΠ½Ρ‹ΠΉ класс,
277 // содСрТащий этот символ Π½Π° Ρ‚ΠΎΠΌΠΆΠ΅ мСстС.
284 // содСрТащий этот символ Π½Π° Ρ‚ΠΎΠΌΠΆΠ΅ мСстС.
278
285
279 var nextCls = 0;
286 var nextCls = 0;
280 foreach (var item in minClasses) {
287 foreach (var item in minClasses) {
281 if (nextCls == AutomatonConst.UNCLASSIFIED_INPUT)
288 if (nextCls == AutomatonConst.UNCLASSIFIED_INPUT)
282 nextCls++;
289 nextCls++;
283
290
284 // сохраняСм DFAConst.UNCLASSIFIED_INPUT
291 // сохраняСм DFAConst.UNCLASSIFIED_INPUT
285 var cls = item.Contains(AutomatonConst.UNCLASSIFIED_INPUT) ? AutomatonConst.UNCLASSIFIED_INPUT : nextCls++;
292 var cls = item.Contains(AutomatonConst.UNCLASSIFIED_INPUT) ? AutomatonConst.UNCLASSIFIED_INPUT : nextCls++;
293 optimalDFA.AddSymbol(cls);
286
294
287 foreach (var a in item)
295 foreach (var a in item)
288 alphabetMap[a] = cls;
296 alphabetMap[a] = cls;
289 }
297 }
290
298
291 // построСниС Π°Π²Ρ‚ΠΎΠΌΠ°Ρ‚Π°
299 // построСниС Π°Π²Ρ‚ΠΎΠΌΠ°Ρ‚Π°
292 optimalDFA.SetInitialState(stateMap[m_initialState]);
300 optimalDFA.SetInitialState(stateMap[m_initialState]);
293
301
294 foreach (var sf in m_finalStates.Select(s => stateMap[s]).Distinct())
302 foreach (var sf in m_finalStates.Select(s => stateMap[s]).Distinct())
295 optimalDFA.MarkFinalState(sf);
303 optimalDFA.MarkFinalState(sf);
296
304
297 foreach (var t in m_transitions.Select(t => new AutomatonTransition(stateMap[t.s1],stateMap[t.s2],alphabetMap[t.edge])).Distinct())
305 foreach (var t in m_transitions.Select(t => new AutomatonTransition(stateMap[t.s1],stateMap[t.s2],alphabetMap[t.edge])).Distinct())
298 optimalDFA.Add(t);
306 optimalDFA.Add(t);
299 }
307 }
300
308
301 protected void PrintDFA<TInput, TState>(IAlphabet<TInput> inputAlphabet, IAlphabet<TState> stateAlphabet) {
309 protected string PrintDFA<TInput, TState>(IAlphabet<TInput> inputAlphabet, IAlphabet<TState> stateAlphabet) {
302 Safe.ArgumentNotNull(inputAlphabet, "inputAlphabet");
310 Safe.ArgumentNotNull(inputAlphabet, "inputAlphabet");
303 Safe.ArgumentNotNull(stateAlphabet, "stateAlphabet");
311 Safe.ArgumentNotNull(stateAlphabet, "stateAlphabet");
304
312
313 var data = new List<string>();
314
315 data.Add("digraph dfa {");
316
317 foreach (var final in m_finalStates)
318 data.Add(String.Format("{0} [shape=box];",String.Join("", stateAlphabet.GetSymbols(final))));
319
305 foreach(var t in m_transitions)
320 foreach (var t in m_transitions)
306 Console.WriteLine(
321 data.Add(String.Format(
307 "[{0}] -{{{1}}}-> [{2}]{3}",
322 "{0} -> {2} [label={1}];",
308 String.Join(",", stateAlphabet.GetSymbols(t.s1)),
323 String.Join("", stateAlphabet.GetSymbols(t.s1)),
309 String.Join("", inputAlphabet.GetSymbols(t.edge)),
324 ToLiteral(ToLiteral(String.Join("", t.edge == AutomatonConst.UNCLASSIFIED_INPUT ? new [] { "@" } : inputAlphabet.GetSymbols(t.edge).Select(x => x.ToString())))),
310 String.Join(",", stateAlphabet.GetSymbols(t.s2)),
325 String.Join("", stateAlphabet.GetSymbols(t.s2))
311 m_finalStates.Contains(t.s2) ? "$" : ""
326 ));
312 );
327 data.Add("}");
328 return String.Join("\n", data);
313 }
329 }
314
330
331 static string ToLiteral(string input)
332 {
333 using (var writer = new StringWriter())
334 {
335 using (var provider = CodeDomProvider.CreateProvider("CSharp"))
336 {
337 provider.GenerateCodeFromExpression(new CodePrimitiveExpression(input), writer, null);
338 return writer.ToString();
315 }
339 }
316 }
340 }
341 }
342 }
343 }
@@ -1,15 +1,26
1 using System;
1 using System;
2 using System.Collections.Generic;
2 using System.Collections.Generic;
3
3
4 namespace Implab.Automaton {
4 namespace Implab.Automaton {
5 public interface IDFATableBuilder : IDFATable, ICollection<AutomatonTransition> {
5 public interface IDFATableBuilder : IDFATable, ICollection<AutomatonTransition> {
6 /// <summary>
6 /// <summary>
7 /// Marks the state as final.
7 /// Marks the state as final.
8 /// </summary>
8 /// </summary>
9 /// <param name="state">State.</param>
9 /// <param name="state">State.</param>
10 void MarkFinalState(int state);
10 void MarkFinalState(int state);
11
11
12 void SetInitialState(int s);
12 void SetInitialState(int s);
13
14 /// <summary>
15 /// Increases if needed the input alphabet size to hold the specified symbol.
16 /// </summary>
17 /// <remarks>
18 /// <code>
19 /// AlphabetSize = Math.Max(AlphabetSize, symbol + 1)
20 /// </code>
21 /// </remarks>
22 /// <param name="symbol">Symbol.</param>
23 void AddSymbol(int symbol);
13 }
24 }
14 }
25 }
15
26
@@ -1,83 +1,95
1 using System.Collections.Generic;
1 using System.Collections.Generic;
2 using System.Linq;
2 using System.Linq;
3
3
4 namespace Implab.Automaton.RegularExpressions {
4 namespace Implab.Automaton.RegularExpressions {
5 public class RegularDFA<TInput, TTag> : DFATable, ITaggedDFABuilder<TTag> {
5 public class RegularDFA<TInput, TTag> : DFATable, ITaggedDFABuilder<TTag> {
6
6
7 readonly Dictionary<int,TTag[]> m_tags = new Dictionary<int, TTag[]>();
7 readonly Dictionary<int,TTag[]> m_tags = new Dictionary<int, TTag[]>();
8 readonly IAlphabet<TInput> m_alphabet;
8 readonly IAlphabet<TInput> m_alphabet;
9
9
10 public RegularDFA(IAlphabet<TInput> alphabet) {
10 public RegularDFA(IAlphabet<TInput> alphabet) {
11 Safe.ArgumentNotNull(alphabet, "aplhabet");
11 Safe.ArgumentNotNull(alphabet, "aplhabet");
12
12
13 m_alphabet = alphabet;
13 m_alphabet = alphabet;
14 }
14 }
15
15
16
16
17 public IAlphabet<TInput> InputAlphabet {
17 public IAlphabet<TInput> InputAlphabet {
18 get {
18 get {
19 return m_alphabet;
19 return m_alphabet;
20 }
20 }
21 }
21 }
22
22
23 public void MarkFinalState(int s, TTag[] tags) {
23 public void MarkFinalState(int s, TTag[] tags) {
24 MarkFinalState(s);
24 MarkFinalState(s);
25 SetStateTag(s, tags);
25 SetStateTag(s, tags);
26 }
26 }
27
27
28 public void SetStateTag(int s, TTag[] tags) {
28 public void SetStateTag(int s, TTag[] tags) {
29 Safe.ArgumentNotNull(tags, "tags");
29 Safe.ArgumentNotNull(tags, "tags");
30 m_tags[s] = tags;
30 m_tags[s] = tags;
31 }
31 }
32
32
33 public TTag[] GetStateTag(int s) {
33 public TTag[] GetStateTag(int s) {
34 TTag[] tags;
34 TTag[] tags;
35 return m_tags.TryGetValue(s, out tags) ? tags : new TTag[0];
35 return m_tags.TryGetValue(s, out tags) ? tags : new TTag[0];
36 }
36 }
37
37
38 public TTag[][] CreateTagTable() {
38 public TTag[][] CreateTagTable() {
39 var table = new TTag[StateCount][];
39 var table = new TTag[StateCount][];
40
40
41 foreach (var pair in m_tags)
41 foreach (var pair in m_tags)
42 table[pair.Key] = pair.Value;
42 table[pair.Key] = pair.Value;
43
43
44 return table;
44 return table;
45 }
45 }
46
46
47 /// <summary>
47 /// <summary>
48 /// Optimize the specified alphabet.
48 /// Optimize the specified alphabet.
49 /// </summary>
49 /// </summary>
50 /// <param name="alphabet">ΠŸΡƒΡΡ‚ΠΎΠΉ Π°Π»Ρ„Π°Π²ΠΈΡ‚, ΠΊΠΎΡ‚ΠΎΡ€Ρ‹ΠΉ Π±ΡƒΠ΄Π΅Ρ‚ Π·ΠΏΠΎΠ»Π½Π΅Π½ Π² процСссС ΠΎΠΏΡ‚ΠΈΠΌΠΈΠ·Π°Ρ†ΠΈΠΈ.</param>
50 /// <param name="alphabet">ΠŸΡƒΡΡ‚ΠΎΠΉ Π°Π»Ρ„Π°Π²ΠΈΡ‚, ΠΊΠΎΡ‚ΠΎΡ€Ρ‹ΠΉ Π±ΡƒΠ΄Π΅Ρ‚ Π·ΠΏΠΎΠ»Π½Π΅Π½ Π² процСссС ΠΎΠΏΡ‚ΠΈΠΌΠΈΠ·Π°Ρ†ΠΈΠΈ.</param>
51 public RegularDFA<TInput,TTag> Optimize(IAlphabetBuilder<TInput> alphabet) {
51 public RegularDFA<TInput,TTag> Optimize(IAlphabetBuilder<TInput> alphabet) {
52 Safe.ArgumentNotNull(alphabet, "alphabet");
52 Safe.ArgumentNotNull(alphabet, "alphabet");
53
53
54 var dfa = new RegularDFA<TInput, TTag>(alphabet);
54 var dfa = new RegularDFA<TInput, TTag>(alphabet);
55
55
56 var alphaMap = new Dictionary<int,int>();
56 var alphaMap = new Dictionary<int,int>();
57 var stateMap = new Dictionary<int,int>();
57 var stateMap = new Dictionary<int,int>();
58
58
59 Optimize(dfa, alphaMap, stateMap);
59 Optimize(dfa, alphaMap, stateMap);
60
60
61 // mark tags in the new DFA
61 // mark tags in the new DFA
62 foreach (var g in m_tags.Where(x => x.Key < StateCount).GroupBy(x => stateMap[x.Key], x => x.Value ))
62 foreach (var g in m_tags.Where(x => x.Key < StateCount).GroupBy(x => stateMap[x.Key], x => x.Value ))
63 dfa.SetStateTag(g.Key, g.SelectMany(x => x).ToArray());
63 dfa.SetStateTag(g.Key, g.SelectMany(x => x).ToArray());
64
64
65 // make the alphabet for the new DFA
65 // make the alphabet for the new DFA
66 // skip all unclassified symbols
66 // skip all unclassified symbols
67 foreach (var pair in alphaMap.Where(x => x.Value != 0))
67 foreach (var pair in alphaMap.Where(x => x.Value != 0))
68 alphabet.DefineClass(m_alphabet.GetSymbols(pair.Key), pair.Value);
68 alphabet.DefineClass(m_alphabet.GetSymbols(pair.Key), pair.Value);
69
69
70 var orig = ToString();
71 var opt = dfa.ToString();
72
70 return dfa;
73 return dfa;
71 }
74 }
72
75
73 protected override IEnumerable<HashSet<int>> GroupFinalStates() {
76 protected override IEnumerable<HashSet<int>> GroupFinalStates() {
74 var arrayComparer = new CustomEqualityComparer<TTag[]>(
77 var arrayComparer = new CustomEqualityComparer<TTag[]>(
75 (x,y) => x.Length == y.Length && x.All(it => y.Contains(it)),
78 (x,y) => x.Length == y.Length && x.All(it => y.Contains(it)),
76 x => x.Sum(it => x.GetHashCode())
79 x => x.Sum(it => x.GetHashCode())
77 );
80 );
78 return FinalStates.GroupBy(x => m_tags[x], arrayComparer).Select(g => new HashSet<int>(g));
81 return FinalStates.GroupBy(x => m_tags[x], arrayComparer).Select(g => new HashSet<int>(g));
79 }
82 }
80
83
84 public override string ToString() {
85 var states = new MapAlphabet<string>(false, null);
86
87 for (int i = 0; i < StateCount; i++)
88 states.DefineSymbol(string.Format("s{0}", i), i);
89
90 return string.Format("//[RegularDFA {1} x {2}]\n{0}", PrintDFA(InputAlphabet, states),StateCount, AlphabetSize);
91 }
92
81 }
93 }
82 }
94 }
83
95
@@ -1,63 +1,64
1 using System;
1 using System;
2 using System.Threading;
2 using System.Threading;
3
3
4 namespace Implab.Components {
4 namespace Implab.Components {
5 /// <summary>
5 /// <summary>
6 /// Creates an instace on-demand and allows it to be garbage collected.
6 /// Creates an instace on-demand and allows it to be garbage collected.
7 /// </summary>
7 /// </summary>
8 /// <remarks>
8 /// <remarks>
9 /// Usefull when dealing with memory-intensive objects which are frequently used.
9 /// Usefull when dealing with memory-intensive objects which are frequently used.
10 /// This class is similar to <see cref="ObjectPool{T}"/> except is a singleton.
10 /// This class is similar to <see cref="ObjectPool{T}"/> except it is a singleton.
11 /// </remarks>
11 /// </remarks>
12 public class LazyAndWeak<T> where T : class {
12 public class LazyAndWeak<T> where T : class {
13
13
14 readonly Func<T> m_factory;
14 readonly Func<T> m_factory;
15 readonly object m_lock;
15 readonly object m_lock;
16 WeakReference m_reference;
16 WeakReference m_reference;
17
17
18
18
19 public LazyAndWeak(Func<T> factory, bool useLock) {
19 public LazyAndWeak(Func<T> factory, bool useLock) {
20 Safe.ArgumentNotNull(factory, "factory");
20 Safe.ArgumentNotNull(factory, "factory");
21 m_factory = factory;
21 m_factory = factory;
22 m_lock = useLock ? new object() : null;
22 m_lock = useLock ? new object() : null;
23 }
23 }
24
24
25 public LazyAndWeak(Func<T> factory) : this(factory, false) {
25 public LazyAndWeak(Func<T> factory) : this(factory, false) {
26 }
26 }
27
27
28 public T Value {
28 public T Value {
29 get {
29 get {
30 while (true) {
30 while (true) {
31 var weak = m_reference;
31 var weak = m_reference;
32 T value;
32 T value;
33 if (weak != null) {
33 if (weak != null) {
34 value = weak.Target as T;
34 value = weak.Target as T;
35 if (value != null)
35 if (value != null)
36 return value;
36 return value;
37 }
37 }
38
38
39 if (m_lock == null) {
39 if (m_lock == null) {
40 value = m_factory();
40 value = m_factory();
41
41
42 if (Interlocked.CompareExchange(ref m_reference, new WeakReference(value), weak) == weak)
42 if (Interlocked.CompareExchange(ref m_reference, new WeakReference(value), weak) == weak)
43 return value;
43 return value;
44 } else {
44 } else {
45 lock (m_lock) {
45 lock (m_lock) {
46 // double check
46 // double check
47 weak = m_reference;
47 if (weak != null) {
48 if (weak != null) {
48 value = weak.Target as T;
49 value = weak.Target as T;
49 if (value != null)
50 if (value != null)
50 return value;
51 return value;
51 }
52 }
52 // we are safe to write
53 // we are safe to write
53 value = m_factory();
54 value = m_factory();
54 m_reference = new WeakReference(value);
55 m_reference = new WeakReference(value);
55 return value;
56 return value;
56 }
57 }
57 }
58 }
58 }
59 }
59 }
60 }
60 }
61 }
61 }
62 }
62 }
63 }
63
64
@@ -1,119 +1,119
1 using System.Linq;
1 using System.Linq;
2 using Implab.Automaton.RegularExpressions;
2 using Implab.Automaton.RegularExpressions;
3 using System;
3 using System;
4 using Implab.Automaton;
4 using Implab.Automaton;
5 using Implab.Components;
5 using Implab.Components;
6
6
7 namespace Implab.Formats.JSON {
7 namespace Implab.Formats.JSON {
8 class JSONGrammar : Grammar<char> {
8 class JSONGrammar : Grammar<char> {
9 public enum TokenType {
9 public enum TokenType {
10 None,
10 None,
11 BeginObject,
11 BeginObject,
12 EndObject,
12 EndObject,
13 BeginArray,
13 BeginArray,
14 EndArray,
14 EndArray,
15 String,
15 String,
16 Number,
16 Number,
17 Literal,
17 Literal,
18 NameSeparator,
18 NameSeparator,
19 ValueSeparator,
19 ValueSeparator,
20
20
21 StringBound,
21 StringBound,
22 EscapedChar,
22 EscapedChar,
23 UnescapedChar,
23 UnescapedChar,
24 EscapedUnicode
24 EscapedUnicode
25 }
25 }
26
26
27 static LazyAndWeak<JSONGrammar> _instance = new LazyAndWeak<JSONGrammar>(() => new JSONGrammar());
27 static LazyAndWeak<JSONGrammar> _instance = new LazyAndWeak<JSONGrammar>(() => new JSONGrammar());
28
28
29 public static JSONGrammar Instance {
29 public static JSONGrammar Instance {
30 get { return _instance.Value; }
30 get { return _instance.Value; }
31 }
31 }
32
32
33 readonly ScannerContext<TokenType> m_jsonExpression;
33 readonly ScannerContext<TokenType> m_jsonExpression;
34 readonly ScannerContext<TokenType> m_stringExpression;
34 readonly ScannerContext<TokenType> m_stringExpression;
35 readonly CharAlphabet m_defaultAlphabet = new CharAlphabet();
35 readonly CharAlphabet m_defaultAlphabet = new CharAlphabet();
36
36
37 public JSONGrammar() {
37 public JSONGrammar() {
38 DefineAlphabet(Enumerable.Range(0, 0x20).Select(x => (char)x));
38 DefineAlphabet(Enumerable.Range(0, 0x20).Select(x => (char)x));
39 var hexDigit = SymbolRangeToken('a','f').Or(SymbolRangeToken('A','F')).Or(SymbolRangeToken('0','9'));
39 var hexDigit = SymbolRangeToken('a','f').Or(SymbolRangeToken('A','F')).Or(SymbolRangeToken('0','9'));
40 var digit9 = SymbolRangeToken('1', '9');
40 var digit9 = SymbolRangeToken('1', '9');
41 var zero = SymbolToken('0');
41 var zero = SymbolToken('0');
42 var digit = zero.Or(digit9);
42 var digit = zero.Or(digit9);
43 var dot = SymbolToken('.');
43 var dot = SymbolToken('.');
44 var minus = SymbolToken('-');
44 var minus = SymbolToken('-');
45 var sign = SymbolSetToken('-', '+');
45 var sign = SymbolSetToken('-', '+');
46 var expSign = SymbolSetToken('e', 'E');
46 var expSign = SymbolSetToken('e', 'E');
47 var letters = SymbolRangeToken('a', 'z');
47 var letters = SymbolRangeToken('a', 'z');
48 var integer = zero.Or(digit9.Cat(digit.EClosure()));
48 var integer = zero.Or(digit9.Cat(digit.EClosure()));
49 var frac = dot.Cat(digit.Closure());
49 var frac = dot.Cat(digit.Closure());
50 var exp = expSign.Cat(sign.Optional()).Cat(digit.Closure());
50 var exp = expSign.Cat(sign.Optional()).Cat(digit.Closure());
51 var quote = SymbolToken('"');
51 var quote = SymbolToken('"');
52 var backSlash = SymbolToken('\\');
52 var backSlash = SymbolToken('\\');
53 var specialEscapeChars = SymbolSetToken('\\', '"', '/', 'b', 'f', 't', 'n', 'r');
53 var specialEscapeChars = SymbolSetToken('\\', '"', '/', 'b', 'f', 't', 'n', 'r');
54 var unicodeEspace = SymbolToken('u').Cat(hexDigit.Repeat(4));
54 var unicodeEspace = SymbolToken('u').Cat(hexDigit.Repeat(4));
55 var whitespace = SymbolSetToken('\n', '\r', '\t', ' ').EClosure();
55 var whitespace = SymbolSetToken('\n', '\r', '\t', ' ').EClosure();
56 var beginObject = whitespace.Cat(SymbolToken('{')).Cat(whitespace);
56 var beginObject = whitespace.Cat(SymbolToken('{')).Cat(whitespace);
57 var endObject = whitespace.Cat(SymbolToken('}')).Cat(whitespace);
57 var endObject = whitespace.Cat(SymbolToken('}')).Cat(whitespace);
58 var beginArray = whitespace.Cat(SymbolToken('[')).Cat(whitespace);
58 var beginArray = whitespace.Cat(SymbolToken('[')).Cat(whitespace);
59 var endArray = whitespace.Cat(SymbolToken(']')).Cat(whitespace);
59 var endArray = whitespace.Cat(SymbolToken(']')).Cat(whitespace);
60 var nameSep = whitespace.Cat(SymbolToken(':')).Cat(whitespace);
60 var nameSep = whitespace.Cat(SymbolToken(':')).Cat(whitespace);
61 var valueSep = whitespace.Cat(SymbolToken(',')).Cat(whitespace);
61 var valueSep = whitespace.Cat(SymbolToken(',')).Cat(whitespace);
62
62
63 var number = minus.Optional().Cat(integer).Cat(frac.Optional()).Cat(exp.Optional());
63 var number = minus.Optional().Cat(integer).Cat(frac.Optional()).Cat(exp.Optional());
64 var literal = letters.Closure();
64 var literal = letters.Closure();
65 var unescaped = SymbolTokenExcept(Enumerable.Range(0, 0x20).Union(new int[] { '\\', '"' }).Select(x => (char)x));
65 var unescaped = SymbolTokenExcept(Enumerable.Range(0, 0x20).Union(new int[] { '\\', '"' }).Select(x => (char)x));
66
66
67 var jsonExpression =
67 var jsonExpression =
68 number.Tag(TokenType.Number)
68 number.Tag(TokenType.Number)
69 .Or(literal.Tag(TokenType.Literal))
69 .Or(literal.Tag(TokenType.Literal))
70 .Or(quote.Tag(TokenType.StringBound))
70 .Or(quote.Tag(TokenType.StringBound))
71 .Or(beginObject.Tag(TokenType.BeginObject))
71 .Or(beginObject.Tag(TokenType.BeginObject))
72 .Or(endObject.Tag(TokenType.EndObject))
72 .Or(endObject.Tag(TokenType.EndObject))
73 .Or(beginArray.Tag(TokenType.BeginArray))
73 .Or(beginArray.Tag(TokenType.BeginArray))
74 .Or(endArray.Tag(TokenType.EndArray))
74 .Or(endArray.Tag(TokenType.EndArray))
75 .Or(nameSep.Tag(TokenType.NameSeparator))
75 .Or(nameSep.Tag(TokenType.NameSeparator))
76 .Or(valueSep.Tag(TokenType.ValueSeparator));
76 .Or(valueSep.Tag(TokenType.ValueSeparator));
77
77
78
78
79 var jsonStringExpression =
79 var jsonStringExpression =
80 quote.Tag(TokenType.StringBound)
80 quote.Tag(TokenType.StringBound)
81 .Or(backSlash.Cat(specialEscapeChars).Tag(TokenType.EscapedChar))
81 .Or(backSlash.Cat(specialEscapeChars).Tag(TokenType.EscapedChar))
82 .Or(backSlash.Cat(unicodeEspace).Tag(TokenType.EscapedUnicode))
82 .Or(backSlash.Cat(unicodeEspace).Tag(TokenType.EscapedUnicode))
83 .Or(unescaped.Closure().Tag(TokenType.UnescapedChar));
83 .Or(unescaped.Closure().Tag(TokenType.UnescapedChar));
84
84
85
85
86 m_jsonExpression = BuildScannerContext<TokenType>(jsonExpression);
86 m_jsonExpression = BuildScannerContext<TokenType>(jsonExpression);
87 m_stringExpression = BuildScannerContext<TokenType>(jsonStringExpression);
87 m_stringExpression = BuildScannerContext<TokenType>(jsonStringExpression);
88
88
89
89
90 }
90 }
91
91
92 protected override IAlphabetBuilder<char> AlphabetBuilder {
92 protected override IAlphabetBuilder<char> AlphabetBuilder {
93 get {
93 get {
94 return m_defaultAlphabet;
94 return m_defaultAlphabet;
95 }
95 }
96 }
96 }
97
97
98 public ScannerContext<TokenType> JsonExpression {
98 public ScannerContext<TokenType> JsonExpression {
99 get {
99 get {
100 return m_jsonExpression;
100 return m_jsonExpression;
101 }
101 }
102 }
102 }
103
103
104 public ScannerContext<TokenType> JsonStringExpression {
104 public ScannerContext<TokenType> JsonStringExpression {
105 get {
105 get {
106 return m_stringExpression;
106 return m_stringExpression;
107 }
107 }
108 }
108 }
109
109
110 Token SymbolRangeToken(char start, char stop) {
110 Token SymbolRangeToken(char start, char stop) {
111 return SymbolToken(Enumerable.Range(start,stop - start).Select(x => (char)x));
111 return SymbolToken(Enumerable.Range(start, stop - start + 1).Select(x => (char)x));
112 }
112 }
113
113
114 protected override IndexedAlphabetBase<char> CreateAlphabet() {
114 protected override IndexedAlphabetBase<char> CreateAlphabet() {
115 return new CharAlphabet();
115 return new CharAlphabet();
116 }
116 }
117
117
118 }
118 }
119 }
119 }
@@ -1,26 +1,18
1 using System;
1 using System;
2
2
3 namespace Implab.Formats {
3 namespace Implab.Formats {
4 public class StringScanner: TextScanner {
4 public class StringScanner: TextScanner {
5 const int CHUNK_SIZE = 1024;
5 const int CHUNK_SIZE = 1024;
6
6
7 readonly string m_text;
7 public StringScanner(string text) : base(null) {
8 int m_pos;
8 Safe.ArgumentNotNull(text, "text");
9
9 var data = text.ToCharArray();
10 public StringScanner(string text) : base(text.Length, text.Length < CHUNK_SIZE ? text.Length : CHUNK_SIZE) {
10 Feed(data, 0, data.Length);
11 m_text = text;
12 Feed();
13 }
11 }
14
12
15 protected override int Read(char[] buffer, int offset, int size) {
13 protected override int Read(char[] buffer, int offset, int size) {
16 var actual = size + m_pos > m_text.Length ? m_text.Length - m_pos : size;
14 return 0;
17
18 m_text.CopyTo(m_pos,buffer,offset, actual);
19
20 m_pos += actual;
21
22 return actual;
23 }
15 }
24 }
16 }
25 }
17 }
26
18
@@ -1,162 +1,157
1 using System;
1 using System;
2 using Implab.Components;
2 using Implab.Components;
3 using System.Diagnostics;
3 using System.Diagnostics;
4 using Implab.Automaton;
4 using Implab.Automaton;
5 using System.Text;
5 using System.Text;
6
6
7 namespace Implab.Formats {
7 namespace Implab.Formats {
8 public abstract class TextScanner : Disposable {
8 public abstract class TextScanner : Disposable {
9 readonly int m_bufferMax;
9 readonly int m_bufferMax;
10 readonly int m_chunkSize;
10 readonly int m_chunkSize;
11
11
12 char[] m_buffer;
12 char[] m_buffer;
13 int m_bufferOffset;
13 int m_bufferOffset;
14 int m_bufferSize;
14 int m_bufferSize;
15 int m_tokenOffset;
15 int m_tokenOffset;
16 int m_tokenLength;
16 int m_tokenLength;
17
17
18 /// <summary>
18 /// <summary>
19 /// Initializes a new instance of the <see cref="Implab.Formats.TextScanner"/> class.
19 /// Initializes a new instance of the <see cref="Implab.Formats.TextScanner"/> class.
20 /// </summary>
20 /// </summary>
21 /// <param name="bufferMax">Buffer max.</param>
21 /// <param name="bufferMax">Buffer max.</param>
22 /// <param name="chunkSize">Chunk size.</param>
22 /// <param name="chunkSize">Chunk size.</param>
23 protected TextScanner(int bufferMax, int chunkSize) {
23 protected TextScanner(int bufferMax, int chunkSize) {
24 Debug.Assert(m_chunkSize <= m_bufferMax);
24 Debug.Assert(m_chunkSize <= m_bufferMax);
25
25
26 m_bufferMax = bufferMax;
26 m_bufferMax = bufferMax;
27 m_chunkSize = chunkSize;
27 m_chunkSize = chunkSize;
28 }
28 }
29
29
30 /// <summary>
30 /// <summary>
31 /// Initializes a new instance of the <see cref="Implab.Formats.TextScanner"/> class.
31 /// Initializes a new instance of the <see cref="Implab.Formats.TextScanner"/> class.
32 /// </summary>
32 /// </summary>
33 /// <param name="buffer">Buffer.</param>
33 /// <param name="buffer">Buffer.</param>
34 protected TextScanner(char[] buffer) {
34 protected TextScanner(char[] buffer) {
35 if (buffer != null) {
35 if (buffer != null) {
36 m_buffer = buffer;
36 m_buffer = buffer;
37 m_bufferSize = buffer.Length;
37 m_bufferSize = buffer.Length;
38 }
38 }
39 }
39 }
40
40
41 /// <summary>
41 /// <summary>
42 /// (hungry) Reads the next token.
42 /// (hungry) Reads the next token.
43 /// </summary>
43 /// </summary>
44 /// <returns><c>true</c>, if token internal was read, <c>false</c> if there is no more tokens in the stream.</returns>
44 /// <returns><c>true</c>, if token internal was read, <c>false</c> if there is no more tokens in the stream.</returns>
45 /// <param name="dfa">The transition map for the automaton</param>
45 /// <param name="dfa">The transition map for the automaton</param>
46 /// <param name="final">Final states of the automaton.</param>
46 /// <param name="final">Final states of the automaton.</param>
47 /// <param name="tags">Tags.</param>
47 /// <param name="tags">Tags.</param>
48 /// <param name="state">The initial state for the automaton.</param>
48 /// <param name="state">The initial state for the automaton.</param>
49 /// <param name="alphabet"></param>
49 /// <param name="alphabet"></param>
50 /// <param name = "tag"></param>
50 /// <param name = "tag"></param>
51 internal bool ReadToken<TTag>(int[,] dfa, bool[] final, TTag[][] tags, int state, int[] alphabet, out TTag[] tag) {
51 internal bool ReadToken<TTag>(int[,] dfa, bool[] final, TTag[][] tags, int state, int[] alphabet, out TTag[] tag) {
52 m_tokenLength = 0;
52 m_tokenLength = 0;
53 tag = null;
53 tag = null;
54
54
55 var maxSymbol = alphabet.Length - 1;
55 var maxSymbol = alphabet.Length - 1;
56
56 int next;
57 do {
57 do {
58 // after the next chunk is read the offset in the buffer may change
58 // after the next chunk is read the offset in the buffer may change
59 int pos = m_bufferOffset + m_tokenLength;
59 int pos = m_bufferOffset + m_tokenLength;
60
60 next = state;
61 while (pos < m_bufferSize) {
61 while (pos < m_bufferSize) {
62 var ch = m_buffer[pos];
62 var ch = m_buffer[pos];
63
63
64 try {
64 next = dfa[next, ch > maxSymbol ? AutomatonConst.UNCLASSIFIED_INPUT : alphabet[ch]];
65 var next = dfa[state, ch > maxSymbol ? AutomatonConst.UNCLASSIFIED_INPUT : alphabet[ch]];
66
65
67 if (next == AutomatonConst.UNREACHABLE_STATE)
66 if (next == AutomatonConst.UNREACHABLE_STATE)
68 break;
67 break;
69
68
70 state = next;
69 state = next;
71 }catch {
72 throw;
73 }
74 pos++;
70 pos++;
75 }
71 }
76
77 m_tokenLength = pos - m_bufferOffset;
72 m_tokenLength = pos - m_bufferOffset;
78 } while (state != AutomatonConst.UNREACHABLE_STATE && Feed());
73 } while (next != AutomatonConst.UNREACHABLE_STATE && Feed());
79
74
80 m_tokenOffset = m_bufferOffset;
75 m_tokenOffset = m_bufferOffset;
81 m_bufferOffset += m_tokenLength;
76 m_bufferOffset += m_tokenLength;
82
77
83 if (final[state]) {
78 if (final[state]) {
84 tag = tags[state];
79 tag = tags[state];
85 return true;
80 return true;
86 }
81 }
87
82
88 if (m_bufferOffset == m_bufferSize) {
83 if (m_bufferOffset == m_bufferSize) {
89 if (m_tokenLength == 0) //EOF
84 if (m_tokenLength == 0) //EOF
90 return false;
85 return false;
91
86
92 throw new ParserException();
87 throw new ParserException();
93 }
88 }
94
89
95 throw new ParserException(String.Format("Unexpected symbol '{0}'", m_buffer[m_bufferOffset]));
90 throw new ParserException(String.Format("Unexpected symbol '{0}'", m_buffer[m_bufferOffset]));
96
91
97 }
92 }
98
93
99 protected void Feed(char[] buffer, int offset, int length) {
94 protected void Feed(char[] buffer, int offset, int length) {
100 m_buffer = buffer;
95 m_buffer = buffer;
101 m_bufferOffset = offset;
96 m_bufferOffset = offset;
102 m_bufferSize = offset + length;
97 m_bufferSize = offset + length;
103 }
98 }
104
99
105 protected bool Feed() {
100 protected bool Feed() {
106 if (m_chunkSize <= 0)
101 if (m_chunkSize <= 0)
107 return false;
102 return false;
108
103
109 if (m_buffer != null) {
104 if (m_buffer != null) {
110 var free = m_buffer.Length - m_bufferSize;
105 var free = m_buffer.Length - m_bufferSize;
111
106
112 if (free < m_chunkSize) {
107 if (free < m_chunkSize) {
113 free += m_chunkSize;
108 free += m_chunkSize;
114 var used = m_bufferSize - m_bufferOffset;
109 var used = m_bufferSize - m_bufferOffset;
115 var size = used + free;
110 var size = used + free;
116
111
117 if (size > m_bufferMax)
112 if (size > m_bufferMax)
118 throw new ParserException(String.Format("The buffer limit ({0} Kb) is reached", m_bufferMax / 1024));
113 throw new ParserException(String.Format("The buffer limit ({0} Kb) is reached", m_bufferMax / 1024));
119
114
120 var temp = new char[size];
115 var temp = new char[size];
121
116
122 var read = Read(temp, used, m_chunkSize);
117 var read = Read(temp, used, m_chunkSize);
123 if (read == 0)
118 if (read == 0)
124 return false;
119 return false;
125
120
126 Array.Copy(m_buffer, m_bufferOffset, temp, 0, used);
121 Array.Copy(m_buffer, m_bufferOffset, temp, 0, used);
127
122
128 m_bufferOffset = 0;
123 m_bufferOffset = 0;
129 m_bufferSize = used + read;
124 m_bufferSize = used + read;
130 m_buffer = temp;
125 m_buffer = temp;
131 } else {
126 } else {
132 var read = Read(m_buffer, m_bufferSize, m_chunkSize);
127 var read = Read(m_buffer, m_bufferSize, m_chunkSize);
133 if (read == 0)
128 if (read == 0)
134 return false;
129 return false;
135 m_bufferSize += m_chunkSize;
130 m_bufferSize += m_chunkSize;
136 }
131 }
137 return true;
132 return true;
138 } else {
133 } else {
139 Debug.Assert(m_bufferOffset == 0);
134 Debug.Assert(m_bufferOffset == 0);
140 m_buffer = new char[m_chunkSize];
135 m_buffer = new char[m_chunkSize];
141 m_bufferSize = Read(m_buffer, 0, m_chunkSize);
136 m_bufferSize = Read(m_buffer, 0, m_chunkSize);
142 return (m_bufferSize != 0);
137 return (m_bufferSize != 0);
143 }
138 }
144 }
139 }
145
140
146 protected abstract int Read(char[] buffer, int offset, int size);
141 protected abstract int Read(char[] buffer, int offset, int size);
147
142
148 public string GetTokenValue() {
143 public string GetTokenValue() {
149 return new String(m_buffer, m_tokenOffset, m_tokenLength);
144 return new String(m_buffer, m_tokenOffset, m_tokenLength);
150 }
145 }
151
146
152 public void CopyTokenTo(char[] buffer, int offset) {
147 public void CopyTokenTo(char[] buffer, int offset) {
153 m_buffer.CopyTo(buffer, offset);
148 Array.Copy(m_buffer, m_tokenOffset,buffer, offset, m_tokenLength);
154 }
149 }
155
150
156 public void CopyTokenTo(StringBuilder sb) {
151 public void CopyTokenTo(StringBuilder sb) {
157 sb.Append(m_buffer, m_tokenOffset, m_tokenLength);
152 sb.Append(m_buffer, m_tokenOffset, m_tokenLength);
158 }
153 }
159
154
160 }
155 }
161 }
156 }
162
157
General Comments 0
You need to be logged in to leave comments. Login now