@@ -10,6 +10,7 | |||
|
10 | 10 | <RootNamespace>Implab.Format.Test</RootNamespace> |
|
11 | 11 | <AssemblyName>Implab.Format.Test</AssemblyName> |
|
12 | 12 | <TargetFrameworkVersion>v4.5</TargetFrameworkVersion> |
|
13 | <ReleaseVersion>0.2</ReleaseVersion> | |
|
13 | 14 | </PropertyGroup> |
|
14 | 15 | <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' "> |
|
15 | 16 | <DebugSymbols>true</DebugSymbols> |
@@ -32,7 +33,7 | |||
|
32 | 33 | <ItemGroup> |
|
33 | 34 | <Reference Include="System" /> |
|
34 | 35 | <Reference Include="nunit.framework"> |
|
35 |
<HintPath>..\..\packages\NUnit. |
|
|
36 | <HintPath>..\..\packages\NUnit.2.6.4\lib\nunit.framework.dll</HintPath> | |
|
36 | 37 | </Reference> |
|
37 | 38 | </ItemGroup> |
|
38 | 39 | <ItemGroup> |
@@ -40,6 +41,12 | |||
|
40 | 41 | </ItemGroup> |
|
41 | 42 | <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" /> |
|
42 | 43 | <ItemGroup> |
|
44 | <ProjectReference Include="..\..\Implab\Implab.csproj"> | |
|
45 | <Project>{F550F1F8-8746-4AD0-9614-855F4C4B7F05}</Project> | |
|
46 | <Name>Implab</Name> | |
|
47 | </ProjectReference> | |
|
48 | </ItemGroup> | |
|
49 | <ItemGroup> | |
|
43 | 50 | <None Include="packages.config" /> |
|
44 | 51 | </ItemGroup> |
|
45 | 52 | </Project> No newline at end of file |
@@ -1,11 +1,49 | |||
|
1 | 1 | using NUnit.Framework; |
|
2 | 2 | using System; |
|
3 | using Implab.Formats.JSON; | |
|
3 | 4 | |
|
4 | 5 | namespace Implab.Format.Test { |
|
5 |
[TestFixture |
|
|
6 | [TestFixture] | |
|
6 | 7 | public class JsonTests { |
|
7 |
[Test |
|
|
8 |
public void Test |
|
|
8 | [Test] | |
|
9 | public void TestScannerValidTokens() { | |
|
10 | var scanner = new JSONScanner(@"9123, -123, 0, 0.1, -0.2, -0.1e3, 1.3E-3, ""some \t\n\u0020 text"", literal []{}:"); | |
|
11 | ||
|
12 | Tuple<JsonTokenType,object>[] expexted = new [] { | |
|
13 | new Tuple<JsonTokenType,object>(JsonTokenType.Number, 9123d), | |
|
14 | new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ), | |
|
15 | new Tuple<JsonTokenType,object>(JsonTokenType.Number, -123d ), | |
|
16 | new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ), | |
|
17 | new Tuple<JsonTokenType,object>(JsonTokenType.Number, 0d ), | |
|
18 | new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ), | |
|
19 | new Tuple<JsonTokenType,object>(JsonTokenType.Number, 0.1d ), | |
|
20 | new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ), | |
|
21 | new Tuple<JsonTokenType,object>(JsonTokenType.Number, -0.2d ), | |
|
22 | new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ), | |
|
23 | new Tuple<JsonTokenType,object>(JsonTokenType.Number, -0.1e3d ), | |
|
24 | new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ), | |
|
25 | new Tuple<JsonTokenType,object>(JsonTokenType.Number, 1.3E-3d ), | |
|
26 | new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ), | |
|
27 | new Tuple<JsonTokenType,object>(JsonTokenType.String, "some \t\n text" ), | |
|
28 | new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ), | |
|
29 | new Tuple<JsonTokenType,object>(JsonTokenType.Literal, "literal" ), | |
|
30 | new Tuple<JsonTokenType,object>(JsonTokenType.BeginArray, " [" ), | |
|
31 | new Tuple<JsonTokenType,object>(JsonTokenType.EndArray, "]" ), | |
|
32 | new Tuple<JsonTokenType,object>(JsonTokenType.BeginObject, "{" ), | |
|
33 | new Tuple<JsonTokenType,object>(JsonTokenType.EndObject, "}" ), | |
|
34 | new Tuple<JsonTokenType,object>(JsonTokenType.NameSeparator, ":" ) | |
|
35 | }; | |
|
36 | ||
|
37 | object value; | |
|
38 | JsonTokenType tokenType; | |
|
39 | for (var i = 0; i < expexted.Length; i++) { | |
|
40 | ||
|
41 | Assert.IsTrue(scanner.ReadToken(out value, out tokenType)); | |
|
42 | Assert.AreEqual(expexted[i].Item1, tokenType); | |
|
43 | Assert.AreEqual(expexted[i].Item2, value); | |
|
44 | } | |
|
45 | ||
|
46 | Assert.IsFalse(scanner.ReadToken(out value, out tokenType)); | |
|
9 | 47 | } |
|
10 | 48 | } |
|
11 | 49 | } |
@@ -1,4 +1,4 | |||
|
1 | 1 | ο»Ώ<?xml version="1.0" encoding="utf-8"?> |
|
2 | 2 | <packages> |
|
3 |
<package id="NUnit" version=" |
|
|
3 | <package id="NUnit" version="2.6.4" targetFramework="net45" /> | |
|
4 | 4 | </packages> No newline at end of file |
@@ -3,6 +3,9 using System; | |||
|
3 | 3 | using System.Collections.Generic; |
|
4 | 4 | using System.Linq; |
|
5 | 5 | using System.Diagnostics; |
|
6 | using System.IO; | |
|
7 | using System.CodeDom.Compiler; | |
|
8 | using System.CodeDom; | |
|
6 | 9 | |
|
7 | 10 | namespace Implab.Automaton { |
|
8 | 11 | public class DFATable : IDFATableBuilder { |
@@ -103,6 +106,11 namespace Implab.Automaton { | |||
|
103 | 106 | return GetEnumerator(); |
|
104 | 107 | } |
|
105 | 108 | |
|
109 | public void AddSymbol(int symbol) { | |
|
110 | Safe.ArgumentAssert(symbol >= 0, "symbol"); | |
|
111 | m_symbolCount = Math.Max(symbol + 1, m_symbolCount); | |
|
112 | } | |
|
113 | ||
|
106 | 114 | public int[,] CreateTransitionTable() { |
|
107 | 115 | var table = new int[StateCount,AlphabetSize]; |
|
108 | 116 | |
@@ -162,7 +170,7 namespace Implab.Automaton { | |||
|
162 | 170 | |
|
163 | 171 | var state = new HashSet<int>( |
|
164 | 172 | Enumerable |
|
165 |
.Range(0, m_stateCount |
|
|
173 | .Range(0, m_stateCount) | |
|
166 | 174 | .Where(i => !m_finalStates.Contains(i)) |
|
167 | 175 | ); |
|
168 | 176 | |
@@ -182,10 +190,13 namespace Implab.Automaton { | |||
|
182 | 190 | |
|
183 | 191 | for (int c = 0; c < m_symbolCount; c++) { |
|
184 | 192 | var stateX = new HashSet<int>(); |
|
185 | foreach(var a in stateA.Where(rmap.ContainsKey)) | |
|
186 | stateX.UnionWith(rmap[a][c]); // all states from wich the symbol 'c' leads to the state 'a' | |
|
193 | //foreach(var a in stateA.Where(rmap.ContainsKey)) | |
|
194 | // stateX.UnionWith(rmap[a][c]); // all states from wich the symbol 'c' leads to the state 'a' | |
|
187 | 195 | |
|
188 | foreach (var stateY in optimalStates.ToArray()) { | |
|
196 | stateX.UnionWith(m_transitions.Where(t => stateA.Contains(t.s2) && t.edge == c).Select(t => t.s1)); | |
|
197 | ||
|
198 | var tmp = optimalStates.ToArray(); | |
|
199 | foreach (var stateY in tmp) { | |
|
189 | 200 | if (stateX.Overlaps(stateY) && !stateY.IsSubsetOf(stateX)) { |
|
190 | 201 | var stateR1 = new HashSet<int>(stateY); |
|
191 | 202 | var stateR2 = new HashSet<int>(stateY); |
@@ -245,12 +256,8 namespace Implab.Automaton { | |||
|
245 | 256 | |
|
246 | 257 | foreach (var term in A) { |
|
247 | 258 | // ΠΈΡΠ΅ΠΌ Π²ΡΠ΅ ΠΏΠ΅ΡΠ΅Ρ ΠΎΠ΄Ρ ΠΊΠ»Π°ΡΡΠ° ΠΏΠΎ ΡΠΈΠΌΠ²ΠΎΠ»Ρ term |
|
248 |
var |
|
|
259 | var s2 = m_transitions.Where(t => stateMap[t.s1] == s && t.edge == term).Select(t => stateMap[t.s2]).DefaultIfEmpty(-1).First(); | |
|
249 | 260 | |
|
250 | Debug.Assert(res.Length <= 1); | |
|
251 | ||
|
252 | var s2 = res.Length > 0 ? res[0] : -1; | |
|
253 | ||
|
254 | 261 | HashSet<int> a2; |
|
255 | 262 | if (!classes.TryGetValue(s2, out a2)) { |
|
256 | 263 | a2 = new HashSet<int>(); |
@@ -283,6 +290,7 namespace Implab.Automaton { | |||
|
283 | 290 | |
|
284 | 291 | // ΡΠΎΡ ΡΠ°Π½ΡΠ΅ΠΌ DFAConst.UNCLASSIFIED_INPUT |
|
285 | 292 | var cls = item.Contains(AutomatonConst.UNCLASSIFIED_INPUT) ? AutomatonConst.UNCLASSIFIED_INPUT : nextCls++; |
|
293 | optimalDFA.AddSymbol(cls); | |
|
286 | 294 | |
|
287 | 295 | foreach (var a in item) |
|
288 | 296 | alphabetMap[a] = cls; |
@@ -298,19 +306,38 namespace Implab.Automaton { | |||
|
298 | 306 | optimalDFA.Add(t); |
|
299 | 307 | } |
|
300 | 308 | |
|
301 |
protected |
|
|
309 | protected string PrintDFA<TInput, TState>(IAlphabet<TInput> inputAlphabet, IAlphabet<TState> stateAlphabet) { | |
|
302 | 310 | Safe.ArgumentNotNull(inputAlphabet, "inputAlphabet"); |
|
303 | 311 | Safe.ArgumentNotNull(stateAlphabet, "stateAlphabet"); |
|
304 | 312 | |
|
305 | foreach(var t in m_transitions) | |
|
306 | Console.WriteLine( | |
|
307 | "[{0}] -{{{1}}}-> [{2}]{3}", | |
|
308 | String.Join(",", stateAlphabet.GetSymbols(t.s1)), | |
|
309 | String.Join("", inputAlphabet.GetSymbols(t.edge)), | |
|
310 |
|
|
|
311 | m_finalStates.Contains(t.s2) ? "$" : "" | |
|
312 | ); | |
|
313 | var data = new List<string>(); | |
|
314 | ||
|
315 | data.Add("digraph dfa {"); | |
|
316 | ||
|
317 | foreach (var final in m_finalStates) | |
|
318 | data.Add(String.Format("{0} [shape=box];",String.Join("", stateAlphabet.GetSymbols(final)))); | |
|
319 | ||
|
320 | foreach (var t in m_transitions) | |
|
321 | data.Add(String.Format( | |
|
322 | "{0} -> {2} [label={1}];", | |
|
323 | String.Join("", stateAlphabet.GetSymbols(t.s1)), | |
|
324 | ToLiteral(ToLiteral(String.Join("", t.edge == AutomatonConst.UNCLASSIFIED_INPUT ? new [] { "@" } : inputAlphabet.GetSymbols(t.edge).Select(x => x.ToString())))), | |
|
325 | String.Join("", stateAlphabet.GetSymbols(t.s2)) | |
|
326 | )); | |
|
327 | data.Add("}"); | |
|
328 | return String.Join("\n", data); | |
|
313 | 329 | } |
|
314 | 330 | |
|
331 | static string ToLiteral(string input) | |
|
332 | { | |
|
333 | using (var writer = new StringWriter()) | |
|
334 | { | |
|
335 | using (var provider = CodeDomProvider.CreateProvider("CSharp")) | |
|
336 | { | |
|
337 | provider.GenerateCodeFromExpression(new CodePrimitiveExpression(input), writer, null); | |
|
338 | return writer.ToString(); | |
|
339 | } | |
|
340 | } | |
|
341 | } | |
|
315 | 342 | } |
|
316 | 343 | } |
@@ -10,6 +10,17 namespace Implab.Automaton { | |||
|
10 | 10 | void MarkFinalState(int state); |
|
11 | 11 | |
|
12 | 12 | void SetInitialState(int s); |
|
13 | ||
|
14 | /// <summary> | |
|
15 | /// Increases if needed the input alphabet size to hold the specified symbol. | |
|
16 | /// </summary> | |
|
17 | /// <remarks> | |
|
18 | /// <code> | |
|
19 | /// AlphabetSize = Math.Max(AlphabetSize, symbol + 1) | |
|
20 | /// </code> | |
|
21 | /// </remarks> | |
|
22 | /// <param name="symbol">Symbol.</param> | |
|
23 | void AddSymbol(int symbol); | |
|
13 | 24 | } |
|
14 | 25 | } |
|
15 | 26 |
@@ -66,6 +66,9 namespace Implab.Automaton.RegularExpres | |||
|
66 | 66 | // skip all unclassified symbols |
|
67 | 67 | foreach (var pair in alphaMap.Where(x => x.Value != 0)) |
|
68 | 68 | alphabet.DefineClass(m_alphabet.GetSymbols(pair.Key), pair.Value); |
|
69 | ||
|
70 | var orig = ToString(); | |
|
71 | var opt = dfa.ToString(); | |
|
69 | 72 | |
|
70 | 73 | return dfa; |
|
71 | 74 | } |
@@ -78,6 +81,15 namespace Implab.Automaton.RegularExpres | |||
|
78 | 81 | return FinalStates.GroupBy(x => m_tags[x], arrayComparer).Select(g => new HashSet<int>(g)); |
|
79 | 82 | } |
|
80 | 83 | |
|
84 | public override string ToString() { | |
|
85 | var states = new MapAlphabet<string>(false, null); | |
|
86 | ||
|
87 | for (int i = 0; i < StateCount; i++) | |
|
88 | states.DefineSymbol(string.Format("s{0}", i), i); | |
|
89 | ||
|
90 | return string.Format("//[RegularDFA {1} x {2}]\n{0}", PrintDFA(InputAlphabet, states),StateCount, AlphabetSize); | |
|
91 | } | |
|
92 | ||
|
81 | 93 | } |
|
82 | 94 | } |
|
83 | 95 |
@@ -7,7 +7,7 namespace Implab.Components { | |||
|
7 | 7 | /// </summary> |
|
8 | 8 | /// <remarks> |
|
9 | 9 | /// Usefull when dealing with memory-intensive objects which are frequently used. |
|
10 | /// This class is similar to <see cref="ObjectPool{T}"/> except is a singleton. | |
|
10 | /// This class is similar to <see cref="ObjectPool{T}"/> except it is a singleton. | |
|
11 | 11 | /// </remarks> |
|
12 | 12 | public class LazyAndWeak<T> where T : class { |
|
13 | 13 | |
@@ -44,6 +44,7 namespace Implab.Components { | |||
|
44 | 44 | } else { |
|
45 | 45 | lock (m_lock) { |
|
46 | 46 | // double check |
|
47 | weak = m_reference; | |
|
47 | 48 | if (weak != null) { |
|
48 | 49 | value = weak.Target as T; |
|
49 | 50 | if (value != null) |
@@ -108,7 +108,7 namespace Implab.Formats.JSON { | |||
|
108 | 108 | } |
|
109 | 109 | |
|
110 | 110 | Token SymbolRangeToken(char start, char stop) { |
|
111 | return SymbolToken(Enumerable.Range(start,stop - start).Select(x => (char)x)); | |
|
111 | return SymbolToken(Enumerable.Range(start, stop - start + 1).Select(x => (char)x)); | |
|
112 | 112 | } |
|
113 | 113 | |
|
114 | 114 | protected override IndexedAlphabetBase<char> CreateAlphabet() { |
@@ -4,22 +4,14 namespace Implab.Formats { | |||
|
4 | 4 | public class StringScanner: TextScanner { |
|
5 | 5 | const int CHUNK_SIZE = 1024; |
|
6 | 6 | |
|
7 | readonly string m_text; | |
|
8 | int m_pos; | |
|
9 | ||
|
10 | public StringScanner(string text) : base(text.Length, text.Length < CHUNK_SIZE ? text.Length : CHUNK_SIZE) { | |
|
11 | m_text = text; | |
|
12 | Feed(); | |
|
7 | public StringScanner(string text) : base(null) { | |
|
8 | Safe.ArgumentNotNull(text, "text"); | |
|
9 | var data = text.ToCharArray(); | |
|
10 | Feed(data, 0, data.Length); | |
|
13 | 11 | } |
|
14 | 12 | |
|
15 | 13 | protected override int Read(char[] buffer, int offset, int size) { |
|
16 | var actual = size + m_pos > m_text.Length ? m_text.Length - m_pos : size; | |
|
17 | ||
|
18 | m_text.CopyTo(m_pos,buffer,offset, actual); | |
|
19 | ||
|
20 | m_pos += actual; | |
|
21 | ||
|
22 | return actual; | |
|
14 | return 0; | |
|
23 | 15 | } |
|
24 | 16 | } |
|
25 | 17 | } |
@@ -53,29 +53,24 namespace Implab.Formats { | |||
|
53 | 53 | tag = null; |
|
54 | 54 | |
|
55 | 55 | var maxSymbol = alphabet.Length - 1; |
|
56 | ||
|
56 | int next; | |
|
57 | 57 | do { |
|
58 | 58 | // after the next chunk is read the offset in the buffer may change |
|
59 | 59 | int pos = m_bufferOffset + m_tokenLength; |
|
60 | ||
|
60 | next = state; | |
|
61 | 61 | while (pos < m_bufferSize) { |
|
62 | 62 | var ch = m_buffer[pos]; |
|
63 | 63 | |
|
64 | try { | |
|
65 | var next = dfa[state, ch > maxSymbol ? AutomatonConst.UNCLASSIFIED_INPUT : alphabet[ch]]; | |
|
64 | next = dfa[next, ch > maxSymbol ? AutomatonConst.UNCLASSIFIED_INPUT : alphabet[ch]]; | |
|
66 | 65 | |
|
67 | 66 | if (next == AutomatonConst.UNREACHABLE_STATE) |
|
68 | 67 | break; |
|
69 | ||
|
68 | ||
|
70 | 69 | state = next; |
|
71 | }catch { | |
|
72 | throw; | |
|
73 | } | |
|
74 | 70 | pos++; |
|
75 | 71 | } |
|
76 | ||
|
77 | 72 | m_tokenLength = pos - m_bufferOffset; |
|
78 |
} while ( |
|
|
73 | } while (next != AutomatonConst.UNREACHABLE_STATE && Feed()); | |
|
79 | 74 | |
|
80 | 75 | m_tokenOffset = m_bufferOffset; |
|
81 | 76 | m_bufferOffset += m_tokenLength; |
@@ -150,7 +145,7 namespace Implab.Formats { | |||
|
150 | 145 | } |
|
151 | 146 | |
|
152 | 147 | public void CopyTokenTo(char[] buffer, int offset) { |
|
153 |
m_buffer |
|
|
148 | Array.Copy(m_buffer, m_tokenOffset,buffer, offset, m_tokenLength); | |
|
154 | 149 | } |
|
155 | 150 | |
|
156 | 151 | public void CopyTokenTo(StringBuilder sb) { |
General Comments 0
You need to be logged in to leave comments.
Login now