@@ -1,9 +1,9 | |||||
1 |
|
1 | |||
2 | namespace Implab.Automaton { |
|
2 | namespace Implab.Automaton { | |
3 |
public static class |
|
3 | public static class AutomatonConst { | |
4 | public const int UNREACHABLE_STATE = -1; |
|
4 | public const int UNREACHABLE_STATE = -1; | |
5 |
|
5 | |||
6 | public const int UNCLASSIFIED_INPUT = 0; |
|
6 | public const int UNCLASSIFIED_INPUT = 0; | |
7 | } |
|
7 | } | |
8 | } |
|
8 | } | |
9 |
|
9 |
@@ -1,83 +1,83 | |||||
1 | using System.Collections.Generic; |
|
1 | using System.Collections.Generic; | |
2 | using System.Linq; |
|
2 | using System.Linq; | |
3 |
|
3 | |||
4 | namespace Implab.Automaton.RegularExpressions { |
|
4 | namespace Implab.Automaton.RegularExpressions { | |
5 |
public class |
|
5 | public class RegularDFA<TInput, TTag> : DFATable, ITaggedDFABuilder<TTag> { | |
6 |
|
6 | |||
7 | readonly Dictionary<int,TTag[]> m_tags = new Dictionary<int, TTag[]>(); |
|
7 | readonly Dictionary<int,TTag[]> m_tags = new Dictionary<int, TTag[]>(); | |
8 | readonly IAlphabet<TInput> m_alphabet; |
|
8 | readonly IAlphabet<TInput> m_alphabet; | |
9 |
|
9 | |||
10 |
public |
|
10 | public RegularDFA(IAlphabet<TInput> alphabet) { | |
11 | Safe.ArgumentNotNull(alphabet, "aplhabet"); |
|
11 | Safe.ArgumentNotNull(alphabet, "aplhabet"); | |
12 |
|
12 | |||
13 | m_alphabet = alphabet; |
|
13 | m_alphabet = alphabet; | |
14 | } |
|
14 | } | |
15 |
|
15 | |||
16 |
|
16 | |||
17 | public IAlphabet<TInput> InputAlphabet { |
|
17 | public IAlphabet<TInput> InputAlphabet { | |
18 | get { |
|
18 | get { | |
19 | return m_alphabet; |
|
19 | return m_alphabet; | |
20 | } |
|
20 | } | |
21 | } |
|
21 | } | |
22 |
|
22 | |||
23 | public void MarkFinalState(int s, TTag[] tags) { |
|
23 | public void MarkFinalState(int s, TTag[] tags) { | |
24 | MarkFinalState(s); |
|
24 | MarkFinalState(s); | |
25 | SetStateTag(s, tags); |
|
25 | SetStateTag(s, tags); | |
26 | } |
|
26 | } | |
27 |
|
27 | |||
28 | public void SetStateTag(int s, TTag[] tags) { |
|
28 | public void SetStateTag(int s, TTag[] tags) { | |
29 | Safe.ArgumentNotNull(tags, "tags"); |
|
29 | Safe.ArgumentNotNull(tags, "tags"); | |
30 | m_tags[s] = tags; |
|
30 | m_tags[s] = tags; | |
31 | } |
|
31 | } | |
32 |
|
32 | |||
33 | public TTag[] GetStateTag(int s) { |
|
33 | public TTag[] GetStateTag(int s) { | |
34 | TTag[] tags; |
|
34 | TTag[] tags; | |
35 | return m_tags.TryGetValue(s, out tags) ? tags : new TTag[0]; |
|
35 | return m_tags.TryGetValue(s, out tags) ? tags : new TTag[0]; | |
36 | } |
|
36 | } | |
37 |
|
37 | |||
38 | public TTag[][] CreateTagTable() { |
|
38 | public TTag[][] CreateTagTable() { | |
39 | var table = new TTag[StateCount][]; |
|
39 | var table = new TTag[StateCount][]; | |
40 |
|
40 | |||
41 | foreach (var pair in m_tags) |
|
41 | foreach (var pair in m_tags) | |
42 | table[pair.Key] = pair.Value; |
|
42 | table[pair.Key] = pair.Value; | |
43 |
|
43 | |||
44 | return table; |
|
44 | return table; | |
45 | } |
|
45 | } | |
46 |
|
46 | |||
47 | /// <summary> |
|
47 | /// <summary> | |
48 | /// Optimize the specified alphabet. |
|
48 | /// Optimize the specified alphabet. | |
49 | /// </summary> |
|
49 | /// </summary> | |
50 | /// <param name="alphabet">ΠΡΡΡΠΎΠΉ Π°Π»ΡΠ°Π²ΠΈΡ, ΠΊΠΎΡΠΎΡΡΠΉ Π±ΡΠ΄Π΅Ρ Π·ΠΏΠΎΠ»Π½Π΅Π½ Π² ΠΏΡΠΎΡΠ΅ΡΡΠ΅ ΠΎΠΏΡΠΈΠΌΠΈΠ·Π°ΡΠΈΠΈ.</param> |
|
50 | /// <param name="alphabet">ΠΡΡΡΠΎΠΉ Π°Π»ΡΠ°Π²ΠΈΡ, ΠΊΠΎΡΠΎΡΡΠΉ Π±ΡΠ΄Π΅Ρ Π·ΠΏΠΎΠ»Π½Π΅Π½ Π² ΠΏΡΠΎΡΠ΅ΡΡΠ΅ ΠΎΠΏΡΠΈΠΌΠΈΠ·Π°ΡΠΈΠΈ.</param> | |
51 |
public |
|
51 | public RegularDFA<TInput,TTag> Optimize(IAlphabetBuilder<TInput> alphabet) { | |
52 | Safe.ArgumentNotNull(alphabet, "alphabet"); |
|
52 | Safe.ArgumentNotNull(alphabet, "alphabet"); | |
53 |
|
53 | |||
54 |
var dfa = new |
|
54 | var dfa = new RegularDFA<TInput, TTag>(alphabet); | |
55 |
|
55 | |||
56 | var states = new DummyAlphabet(StateCount); |
|
56 | var states = new DummyAlphabet(StateCount); | |
57 | var alphaMap = new Dictionary<int,int>(); |
|
57 | var alphaMap = new Dictionary<int,int>(); | |
58 | var stateMap = new Dictionary<int,int>(); |
|
58 | var stateMap = new Dictionary<int,int>(); | |
59 |
|
59 | |||
60 | Optimize(dfa, alphaMap, stateMap); |
|
60 | Optimize(dfa, alphaMap, stateMap); | |
61 |
|
61 | |||
62 | // mark tags in the new DFA |
|
62 | // mark tags in the new DFA | |
63 | foreach (var g in m_tags.Where(x => x.Key < StateCount).GroupBy(x => stateMap[x.Key], x => x.Value )) |
|
63 | foreach (var g in m_tags.Where(x => x.Key < StateCount).GroupBy(x => stateMap[x.Key], x => x.Value )) | |
64 | dfa.SetStateTag(g.Key, g.SelectMany(x => x).ToArray()); |
|
64 | dfa.SetStateTag(g.Key, g.SelectMany(x => x).ToArray()); | |
65 |
|
65 | |||
66 | // make the alphabet for the new DFA |
|
66 | // make the alphabet for the new DFA | |
67 | foreach (var pair in alphaMap) |
|
67 | foreach (var pair in alphaMap) | |
68 | alphabet.DefineClass(m_alphabet.GetSymbols(pair.Key), pair.Value); |
|
68 | alphabet.DefineClass(m_alphabet.GetSymbols(pair.Key), pair.Value); | |
69 |
|
69 | |||
70 | return dfa; |
|
70 | return dfa; | |
71 | } |
|
71 | } | |
72 |
|
72 | |||
73 | protected override IEnumerable<HashSet<int>> GroupFinalStates() { |
|
73 | protected override IEnumerable<HashSet<int>> GroupFinalStates() { | |
74 | var arrayComparer = new CustomEqualityComparer<TTag[]>( |
|
74 | var arrayComparer = new CustomEqualityComparer<TTag[]>( | |
75 | (x,y) => x.Length == y.Length && x.All(it => y.Contains(it)), |
|
75 | (x,y) => x.Length == y.Length && x.All(it => y.Contains(it)), | |
76 | x => x.Sum(it => x.GetHashCode()) |
|
76 | x => x.Sum(it => x.GetHashCode()) | |
77 | ); |
|
77 | ); | |
78 | return FinalStates.GroupBy(x => m_tags[x], arrayComparer).Select(g => new HashSet<int>(g)); |
|
78 | return FinalStates.GroupBy(x => m_tags[x], arrayComparer).Select(g => new HashSet<int>(g)); | |
79 | } |
|
79 | } | |
80 |
|
80 | |||
81 | } |
|
81 | } | |
82 | } |
|
82 | } | |
83 |
|
83 |
@@ -1,104 +1,104 | |||||
1 | using System; |
|
1 | using System; | |
2 | using System.Globalization; |
|
2 | using System.Globalization; | |
3 | using Implab.Automaton; |
|
3 | using Implab.Automaton; | |
4 | using System.Text; |
|
4 | using System.Text; | |
5 | using Implab.Components; |
|
5 | using Implab.Components; | |
6 | using System.IO; |
|
6 | using System.IO; | |
7 |
|
7 | |||
8 | namespace Implab.Formats.JSON { |
|
8 | namespace Implab.Formats.JSON { | |
9 | /// <summary> |
|
9 | /// <summary> | |
10 | /// Π‘ΠΊΠ°Π½Π½Π΅Ρ (Π»Π΅ΠΊΡΠ΅Ρ), ΡΠ°Π·Π±ΠΈΠ²Π°ΡΡΠΈΠΉ ΠΏΠΎΡΠΎΠΊ ΡΠΈΠΌΠ²ΠΎΠ»ΠΎΠ² Π½Π° ΡΠΎΠΊΠ΅Π½Ρ JSON. |
|
10 | /// Π‘ΠΊΠ°Π½Π½Π΅Ρ (Π»Π΅ΠΊΡΠ΅Ρ), ΡΠ°Π·Π±ΠΈΠ²Π°ΡΡΠΈΠΉ ΠΏΠΎΡΠΎΠΊ ΡΠΈΠΌΠ²ΠΎΠ»ΠΎΠ² Π½Π° ΡΠΎΠΊΠ΅Π½Ρ JSON. | |
11 | /// </summary> |
|
11 | /// </summary> | |
12 | public class JSONScanner : Disposable { |
|
12 | public class JSONScanner : Disposable { | |
13 | readonly StringBuilder m_builder = new StringBuilder(); |
|
13 | readonly StringBuilder m_builder = new StringBuilder(); | |
14 |
|
14 | |||
15 |
readonly ScannerContext<JSONGrammar.TokenType> m_jsonContext = JSONGrammar.Instance.Json |
|
15 | readonly ScannerContext<JSONGrammar.TokenType> m_jsonContext = JSONGrammar.Instance.JsonExpression; | |
16 |
readonly ScannerContext<JSONGrammar.TokenType> m_stringContext = JSONGrammar.Instance.JsonString |
|
16 | readonly ScannerContext<JSONGrammar.TokenType> m_stringContext = JSONGrammar.Instance.JsonStringExpression; | |
17 |
|
17 | |||
18 |
|
18 | |||
19 | readonly TextScanner m_scanner; |
|
19 | readonly TextScanner m_scanner; | |
20 |
|
20 | |||
21 | /// <summary> |
|
21 | /// <summary> | |
22 | /// Π‘ΠΎΠ·Π΄Π°Π΅Ρ Π½ΠΎΠ²ΡΠΉ ΡΠΊΠ·Π΅ΠΌΠΏΠ»ΡΡ ΡΠΊΠ°Π½Π½Π΅ΡΠ° |
|
22 | /// Π‘ΠΎΠ·Π΄Π°Π΅Ρ Π½ΠΎΠ²ΡΠΉ ΡΠΊΠ·Π΅ΠΌΠΏΠ»ΡΡ ΡΠΊΠ°Π½Π½Π΅ΡΠ° | |
23 | /// </summary> |
|
23 | /// </summary> | |
24 | public JSONScanner(string text) { |
|
24 | public JSONScanner(string text) { | |
25 | Safe.ArgumentNotEmpty(text, "text"); |
|
25 | Safe.ArgumentNotEmpty(text, "text"); | |
26 |
|
26 | |||
27 | m_scanner = new StringScanner(text); |
|
27 | m_scanner = new StringScanner(text); | |
28 | } |
|
28 | } | |
29 |
|
29 | |||
30 | public JSONScanner(TextReader reader, int bufferMax, int chunkSize) { |
|
30 | public JSONScanner(TextReader reader, int bufferMax, int chunkSize) { | |
31 | Safe.ArgumentNotNull(reader, "reader"); |
|
31 | Safe.ArgumentNotNull(reader, "reader"); | |
32 |
|
32 | |||
33 | m_scanner = new ReaderScanner(reader, bufferMax, chunkSize); |
|
33 | m_scanner = new ReaderScanner(reader, bufferMax, chunkSize); | |
34 | } |
|
34 | } | |
35 |
|
35 | |||
36 | /// <summary> |
|
36 | /// <summary> | |
37 | /// Π§ΠΈΡΠ°Π΅Ρ ΡΠ»Π΅Π΄ΡΡΡΠΈΠΉ Π»Π΅ΠΊΡΠΈΡΠ΅ΡΠΊΠΈΠΉ ΡΠ»Π΅ΠΌΠ΅Π½Ρ ΠΈΠ· Π²Ρ ΠΎΠ΄Π½ΡΡ Π΄Π°Π½Π½ΡΡ . |
|
37 | /// Π§ΠΈΡΠ°Π΅Ρ ΡΠ»Π΅Π΄ΡΡΡΠΈΠΉ Π»Π΅ΠΊΡΠΈΡΠ΅ΡΠΊΠΈΠΉ ΡΠ»Π΅ΠΌΠ΅Π½Ρ ΠΈΠ· Π²Ρ ΠΎΠ΄Π½ΡΡ Π΄Π°Π½Π½ΡΡ . | |
38 | /// </summary> |
|
38 | /// </summary> | |
39 | /// <param name="tokenValue">ΠΠΎΠ·Π²ΡΠ°ΡΠ°Π΅Ρ Π·Π½Π°ΡΠ΅Π½ΠΈΠ΅ ΠΏΡΠΎΡΠΈΡΠ°Π½Π½ΠΎΠ³ΠΎ ΡΠΎΠΊΠ΅Π½Π°.</param> |
|
39 | /// <param name="tokenValue">ΠΠΎΠ·Π²ΡΠ°ΡΠ°Π΅Ρ Π·Π½Π°ΡΠ΅Π½ΠΈΠ΅ ΠΏΡΠΎΡΠΈΡΠ°Π½Π½ΠΎΠ³ΠΎ ΡΠΎΠΊΠ΅Π½Π°.</param> | |
40 | /// <param name="tokenType">ΠΠΎΠ·Π²ΡΠ°ΡΠ°Π΅Ρ ΡΠΈΠΏ ΠΏΡΠΎΡΠΈΡΠ°Π½Π½ΠΎΠ³ΠΎ ΡΠΎΠΊΠ΅Π½Π°.</param> |
|
40 | /// <param name="tokenType">ΠΠΎΠ·Π²ΡΠ°ΡΠ°Π΅Ρ ΡΠΈΠΏ ΠΏΡΠΎΡΠΈΡΠ°Π½Π½ΠΎΠ³ΠΎ ΡΠΎΠΊΠ΅Π½Π°.</param> | |
41 | /// <returns><c>true</c> - ΡΡΠ΅Π½ΠΈΠ΅ ΠΏΡΠΎΠΈΠ·Π²Π΅Π΄Π΅Π½ΠΎ ΡΡΠΏΠ΅ΡΠ½ΠΎ. <c>false</c> - Π΄ΠΎΡΡΠΈΠ³Π½ΡΡ ΠΊΠΎΠ½Π΅Ρ Π²Ρ ΠΎΠ΄Π½ΡΡ Π΄Π°Π½Π½ΡΡ </returns> |
|
41 | /// <returns><c>true</c> - ΡΡΠ΅Π½ΠΈΠ΅ ΠΏΡΠΎΠΈΠ·Π²Π΅Π΄Π΅Π½ΠΎ ΡΡΠΏΠ΅ΡΠ½ΠΎ. <c>false</c> - Π΄ΠΎΡΡΠΈΠ³Π½ΡΡ ΠΊΠΎΠ½Π΅Ρ Π²Ρ ΠΎΠ΄Π½ΡΡ Π΄Π°Π½Π½ΡΡ </returns> | |
42 | /// <remarks>Π ΡΠ»ΡΡΠ΅ Π΅ΡΠ»ΠΈ ΡΠΎΠΊΠ΅Π½ Π½Π΅ ΡΠ°ΡΠΏΠΎΠ·Π½Π°Π΅ΡΡΡ, Π²ΠΎΠ·Π½ΠΈΠΊΠ°Π΅Ρ ΠΈΡΠΊΠ»ΡΡΠ΅Π½ΠΈΠ΅. ΠΠ½Π°ΡΠ΅Π½ΠΈΡ ΡΠΎΠΊΠ΅Π½ΠΎΠ² ΠΎΠ±ΡΠ°Π±Π°ΡΡΠ²Π°ΡΡΡΡ, Ρ.Π΅. |
|
42 | /// <remarks>Π ΡΠ»ΡΡΠ΅ Π΅ΡΠ»ΠΈ ΡΠΎΠΊΠ΅Π½ Π½Π΅ ΡΠ°ΡΠΏΠΎΠ·Π½Π°Π΅ΡΡΡ, Π²ΠΎΠ·Π½ΠΈΠΊΠ°Π΅Ρ ΠΈΡΠΊΠ»ΡΡΠ΅Π½ΠΈΠ΅. ΠΠ½Π°ΡΠ΅Π½ΠΈΡ ΡΠΎΠΊΠ΅Π½ΠΎΠ² ΠΎΠ±ΡΠ°Π±Π°ΡΡΠ²Π°ΡΡΡΡ, Ρ.Π΅. | |
43 | /// Π² ΡΡΡΠΎΠΊΠ°Ρ ΠΎΠ±ΡΠ°Π±Π°ΡΡΠ²Π°ΡΡΡΡ ΡΠΊΡΠ°Π½ΠΈΡΠΎΠ²Π°Π½Π½ΡΠ΅ ΡΠΈΠΌΠ²ΠΎΠ»Ρ, ΡΠΈΡΠ»Π° ΡΡΠ°Π½ΠΎΠ²ΡΡΡ ΡΠΈΠΏΠ° double.</remarks> |
|
43 | /// Π² ΡΡΡΠΎΠΊΠ°Ρ ΠΎΠ±ΡΠ°Π±Π°ΡΡΠ²Π°ΡΡΡΡ ΡΠΊΡΠ°Π½ΠΈΡΠΎΠ²Π°Π½Π½ΡΠ΅ ΡΠΈΠΌΠ²ΠΎΠ»Ρ, ΡΠΈΡΠ»Π° ΡΡΠ°Π½ΠΎΠ²ΡΡΡ ΡΠΈΠΏΠ° double.</remarks> | |
44 | public bool ReadToken(out object tokenValue, out JsonTokenType tokenType) { |
|
44 | public bool ReadToken(out object tokenValue, out JsonTokenType tokenType) { | |
45 | JSONGrammar.TokenType[] tag; |
|
45 | JSONGrammar.TokenType[] tag; | |
46 | if (m_jsonContext.Execute(m_scanner, out tag)) { |
|
46 | if (m_jsonContext.Execute(m_scanner, out tag)) { | |
47 | switch (tag[0]) { |
|
47 | switch (tag[0]) { | |
48 | case JSONGrammar.TokenType.StringBound: |
|
48 | case JSONGrammar.TokenType.StringBound: | |
49 | tokenValue = ReadString(); |
|
49 | tokenValue = ReadString(); | |
50 | tokenType = JsonTokenType.String; |
|
50 | tokenType = JsonTokenType.String; | |
51 | break; |
|
51 | break; | |
52 | case JSONGrammar.TokenType.Number: |
|
52 | case JSONGrammar.TokenType.Number: | |
53 | tokenValue = Double.Parse(m_scanner.GetTokenValue(), CultureInfo.InvariantCulture); |
|
53 | tokenValue = Double.Parse(m_scanner.GetTokenValue(), CultureInfo.InvariantCulture); | |
54 | tokenType = JsonTokenType.Number; |
|
54 | tokenType = JsonTokenType.Number; | |
55 | break; |
|
55 | break; | |
56 | default: |
|
56 | default: | |
57 | tokenType = (JsonTokenType)tag[0]; |
|
57 | tokenType = (JsonTokenType)tag[0]; | |
58 | tokenValue = m_scanner.GetTokenValue(); |
|
58 | tokenValue = m_scanner.GetTokenValue(); | |
59 | break; |
|
59 | break; | |
60 | } |
|
60 | } | |
61 | return true; |
|
61 | return true; | |
62 | } |
|
62 | } | |
63 | tokenValue = null; |
|
63 | tokenValue = null; | |
64 | tokenType = JsonTokenType.None; |
|
64 | tokenType = JsonTokenType.None; | |
65 | return false; |
|
65 | return false; | |
66 | } |
|
66 | } | |
67 |
|
67 | |||
68 | string ReadString() { |
|
68 | string ReadString() { | |
69 | int pos = 0; |
|
69 | int pos = 0; | |
70 | var buf = new char[6]; // the buffer for unescaping chars |
|
70 | var buf = new char[6]; // the buffer for unescaping chars | |
71 |
|
71 | |||
72 | JSONGrammar.TokenType[] tag; |
|
72 | JSONGrammar.TokenType[] tag; | |
73 | m_builder.Clear(); |
|
73 | m_builder.Clear(); | |
74 |
|
74 | |||
75 | while (m_stringContext.Execute(m_scanner, out tag)) { |
|
75 | while (m_stringContext.Execute(m_scanner, out tag)) { | |
76 | switch (tag[0]) { |
|
76 | switch (tag[0]) { | |
77 | case JSONGrammar.TokenType.StringBound: |
|
77 | case JSONGrammar.TokenType.StringBound: | |
78 | return m_builder.ToString(); |
|
78 | return m_builder.ToString(); | |
79 | case JSONGrammar.TokenType.UnescapedChar: |
|
79 | case JSONGrammar.TokenType.UnescapedChar: | |
80 | m_scanner.CopyTokenTo(m_builder); |
|
80 | m_scanner.CopyTokenTo(m_builder); | |
81 | break; |
|
81 | break; | |
82 | case JSONGrammar.TokenType.EscapedUnicode: // \xXXXX - unicode escape sequence |
|
82 | case JSONGrammar.TokenType.EscapedUnicode: // \xXXXX - unicode escape sequence | |
83 | m_scanner.CopyTokenTo(buf, 0); |
|
83 | m_scanner.CopyTokenTo(buf, 0); | |
84 | m_builder.Append(StringTranslator.TranslateHexUnicode(buf, 2)); |
|
84 | m_builder.Append(StringTranslator.TranslateHexUnicode(buf, 2)); | |
85 | pos++; |
|
85 | pos++; | |
86 | break; |
|
86 | break; | |
87 | case JSONGrammar.TokenType.EscapedChar: // \t - escape sequence |
|
87 | case JSONGrammar.TokenType.EscapedChar: // \t - escape sequence | |
88 | m_scanner.CopyTokenTo(buf, 0); |
|
88 | m_scanner.CopyTokenTo(buf, 0); | |
89 | m_builder.Append(StringTranslator.TranslateEscapedChar(buf[1])); |
|
89 | m_builder.Append(StringTranslator.TranslateEscapedChar(buf[1])); | |
90 | break; |
|
90 | break; | |
91 | } |
|
91 | } | |
92 |
|
92 | |||
93 | } |
|
93 | } | |
94 |
|
94 | |||
95 | throw new ParserException("Unexpected end of data"); |
|
95 | throw new ParserException("Unexpected end of data"); | |
96 | } |
|
96 | } | |
97 |
|
97 | |||
98 | protected override void Dispose(bool disposing) { |
|
98 | protected override void Dispose(bool disposing) { | |
99 | if (disposing) |
|
99 | if (disposing) | |
100 | Safe.Dispose(m_scanner); |
|
100 | Safe.Dispose(m_scanner); | |
101 | base.Dispose(disposing); |
|
101 | base.Dispose(disposing); | |
102 | } |
|
102 | } | |
103 | } |
|
103 | } | |
104 | } |
|
104 | } |
General Comments 0
You need to be logged in to leave comments.
Login now