@@ -0,0 +1,30 | |||||
|
1 | using System; | |||
|
2 | using System.IO; | |||
|
3 | ||||
|
4 | namespace Implab.Formats { | |||
|
5 | public class ReaderScanner: TextScanner { | |||
|
6 | const int CHUNK_SIZE = 1024; | |||
|
7 | const int BUFFER_MAX = CHUNK_SIZE*1024; | |||
|
8 | ||||
|
9 | readonly TextReader m_reader; | |||
|
10 | ||||
|
11 | public ReaderScanner(TextReader reader, int limit, int chunk) : base(limit, chunk) { | |||
|
12 | Safe.ArgumentNotNull(reader, "reader"); | |||
|
13 | m_reader = reader; | |||
|
14 | } | |||
|
15 | ||||
|
16 | public ReaderScanner(TextReader reader) : this(reader, BUFFER_MAX, CHUNK_SIZE) { | |||
|
17 | } | |||
|
18 | ||||
|
19 | protected override int Read(char[] buffer, int offset, int size) { | |||
|
20 | return m_reader.Read(buffer, offset, size); | |||
|
21 | } | |||
|
22 | ||||
|
23 | protected override void Dispose(bool disposing) { | |||
|
24 | if (disposing) | |||
|
25 | Safe.Dispose(m_reader); | |||
|
26 | base.Dispose(disposing); | |||
|
27 | } | |||
|
28 | } | |||
|
29 | } | |||
|
30 |
@@ -0,0 +1,24 | |||||
|
1 | using System; | |||
|
2 | ||||
|
3 | namespace Implab.Formats { | |||
|
4 | public class ScannerContext<TTag> { | |||
|
5 | public int[,] Dfa { get; private set; } | |||
|
6 | public bool[] Final { get; private set; } | |||
|
7 | public TTag[][] Tags { get; private set; } | |||
|
8 | public int State { get; private set; } | |||
|
9 | public int[] Alphabet { get; private set; } | |||
|
10 | ||||
|
11 | public ScannerContext(int[,] dfa, bool[] final, TTag[][] tags, int state, int[] alphabet) { | |||
|
12 | Dfa = dfa; | |||
|
13 | Final = final; | |||
|
14 | Tags = tags; | |||
|
15 | State = state; | |||
|
16 | Alphabet = alphabet; | |||
|
17 | } | |||
|
18 | ||||
|
19 | public bool Execute(TextScanner scanner, out TTag[] tag) { | |||
|
20 | return scanner.ReadToken(Dfa, Final, Tags, State, Alphabet, out tag); | |||
|
21 | } | |||
|
22 | } | |||
|
23 | } | |||
|
24 |
@@ -0,0 +1,26 | |||||
|
1 | using System; | |||
|
2 | ||||
|
3 | namespace Implab.Formats { | |||
|
4 | public class StringScanner: TextScanner { | |||
|
5 | const int CHUNK_SIZE = 1024; | |||
|
6 | ||||
|
7 | readonly string m_text; | |||
|
8 | int m_pos; | |||
|
9 | ||||
|
10 | public StringScanner(string text) : base(text.Length, text.Length < CHUNK_SIZE ? text.Length : CHUNK_SIZE) { | |||
|
11 | m_text = text; | |||
|
12 | Feed(); | |||
|
13 | } | |||
|
14 | ||||
|
15 | protected override int Read(char[] buffer, int offset, int size) { | |||
|
16 | var actual = size + m_pos > m_text.Length ? m_text.Length - m_pos : size; | |||
|
17 | ||||
|
18 | m_text.CopyTo(m_pos,buffer,offset, actual); | |||
|
19 | ||||
|
20 | m_pos += actual; | |||
|
21 | ||||
|
22 | return actual; | |||
|
23 | } | |||
|
24 | } | |||
|
25 | } | |||
|
26 |
@@ -100,17 +100,25 namespace Implab.Automaton { | |||||
100 | return GetEnumerator(); |
|
100 | return GetEnumerator(); | |
101 | } |
|
101 | } | |
102 |
|
102 | |||
103 |
public |
|
103 | public int[,] CreateTransitionTable() { | |
104 |
var table = new |
|
104 | var table = new int[StateCount,AlphabetSize]; | |
|
105 | ||||
|
106 | for (int i = 0; i < StateCount; i++) | |||
|
107 | for (int j = 0; i < AlphabetSize; j++) | |||
|
108 | table[i, j] = DFAConst.UNREACHABLE_STATE; | |||
105 |
|
109 | |||
106 |
foreach (var t in this) |
|
110 | foreach (var t in this) | |
107 |
|
|
111 | table[t.s1,t.edge] = t.s2; | |
108 | table[t.s1] = new DFAStateDescriptor(AlphabetSize, IsFinalState(t.s1)); |
|
112 | ||
109 | if (table[t.s2].transitions == null) |
|
113 | return table; | |
110 | table[t.s2] = new DFAStateDescriptor(AlphabetSize, IsFinalState(t.s2)); |
|
|||
111 | table[t.s1].transitions[t.edge] = t.s2; |
|
|||
112 |
|
|
114 | } | |
113 |
|
115 | |||
|
116 | public bool[] CreateFinalStateTable() { | |||
|
117 | var table = new bool[StateCount]; | |||
|
118 | ||||
|
119 | foreach (var s in FinalStates) | |||
|
120 | table[s] = true; | |||
|
121 | ||||
114 | return table; |
|
122 | return table; | |
115 | } |
|
123 | } | |
116 |
|
124 |
@@ -13,82 +13,38 namespace Implab.Automaton { | |||||
13 | /// to the input alphabet of the automaton. It's assumed that the index to the symbol match |
|
13 | /// to the input alphabet of the automaton. It's assumed that the index to the symbol match | |
14 | /// is well known and documented. |
|
14 | /// is well known and documented. | |
15 | /// </remarks> |
|
15 | /// </remarks> | |
16 |
public abstract class IndexedAlphabetBase<T> : |
|
16 | public abstract class IndexedAlphabetBase<T> : MapAlphabet<T> { | |
17 | int m_nextId = 1; |
|
|||
18 | readonly int[] m_map; |
|
|||
19 |
|
||||
20 | protected IndexedAlphabetBase(int mapSize) { |
|
|||
21 | m_map = new int[mapSize]; |
|
|||
22 | } |
|
|||
23 |
|
||||
24 | protected IndexedAlphabetBase(int[] map) { |
|
|||
25 | Debug.Assert(map != null && map.Length > 0); |
|
|||
26 | Debug.Assert(map.All(x => x >= 0)); |
|
|||
27 |
|
||||
28 | m_map = map; |
|
|||
29 | m_nextId = map.Max() + 1; |
|
|||
30 | } |
|
|||
31 |
|
||||
32 | public int DefineSymbol(T symbol) { |
|
|||
33 | var index = GetSymbolIndex(symbol); |
|
|||
34 | if (m_map[index] == DFAConst.UNCLASSIFIED_INPUT) |
|
|||
35 | m_map[index] = m_nextId++; |
|
|||
36 | return m_map[index]; |
|
|||
37 | } |
|
|||
38 |
|
||||
39 | public int DefineSymbol(T symbol, int cls) { |
|
|||
40 | var index = GetSymbolIndex(symbol); |
|
|||
41 | m_map[index] = cls; |
|
|||
42 | m_nextId = Math.Max(cls + 1, m_nextId); |
|
|||
43 | return cls; |
|
|||
44 | } |
|
|||
45 |
|
17 | |||
46 | public int DefineClass(IEnumerable<T> symbols) { |
|
18 | protected IndexedAlphabetBase() :base(true, null) { | |
47 | return DefineClass(symbols, m_nextId); |
|
|||
48 | } |
|
|||
49 |
|
||||
50 | public int DefineClass(IEnumerable<T> symbols, int cls) { |
|
|||
51 | Safe.ArgumentNotNull(symbols, "symbols"); |
|
|||
52 | symbols = symbols.Distinct(); |
|
|||
53 |
|
||||
54 | foreach (var symbol in symbols) |
|
|||
55 | m_map[GetSymbolIndex(symbol)] = cls; |
|
|||
56 |
|
||||
57 | m_nextId = Math.Max(cls + 1, m_nextId); |
|
|||
58 |
|
||||
59 | return cls; |
|
|||
60 | } |
|
|||
61 |
|
||||
62 | public virtual int Translate(T symbol) { |
|
|||
63 | return m_map[GetSymbolIndex(symbol)]; |
|
|||
64 | } |
|
|||
65 |
|
||||
66 | public int Count { |
|
|||
67 | get { return m_nextId; } |
|
|||
68 | } |
|
|||
69 |
|
||||
70 | public bool Contains(T symbol) { |
|
|||
71 | return true; |
|
|||
72 | } |
|
|||
73 |
|
||||
74 | public IEnumerable<T> GetSymbols(int cls) { |
|
|||
75 | for (var i = 0; i < m_map.Length; i++) |
|
|||
76 | if (m_map[i] == cls) |
|
|||
77 | yield return GetSymbolByIndex(i); |
|
|||
78 | } |
|
19 | } | |
79 |
|
20 | |||
80 | public abstract int GetSymbolIndex(T symbol); |
|
21 | public abstract int GetSymbolIndex(T symbol); | |
81 |
|
22 | |||
82 | public abstract T GetSymbolByIndex(int index); |
|
|||
83 |
|
||||
84 | public abstract IEnumerable<T> InputSymbols { get; } |
|
|||
85 |
|
||||
86 | /// <summary> |
|
23 | /// <summary> | |
87 | /// Gets the translation map from the index of the symbol to it's class this is usefull for the optimized input symbols transtaion. |
|
24 | /// Gets the translation map from the index of the symbol to it's class this is usefull for the optimized input symbols transtaion. | |
88 | /// </summary> |
|
25 | /// </summary> | |
|
26 | /// <remarks> | |||
|
27 | /// The map is continous and start from the symbol with zero code. The last symbol | |||
|
28 | /// in the map is the last classified symbol in the alphabet, i.e. the map can be | |||
|
29 | /// shorter then the whole alphabet. | |||
|
30 | /// </remarks> | |||
89 | /// <returns>The translation map.</returns> |
|
31 | /// <returns>The translation map.</returns> | |
90 | public int[] GetTranslationMap() { |
|
32 | public int[] GetTranslationMap() { | |
91 | return m_map; |
|
33 | Dictionary<int,int> map = new Dictionary<int, int>(); | |
|
34 | ||||
|
35 | int max; | |||
|
36 | foreach (var p in Mappings) { | |||
|
37 | var index = GetSymbolIndex(p.Key); | |||
|
38 | max = Math.Max(max, index); | |||
|
39 | map[index] = p.Value; | |||
|
40 | } | |||
|
41 | ||||
|
42 | var result = new int[max + 1]; | |||
|
43 | ||||
|
44 | for (int i = 0; i < result.Length; i++) | |||
|
45 | map.TryGetValue(i, out result[i]); | |||
|
46 | ||||
|
47 | return result; | |||
92 | } |
|
48 | } | |
93 | } |
|
49 | } | |
94 | } |
|
50 | } |
@@ -69,9 +69,16 namespace Implab.Automaton { | |||||
69 |
|
69 | |||
70 |
|
70 | |||
71 | public IEnumerable<T> GetSymbols(int cls) { |
|
71 | public IEnumerable<T> GetSymbols(int cls) { | |
|
72 | Safe.ArgumentAssert(cls > 0, "cls"); | |||
72 | return m_map.Where(p => p.Value == cls).Select(p => p.Key); |
|
73 | return m_map.Where(p => p.Value == cls).Select(p => p.Key); | |
73 | } |
|
74 | } | |
74 | #endregion |
|
75 | #endregion | |
|
76 | ||||
|
77 | public IEnumerable<KeyValuePair<T,int>> Mappings { | |||
|
78 | get { | |||
|
79 | return m_map; | |||
|
80 | } | |||
|
81 | } | |||
75 | } |
|
82 | } | |
76 | } |
|
83 | } | |
77 |
|
84 |
@@ -66,9 +66,9 namespace Implab.Automaton.RegularExpres | |||||
66 | return Token<TTag>.New( Enumerable.Range(0, AlphabetBuilder.Count).Except(TranslateOrDie(symbols)).ToArray() ); |
|
66 | return Token<TTag>.New( Enumerable.Range(0, AlphabetBuilder.Count).Except(TranslateOrDie(symbols)).ToArray() ); | |
67 | } |
|
67 | } | |
68 |
|
68 | |||
69 |
protected abstract IAlphabetB |
|
69 | protected abstract IndexedAlphabetBase<TSymbol> CreateAlphabet(); | |
70 |
|
70 | |||
71 |
protected |
|
71 | protected ScannerContext<TTag> BuildScannerContext(Token<TTag> regexp) { | |
72 |
|
72 | |||
73 | var dfa = new RegularDFA<TSymbol, TTag>(AlphabetBuilder); |
|
73 | var dfa = new RegularDFA<TSymbol, TTag>(AlphabetBuilder); | |
74 |
|
74 | |||
@@ -80,7 +80,16 namespace Implab.Automaton.RegularExpres | |||||
80 | if (dfa.IsFinalState(dfa.InitialState)) |
|
80 | if (dfa.IsFinalState(dfa.InitialState)) | |
81 | throw new ApplicationException("The specified language contains empty token"); |
|
81 | throw new ApplicationException("The specified language contains empty token"); | |
82 |
|
82 | |||
83 |
r |
|
83 | var ab = CreateAlphabet(); | |
|
84 | var optimal = dfa.Optimize(ab); | |||
|
85 | ||||
|
86 | return new ScannerContext<TTag>( | |||
|
87 | optimal.CreateTransitionTable(), | |||
|
88 | optimal.CreateFinalStateTable(), | |||
|
89 | optimal.CreateTagTable(), | |||
|
90 | optimal.InitialState, | |||
|
91 | ab.GetTranslationMap() | |||
|
92 | ); | |||
84 | } |
|
93 | } | |
85 |
|
94 | |||
86 | } |
|
95 | } |
@@ -36,16 +36,11 namespace Implab.Automaton.RegularExpres | |||||
36 | return m_tags.TryGetValue(s, out tags) ? tags : new TTag[0]; |
|
36 | return m_tags.TryGetValue(s, out tags) ? tags : new TTag[0]; | |
37 | } |
|
37 | } | |
38 |
|
38 | |||
39 |
public |
|
39 | public TTag[][] CreateTagTable() { | |
40 |
var table = new |
|
40 | var table = new TTag[StateCount][]; | |
41 |
|
41 | |||
42 |
foreach (var |
|
42 | foreach (var pair in m_tags) | |
43 | if (table[t.s1].transitions == null) |
|
43 | table[pair.Key] = pair.Value; | |
44 | table[t.s1] = new DFAStateDescriptor<TTag>(AlphabetSize, IsFinalState(t.s1), GetStateTag(t.s1)); |
|
|||
45 | if (table[t.s2].transitions == null) |
|
|||
46 | table[t.s2] = new DFAStateDescriptor<TTag>(AlphabetSize, IsFinalState(t.s2), GetStateTag(t.s2)); |
|
|||
47 | table[t.s1].transitions[t.edge] = t.s2; |
|
|||
48 | } |
|
|||
49 |
|
44 | |||
50 | return table; |
|
45 | return table; | |
51 | } |
|
46 | } |
@@ -4,7 +4,7 using Implab.Automaton; | |||||
4 |
|
4 | |||
5 | namespace Implab.Formats { |
|
5 | namespace Implab.Formats { | |
6 | public class ByteAlphabet : IndexedAlphabetBase<byte> { |
|
6 | public class ByteAlphabet : IndexedAlphabetBase<byte> { | |
7 |
public ByteAlphabet() |
|
7 | public ByteAlphabet() { | |
8 | } |
|
8 | } | |
9 |
|
9 | |||
10 | #region implemented abstract members of IndexedAlphabetBase |
|
10 | #region implemented abstract members of IndexedAlphabetBase | |
@@ -13,10 +13,6 namespace Implab.Formats { | |||||
13 | return (int)symbol; |
|
13 | return (int)symbol; | |
14 | } |
|
14 | } | |
15 |
|
15 | |||
16 | public override byte GetSymbolByIndex(int index) { |
|
|||
17 | return (byte)index; |
|
|||
18 | } |
|
|||
19 |
|
||||
20 | public IEnumerable<byte> InputSymbols { |
|
16 | public IEnumerable<byte> InputSymbols { | |
21 | get { |
|
17 | get { | |
22 | return Enumerable.Range(byte.MinValue, byte.MaxValue).Cast<byte>(); |
|
18 | return Enumerable.Range(byte.MinValue, byte.MaxValue).Cast<byte>(); |
@@ -5,19 +5,14 using Implab.Automaton; | |||||
5 | namespace Implab.Formats { |
|
5 | namespace Implab.Formats { | |
6 | public class CharAlphabet: IndexedAlphabetBase<char> { |
|
6 | public class CharAlphabet: IndexedAlphabetBase<char> { | |
7 |
|
7 | |||
8 | public CharAlphabet() |
|
8 | public CharAlphabet() { | |
9 | : base(char.MaxValue + 1) { |
|
|||
10 | } |
|
9 | } | |
11 |
|
10 | |||
12 | public override int GetSymbolIndex(char symbol) { |
|
11 | public override int GetSymbolIndex(char symbol) { | |
13 | return symbol; |
|
12 | return symbol; | |
14 | } |
|
13 | } | |
15 |
|
14 | |||
16 |
public |
|
15 | public IEnumerable<char> InputSymbols { | |
17 | return (char)index; |
|
|||
18 | } |
|
|||
19 |
|
||||
20 | public override IEnumerable<char> InputSymbols { |
|
|||
21 | get { return Enumerable.Range(char.MinValue, char.MaxValue).Cast<char>(); } |
|
16 | get { return Enumerable.Range(char.MinValue, char.MaxValue).Cast<char>(); } | |
22 | } |
|
17 | } | |
23 | } |
|
18 | } |
@@ -20,14 +20,7 namespace Implab.Formats.JSON { | |||||
20 | StringBound, |
|
20 | StringBound, | |
21 | EscapedChar, |
|
21 | EscapedChar, | |
22 | UnescapedChar, |
|
22 | UnescapedChar, | |
23 |
EscapedUnicode |
|
23 | EscapedUnicode | |
24 |
|
||||
25 | Minus, |
|
|||
26 | Plus, |
|
|||
27 | Sign, |
|
|||
28 | Integer, |
|
|||
29 | Dot, |
|
|||
30 | Exp |
|
|||
31 | } |
|
24 | } | |
32 |
|
25 | |||
33 | static Lazy<JSONGrammar> _instance = new Lazy<JSONGrammar>(); |
|
26 | static Lazy<JSONGrammar> _instance = new Lazy<JSONGrammar>(); | |
@@ -36,8 +29,8 namespace Implab.Formats.JSON { | |||||
36 | get { return _instance.Value; } |
|
29 | get { return _instance.Value; } | |
37 | } |
|
30 | } | |
38 |
|
31 | |||
39 |
readonly |
|
32 | readonly ScannerContext<TokenType> m_jsonDFA; | |
40 |
readonly |
|
33 | readonly ScannerContext<TokenType> m_stringDFA; | |
41 |
|
34 | |||
42 | public JSONGrammar() { |
|
35 | public JSONGrammar() { | |
43 | DefineAlphabet(Enumerable.Range(0, 0x20).Select(x => (char)x)); |
|
36 | DefineAlphabet(Enumerable.Range(0, 0x20).Select(x => (char)x)); | |
@@ -88,17 +81,17 namespace Implab.Formats.JSON { | |||||
88 | .Or(unescaped.Closure().Tag(TokenType.UnescapedChar)); |
|
81 | .Or(unescaped.Closure().Tag(TokenType.UnescapedChar)); | |
89 |
|
82 | |||
90 |
|
83 | |||
91 |
m_jsonDFA = Build |
|
84 | m_jsonDFA = BuildScannerContext(jsonExpression); | |
92 |
m_stringDFA = Build |
|
85 | m_stringDFA = BuildScannerContext(jsonStringExpression); | |
93 | } |
|
86 | } | |
94 |
|
87 | |||
95 |
public |
|
88 | public ScannerContext<TokenType> JsonDFA { | |
96 | get { |
|
89 | get { | |
97 | return m_jsonDFA; |
|
90 | return m_jsonDFA; | |
98 | } |
|
91 | } | |
99 | } |
|
92 | } | |
100 |
|
93 | |||
101 |
public |
|
94 | public ScannerContext<TokenType> JsonStringDFA { | |
102 | get { |
|
95 | get { | |
103 | return m_stringDFA; |
|
96 | return m_stringDFA; | |
104 | } |
|
97 | } |
@@ -1,25 +1,37 | |||||
1 | using System; |
|
1 | using System; | |
2 | using System.Globalization; |
|
2 | using System.Globalization; | |
3 | using Implab.Automaton; |
|
3 | using Implab.Automaton; | |
|
4 | using System.Text; | |||
|
5 | using Implab.Components; | |||
|
6 | using System.IO; | |||
|
7 | using Implab.Automaton.RegularExpressions; | |||
4 |
|
8 | |||
5 | namespace Implab.Formats.JSON { |
|
9 | namespace Implab.Formats.JSON { | |
6 | /// <summary> |
|
10 | /// <summary> | |
7 | /// Сканнер (лексер), разбивающий поток символов на токены JSON. |
|
11 | /// Сканнер (лексер), разбивающий поток символов на токены JSON. | |
8 | /// </summary> |
|
12 | /// </summary> | |
9 |
public class JSONScanner : |
|
13 | public class JSONScanner : Disposable { | |
10 | char[] m_stringBuffer; |
|
14 | readonly StringBuilder m_builder = new StringBuilder(); | |
11 | DFAStateDescriptior<>[] m_stringDFA; |
|
15 | ||
12 | int[] m_stringAlphabet; |
|
16 | readonly ScannerContext<JSONGrammar.TokenType> m_jsonScanner = JSONGrammar.Instance.JsonDFA; | |
|
17 | readonly ScannerContext<JSONGrammar.TokenType> m_stringScanner = JSONGrammar.Instance.JsonStringDFA; | |||
|
18 | ||||
|
19 | ||||
|
20 | readonly TextScanner m_scanner; | |||
13 |
|
21 | |||
14 | /// <summary> |
|
22 | /// <summary> | |
15 | /// Создает новый экземпляр сканнера |
|
23 | /// Создает новый экземпляр сканнера | |
16 | /// </summary> |
|
24 | /// </summary> | |
17 | public JSONScanner() |
|
25 | public JSONScanner(string text) { | |
18 | : base(JSONGrammar.Instance.JsonDFA.GetTransitionTable(), JSONGrammar.Instance.JsonDFA.Alphabet.GetTranslationMap()) { |
|
26 | Safe.ArgumentNotEmpty(text, "text"); | |
19 | m_stringBuffer = new char[1024]; |
|
27 | ||
20 | var dfa = JSONGrammar.Instance.JsonStringDFA; |
|
28 | m_scanner = new StringScanner(text); | |
21 | m_stringAlphabet = dfa.Alphabet.GetTranslationMap(); |
|
29 | } | |
22 | m_stringDFA = dfa.States; |
|
30 | ||
|
31 | public JSONScanner(TextReader reader, int bufferMax, int chunkSize) { | |||
|
32 | Safe.ArgumentNotNull(reader, "reader"); | |||
|
33 | ||||
|
34 | m_scanner = new ReaderScanner(reader); | |||
23 | } |
|
35 | } | |
24 |
|
36 | |||
25 | /// <summary> |
|
37 | /// <summary> | |
@@ -31,19 +43,20 namespace Implab.Formats.JSON { | |||||
31 | /// <remarks>В случе если токен не распознается, возникает исключение. Значения токенов обрабатываются, т.е. |
|
43 | /// <remarks>В случе если токен не распознается, возникает исключение. Значения токенов обрабатываются, т.е. | |
32 | /// в строках обрабатываются экранированные символы, числа становтся типа double.</remarks> |
|
44 | /// в строках обрабатываются экранированные символы, числа становтся типа double.</remarks> | |
33 | public bool ReadToken(out object tokenValue, out JsonTokenType tokenType) { |
|
45 | public bool ReadToken(out object tokenValue, out JsonTokenType tokenType) { | |
34 | if (ReadTokenInternal()) { |
|
46 | JSONGrammar.TokenType[] tag; | |
35 | switch ((JSONGrammar.TokenType)m_currentState.tag[0]) { |
|
47 | if (m_jsonScanner.Execute(m_scanner, out tag)) { | |
|
48 | switch (tag[0]) { | |||
36 | case JSONGrammar.TokenType.StringBound: |
|
49 | case JSONGrammar.TokenType.StringBound: | |
37 | tokenValue = ReadString(); |
|
50 | tokenValue = ReadString(); | |
38 | tokenType = JsonTokenType.String; |
|
51 | tokenType = JsonTokenType.String; | |
39 | break; |
|
52 | break; | |
40 | case JSONGrammar.TokenType.Number: |
|
53 | case JSONGrammar.TokenType.Number: | |
41 |
tokenValue = Double.Parse( |
|
54 | tokenValue = Double.Parse(m_scanner.GetTokenValue(), CultureInfo.InvariantCulture); | |
42 | tokenType = JsonTokenType.Number; |
|
55 | tokenType = JsonTokenType.Number; | |
43 | break; |
|
56 | break; | |
44 | default: |
|
57 | default: | |
45 |
tokenType = (JsonTokenType) |
|
58 | tokenType = (JsonTokenType)tag[0]; | |
46 |
tokenValue = |
|
59 | tokenValue = m_scanner.GetTokenValue(); | |
47 | break; |
|
60 | break; | |
48 | } |
|
61 | } | |
49 | return true; |
|
62 | return true; | |
@@ -55,26 +68,26 namespace Implab.Formats.JSON { | |||||
55 |
|
68 | |||
56 | string ReadString() { |
|
69 | string ReadString() { | |
57 | int pos = 0; |
|
70 | int pos = 0; | |
58 | Switch(m_stringDFA, m_stringAlphabet); |
|
71 | char[] buf = new char[6]; // the buffer for unescaping chars | |
59 | while (ReadTokenInternal()) { |
|
72 | ||
60 |
|
|
73 | JSONGrammar.TokenType[] tag; | |
|
74 | m_builder.Clear(); | |||
|
75 | ||||
|
76 | while (m_stringScanner.Execute(m_scanner, out tag)) { | |||
|
77 | switch (tag[0]) { | |||
61 | case JSONGrammar.TokenType.StringBound: |
|
78 | case JSONGrammar.TokenType.StringBound: | |
62 |
|
|
79 | return m_builder.ToString(); | |
63 | return new String(m_stringBuffer, 0, pos); |
|
|||
64 | case JSONGrammar.TokenType.UnescapedChar: |
|
80 | case JSONGrammar.TokenType.UnescapedChar: | |
65 | EnsureStringBufferSize(pos + m_tokenLen); |
|
81 | m_scanner.CopyTokenTo(m_builder); | |
66 | Array.Copy(m_buffer, m_tokenOffset, m_stringBuffer, pos, m_tokenLen); |
|
|||
67 | pos += m_tokenLen; |
|
|||
68 | break; |
|
82 | break; | |
69 | case JSONGrammar.TokenType.EscapedUnicode: |
|
83 | case JSONGrammar.TokenType.EscapedUnicode: // \xXXXX - unicode escape sequence | |
70 | EnsureStringBufferSize(pos + 1); |
|
84 | m_scanner.CopyTokenTo(buf, 0); | |
71 |
m_ |
|
85 | m_builder.Append(StringTranslator.TranslateHexUnicode(buf, 2)); | |
72 | pos++; |
|
86 | pos++; | |
73 | break; |
|
87 | break; | |
74 | case JSONGrammar.TokenType.EscapedChar: |
|
88 | case JSONGrammar.TokenType.EscapedChar: // \t - escape sequence | |
75 |
|
|
89 | m_scanner.CopyTokenTo(buf, 0); | |
76 |
m_ |
|
90 | m_builder.Append(StringTranslator.TranslateEscapedChar(buf[1])); | |
77 | pos++; |
|
|||
78 | break; |
|
91 | break; | |
79 | default: |
|
92 | default: | |
80 | break; |
|
93 | break; | |
@@ -84,13 +97,5 namespace Implab.Formats.JSON { | |||||
84 |
|
97 | |||
85 | throw new ParserException("Unexpected end of data"); |
|
98 | throw new ParserException("Unexpected end of data"); | |
86 | } |
|
99 | } | |
87 |
|
||||
88 | void EnsureStringBufferSize(int size) { |
|
|||
89 | if (size > m_stringBuffer.Length) { |
|
|||
90 | var newBuffer = new char[size]; |
|
|||
91 | m_stringBuffer.CopyTo(newBuffer, 0); |
|
|||
92 | m_stringBuffer = newBuffer; |
|
|||
93 | } |
|
100 | } | |
94 | } |
|
101 | } | |
95 | } |
|
|||
96 | } |
|
@@ -1,5 +1,5 | |||||
1 | using Implab; |
|
1 | using Implab; | |
2 |
using Implab. |
|
2 | using Implab.Formats; | |
3 | using System; |
|
3 | using System; | |
4 | using System.Collections.Generic; |
|
4 | using System.Collections.Generic; | |
5 | using System.Diagnostics; |
|
5 | using System.Diagnostics; | |
@@ -7,11 +7,11 using System.Linq; | |||||
7 | using System.Text; |
|
7 | using System.Text; | |
8 | using System.Threading.Tasks; |
|
8 | using System.Threading.Tasks; | |
9 |
|
9 | |||
10 | namespace Implab.JSON { |
|
10 | namespace Implab.Formats.JSON { | |
11 | /// <summary> |
|
11 | /// <summary> | |
12 | /// Класс для преобразования экранированной строки JSON |
|
12 | /// Класс для преобразования экранированной строки JSON | |
13 | /// </summary> |
|
13 | /// </summary> | |
14 | public class StringTranslator : Scanner { |
|
14 | public class StringTranslator : TextScanner<JSONGrammar.TokenType> { | |
15 | static readonly char[] _escMap; |
|
15 | static readonly char[] _escMap; | |
16 | static readonly int[] _hexMap; |
|
16 | static readonly int[] _hexMap; | |
17 |
|
17 | |||
@@ -34,8 +34,7 namespace Implab.JSON { | |||||
34 |
|
34 | |||
35 | } |
|
35 | } | |
36 |
|
36 | |||
37 | public StringTranslator() |
|
37 | public StringTranslator() { | |
38 | : base(JSONGrammar.Instance.JsonStringDFA.States, JSONGrammar.Instance.JsonStringDFA.Alphabet.GetTranslationMap()) { |
|
|||
39 | } |
|
38 | } | |
40 |
|
39 | |||
41 | public string Translate(string data) { |
|
40 | public string Translate(string data) { | |
@@ -59,7 +58,7 namespace Implab.JSON { | |||||
59 | int pos = 0; |
|
58 | int pos = 0; | |
60 |
|
59 | |||
61 | while (ReadTokenInternal()) { |
|
60 | while (ReadTokenInternal()) { | |
62 |
switch ((JSONGrammar.TokenType) |
|
61 | switch ((JSONGrammar.TokenType)Tags[0]) { | |
63 | case JSONGrammar.TokenType.UnescapedChar: |
|
62 | case JSONGrammar.TokenType.UnescapedChar: | |
64 | Array.Copy(m_buffer,m_tokenOffset,translated,pos,m_tokenLen); |
|
63 | Array.Copy(m_buffer,m_tokenOffset,translated,pos,m_tokenLen); | |
65 | pos += m_tokenLen; |
|
64 | pos += m_tokenLen; |
@@ -3,50 +3,146 using Implab.Components; | |||||
3 | using Implab.Automaton.RegularExpressions; |
|
3 | using Implab.Automaton.RegularExpressions; | |
4 | using System.Diagnostics; |
|
4 | using System.Diagnostics; | |
5 | using Implab.Automaton; |
|
5 | using Implab.Automaton; | |
|
6 | using System.IO; | |||
|
7 | using System.Text; | |||
6 |
|
8 | |||
7 | namespace Implab.Formats { |
|
9 | namespace Implab.Formats { | |
8 |
public abstract class TextScanner |
|
10 | public abstract class TextScanner : Disposable { | |
|
11 | readonly int m_bufferMax; | |||
|
12 | readonly int m_chunkSize; | |||
9 |
|
13 | |||
10 | int m_maxSymbol; |
|
14 | char[] m_buffer; | |
11 | int[] m_symbolMap; |
|
|||
12 |
|
||||
13 | readonly char[] m_buffer; |
|
|||
14 | int m_bufferOffset; |
|
15 | int m_bufferOffset; | |
15 | int m_bufferSize; |
|
16 | int m_bufferSize; | |
|
17 | int m_tokenOffset; | |||
16 | int m_tokenLength; |
|
18 | int m_tokenLength; | |
17 |
|
19 | |||
18 | TTag[] m_tags; |
|
20 | /// <summary> | |
|
21 | /// Initializes a new instance of the <see cref="Implab.Formats.TextScanner`1"/> class. | |||
|
22 | /// </summary> | |||
|
23 | /// <param name="bufferMax">Buffer max.</param> | |||
|
24 | /// <param name="chunkSize">Chunk size.</param> | |||
|
25 | protected TextScanner(int bufferMax, int chunkSize) { | |||
|
26 | Debug.Assert(m_chunkSize <= m_bufferMax); | |||
|
27 | ||||
|
28 | m_bufferMax = bufferMax; | |||
|
29 | m_chunkSize = chunkSize; | |||
|
30 | } | |||
19 |
|
31 | |||
20 | protected bool ReadTokenInternal(DFAStateDescriptor<TTag>[] dfa, int state) { |
|
32 | /// <summary> | |
21 | Debug.Assert(dfa != null); |
|
33 | /// Initializes a new instance of the <see cref="Implab.Formats.TextScanner`1"/> class. | |
|
34 | /// </summary> | |||
|
35 | /// <param name="buffer">Buffer.</param> | |||
|
36 | protected TextScanner(char[] buffer) { | |||
|
37 | if (buffer != null) { | |||
|
38 | m_buffer = buffer; | |||
|
39 | m_bufferSize = buffer.Length; | |||
|
40 | } | |||
|
41 | } | |||
|
42 | ||||
|
43 | /// <summary> | |||
|
44 | /// (hungry) Reads the next token. | |||
|
45 | /// </summary> | |||
|
46 | /// <returns><c>true</c>, if token internal was read, <c>false</c> if there is no more tokens in the stream.</returns> | |||
|
47 | /// <param name="dfa">The transition map for the automaton</param> | |||
|
48 | /// <param name="final">Final states of the automaton.</param> | |||
|
49 | /// <param name="tags">Tags.</param> | |||
|
50 | /// <param name="state">The initial state for the automaton.</param> | |||
|
51 | internal bool ReadToken<TTag>(int[,] dfa, int[] final, TTag[][] tags, int state, int[] alphabet, out TTag[] tag) { | |||
|
52 | Safe.ArgumentNotNull(); | |||
|
53 | m_tokenLength = 0; | |||
|
54 | ||||
|
55 | var maxSymbol = alphabet.Length - 1; | |||
22 |
|
56 | |||
23 | do { |
|
57 | do { | |
24 | for (var pos = m_bufferOffset; pos < m_bufferSize; pos++) { |
|
58 | // after the next chunk is read the offset in the buffer may change | |
|
59 | int pos = m_bufferOffset + m_tokenLength; | |||
|
60 | ||||
|
61 | while(pos < m_bufferSize) { | |||
25 | var ch = m_buffer[pos]; |
|
62 | var ch = m_buffer[pos]; | |
26 | state = dfa[state].transitions[m_symbolMap[ch > m_maxSymbol ? m_maxSymbol : ch]]; |
|
63 | ||
|
64 | state = dfa[state,ch > maxSymbol ? DFAConst.UNCLASSIFIED_INPUT : alphabet[ch]]; | |||
27 | if (state == DFAConst.UNREACHABLE_STATE) |
|
65 | if (state == DFAConst.UNREACHABLE_STATE) | |
28 | break; |
|
66 | break; | |
|
67 | ||||
|
68 | pos++; | |||
29 | } |
|
69 | } | |
30 | } while (Feed()); |
|
70 | ||
|
71 | m_tokenLength = pos - m_bufferOffset; | |||
|
72 | } while (state != DFAConst.UNREACHABLE_STATE && Feed()); | |||
|
73 | ||||
|
74 | m_tokenOffset = m_bufferOffset; | |||
|
75 | m_bufferOffset += m_tokenLength; | |||
31 |
|
76 | |||
32 |
if ( |
|
77 | if (final[state]) { | |
|
78 | tag = tags[state]; | |||
|
79 | return true; | |||
|
80 | } else { | |||
|
81 | if (m_bufferOffset == m_bufferSize) { | |||
|
82 | if (m_tokenLength == 0) //EOF | |||
|
83 | return false; | |||
|
84 | ||||
|
85 | throw new ParserException(); | |||
|
86 | } | |||
|
87 | throw new ParserException(String.Format("Unexpected symbol '{0}'", m_buffer[m_bufferOffset])); | |||
33 |
|
88 | |||
34 | } |
|
89 | } | |
|
90 | } | |||
35 |
|
91 | |||
|
92 | protected void Feed(char[] buffer, int offset, int length) { | |||
|
93 | m_buffer = buffer; | |||
|
94 | m_bufferOffset = offset; | |||
|
95 | m_bufferSize = offset + length; | |||
36 | } |
|
96 | } | |
37 |
|
97 | |||
38 | bool Feed() { |
|
98 | protected bool Feed() { | |
|
99 | if (m_chunkSize <= 0) | |||
|
100 | return false; | |||
|
101 | ||||
|
102 | if (m_buffer != null) { | |||
|
103 | var free = m_buffer.Length - m_bufferSize; | |||
|
104 | ||||
|
105 | if (free < m_chunkSize) { | |||
|
106 | free += m_chunkSize; | |||
|
107 | var used = m_bufferSize - m_bufferOffset; | |||
|
108 | var size = used + free; | |||
|
109 | ||||
|
110 | if (size > m_bufferMax) | |||
|
111 | throw new ParserException(String.Format("The buffer limit ({0} Kb) is reached"), m_bufferMax/1024); | |||
|
112 | ||||
|
113 | var temp = new char[size]; | |||
39 |
|
114 | |||
|
115 | var read = Read(temp, used, m_chunkSize); | |||
|
116 | if (read == 0) | |||
|
117 | return false; | |||
|
118 | ||||
|
119 | Array.Copy(m_buffer, m_bufferOffset, temp, 0, used); | |||
|
120 | ||||
|
121 | m_bufferOffset = 0; | |||
|
122 | m_bufferSize = used + read; | |||
|
123 | m_buffer = temp; | |||
|
124 | } | |||
|
125 | } else { | |||
|
126 | Debug.Assert(m_bufferOffset == 0); | |||
|
127 | m_buffer = new char[m_chunkSize]; | |||
|
128 | m_bufferSize = Read(m_buffer, 0, m_chunkSize); | |||
|
129 | return (m_bufferSize != 0); | |||
|
130 | } | |||
40 | } |
|
131 | } | |
41 |
|
132 | |||
42 | protected abstract int Read(char[] buffer, int offset, int size); |
|
133 | protected abstract int Read(char[] buffer, int offset, int size); | |
43 |
|
134 | |||
44 | protected TTag[] Tags { |
|
135 | public string GetTokenValue() { | |
45 | get { |
|
136 | return new String(m_buffer, m_tokenOffset, m_tokenLength); | |
46 | return m_tags; |
|
|||
47 | } |
|
|||
48 | } |
|
137 | } | |
49 |
|
138 | |||
|
139 | public void CopyTokenTo(char[] buffer, int offset) { | |||
|
140 | m_buffer.CopyTo(buffer, offset); | |||
|
141 | } | |||
|
142 | ||||
|
143 | public void CopyTokenTo(StringBuilder sb) { | |||
|
144 | sb.Append(m_buffer, m_tokenOffset, m_tokenLength); | |||
|
145 | } | |||
50 |
|
146 | |||
51 | } |
|
147 | } | |
52 | } |
|
148 | } |
@@ -151,11 +151,9 | |||||
151 | <Compile Include="Components\ExecutionState.cs" /> |
|
151 | <Compile Include="Components\ExecutionState.cs" /> | |
152 | <Compile Include="Components\RunnableComponent.cs" /> |
|
152 | <Compile Include="Components\RunnableComponent.cs" /> | |
153 | <Compile Include="Components\IFactory.cs" /> |
|
153 | <Compile Include="Components\IFactory.cs" /> | |
154 | <Compile Include="Automaton\DFAStateDescriptor.cs" /> |
|
|||
155 | <Compile Include="Automaton\EnumAlphabet.cs" /> |
|
154 | <Compile Include="Automaton\EnumAlphabet.cs" /> | |
156 | <Compile Include="Automaton\IAlphabet.cs" /> |
|
155 | <Compile Include="Automaton\IAlphabet.cs" /> | |
157 | <Compile Include="Automaton\ParserException.cs" /> |
|
156 | <Compile Include="Automaton\ParserException.cs" /> | |
158 | <Compile Include="Automaton\Scanner.cs" /> |
|
|||
159 | <Compile Include="Automaton\IndexedAlphabetBase.cs" /> |
|
157 | <Compile Include="Automaton\IndexedAlphabetBase.cs" /> | |
160 | <Compile Include="Automaton\IAlphabetBuilder.cs" /> |
|
158 | <Compile Include="Automaton\IAlphabetBuilder.cs" /> | |
161 | <Compile Include="Automaton\RegularExpressions\AltToken.cs" /> |
|
159 | <Compile Include="Automaton\RegularExpressions\AltToken.cs" /> | |
@@ -190,9 +188,10 | |||||
190 | <Compile Include="Automaton\RegularExpressions\RegularDFA.cs" /> |
|
188 | <Compile Include="Automaton\RegularExpressions\RegularDFA.cs" /> | |
191 | <Compile Include="Automaton\RegularExpressions\RegularExpressionVisitor.cs" /> |
|
189 | <Compile Include="Automaton\RegularExpressions\RegularExpressionVisitor.cs" /> | |
192 | <Compile Include="Automaton\RegularExpressions\ITaggedDFABuilder.cs" /> |
|
190 | <Compile Include="Automaton\RegularExpressions\ITaggedDFABuilder.cs" /> | |
193 | <Compile Include="Automaton\RegularExpressions\DFAStateDescriptorT.cs" /> |
|
|||
194 | <Compile Include="Formats\BufferScanner.cs" /> |
|
|||
195 | <Compile Include="Formats\TextScanner.cs" /> |
|
191 | <Compile Include="Formats\TextScanner.cs" /> | |
|
192 | <Compile Include="Formats\StringScanner.cs" /> | |||
|
193 | <Compile Include="Formats\ReaderScanner.cs" /> | |||
|
194 | <Compile Include="Formats\ScannerContext.cs" /> | |||
196 | </ItemGroup> |
|
195 | </ItemGroup> | |
197 | <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" /> |
|
196 | <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" /> | |
198 | <ItemGroup /> |
|
197 | <ItemGroup /> |
1 | NO CONTENT: file was removed |
|
NO CONTENT: file was removed |
1 | NO CONTENT: file was removed |
|
NO CONTENT: file was removed |
1 | NO CONTENT: file was removed |
|
NO CONTENT: file was removed |
1 | NO CONTENT: file was removed |
|
NO CONTENT: file was removed |
General Comments 0
You need to be logged in to leave comments.
Login now