##// END OF EJS Templates
sync
cin -
r174:983df35b3ca1 ref20160224
parent child
Show More
This diff has been collapsed as it changes many lines, (510 lines changed) Show them Hide them
@@ -1,255 +1,255
1 using Implab;
1 using Implab;
2 using System;
2 using System;
3 using System.Collections.Generic;
3 using System.Collections.Generic;
4 using System.IO;
4 using System.IO;
5 using Implab.Components;
5 using Implab.Components;
6 using Implab.Automaton.RegularExpressions;
6 using Implab.Automaton.RegularExpressions;
7
7
8 namespace Implab.Automaton {
8 namespace Implab.Automaton {
9 /// <summary>
9 /// <summary>
10 /// Π‘Π°Π·ΠΎΠ²Ρ‹ΠΉ класс для Ρ€Π°Π·Π±ΠΎΡ€Π° ΠΏΠΎΡ‚ΠΎΠΊΠ° Π²Ρ…ΠΎΠ΄Π½Ρ‹Ρ… символов Π½Π° Ρ‚ΠΎΠΊΠ΅Π½Ρ‹.
10 /// Π‘Π°Π·ΠΎΠ²Ρ‹ΠΉ класс для Ρ€Π°Π·Π±ΠΎΡ€Π° ΠΏΠΎΡ‚ΠΎΠΊΠ° Π²Ρ…ΠΎΠ΄Π½Ρ‹Ρ… символов Π½Π° Ρ‚ΠΎΠΊΠ΅Π½Ρ‹.
11 /// </summary>
11 /// </summary>
12 /// <remarks>
12 /// <remarks>
13 /// Π‘ΠΊΠ°Π½Π½Π΅Ρ€ ΠΈΠΌΠ΅Π΅Ρ‚ Π²Π½ΡƒΡ‚Ρ€ΠΈ Π±ΡƒΡ„Ρ„Π΅Ρ€ с симолами Π²Ρ…ΠΎΠ΄Π½ΠΎΠ³ΠΎ тСкста, ΠΏΠΎ ΠΊΠΎΡ‚ΠΎΡ€ΠΎΠΌΡƒ ΠΏΠ΅Ρ€Π΅ΠΌΠ΅Ρ‰Π°ΡŽΡ‚ΡΡ Π΄Π²Π°
13 /// Π‘ΠΊΠ°Π½Π½Π΅Ρ€ ΠΈΠΌΠ΅Π΅Ρ‚ Π²Π½ΡƒΡ‚Ρ€ΠΈ Π±ΡƒΡ„Ρ„Π΅Ρ€ с симолами Π²Ρ…ΠΎΠ΄Π½ΠΎΠ³ΠΎ тСкста, ΠΏΠΎ ΠΊΠΎΡ‚ΠΎΡ€ΠΎΠΌΡƒ ΠΏΠ΅Ρ€Π΅ΠΌΠ΅Ρ‰Π°ΡŽΡ‚ΡΡ Π΄Π²Π°
14 /// указатСля, Π½Π°Ρ‡Π°Π»Π° ΠΈ ΠΊΠΎΠ½Ρ†Π° Ρ‚ΠΎΠΊΠ΅Π½Π°, ΠΏΡ€ΠΈ ΠΏΠ΅Ρ€Π΅ΠΌΠ΅Ρ‰Π΅Π½ΠΈΠΈ ΠΈΡΠΊΠΎΠ»ΡŒΠ·ΡƒΠ΅Ρ‚ΡΡ Π”ΠšΠ для опрСдСлСния
14 /// указатСля, Π½Π°Ρ‡Π°Π»Π° ΠΈ ΠΊΠΎΠ½Ρ†Π° Ρ‚ΠΎΠΊΠ΅Π½Π°, ΠΏΡ€ΠΈ ΠΏΠ΅Ρ€Π΅ΠΌΠ΅Ρ‰Π΅Π½ΠΈΠΈ ΠΈΡΠΊΠΎΠ»ΡŒΠ·ΡƒΠ΅Ρ‚ΡΡ Π”ΠšΠ для опрСдСлСния
15 /// ΠΊΠΎΠ½Ρ†Π° Ρ‚ΠΎΠΊΠ΅Π½Π° ΠΈ допустимости Ρ‚Π΅ΠΊΡƒΡ‰Π΅Π³ΠΎ символа.
15 /// ΠΊΠΎΠ½Ρ†Π° Ρ‚ΠΎΠΊΠ΅Π½Π° ΠΈ допустимости Ρ‚Π΅ΠΊΡƒΡ‰Π΅Π³ΠΎ символа.
16 /// </remarks>
16 /// </remarks>
17 public abstract class Scanner<TTag> : Disposable {
17 public abstract class Scanner<TTag> : Disposable {
18 protected struct ScannerConfig {
18 protected struct ScannerConfig {
19 public readonly DFAStateDescriptor<TTag>[] states;
19 public readonly DFAStateDescriptor<TTag>[] states;
20 public readonly int[] alphabet;
20 public readonly int[] alphabet;
21 public readonly int initialState;
21 public readonly int initialState;
22
22
23 public ScannerConfig(DFAStateDescriptor<TTag>[] states, int[] alphabet, int initialState) {
23 public ScannerConfig(DFAStateDescriptor<TTag>[] states, int[] alphabet, int initialState) {
24 this.initialState = initialState;
24 this.initialState = initialState;
25 this.alphabet = alphabet;
25 this.alphabet = alphabet;
26 this.states = states;
26 this.states = states;
27 }
27 }
28 }
28 }
29
29
30 Stack<ScannerConfig> m_defs = new Stack<ScannerConfig>();
30 Stack<ScannerConfig> m_defs = new Stack<ScannerConfig>();
31
31
32 ScannerConfig m_config;
32 ScannerConfig m_config;
33
33
34 protected DFAStateDescriptor<TTag> m_currentState;
34 protected DFAStateDescriptor<TTag> m_currentState;
35 int m_previewCode;
35 int m_previewCode;
36
36
37 protected int m_tokenLen;
37 protected int m_tokenLen;
38 protected int m_tokenOffset;
38 protected int m_tokenOffset;
39
39
40 protected char[] m_buffer;
40 protected char[] m_buffer;
41 protected int m_bufferSize;
41 protected int m_bufferSize;
42 protected int m_pointer;
42 protected int m_pointer;
43
43
44 TextReader m_reader;
44 TextReader m_reader;
45 bool m_disposeReader;
45 bool m_disposeReader;
46 int m_chunkSize = 1024; // 1k
46 int m_chunkSize = 1024; // 1k
47 int m_limit = 10 * 1024 * 1024; // 10Mb
47 int m_limit = 10 * 1024 * 1024; // 10Mb
48
48
49 protected Scanner(ScannerConfig config) {
49 protected Scanner(ScannerConfig config) {
50 Safe.ArgumentNotEmpty(config.states, "config.states");
50 Safe.ArgumentNotEmpty(config.states, "config.states");
51 Safe.ArgumentNotNull(config.alphabet, "config.alphabet");
51 Safe.ArgumentNotNull(config.alphabet, "config.alphabet");
52
52
53 m_config = config;
53 m_config = config;
54 }
54 }
55
55
56 /// <summary>
56 /// <summary>
57 /// ЗаполняСт Π²Ρ…ΠΎΠ΄Π½Ρ‹ΠΌΠΈ Π΄Π°Π½Π½Ρ‹ΠΌΠΈ Π±ΡƒΡ„Ρ„Π΅Ρ€.
57 /// ЗаполняСт Π²Ρ…ΠΎΠ΄Π½Ρ‹ΠΌΠΈ Π΄Π°Π½Π½Ρ‹ΠΌΠΈ Π±ΡƒΡ„Ρ„Π΅Ρ€.
58 /// </summary>
58 /// </summary>
59 /// <param name="data">Π”Π°Π½Π½Ρ‹Π΅ для ΠΎΠ±Ρ€Π°Π±ΠΎΡ‚ΠΊΠΈ.</param>
59 /// <param name="data">Π”Π°Π½Π½Ρ‹Π΅ для ΠΎΠ±Ρ€Π°Π±ΠΎΡ‚ΠΊΠΈ.</param>
60 /// <remarks>ΠšΠΎΠΏΠΈΡ€ΠΎΠ²Π°Π½ΠΈΠ΅ Π΄Π°Π½Π½Ρ‹Ρ… Π½Π΅ происходит, ΠΏΠ΅Ρ€Π΅Π΄Π°Π½Π½Ρ‹ΠΉ массив ΠΈΡΠΏΠΎΠ»ΡŒΠ·ΡƒΠ΅Ρ‚ΡΡ Π²
60 /// <remarks>ΠšΠΎΠΏΠΈΡ€ΠΎΠ²Π°Π½ΠΈΠ΅ Π΄Π°Π½Π½Ρ‹Ρ… Π½Π΅ происходит, ΠΏΠ΅Ρ€Π΅Π΄Π°Π½Π½Ρ‹ΠΉ массив ΠΈΡΠΏΠΎΠ»ΡŒΠ·ΡƒΠ΅Ρ‚ΡΡ Π²
61 /// качСствС Π²Ρ…ΠΎΠ΄Π½ΠΎΠ³ΠΎ Π±ΡƒΡ„Ρ„Π΅Ρ€Π°.</remarks>
61 /// качСствС Π²Ρ…ΠΎΠ΄Π½ΠΎΠ³ΠΎ Π±ΡƒΡ„Ρ„Π΅Ρ€Π°.</remarks>
62 public void Feed(char[] data) {
62 public void Feed(char[] data) {
63 Safe.ArgumentNotNull(data, "data");
63 Safe.ArgumentNotNull(data, "data");
64
64
65 Feed(data, data.Length);
65 Feed(data, data.Length);
66 }
66 }
67
67
68 /// <summary>
68 /// <summary>
69 /// ЗаполняСт Π±ΡƒΡ„Ρ„ΡƒΡ€ чтСния Π²Ρ…ΠΎΠ΄Π½Ρ‹ΠΌΠΈ Π΄Π°Π½Π½Ρ‹ΠΌΠΈ.
69 /// ЗаполняСт Π±ΡƒΡ„Ρ„ΡƒΡ€ чтСния Π²Ρ…ΠΎΠ΄Π½Ρ‹ΠΌΠΈ Π΄Π°Π½Π½Ρ‹ΠΌΠΈ.
70 /// </summary>
70 /// </summary>
71 /// <param name="data">Π”Π°Π½Π½Ρ‹Π΅ для ΠΎΠ±Ρ€Π°Π±ΠΎΡ‚ΠΊΠΈ.</param>
71 /// <param name="data">Π”Π°Π½Π½Ρ‹Π΅ для ΠΎΠ±Ρ€Π°Π±ΠΎΡ‚ΠΊΠΈ.</param>
72 /// <param name="length">Π”Π»ΠΈΠ½Π° Π΄Π°Π½Π½Ρ‹Ρ… для ΠΎΠ±Ρ€Π°Π±ΠΎΡ‚ΠΊΠΈ.</param>
72 /// <param name="length">Π”Π»ΠΈΠ½Π° Π΄Π°Π½Π½Ρ‹Ρ… для ΠΎΠ±Ρ€Π°Π±ΠΎΡ‚ΠΊΠΈ.</param>
73 /// <remarks>ΠšΠΎΠΏΠΈΡ€ΠΎΠ²Π°Π½ΠΈΠ΅ Π΄Π°Π½Π½Ρ‹Ρ… Π½Π΅ происходит, ΠΏΠ΅Ρ€Π΅Π΄Π°Π½Π½Ρ‹ΠΉ массив ΠΈΡΠΏΠΎΠ»ΡŒΠ·ΡƒΠ΅Ρ‚ΡΡ Π²
73 /// <remarks>ΠšΠΎΠΏΠΈΡ€ΠΎΠ²Π°Π½ΠΈΠ΅ Π΄Π°Π½Π½Ρ‹Ρ… Π½Π΅ происходит, ΠΏΠ΅Ρ€Π΅Π΄Π°Π½Π½Ρ‹ΠΉ массив ΠΈΡΠΏΠΎΠ»ΡŒΠ·ΡƒΠ΅Ρ‚ΡΡ Π²
74 /// качСствС Π²Ρ…ΠΎΠ΄Π½ΠΎΠ³ΠΎ Π±ΡƒΡ„Ρ„Π΅Ρ€Π°.</remarks>
74 /// качСствС Π²Ρ…ΠΎΠ΄Π½ΠΎΠ³ΠΎ Π±ΡƒΡ„Ρ„Π΅Ρ€Π°.</remarks>
75 public void Feed(char[] data, int length) {
75 public void Feed(char[] data, int length) {
76 Safe.ArgumentNotNull(data, "data");
76 Safe.ArgumentNotNull(data, "data");
77 Safe.ArgumentInRange(length, 0, data.Length, "length");
77 Safe.ArgumentInRange(length, 0, data.Length, "length");
78 AssertNotDisposed();
78 AssertNotDisposed();
79
79
80 m_pointer = -1;
80 m_pointer = -1;
81 m_buffer = data;
81 m_buffer = data;
82 m_bufferSize = length;
82 m_bufferSize = length;
83 Shift();
83 Shift();
84 }
84 }
85
85
86 public void Feed(TextReader reader, bool dispose) {
86 public void Feed(TextReader reader, bool dispose) {
87 Safe.ArgumentNotNull(reader, "reader");
87 Safe.ArgumentNotNull(reader, "reader");
88 AssertNotDisposed();
88 AssertNotDisposed();
89
89
90 if (m_reader != null && m_disposeReader)
90 if (m_reader != null && m_disposeReader)
91 m_reader.Dispose();
91 m_reader.Dispose();
92
92
93 m_reader = reader;
93 m_reader = reader;
94 m_disposeReader = dispose;
94 m_disposeReader = dispose;
95 m_pointer = -1;
95 m_pointer = -1;
96 m_buffer = new char[m_chunkSize];
96 m_buffer = new char[m_chunkSize];
97 m_bufferSize = 0;
97 m_bufferSize = 0;
98 Shift();
98 Shift();
99 }
99 }
100
100
101 /// <summary>
101 /// <summary>
102 /// ΠŸΠΎΠ»ΡƒΡ‡Π°Π΅Ρ‚ Ρ‚Π΅ΠΊΡƒΡ‰ΠΈΠΉ Ρ‚ΠΎΠΊΠ΅Π½ Π² Π²ΠΈΠ΄Π΅ строки.
102 /// ΠŸΠΎΠ»ΡƒΡ‡Π°Π΅Ρ‚ Ρ‚Π΅ΠΊΡƒΡ‰ΠΈΠΉ Ρ‚ΠΎΠΊΠ΅Π½ Π² Π²ΠΈΠ΄Π΅ строки.
103 /// </summary>
103 /// </summary>
104 /// <returns></returns>
104 /// <returns></returns>
105 protected string GetTokenValue() {
105 protected string GetTokenValue() {
106 return new String(m_buffer, m_tokenOffset, m_tokenLen);
106 return new String(m_buffer, m_tokenOffset, m_tokenLen);
107 }
107 }
108
108
109 /// <summary>
109 /// <summary>
110 /// ΠœΠ΅Ρ‚ΠΊΠΈ Ρ‚Π΅ΠΊΡƒΡ‰Π΅Π³ΠΎ Ρ‚ΠΎΠΊΠ΅Π½Π°, ΠΊΠΎΡ‚ΠΎΡ€Ρ‹Π΅ Π±Ρ‹Π»ΠΈ Π½Π°Π·Π½Π°Ρ‡Π΅Π½Ρ‹ Π² рСгулярном Π²Ρ‹Ρ€Π°ΠΆΠ΅Π½ΠΈΠΈ.
110 /// ΠœΠ΅Ρ‚ΠΊΠΈ Ρ‚Π΅ΠΊΡƒΡ‰Π΅Π³ΠΎ Ρ‚ΠΎΠΊΠ΅Π½Π°, ΠΊΠΎΡ‚ΠΎΡ€Ρ‹Π΅ Π±Ρ‹Π»ΠΈ Π½Π°Π·Π½Π°Ρ‡Π΅Π½Ρ‹ Π² рСгулярном Π²Ρ‹Ρ€Π°ΠΆΠ΅Π½ΠΈΠΈ.
111 /// </summary>
111 /// </summary>
112 protected TTag[] TokenTags {
112 protected TTag[] TokenTags {
113 get {
113 get {
114 return m_currentState.tags;
114 return m_currentState.tags;
115 }
115 }
116 }
116 }
117
117
118 /// <summary>
118 /// <summary>
119 /// ΠŸΡ€ΠΈΠ·Π½Π°ΠΊ ΠΊΠΎΠ½Ρ†Π° Π΄Π°Π½Π½Ρ‹Ρ…
119 /// ΠŸΡ€ΠΈΠ·Π½Π°ΠΊ ΠΊΠΎΠ½Ρ†Π° Π΄Π°Π½Π½Ρ‹Ρ…
120 /// </summary>
120 /// </summary>
121 public bool EOF {
121 public bool EOF {
122 get {
122 get {
123 return m_pointer >= m_bufferSize;
123 return m_pointer >= m_bufferSize;
124 }
124 }
125 }
125 }
126
126
127 /// <summary>
127 /// <summary>
128 /// Π§ΠΈΡ‚Π°Π΅Ρ‚ ΡΠ»Π΅Π΄ΡƒΡŽΡ‰ΠΈΠΉ Ρ‚ΠΎΠΊΠ΅Π½, ΠΏΡ€ΠΈ этом <see cref="m_tokenOffset"/> ΡƒΠΊΠ°Π·Ρ‹Π²Π°Π΅Ρ‚ Π½Π° Π½Π°Ρ‡Π°Π»ΠΎ Ρ‚ΠΎΠΊΠ΅Π½Π°,
128 /// Π§ΠΈΡ‚Π°Π΅Ρ‚ ΡΠ»Π΅Π΄ΡƒΡŽΡ‰ΠΈΠΉ Ρ‚ΠΎΠΊΠ΅Π½, ΠΏΡ€ΠΈ этом <see cref="m_tokenOffset"/> ΡƒΠΊΠ°Π·Ρ‹Π²Π°Π΅Ρ‚ Π½Π° Π½Π°Ρ‡Π°Π»ΠΎ Ρ‚ΠΎΠΊΠ΅Π½Π°,
129 /// <see cref="m_tokenLen"/> Π½Π° Π΄Π»ΠΈΠ½Ρƒ Ρ‚ΠΎΠΊΠ΅Π½Π°, <see cref="m_buffer"/> - массив символов, Π²
129 /// <see cref="m_tokenLen"/> Π½Π° Π΄Π»ΠΈΠ½Ρƒ Ρ‚ΠΎΠΊΠ΅Π½Π°, <see cref="m_buffer"/> - массив символов, Π²
130 /// ΠΊΠΎΡ‚ΠΎΡ€ΠΎΠΌ находится Ρ‚ΠΎΠΊΠ΅Π½.
130 /// ΠΊΠΎΡ‚ΠΎΡ€ΠΎΠΌ находится Ρ‚ΠΎΠΊΠ΅Π½.
131 /// </summary>
131 /// </summary>
132 /// <returns><c>false</c> - достигнут ΠΊΠΎΠ½Π΅Ρ† Π΄Π°Π½Π½Ρ‹Ρ…, Ρ‚ΠΎΠΊΠ΅Π½ Π½Π΅ ΠΏΡ€ΠΎΡ‡ΠΈΡ‚Π°Π½.</returns>
132 /// <returns><c>false</c> - достигнут ΠΊΠΎΠ½Π΅Ρ† Π΄Π°Π½Π½Ρ‹Ρ…, Ρ‚ΠΎΠΊΠ΅Π½ Π½Π΅ ΠΏΡ€ΠΎΡ‡ΠΈΡ‚Π°Π½.</returns>
133 protected bool ReadTokenInternal() {
133 protected bool ReadTokenInternal() {
134 if (m_pointer >= m_bufferSize)
134 if (m_pointer >= m_bufferSize)
135 return false;
135 return false;
136
136
137 m_currentState = m_config.states[m_config.initialState];
137 m_currentState = m_config.states[m_config.initialState];
138 m_tokenLen = 0;
138 m_tokenLen = 0;
139 m_tokenOffset = m_pointer;
139 m_tokenOffset = m_pointer;
140 int nextState;
140 int nextState;
141 do {
141 do {
142 nextState = m_currentState.transitions[m_previewCode];
142 nextState = m_currentState.transitions[m_previewCode];
143 if (nextState == DFAConst.UNREACHABLE_STATE) {
143 if (nextState == DFAConst.UNREACHABLE_STATE) {
144 if (m_currentState.final)
144 if (m_currentState.final)
145 return true;
145 return true;
146
146
147 throw new ParserException(
147 throw new ParserException(
148 String.Format(
148 String.Format(
149 "Unexpected symbol '{0}', at pos {1}",
149 "Unexpected symbol '{0}', at pos {1}",
150 m_buffer[m_pointer],
150 m_buffer[m_pointer],
151 Position
151 Position
152 )
152 )
153 );
153 );
154 }
154 }
155 m_currentState = m_config.states[nextState];
155 m_currentState = m_config.states[nextState];
156 m_tokenLen++;
156 m_tokenLen++;
157
157
158 } while (Shift());
158 } while (Shift());
159
159
160 // END OF DATA
160 // END OF DATA
161 if (!m_currentState.final)
161 if (!m_currentState.final)
162 throw new ParserException("Unexpected end of data");
162 throw new ParserException("Unexpected end of data");
163
163
164 return true;
164 return true;
165 }
165 }
166
166
167
167
168 bool Shift() {
168 bool Shift() {
169 m_pointer++;
169 m_pointer++;
170
170
171 if (m_pointer >= m_bufferSize) {
171 if (m_pointer >= m_bufferSize) {
172 if (!ReadNextChunk())
172 if (!ReadNextChunk())
173 return false;
173 return false;
174 }
174 }
175
175
176 m_previewCode = m_config.alphabet[m_buffer[m_pointer]];
176 m_previewCode = m_config.alphabet[m_buffer[m_pointer]];
177
177
178 return true;
178 return true;
179 }
179 }
180
180
181 bool ReadNextChunk() {
181 bool ReadNextChunk() {
182 if (m_reader == null)
182 if (m_reader == null)
183 return false;
183 return false;
184
184
185 // extend buffer if nesessary
185 // extend buffer if nesessary
186 if (m_pointer + m_chunkSize > m_buffer.Length) {
186 if (m_pointer + m_chunkSize > m_buffer.Length) {
187 // trim unused buffer head
187 // trim unused buffer head
188 var size = m_tokenLen + m_chunkSize;
188 var size = m_tokenLen + m_chunkSize;
189 if (size >= m_limit)
189 if (size >= m_limit)
190 throw new ParserException(String.Format("Input buffer {0} bytes limit exceeded", m_limit));
190 throw new ParserException(String.Format("Input buffer {0} bytes limit exceeded", m_limit));
191 var temp = new char[size];
191 var temp = new char[size];
192 Array.Copy(m_buffer, m_tokenOffset, temp, 0, m_tokenLen);
192 Array.Copy(m_buffer, m_tokenOffset, temp, 0, m_tokenLen);
193 m_pointer -= m_tokenOffset;
193 m_pointer -= m_tokenOffset;
194 m_bufferSize -= m_tokenOffset;
194 m_bufferSize -= m_tokenOffset;
195 m_tokenOffset = 0;
195 m_tokenOffset = 0;
196 m_buffer = temp;
196 m_buffer = temp;
197 }
197 }
198
198
199 var read = m_reader.Read(m_buffer, m_tokenLen, m_chunkSize);
199 var read = m_reader.Read(m_buffer, m_tokenLen, m_chunkSize);
200 if (read == 0)
200 if (read == 0)
201 return false;
201 return false;
202
202
203 m_bufferSize += read;
203 m_bufferSize += read;
204
204
205 return true;
205 return true;
206 }
206 }
207
207
208 /// <summary>
208 /// <summary>
209 /// ΠŸΠΎΠ·ΠΈΡ†ΠΈΡ сканнСра Π²ΠΎ Π²Ρ…ΠΎΠ΄Π½ΠΎΠΌ Π±ΡƒΡ„Π΅Ρ€Π΅
209 /// ΠŸΠΎΠ·ΠΈΡ†ΠΈΡ сканнСра Π²ΠΎ Π²Ρ…ΠΎΠ΄Π½ΠΎΠΌ Π±ΡƒΡ„Π΅Ρ€Π΅
210 /// </summary>
210 /// </summary>
211 public int Position {
211 public int Position {
212 get {
212 get {
213 return m_pointer + 1;
213 return m_pointer + 1;
214 }
214 }
215 }
215 }
216
216
217 /// <summary>
217 /// <summary>
218 /// ΠŸΡ€Π΅ΠΊΠ»ΡŽΡ‡Π°Π΅Ρ‚ Π²Π½ΡƒΡ‚Ρ€Π΅Π½Π½ΠΈΠΉ Π”ΠšΠ Π½Π° ΡƒΠΊΠ°Π·Π°Π½Π½Ρ‹ΠΉ, позволяСт Ρ€Π΅Π°Π»ΠΈΠ·ΠΎΠ²Π°Ρ‚ΡŒ ΠΏΠΎΠ΄ΠΎΠ±ΠΈΠ΅ Π·Π°Ρ…Π²Π°Ρ‚Ρ‹Π²Π°ΡŽΡ‰Π΅ΠΉ
218 /// ΠŸΡ€Π΅ΠΊΠ»ΡŽΡ‡Π°Π΅Ρ‚ Π²Π½ΡƒΡ‚Ρ€Π΅Π½Π½ΠΈΠΉ Π”ΠšΠ Π½Π° ΡƒΠΊΠ°Π·Π°Π½Π½Ρ‹ΠΉ, позволяСт Ρ€Π΅Π°Π»ΠΈΠ·ΠΎΠ²Π°Ρ‚ΡŒ ΠΏΠΎΠ΄ΠΎΠ±ΠΈΠ΅ Π·Π°Ρ…Π²Π°Ρ‚Ρ‹Π²Π°ΡŽΡ‰Π΅ΠΉ
219 /// Π³Ρ€ΡƒΠΏΠΏΠΈΡ€ΠΎΠ²ΠΊΠΈ.
219 /// Π³Ρ€ΡƒΠΏΠΏΠΈΡ€ΠΎΠ²ΠΊΠΈ.
220 /// </summary>
220 /// </summary>
221 /// <param name = "config"></param>
221 /// <param name = "config"></param>
222 protected void Switch(ScannerConfig config) {
222 protected void Switch(ScannerConfig config) {
223 Safe.ArgumentNotNull(config.states, "config.states");
223 Safe.ArgumentNotNull(config.states, "config.states");
224
224
225 m_defs.Push(m_config);
225 m_defs.Push(m_config);
226 m_config = config;
226 m_config = config;
227
227
228 m_previewCode = m_config.alphabet[m_buffer[m_pointer]];
228 m_previewCode = m_config.alphabet[m_buffer[m_pointer]];
229 }
229 }
230
230
231 /// <summary>
231 /// <summary>
232 /// ВосстанавливаСт ΠΏΡ€Π΅Π΄Ρ‹Π΄ΡƒΡ‰Π΅ΠΉ Π”ΠšΠ сканнСра.
232 /// ВосстанавливаСт ΠΏΡ€Π΅Π΄Ρ‹Π΄ΡƒΡ‰Π΅ΠΉ Π”ΠšΠ сканнСра.
233 /// </summary>
233 /// </summary>
234 protected void Restore() {
234 protected void Restore() {
235 if (m_defs.Count == 0)
235 if (m_defs.Count == 0)
236 throw new InvalidOperationException();
236 throw new InvalidOperationException();
237 m_config = m_defs.Pop();
237 m_config = m_defs.Pop();
238
238
239 m_previewCode = m_config.alphabet[m_buffer[m_pointer]];
239 m_previewCode = m_config.alphabet[m_buffer[m_pointer]];
240 }
240 }
241
241
242 protected override void Dispose(bool disposing) {
242 protected override void Dispose(bool disposing) {
243 if (disposing) {
243 if (disposing) {
244 if (m_reader != null && m_disposeReader)
244 if (m_reader != null && m_disposeReader)
245 m_reader.Dispose();
245 m_reader.Dispose();
246 m_buffer = null;
246 m_buffer = null;
247 m_bufferSize = 0;
247 m_bufferSize = 0;
248 m_pointer = 0;
248 m_pointer = 0;
249 m_tokenLen = 0;
249 m_tokenLen = 0;
250 m_tokenOffset = 0;
250 m_tokenOffset = 0;
251 }
251 }
252 base.Dispose(disposing);
252 base.Dispose(disposing);
253 }
253 }
254 }
254 }
255 }
255 }
@@ -1,143 +1,62
1 using System;
1 using System;
2 using Implab.Automaton.RegularExpressions;
2 using Implab.Automaton.RegularExpressions;
3 using Implab.Automaton;
3 using Implab.Automaton;
4 using System.Diagnostics;
4
5
5 namespace Implab.Formats {
6 namespace Implab.Formats {
6 public struct BufferScanner<TTag> {
7 public struct BufferScanner<TTag> {
7 char[] m_buffer;
8 int m_offset;
9 int m_position;
10 int m_hi;
11
12 readonly int m_chunk;
13 readonly int m_limit;
14
15 readonly DFAStateDescriptor<TTag>[] m_dfa;
8 readonly DFAStateDescriptor<TTag>[] m_dfa;
16 int m_state;
9 int m_state;
10 int m_pos;
17
11
18 public BufferScanner(DFAStateDescriptor<TTag>[] dfa, int initialState, int chunk, int limit) {
12 public BufferScanner(DFAStateDescriptor<TTag>[] dfa, int initialState) {
19 m_dfa = dfa;
13 m_dfa = dfa;
20 m_state = initialState;
14 m_state = initialState;
21 m_chunk = chunk;
22 m_limit = limit;
23 m_buffer = null;
24 m_offset = 0;
25 m_position = 0;
26 m_hi = 0;
27 }
28
29 public char[] Buffer {
30 get {
31 return m_buffer;
32 }
33 }
34
35 public int HiMark {
36 get {
37 return m_hi;
38 }
39 }
15 }
40
16
41 public int Position {
17 public int Position {
42 get {
18 get { return m_pos; }
43 return m_position;
44 }
45 }
46
47 public int Length {
48 get {
49 return m_hi - m_position;
50 }
51 }
52
53 public int TokenOffset {
54 get {
55 return m_offset;
56 }
57 }
58
59 public int TokenLength {
60 get {
61 return m_position - m_offset;
62 }
63 }
64
65 public void Init(char[] buffer, int position, int length) {
66 m_buffer = buffer;
67 m_position = position;
68 m_offset = position;
69 m_hi = position + length;
70 }
71
72 public int Extend() {
73 // free space
74 var free = m_buffer.Length - m_hi;
75
76 // if the buffer have enough free space
77 if (free > 0)
78 return free;
79
80 // effective size of the buffer
81 var size = m_buffer.Length - m_offset;
82
83 // calculate the new size
84 int grow = Math.Min(m_limit - size, m_chunk);
85 if (grow <= 0)
86 throw new ParserException(String.Format("Input buffer {0} bytes limit exceeded", m_limit));
87
88 var temp = new char[size + grow];
89 Array.Copy(m_buffer, m_offset, temp, 0, m_hi - m_offset);
90 m_position -= m_offset;
91 m_hi -= m_offset;
92 m_offset = 0;
93 m_buffer = temp;
94
95 return free + grow;
96 }
97
98 public void RaiseMark(int size) {
99 m_hi += size;
100 }
19 }
101
20
102 /// <summary>
21 /// <summary>
103 /// Scan this instance.
22 /// Scan this instance.
104 /// </summary>
23 /// </summary>
105 /// <returns><c>true</c> - additional data required</returns>
24 /// <returns><c>true</c> - additional data required</returns>
106 public bool Scan() {
25 public bool Scan(int[] buffer, int position, int length) {
107 while (m_position < m_hi) {
26 var hi = position + length;
108 var ch = m_buffer[m_position];
27 m_pos = position;
109 var next = m_dfa[m_state].transitions[(int)ch];
28
29 while (position < hi) {
30 var next = m_dfa[m_state].transitions[buffer[position]];
110 if (next == DFAConst.UNREACHABLE_STATE) {
31 if (next == DFAConst.UNREACHABLE_STATE) {
111 if (m_dfa[m_state].final)
32 if (m_dfa[m_state].final)
112 return false;
33 return false;
113
34
114 throw new ParserException(
35 throw new ParserException(
115 String.Format(
36 String.Format(
116 "Unexpected token '{0}'",
37 "Unexpected symbol"
117 new string(m_buffer, m_offset, m_position - m_offset)
118 )
38 )
119 );
39 );
120 }
40 }
41 m_pos++;
121 m_state = next;
42 m_state = next;
122 m_position++;
123 }
43 }
124
44
125 return true;
45 return true;
126 }
46 }
127
47
128 public void Eof() {
48 public void Eof() {
129 if (!m_dfa[m_state].final)
49 if (!m_dfa[m_state].final)
130 throw new ParserException(
50 throw new ParserException(
131 String.Format(
51 String.Format(
132 "Unexpected token '{0}'",
52 "Unexpected EOF"
133 new string(m_buffer, m_offset, m_position - m_offset)
134 )
53 )
135 );
54 );
136 }
55 }
137
56
138 public TTag[] GetTokenTags() {
57 public TTag[] GetTokenTags() {
139 return m_dfa[m_state].tags;
58 return m_dfa[m_state].tags;
140 }
59 }
141 }
60 }
142 }
61 }
143
62
@@ -1,72 +1,61
1 using System;
1 using System;
2 using Implab.Components;
2 using Implab.Components;
3
3
4 namespace Implab.Formats {
4 namespace Implab.Formats {
5 public abstract class TextScanner<TTag> : Disposable {
5 public abstract class TextScanner<TTag> : Disposable {
6
6
7 char[] m_buffer;
7 readonly int[] m_buffer;
8 int m_offset;
8 int m_bufferOffset;
9 int m_length;
9 int m_dataLength;
10 int m_tokenOffset;
11 int m_tokenLength;
10 int m_tokenLength;
11
12 TTag[] m_tags;
12 TTag[] m_tags;
13
13
14 BufferScanner<TTag> m_scanner;
14 BufferScanner<TTag> m_scanner;
15
15
16 protected bool ReadTokenInternal() {
16 protected bool ReadTokenInternal() {
17 if (EOF)
17 if (EOF)
18 return false;
18 return false;
19
19
20 // create a new scanner from template (scanners are structs)
20 // create a new scanner from template (scanners are value types)
21 var inst = m_scanner;
21 var inst = m_scanner;
22
22
23 // initialize the scanner
23 m_tokenLength = 0;
24 inst.Init(m_buffer, m_offset, m_length);
24
25 while (inst.Scan(m_buffer, m_bufferOffset, m_dataLength)) {
26 m_tokenLength += m_dataLength;
27
28 var actual = Read(m_buffer, 0, m_buffer.Length);
29
30 m_bufferOffset = 0;
31 m_dataLength = actual;
25
32
26 // do work
33 if (actual == 0) {
27 while (inst.Scan())
34 inst.Eof();
28 Feed(ref inst);
35 break;
36 }
37 }
38
39 var len = inst.Position - m_bufferOffset;
40 m_tokenLength += len;
41 m_dataLength -= len;
42 m_bufferOffset = inst.Position;
29
43
30 // save result;
44 // save result;
31 m_buffer = inst.Buffer;
45
32 m_length = inst.Length;
33 m_offset = inst.Position;
34 m_tokenOffset = inst.TokenOffset;
35 m_tokenLength = inst.TokenLength;
36
37 m_tags = inst.GetTokenTags();
46 m_tags = inst.GetTokenTags();
38 }
47 }
39
48
40 protected string GetToken() {
49 protected abstract int Read(int[] buffer, int offset, int size);
41 return new String(m_buffer, m_tokenOffset, m_tokenLength);
42 }
43
50
44 protected TTag[] Tags {
51 protected TTag[] Tags {
45 get {
52 get {
46 return m_tags;
53 return m_tags;
47 }
54 }
48 }
55 }
49
56
50 /// <summary>
51 /// Feed the specified scanner.
52 /// </summary>
53 /// <param name="scanner">Scanner.</param>
54 /// <example>
55 /// protected override void Feed(ref BufferScanner<TTag> scanner) {
56 /// var size = scanner.Extend();
57 /// var actual = m_reader.Read(scanner.Buffer, scanner.HiMark, size);
58 /// if (actual == 0) {
59 /// m_eof = true;
60 /// scanner.Eof();
61 /// } else {
62 /// scanner.RaiseHiMark(actual);
63 /// }
64 /// }
65 /// </example>
66 protected abstract void Feed(ref BufferScanner<TTag> scanner);
67
68 public abstract bool EOF { get; }
57 public abstract bool EOF { get; }
69
58
70 }
59 }
71 }
60 }
72
61
General Comments 0
You need to be logged in to leave comments. Login now