| @@ -1,52 +1,45 | |||||
| 1 | using System; |
|
1 | using System; | |
| 2 | using Implab.Components; |
|
2 | using Implab.Components; | |
|
|
3 | using Implab.Automaton.RegularExpressions; | |||
|
|
4 | using System.Diagnostics; | |||
|
|
5 | using Implab.Automaton; | |||
| 3 |
|
6 | |||
| 4 | namespace Implab.Formats { |
|
7 | namespace Implab.Formats { | |
| 5 | public abstract class TextScanner<TTag> : Disposable { |
|
8 | public abstract class TextScanner<TTag> : Disposable { | |
| 6 |
|
9 | |||
| 7 | readonly int[] m_buffer; |
|
10 | int m_maxSymbol; | |
|
|
11 | int[] m_symbolMap; | |||
|
|
12 | ||||
|
|
13 | readonly char[] m_buffer; | |||
| 8 | int m_bufferOffset; |
|
14 | int m_bufferOffset; | |
| 9 |
int m_ |
|
15 | int m_bufferSize; | |
| 10 | int m_tokenLength; |
|
16 | int m_tokenLength; | |
| 11 |
|
17 | |||
| 12 | TTag[] m_tags; |
|
18 | TTag[] m_tags; | |
| 13 |
|
19 | |||
| 14 | BufferScanner<TTag> m_scanner; |
|
20 | protected bool ReadTokenInternal(DFAStateDescriptor<TTag>[] dfa, int state) { | |
| 15 |
|
21 | Debug.Assert(dfa != null); | ||
| 16 | protected bool ReadTokenInternal() { |
|
|||
| 17 | if (EOF) |
|
|||
| 18 | return false; |
|
|||
| 19 |
|
||||
| 20 | // create a new scanner from template (scanners are value types) |
|
|||
| 21 | var inst = m_scanner; |
|
|||
| 22 |
|
||||
| 23 | m_tokenLength = 0; |
|
|||
| 24 |
|
22 | |||
| 25 | while (inst.Scan(m_buffer, m_bufferOffset, m_dataLength)) { |
|
23 | do { | |
| 26 | m_tokenLength += m_dataLength; |
|
24 | for (var pos = m_bufferOffset; pos < m_bufferSize; pos++) { | |
|
|
25 | var ch = m_buffer[pos]; | |||
|
|
26 | state = dfa[state].transitions[m_symbolMap[ch > m_maxSymbol ? m_maxSymbol : ch]]; | |||
|
|
27 | if (state == DFAConst.UNREACHABLE_STATE) | |||
|
|
28 | break; | |||
|
|
29 | } | |||
|
|
30 | } while (Feed()); | |||
| 27 |
|
31 | |||
| 28 | var actual = Read(m_buffer, 0, m_buffer.Length); |
|
32 | if (dfa[state].final) { | |
| 29 |
|
||||
| 30 | m_bufferOffset = 0; |
|
|||
| 31 | m_dataLength = actual; |
|
|||
| 32 |
|
33 | |||
| 33 | if (actual == 0) { |
|
|||
| 34 | inst.Eof(); |
|
|||
| 35 | break; |
|
|||
| 36 | } |
|
|||
| 37 | } |
|
34 | } | |
| 38 |
|
35 | |||
| 39 | var len = inst.Position - m_bufferOffset; |
|
|||
| 40 | m_tokenLength += len; |
|
|||
| 41 | m_dataLength -= len; |
|
|||
| 42 | m_bufferOffset = inst.Position; |
|
|||
| 43 |
|
||||
| 44 | // save result; |
|
|||
| 45 |
|
||||
| 46 | m_tags = inst.GetTokenTags(); |
|
|||
| 47 | } |
|
36 | } | |
| 48 |
|
37 | |||
| 49 | protected abstract int Read(int[] buffer, int offset, int size); |
|
38 | bool Feed() { | |
|
|
39 | ||||
|
|
40 | } | |||
|
|
41 | ||||
|
|
42 | protected abstract int Read(char[] buffer, int offset, int size); | |||
| 50 |
|
43 | |||
| 51 | protected TTag[] Tags { |
|
44 | protected TTag[] Tags { | |
| 52 | get { |
|
45 | get { | |
| @@ -54,8 +47,7 namespace Implab.Formats { | |||||
| 54 | } |
|
47 | } | |
| 55 | } |
|
48 | } | |
| 56 |
|
49 | |||
| 57 | public abstract bool EOF { get; } |
|
50 | ||
| 58 |
|
||||
| 59 | } |
|
51 | } | |
| 60 | } |
|
52 | } | |
| 61 |
|
53 | |||
General Comments 0
You need to be logged in to leave comments.
Login now
