| @@ -1,52 +1,45 | |||||
| 1 | using System; |  | 1 | using System; | |
| 2 | using Implab.Components; |  | 2 | using Implab.Components; | |
|  | 3 | using Implab.Automaton.RegularExpressions; | |||
|  | 4 | using System.Diagnostics; | |||
|  | 5 | using Implab.Automaton; | |||
| 3 |  | 6 | |||
| 4 | namespace Implab.Formats { |  | 7 | namespace Implab.Formats { | |
| 5 | public abstract class TextScanner<TTag> : Disposable { |  | 8 | public abstract class TextScanner<TTag> : Disposable { | |
| 6 |  | 9 | |||
| 7 | readonly int[] m_buffer; |  | 10 | int m_maxSymbol; | |
|  | 11 | int[] m_symbolMap; | |||
|  | 12 | ||||
|  | 13 | readonly char[] m_buffer; | |||
| 8 | int m_bufferOffset; |  | 14 | int m_bufferOffset; | |
| 9 | int m_ |  | 15 | int m_bufferSize; | |
| 10 | int m_tokenLength; |  | 16 | int m_tokenLength; | |
| 11 |  | 17 | |||
| 12 | TTag[] m_tags; |  | 18 | TTag[] m_tags; | |
| 13 |  | 19 | |||
| 14 | BufferScanner<TTag> m_scanner; |  | 20 | protected bool ReadTokenInternal(DFAStateDescriptor<TTag>[] dfa, int state) { | |
| 15 |  | 21 | Debug.Assert(dfa != null); | ||
| 16 | protected bool ReadTokenInternal() { |  | |||
| 17 | if (EOF) |  | |||
| 18 | return false; |  | |||
| 19 |  | ||||
| 20 | // create a new scanner from template (scanners are value types) |  | |||
| 21 | var inst = m_scanner; |  | |||
| 22 |  | ||||
| 23 | m_tokenLength = 0; |  | |||
| 24 |  | 22 | |||
| 25 | while (inst.Scan(m_buffer, m_bufferOffset, m_dataLength)) { |  | 23 | do { | |
| 26 | m_tokenLength += m_dataLength; |  | 24 | for (var pos = m_bufferOffset; pos < m_bufferSize; pos++) { | |
|  | 25 | var ch = m_buffer[pos]; | |||
|  | 26 | state = dfa[state].transitions[m_symbolMap[ch > m_maxSymbol ? m_maxSymbol : ch]]; | |||
|  | 27 | if (state == DFAConst.UNREACHABLE_STATE) | |||
|  | 28 | break; | |||
|  | 29 | } | |||
|  | 30 | } while (Feed()); | |||
| 27 |  | 31 | |||
| 28 | var actual = Read(m_buffer, 0, m_buffer.Length); |  | 32 | if (dfa[state].final) { | |
| 29 |  | ||||
| 30 | m_bufferOffset = 0; |  | |||
| 31 | m_dataLength = actual; |  | |||
| 32 |  | 33 | |||
| 33 | if (actual == 0) { |  | |||
| 34 | inst.Eof(); |  | |||
| 35 | break; |  | |||
| 36 | } |  | |||
| 37 | } |  | 34 | } | |
| 38 |  | 35 | |||
| 39 | var len = inst.Position - m_bufferOffset; |  | |||
| 40 | m_tokenLength += len; |  | |||
| 41 | m_dataLength -= len; |  | |||
| 42 | m_bufferOffset = inst.Position; |  | |||
| 43 |  | ||||
| 44 | // save result; |  | |||
| 45 |  | ||||
| 46 | m_tags = inst.GetTokenTags(); |  | |||
| 47 | } |  | 36 | } | |
| 48 |  | 37 | |||
| 49 | protected abstract int Read(int[] buffer, int offset, int size); |  | 38 | bool Feed() { | |
|  | 39 | ||||
|  | 40 | } | |||
|  | 41 | ||||
|  | 42 | protected abstract int Read(char[] buffer, int offset, int size); | |||
| 50 |  | 43 | |||
| 51 | protected TTag[] Tags { |  | 44 | protected TTag[] Tags { | |
| 52 | get { |  | 45 | get { | |
| @@ -54,8 +47,7 namespace Implab.Formats { | |||||
| 54 | } |  | 47 | } | |
| 55 | } |  | 48 | } | |
| 56 |  | 49 | |||
| 57 | public abstract bool EOF { get; } |  | 50 | ||
| 58 |  | ||||
| 59 | } |  | 51 | } | |
| 60 | } |  | 52 | } | |
| 61 |  | 53 | |||
        
        General Comments 0
    
    
  
  
                      You need to be logged in to leave comments.
                      Login now
                    
                