| @@ -1,52 +1,45 | |||
| 
             | 
        1 | 1 | using System; | 
| 
             | 
        2 | 2 | using Implab.Components; | 
| 
             | 
        3 | using Implab.Automaton.RegularExpressions; | |
| 
             | 
        4 | using System.Diagnostics; | |
| 
             | 
        5 | using Implab.Automaton; | |
| 
             | 
        3 | 6 | |
| 
             | 
        4 | 7 | namespace Implab.Formats { | 
| 
             | 
        5 | 8 | public abstract class TextScanner<TTag> : Disposable { | 
| 
             | 
        6 | 9 | |
| 
             | 
        7 | readonly int[] m_buffer; | |
| 
             | 
        10 | int m_maxSymbol; | |
| 
             | 
        11 | int[] m_symbolMap; | |
| 
             | 
        12 | ||
| 
             | 
        13 | readonly char[] m_buffer; | |
| 
             | 
        8 | 14 | int m_bufferOffset; | 
| 
             | 
        9 | 
            
                     int m_ | 
    |
| 
             | 
        15 | int m_bufferSize; | |
| 
             | 
        10 | 16 | int m_tokenLength; | 
| 
             | 
        11 | 17 | |
| 
             | 
        12 | 18 | TTag[] m_tags; | 
| 
             | 
        13 | 19 | |
| 
             | 
        14 | BufferScanner<TTag> m_scanner; | |
| 
             | 
        15 | ||
| 
             | 
        16 | protected bool ReadTokenInternal() { | |
| 
             | 
        17 | if (EOF) | |
| 
             | 
        18 | return false; | |
| 
             | 
        19 | ||
| 
             | 
        20 | // create a new scanner from template (scanners are value types) | |
| 
             | 
        21 | var inst = m_scanner; | |
| 
             | 
        22 | ||
| 
             | 
        23 | m_tokenLength = 0; | |
| 
             | 
        20 | protected bool ReadTokenInternal(DFAStateDescriptor<TTag>[] dfa, int state) { | |
| 
             | 
        21 | Debug.Assert(dfa != null); | |
| 
             | 
        24 | 22 | |
| 
             | 
        25 | while (inst.Scan(m_buffer, m_bufferOffset, m_dataLength)) { | |
| 
             | 
        26 | m_tokenLength += m_dataLength; | |
| 
             | 
        23 | do { | |
| 
             | 
        24 | for (var pos = m_bufferOffset; pos < m_bufferSize; pos++) { | |
| 
             | 
        25 | var ch = m_buffer[pos]; | |
| 
             | 
        26 | state = dfa[state].transitions[m_symbolMap[ch > m_maxSymbol ? m_maxSymbol : ch]]; | |
| 
             | 
        27 | if (state == DFAConst.UNREACHABLE_STATE) | |
| 
             | 
        28 | break; | |
| 
             | 
        29 | } | |
| 
             | 
        30 | } while (Feed()); | |
| 
             | 
        27 | 31 | |
| 
             | 
        28 | var actual = Read(m_buffer, 0, m_buffer.Length); | |
| 
             | 
        29 | ||
| 
             | 
        30 | m_bufferOffset = 0; | |
| 
             | 
        31 | m_dataLength = actual; | |
| 
             | 
        32 | if (dfa[state].final) { | |
| 
             | 
        32 | 33 | |
| 
             | 
        33 | if (actual == 0) { | |
| 
             | 
        34 | inst.Eof(); | |
| 
             | 
        35 | break; | |
| 
             | 
        36 | } | |
| 
             | 
        37 | 34 | } | 
| 
             | 
        38 | 35 | |
| 
             | 
        39 | var len = inst.Position - m_bufferOffset; | |
| 
             | 
        40 | m_tokenLength += len; | |
| 
             | 
        41 | m_dataLength -= len; | |
| 
             | 
        42 | m_bufferOffset = inst.Position; | |
| 
             | 
        43 | ||
| 
             | 
        44 | // save result; | |
| 
             | 
        45 | ||
| 
             | 
        46 | m_tags = inst.GetTokenTags(); | |
| 
             | 
        47 | 36 | } | 
| 
             | 
        48 | 37 | |
| 
             | 
        49 | protected abstract int Read(int[] buffer, int offset, int size); | |
| 
             | 
        38 | bool Feed() { | |
| 
             | 
        39 | ||
| 
             | 
        40 | } | |
| 
             | 
        41 | ||
| 
             | 
        42 | protected abstract int Read(char[] buffer, int offset, int size); | |
| 
             | 
        50 | 43 | |
| 
             | 
        51 | 44 | protected TTag[] Tags { | 
| 
             | 
        52 | 45 | get { | 
| @@ -54,8 +47,7 namespace Implab.Formats { | |||
| 
             | 
        54 | 47 | } | 
| 
             | 
        55 | 48 | } | 
| 
             | 
        56 | 49 | |
| 
             | 
        57 | public abstract bool EOF { get; } | |
| 
             | 
        58 | ||
| 
             | 
        50 | ||
| 
             | 
        59 | 51 | } | 
| 
             | 
        60 | 52 | } | 
| 
             | 
        61 | 53 | |
        
        General Comments 0
    
    
  
  
                      You need to be logged in to leave comments.
                      Login now
                    
                