| @@ -1,61 +1,53 | |||||
| 1 | using System; | 
             | 
        1 | using System; | |
| 2 | using Implab.Components; | 
             | 
        2 | using Implab.Components; | |
| 
             | 
        3 | using Implab.Automaton.RegularExpressions; | |||
| 
             | 
        4 | using System.Diagnostics; | |||
| 
             | 
        5 | using Implab.Automaton; | |||
| 3 | 
             | 
        6 | |||
| 4 | namespace Implab.Formats { | 
             | 
        7 | namespace Implab.Formats { | |
| 5 | public abstract class TextScanner<TTag> : Disposable { | 
             | 
        8 | public abstract class TextScanner<TTag> : Disposable { | |
| 6 | 
             | 
        9 | |||
| 7 | readonly int[] m_buffer; | 
             | 
        10 | int m_maxSymbol; | |
| 
             | 
        11 | int[] m_symbolMap; | |||
| 
             | 
        12 | ||||
| 
             | 
        13 | readonly char[] m_buffer; | |||
| 8 | int m_bufferOffset; | 
             | 
        14 | int m_bufferOffset; | |
| 9 | 
            
                    int m_ | 
        
             | 
        15 | int m_bufferSize; | |
| 10 | int m_tokenLength; | 
             | 
        16 | int m_tokenLength; | |
| 11 | 
             | 
        17 | |||
| 12 | TTag[] m_tags; | 
             | 
        18 | TTag[] m_tags; | |
| 13 | 
             | 
        19 | |||
| 14 | BufferScanner<TTag> m_scanner; | 
             | 
        20 | protected bool ReadTokenInternal(DFAStateDescriptor<TTag>[] dfa, int state) { | |
| 15 | 
             | 
        21 | Debug.Assert(dfa != null); | ||
| 16 | protected bool ReadTokenInternal() { | 
             | 
        |||
| 17 | if (EOF) | 
             | 
        |||
| 18 | return false; | 
             | 
        |||
| 19 | 
             | 
        ||||
| 20 | // create a new scanner from template (scanners are value types) | 
             | 
        |||
| 21 | var inst = m_scanner; | 
             | 
        |||
| 22 | 
             | 
        ||||
| 23 | m_tokenLength = 0; | 
             | 
        |||
| 24 | 
             | 
        22 | |||
| 25 | while (inst.Scan(m_buffer, m_bufferOffset, m_dataLength)) { | 
             | 
        23 | do { | |
| 26 | m_tokenLength += m_dataLength; | 
             | 
        24 | for (var pos = m_bufferOffset; pos < m_bufferSize; pos++) { | |
| 
             | 
        25 | var ch = m_buffer[pos]; | |||
| 
             | 
        26 | state = dfa[state].transitions[m_symbolMap[ch > m_maxSymbol ? m_maxSymbol : ch]]; | |||
| 
             | 
        27 | if (state == DFAConst.UNREACHABLE_STATE) | |||
| 
             | 
        28 | break; | |||
| 
             | 
        29 | } | |||
| 
             | 
        30 | } while (Feed()); | |||
| 27 | 
             | 
        31 | |||
| 28 | var actual = Read(m_buffer, 0, m_buffer.Length); | 
             | 
        32 | if (dfa[state].final) { | |
| 29 | 
             | 
        ||||
| 30 | m_bufferOffset = 0; | 
             | 
        |||
| 31 | m_dataLength = actual; | 
             | 
        |||
| 32 | 
             | 
        33 | |||
| 33 | if (actual == 0) { | 
             | 
        |||
| 34 | inst.Eof(); | 
             | 
        |||
| 35 | break; | 
             | 
        |||
| 36 | } | 
             | 
        |||
| 37 | } | 
             | 
        34 | } | |
| 38 | 
             | 
        35 | |||
| 39 | var len = inst.Position - m_bufferOffset; | 
             | 
        |||
| 40 | m_tokenLength += len; | 
             | 
        |||
| 41 | m_dataLength -= len; | 
             | 
        |||
| 42 | m_bufferOffset = inst.Position; | 
             | 
        |||
| 43 | 
             | 
        ||||
| 44 | // save result; | 
             | 
        |||
| 45 | 
             | 
        ||||
| 46 | m_tags = inst.GetTokenTags(); | 
             | 
        |||
| 47 | } | 
             | 
        36 | } | |
| 48 | 
             | 
        37 | |||
| 49 | protected abstract int Read(int[] buffer, int offset, int size); | 
             | 
        38 | bool Feed() { | |
| 
             | 
        39 | ||||
| 
             | 
        40 | } | |||
| 
             | 
        41 | ||||
| 
             | 
        42 | protected abstract int Read(char[] buffer, int offset, int size); | |||
| 50 | 
             | 
        43 | |||
| 51 | protected TTag[] Tags { | 
             | 
        44 | protected TTag[] Tags { | |
| 52 | get { | 
             | 
        45 | get { | |
| 53 | return m_tags; | 
             | 
        46 | return m_tags; | |
| 54 | } | 
             | 
        47 | } | |
| 55 | } | 
             | 
        48 | } | |
| 56 | 
             | 
        49 | |||
| 57 | public abstract bool EOF { get; } | 
             | 
        50 | ||
| 58 | 
             | 
        ||||
| 59 | } | 
             | 
        51 | } | |
| 60 | } | 
             | 
        52 | } | |
| 61 | 
             | 
        53 | |||
        
        General Comments 0
    
    
  
  
                      You need to be logged in to leave comments.
                      Login now
                    
                