| @@ -0,0 +1,143 | |||||
|  | 1 | using System; | |||
|  | 2 | using Implab.Automaton.RegularExpressions; | |||
|  | 3 | using Implab.Automaton; | |||
|  | 4 | ||||
|  | 5 | namespace Implab.Formats { | |||
|  | 6 | public struct BufferScanner<TTag> { | |||
|  | 7 | char[] m_buffer; | |||
|  | 8 | int m_offset; | |||
|  | 9 | int m_position; | |||
|  | 10 | int m_hi; | |||
|  | 11 | ||||
|  | 12 | readonly int m_chunk; | |||
|  | 13 | readonly int m_limit; | |||
|  | 14 | ||||
|  | 15 | readonly DFAStateDescriptor<TTag>[] m_dfa; | |||
|  | 16 | int m_state; | |||
|  | 17 | ||||
|  | 18 | public BufferScanner(DFAStateDescriptor<TTag>[] dfa, int initialState, int chunk, int limit) { | |||
|  | 19 | m_dfa = dfa; | |||
|  | 20 | m_state = initialState; | |||
|  | 21 | m_chunk = chunk; | |||
|  | 22 | m_limit = limit; | |||
|  | 23 | m_buffer = null; | |||
|  | 24 | m_offset = 0; | |||
|  | 25 | m_position = 0; | |||
|  | 26 | m_hi = 0; | |||
|  | 27 | } | |||
|  | 28 | ||||
|  | 29 | public char[] Buffer { | |||
|  | 30 | get { | |||
|  | 31 | return m_buffer; | |||
|  | 32 | } | |||
|  | 33 | } | |||
|  | 34 | ||||
|  | 35 | public int HiMark { | |||
|  | 36 | get { | |||
|  | 37 | return m_hi; | |||
|  | 38 | } | |||
|  | 39 | } | |||
|  | 40 | ||||
|  | 41 | public int Position { | |||
|  | 42 | get { | |||
|  | 43 | return m_position; | |||
|  | 44 | } | |||
|  | 45 | } | |||
|  | 46 | ||||
|  | 47 | public int Length { | |||
|  | 48 | get { | |||
|  | 49 | return m_hi - m_position; | |||
|  | 50 | } | |||
|  | 51 | } | |||
|  | 52 | ||||
|  | 53 | public int TokenOffset { | |||
|  | 54 | get { | |||
|  | 55 | return m_offset; | |||
|  | 56 | } | |||
|  | 57 | } | |||
|  | 58 | ||||
|  | 59 | public int TokenLength { | |||
|  | 60 | get { | |||
|  | 61 | return m_position - m_offset; | |||
|  | 62 | } | |||
|  | 63 | } | |||
|  | 64 | ||||
|  | 65 | public void Init(char[] buffer, int position, int length) { | |||
|  | 66 | m_buffer = buffer; | |||
|  | 67 | m_position = position; | |||
|  | 68 | m_offset = position; | |||
|  | 69 | m_hi = position + length; | |||
|  | 70 | } | |||
|  | 71 | ||||
|  | 72 | public int Extend() { | |||
|  | 73 | // free space | |||
|  | 74 | var free = m_buffer.Length - m_hi; | |||
|  | 75 | ||||
|  | 76 | // if the buffer have enough free space | |||
|  | 77 | if (free > 0) | |||
|  | 78 | return free; | |||
|  | 79 | ||||
|  | 80 | // effective size of the buffer | |||
|  | 81 | var size = m_buffer.Length - m_offset; | |||
|  | 82 | ||||
|  | 83 | // calculate the new size | |||
|  | 84 | int grow = Math.Min(m_limit - size, m_chunk); | |||
|  | 85 | if (grow <= 0) | |||
|  | 86 | throw new ParserException(String.Format("Input buffer {0} bytes limit exceeded", m_limit)); | |||
|  | 87 | ||||
|  | 88 | var temp = new char[size + grow]; | |||
|  | 89 | Array.Copy(m_buffer, m_offset, temp, 0, m_hi - m_offset); | |||
|  | 90 | m_position -= m_offset; | |||
|  | 91 | m_hi -= m_offset; | |||
|  | 92 | m_offset = 0; | |||
|  | 93 | m_buffer = temp; | |||
|  | 94 | ||||
|  | 95 | return free + grow; | |||
|  | 96 | } | |||
|  | 97 | ||||
|  | 98 | public void RaiseMark(int size) { | |||
|  | 99 | m_hi += size; | |||
|  | 100 | } | |||
|  | 101 | ||||
|  | 102 | /// <summary> | |||
|  | 103 | /// Scan this instance. | |||
|  | 104 | /// </summary> | |||
|  | 105 | /// <returns><c>true</c> - additional data required</returns> | |||
|  | 106 | public bool Scan() { | |||
|  | 107 | while (m_position < m_hi) { | |||
|  | 108 | var ch = m_buffer[m_position]; | |||
|  | 109 | var next = m_dfa[m_state].transitions[(int)ch]; | |||
|  | 110 | if (next == DFAConst.UNREACHABLE_STATE) { | |||
|  | 111 | if (m_dfa[m_state].final) | |||
|  | 112 | return false; | |||
|  | 113 | ||||
|  | 114 | throw new ParserException( | |||
|  | 115 | String.Format( | |||
|  | 116 | "Unexpected token '{0}'", | |||
|  | 117 | new string(m_buffer, m_offset, m_position - m_offset) | |||
|  | 118 | ) | |||
|  | 119 | ); | |||
|  | 120 | } | |||
|  | 121 | m_state = next; | |||
|  | 122 | m_position++; | |||
|  | 123 | } | |||
|  | 124 | ||||
|  | 125 | return true; | |||
|  | 126 | } | |||
|  | 127 | ||||
|  | 128 | public void Eof() { | |||
|  | 129 | if (!m_dfa[m_state].final) | |||
|  | 130 | throw new ParserException( | |||
|  | 131 | String.Format( | |||
|  | 132 | "Unexpected token '{0}'", | |||
|  | 133 | new string(m_buffer, m_offset, m_position - m_offset) | |||
|  | 134 | ) | |||
|  | 135 | ); | |||
|  | 136 | } | |||
|  | 137 | ||||
|  | 138 | public TTag[] GetTokenTags() { | |||
|  | 139 | return m_dfa[m_state].tags; | |||
|  | 140 | } | |||
|  | 141 | } | |||
|  | 142 | } | |||
|  | 143 | ||||
| @@ -0,0 +1,72 | |||||
|  | 1 | using System; | |||
|  | 2 | using Implab.Components; | |||
|  | 3 | ||||
|  | 4 | namespace Implab.Formats { | |||
|  | 5 | public abstract class TextScanner<TTag> : Disposable { | |||
|  | 6 | ||||
|  | 7 | char[] m_buffer; | |||
|  | 8 | int m_offset; | |||
|  | 9 | int m_length; | |||
|  | 10 | int m_tokenOffset; | |||
|  | 11 | int m_tokenLength; | |||
|  | 12 | TTag[] m_tags; | |||
|  | 13 | ||||
|  | 14 | BufferScanner<TTag> m_scanner; | |||
|  | 15 | ||||
|  | 16 | protected bool ReadTokenInternal() { | |||
|  | 17 | if (EOF) | |||
|  | 18 | return false; | |||
|  | 19 | ||||
|  | 20 | // create a new scanner from template (scanners are structs) | |||
|  | 21 | var inst = m_scanner; | |||
|  | 22 | ||||
|  | 23 | // initialize the scanner | |||
|  | 24 | inst.Init(m_buffer, m_offset, m_length); | |||
|  | 25 | ||||
|  | 26 | // do work | |||
|  | 27 | while (inst.Scan()) | |||
|  | 28 | Feed(ref inst); | |||
|  | 29 | ||||
|  | 30 | // save result; | |||
|  | 31 | m_buffer = inst.Buffer; | |||
|  | 32 | m_length = inst.Length; | |||
|  | 33 | m_offset = inst.Position; | |||
|  | 34 | m_tokenOffset = inst.TokenOffset; | |||
|  | 35 | m_tokenLength = inst.TokenLength; | |||
|  | 36 | ||||
|  | 37 | m_tags = inst.GetTokenTags(); | |||
|  | 38 | } | |||
|  | 39 | ||||
|  | 40 | protected string GetToken() { | |||
|  | 41 | return new String(m_buffer, m_tokenOffset, m_tokenLength); | |||
|  | 42 | } | |||
|  | 43 | ||||
|  | 44 | protected TTag[] Tags { | |||
|  | 45 | get { | |||
|  | 46 | return m_tags; | |||
|  | 47 | } | |||
|  | 48 | } | |||
|  | 49 | ||||
|  | 50 | /// <summary> | |||
|  | 51 | /// Feed the specified scanner. | |||
|  | 52 | /// </summary> | |||
|  | 53 | /// <param name="scanner">Scanner.</param> | |||
|  | 54 | /// <example> | |||
|  | 55 | /// protected override void Feed(ref BufferScanner<TTag> scanner) { | |||
|  | 56 | /// var size = scanner.Extend(); | |||
|  | 57 | /// var actual = m_reader.Read(scanner.Buffer, scanner.HiMark, size); | |||
|  | 58 | /// if (actual == 0) { | |||
|  | 59 | /// m_eof = true; | |||
|  | 60 | /// scanner.Eof(); | |||
|  | 61 | /// } else { | |||
|  | 62 | /// scanner.RaiseHiMark(actual); | |||
|  | 63 | /// } | |||
|  | 64 | /// } | |||
|  | 65 | /// </example> | |||
|  | 66 | protected abstract void Feed(ref BufferScanner<TTag> scanner); | |||
|  | 67 | ||||
|  | 68 | public abstract bool EOF { get; } | |||
|  | 69 | ||||
|  | 70 | } | |||
|  | 71 | } | |||
|  | 72 | ||||
| @@ -191,6 +191,8 | |||||
| 191 | <Compile Include="Automaton\RegularExpressions\RegularExpressionVisitor.cs" /> |  | 191 | <Compile Include="Automaton\RegularExpressions\RegularExpressionVisitor.cs" /> | |
| 192 | <Compile Include="Automaton\RegularExpressions\ITaggedDFABuilder.cs" /> |  | 192 | <Compile Include="Automaton\RegularExpressions\ITaggedDFABuilder.cs" /> | |
| 193 | <Compile Include="Automaton\RegularExpressions\DFAStateDescriptorT.cs" /> |  | 193 | <Compile Include="Automaton\RegularExpressions\DFAStateDescriptorT.cs" /> | |
|  | 194 | <Compile Include="Formats\BufferScanner.cs" /> | |||
|  | 195 | <Compile Include="Formats\TextScanner.cs" /> | |||
| 194 | </ItemGroup> |  | 196 | </ItemGroup> | |
| 195 | <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" /> |  | 197 | <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" /> | |
| 196 | <ItemGroup /> |  | 198 | <ItemGroup /> | |
        
        General Comments 0
    
    
  
  
                      You need to be logged in to leave comments.
                      Login now
                    
                