TextScanner.cs
150 lines
| 5.0 KiB
| text/x-csharp
|
CSharpLexer
cin
|
r173 | using System; | ||
using Implab.Components; | ||||
cin
|
r175 | using System.Diagnostics; | ||
using Implab.Automaton; | ||||
cin
|
r176 | using System.Text; | ||
cin
|
r173 | |||
namespace Implab.Formats { | ||||
cin
|
r176 | public abstract class TextScanner : Disposable { | ||
readonly int m_bufferMax; | ||||
readonly int m_chunkSize; | ||||
cin
|
r173 | |||
cin
|
r176 | char[] m_buffer; | ||
cin
|
r174 | int m_bufferOffset; | ||
cin
|
r175 | int m_bufferSize; | ||
cin
|
r176 | int m_tokenOffset; | ||
cin
|
r173 | int m_tokenLength; | ||
cin
|
r174 | |||
cin
|
r176 | /// <summary> | ||
cin
|
r177 | /// Initializes a new instance of the <see cref="Implab.Formats.TextScanner"/> class. | ||
cin
|
r176 | /// </summary> | ||
/// <param name="bufferMax">Buffer max.</param> | ||||
/// <param name="chunkSize">Chunk size.</param> | ||||
protected TextScanner(int bufferMax, int chunkSize) { | ||||
Debug.Assert(m_chunkSize <= m_bufferMax); | ||||
m_bufferMax = bufferMax; | ||||
m_chunkSize = chunkSize; | ||||
} | ||||
cin
|
r173 | |||
cin
|
r176 | /// <summary> | ||
cin
|
r177 | /// Initializes a new instance of the <see cref="Implab.Formats.TextScanner"/> class. | ||
cin
|
r176 | /// </summary> | ||
/// <param name="buffer">Buffer.</param> | ||||
protected TextScanner(char[] buffer) { | ||||
if (buffer != null) { | ||||
m_buffer = buffer; | ||||
m_bufferSize = buffer.Length; | ||||
} | ||||
} | ||||
/// <summary> | ||||
/// (hungry) Reads the next token. | ||||
/// </summary> | ||||
/// <returns><c>true</c>, if token internal was read, <c>false</c> if there is no more tokens in the stream.</returns> | ||||
/// <param name="dfa">The transition map for the automaton</param> | ||||
/// <param name="final">Final states of the automaton.</param> | ||||
/// <param name="tags">Tags.</param> | ||||
/// <param name="state">The initial state for the automaton.</param> | ||||
cin
|
r177 | /// <param name="alphabet"></param> | ||
/// <param name = "tag"></param> | ||||
internal bool ReadToken<TTag>(int[,] dfa, bool[] final, TTag[][] tags, int state, int[] alphabet, out TTag[] tag) { | ||||
cin
|
r176 | Safe.ArgumentNotNull(); | ||
m_tokenLength = 0; | ||||
var maxSymbol = alphabet.Length - 1; | ||||
cin
|
r174 | |||
cin
|
r175 | do { | ||
cin
|
r176 | // after the next chunk is read the offset in the buffer may change | ||
int pos = m_bufferOffset + m_tokenLength; | ||||
cin
|
r177 | while (pos < m_bufferSize) { | ||
cin
|
r175 | var ch = m_buffer[pos]; | ||
cin
|
r176 | |||
cin
|
r177 | state = dfa[state, ch > maxSymbol ? DFAConst.UNCLASSIFIED_INPUT : alphabet[ch]]; | ||
cin
|
r175 | if (state == DFAConst.UNREACHABLE_STATE) | ||
break; | ||||
cin
|
r176 | |||
pos++; | ||||
cin
|
r175 | } | ||
cin
|
r176 | |||
m_tokenLength = pos - m_bufferOffset; | ||||
} while (state != DFAConst.UNREACHABLE_STATE && Feed()); | ||||
m_tokenOffset = m_bufferOffset; | ||||
m_bufferOffset += m_tokenLength; | ||||
cin
|
r174 | |||
cin
|
r176 | if (final[state]) { | ||
tag = tags[state]; | ||||
return true; | ||||
cin
|
r177 | } | ||
if (m_bufferOffset == m_bufferSize) { | ||||
if (m_tokenLength == 0) //EOF | ||||
cin
|
r176 | return false; | ||
cin
|
r177 | throw new ParserException(); | ||
} | ||||
throw new ParserException(String.Format("Unexpected symbol '{0}'", m_buffer[m_bufferOffset])); | ||||
cin
|
r176 | |||
} | ||||
cin
|
r173 | |||
cin
|
r176 | protected void Feed(char[] buffer, int offset, int length) { | ||
m_buffer = buffer; | ||||
m_bufferOffset = offset; | ||||
m_bufferSize = offset + length; | ||||
cin
|
r173 | } | ||
cin
|
r176 | protected bool Feed() { | ||
if (m_chunkSize <= 0) | ||||
return false; | ||||
if (m_buffer != null) { | ||||
var free = m_buffer.Length - m_bufferSize; | ||||
if (free < m_chunkSize) { | ||||
free += m_chunkSize; | ||||
var used = m_bufferSize - m_bufferOffset; | ||||
var size = used + free; | ||||
if (size > m_bufferMax) | ||||
cin
|
r177 | throw new ParserException(String.Format("The buffer limit ({0} Kb) is reached", m_bufferMax/1024)); | ||
cin
|
r176 | |||
var temp = new char[size]; | ||||
cin
|
r175 | |||
cin
|
r176 | var read = Read(temp, used, m_chunkSize); | ||
if (read == 0) | ||||
return false; | ||||
Array.Copy(m_buffer, m_bufferOffset, temp, 0, used); | ||||
m_bufferOffset = 0; | ||||
m_bufferSize = used + read; | ||||
m_buffer = temp; | ||||
} | ||||
} else { | ||||
Debug.Assert(m_bufferOffset == 0); | ||||
m_buffer = new char[m_chunkSize]; | ||||
m_bufferSize = Read(m_buffer, 0, m_chunkSize); | ||||
return (m_bufferSize != 0); | ||||
} | ||||
cin
|
r175 | } | ||
protected abstract int Read(char[] buffer, int offset, int size); | ||||
cin
|
r173 | |||
cin
|
r176 | public string GetTokenValue() { | ||
return new String(m_buffer, m_tokenOffset, m_tokenLength); | ||||
cin
|
r173 | } | ||
cin
|
r176 | public void CopyTokenTo(char[] buffer, int offset) { | ||
m_buffer.CopyTo(buffer, offset); | ||||
} | ||||
public void CopyTokenTo(StringBuilder sb) { | ||||
sb.Append(m_buffer, m_tokenOffset, m_tokenLength); | ||||
} | ||||
cin
|
r175 | |||
cin
|
r173 | } | ||
} | ||||