##// END OF EJS Templates
README.md edited online with Bitbucket
README.md edited online with Bitbucket

File last commit:

r182:76e8f2ba12b8 ref20160224
r282:12b89f1cfe9f default
Show More
TextScanner.cs
157 lines | 5.4 KiB | text/x-csharp | CSharpLexer
using System;
using Implab.Components;
using System.Diagnostics;
using Implab.Automaton;
using System.Text;
namespace Implab.Formats {
public abstract class TextScanner : Disposable {
readonly int m_bufferMax;
readonly int m_chunkSize;
char[] m_buffer;
int m_bufferOffset;
int m_bufferSize;
int m_tokenOffset;
int m_tokenLength;
/// <summary>
/// Initializes a new instance of the <see cref="Implab.Formats.TextScanner"/> class.
/// </summary>
/// <param name="bufferMax">Buffer max.</param>
/// <param name="chunkSize">Chunk size.</param>
protected TextScanner(int bufferMax, int chunkSize) {
Debug.Assert(m_chunkSize <= m_bufferMax);
m_bufferMax = bufferMax;
m_chunkSize = chunkSize;
}
/// <summary>
/// Initializes a new instance of the <see cref="Implab.Formats.TextScanner"/> class.
/// </summary>
/// <param name="buffer">Buffer.</param>
protected TextScanner(char[] buffer) {
if (buffer != null) {
m_buffer = buffer;
m_bufferSize = buffer.Length;
}
}
/// <summary>
/// (hungry) Reads the next token.
/// </summary>
/// <returns><c>true</c>, if token internal was read, <c>false</c> if there is no more tokens in the stream.</returns>
/// <param name="dfa">The transition map for the automaton</param>
/// <param name="final">Final states of the automaton.</param>
/// <param name="tags">Tags.</param>
/// <param name="state">The initial state for the automaton.</param>
/// <param name="alphabet"></param>
/// <param name = "tag"></param>
internal bool ReadToken<TTag>(int[,] dfa, bool[] final, TTag[][] tags, int state, int[] alphabet, out TTag[] tag) {
m_tokenLength = 0;
tag = null;
var maxSymbol = alphabet.Length - 1;
int next;
do {
// after the next chunk is read the offset in the buffer may change
int pos = m_bufferOffset + m_tokenLength;
next = state;
while (pos < m_bufferSize) {
var ch = m_buffer[pos];
next = dfa[next, ch > maxSymbol ? AutomatonConst.UNCLASSIFIED_INPUT : alphabet[ch]];
if (next == AutomatonConst.UNREACHABLE_STATE)
break;
state = next;
pos++;
}
m_tokenLength = pos - m_bufferOffset;
} while (next != AutomatonConst.UNREACHABLE_STATE && Feed());
m_tokenOffset = m_bufferOffset;
m_bufferOffset += m_tokenLength;
if (final[state]) {
tag = tags[state];
return true;
}
if (m_bufferOffset == m_bufferSize) {
if (m_tokenLength == 0) //EOF
return false;
throw new ParserException();
}
throw new ParserException(String.Format("Unexpected symbol '{0}'", m_buffer[m_bufferOffset]));
}
protected void Feed(char[] buffer, int offset, int length) {
m_buffer = buffer;
m_bufferOffset = offset;
m_bufferSize = offset + length;
}
protected bool Feed() {
if (m_chunkSize <= 0)
return false;
if (m_buffer != null) {
var free = m_buffer.Length - m_bufferSize;
if (free < m_chunkSize) {
free += m_chunkSize;
var used = m_bufferSize - m_bufferOffset;
var size = used + free;
if (size > m_bufferMax)
throw new ParserException(String.Format("The buffer limit ({0} Kb) is reached", m_bufferMax / 1024));
var temp = new char[size];
var read = Read(temp, used, m_chunkSize);
if (read == 0)
return false;
Array.Copy(m_buffer, m_bufferOffset, temp, 0, used);
m_bufferOffset = 0;
m_bufferSize = used + read;
m_buffer = temp;
} else {
var read = Read(m_buffer, m_bufferSize, m_chunkSize);
if (read == 0)
return false;
m_bufferSize += m_chunkSize;
}
return true;
} else {
Debug.Assert(m_bufferOffset == 0);
m_buffer = new char[m_chunkSize];
m_bufferSize = Read(m_buffer, 0, m_chunkSize);
return (m_bufferSize != 0);
}
}
protected abstract int Read(char[] buffer, int offset, int size);
public string GetTokenValue() {
return new String(m_buffer, m_tokenOffset, m_tokenLength);
}
public void CopyTokenTo(char[] buffer, int offset) {
Array.Copy(m_buffer, m_tokenOffset,buffer, offset, m_tokenLength);
}
public void CopyTokenTo(StringBuilder sb) {
sb.Append(m_buffer, m_tokenOffset, m_tokenLength);
}
}
}