##// END OF EJS Templates
Minor code changes
Minor code changes

File last commit:

r182:76e8f2ba12b8 ref20160224
r213:9ee78a345738 v2
Show More
TextScanner.cs
157 lines | 5.4 KiB | text/x-csharp | CSharpLexer
cin
Working on text scanner
r173 using System;
using Implab.Components;
cin
sync
r175 using System.Diagnostics;
using Implab.Automaton;
cin
rewritten the text scanner
r176 using System.Text;
cin
Working on text scanner
r173
namespace Implab.Formats {
cin
rewritten the text scanner
r176 public abstract class TextScanner : Disposable {
readonly int m_bufferMax;
readonly int m_chunkSize;
cin
Working on text scanner
r173
cin
rewritten the text scanner
r176 char[] m_buffer;
cin
sync
r174 int m_bufferOffset;
cin
sync
r175 int m_bufferSize;
cin
rewritten the text scanner
r176 int m_tokenOffset;
cin
Working on text scanner
r173 int m_tokenLength;
cin
sync
r174
cin
rewritten the text scanner
r176 /// <summary>
cin
refactoring
r177 /// Initializes a new instance of the <see cref="Implab.Formats.TextScanner"/> class.
cin
rewritten the text scanner
r176 /// </summary>
/// <param name="bufferMax">Buffer max.</param>
/// <param name="chunkSize">Chunk size.</param>
protected TextScanner(int bufferMax, int chunkSize) {
Debug.Assert(m_chunkSize <= m_bufferMax);
m_bufferMax = bufferMax;
m_chunkSize = chunkSize;
}
cin
Working on text scanner
r173
cin
rewritten the text scanner
r176 /// <summary>
cin
refactoring
r177 /// Initializes a new instance of the <see cref="Implab.Formats.TextScanner"/> class.
cin
rewritten the text scanner
r176 /// </summary>
/// <param name="buffer">Buffer.</param>
protected TextScanner(char[] buffer) {
if (buffer != null) {
m_buffer = buffer;
m_bufferSize = buffer.Length;
}
}
/// <summary>
/// (hungry) Reads the next token.
/// </summary>
/// <returns><c>true</c>, if token internal was read, <c>false</c> if there is no more tokens in the stream.</returns>
/// <param name="dfa">The transition map for the automaton</param>
/// <param name="final">Final states of the automaton.</param>
/// <param name="tags">Tags.</param>
/// <param name="state">The initial state for the automaton.</param>
cin
refactoring
r177 /// <param name="alphabet"></param>
/// <param name = "tag"></param>
internal bool ReadToken<TTag>(int[,] dfa, bool[] final, TTag[][] tags, int state, int[] alphabet, out TTag[] tag) {
cin
rewritten the text scanner
r176 m_tokenLength = 0;
cin
refactoring complete, JSONParser rewritten
r180 tag = null;
cin
rewritten the text scanner
r176
var maxSymbol = alphabet.Length - 1;
cin
pretty print DFA, the minimization is still buggy
r182 int next;
cin
sync
r175 do {
cin
rewritten the text scanner
r176 // after the next chunk is read the offset in the buffer may change
int pos = m_bufferOffset + m_tokenLength;
cin
pretty print DFA, the minimization is still buggy
r182 next = state;
cin
refactoring
r177 while (pos < m_bufferSize) {
cin
sync
r175 var ch = m_buffer[pos];
cin
rewritten the text scanner
r176
cin
pretty print DFA, the minimization is still buggy
r182 next = dfa[next, ch > maxSymbol ? AutomatonConst.UNCLASSIFIED_INPUT : alphabet[ch]];
cin
minor fixes and debug
r181
if (next == AutomatonConst.UNREACHABLE_STATE)
cin
sync
r175 break;
cin
pretty print DFA, the minimization is still buggy
r182
cin
minor fixes and debug
r181 state = next;
cin
rewritten the text scanner
r176 pos++;
cin
sync
r175 }
cin
rewritten the text scanner
r176 m_tokenLength = pos - m_bufferOffset;
cin
pretty print DFA, the minimization is still buggy
r182 } while (next != AutomatonConst.UNREACHABLE_STATE && Feed());
cin
rewritten the text scanner
r176
m_tokenOffset = m_bufferOffset;
m_bufferOffset += m_tokenLength;
cin
sync
r174
cin
rewritten the text scanner
r176 if (final[state]) {
tag = tags[state];
return true;
cin
refactoring
r177 }
if (m_bufferOffset == m_bufferSize) {
if (m_tokenLength == 0) //EOF
cin
rewritten the text scanner
r176 return false;
cin
refactoring
r177 throw new ParserException();
}
throw new ParserException(String.Format("Unexpected symbol '{0}'", m_buffer[m_bufferOffset]));
cin
rewritten the text scanner
r176
}
cin
Working on text scanner
r173
cin
rewritten the text scanner
r176 protected void Feed(char[] buffer, int offset, int length) {
m_buffer = buffer;
m_bufferOffset = offset;
m_bufferSize = offset + length;
cin
Working on text scanner
r173 }
cin
rewritten the text scanner
r176 protected bool Feed() {
if (m_chunkSize <= 0)
return false;
if (m_buffer != null) {
var free = m_buffer.Length - m_bufferSize;
if (free < m_chunkSize) {
free += m_chunkSize;
var used = m_bufferSize - m_bufferOffset;
var size = used + free;
if (size > m_bufferMax)
cin
refactoring complete, JSONParser rewritten
r180 throw new ParserException(String.Format("The buffer limit ({0} Kb) is reached", m_bufferMax / 1024));
cin
rewritten the text scanner
r176
var temp = new char[size];
cin
sync
r175
cin
rewritten the text scanner
r176 var read = Read(temp, used, m_chunkSize);
if (read == 0)
return false;
Array.Copy(m_buffer, m_bufferOffset, temp, 0, used);
m_bufferOffset = 0;
m_bufferSize = used + read;
m_buffer = temp;
cin
refactoring complete, JSONParser rewritten
r180 } else {
var read = Read(m_buffer, m_bufferSize, m_chunkSize);
if (read == 0)
return false;
m_bufferSize += m_chunkSize;
cin
rewritten the text scanner
r176 }
cin
refactoring complete, JSONParser rewritten
r180 return true;
cin
rewritten the text scanner
r176 } else {
Debug.Assert(m_bufferOffset == 0);
m_buffer = new char[m_chunkSize];
m_bufferSize = Read(m_buffer, 0, m_chunkSize);
return (m_bufferSize != 0);
}
cin
sync
r175 }
protected abstract int Read(char[] buffer, int offset, int size);
cin
Working on text scanner
r173
cin
rewritten the text scanner
r176 public string GetTokenValue() {
return new String(m_buffer, m_tokenOffset, m_tokenLength);
cin
Working on text scanner
r173 }
cin
rewritten the text scanner
r176 public void CopyTokenTo(char[] buffer, int offset) {
cin
pretty print DFA, the minimization is still buggy
r182 Array.Copy(m_buffer, m_tokenOffset,buffer, offset, m_tokenLength);
cin
rewritten the text scanner
r176 }
public void CopyTokenTo(StringBuilder sb) {
sb.Append(m_buffer, m_tokenOffset, m_tokenLength);
}
cin
sync
r175
cin
Working on text scanner
r173 }
}