# HG changeset patch # User cin # Date 2016-03-14 23:11:06 # Node ID ecfece82ca11aeb16feff35a3c84bf8139477b8d # Parent 92d5278d1b107bd6648fbe66f7d4e202196bb92e Working on text scanner diff --git a/Implab/Formats/BufferScanner.cs b/Implab/Formats/BufferScanner.cs new file mode 100644 --- /dev/null +++ b/Implab/Formats/BufferScanner.cs @@ -0,0 +1,143 @@ +using System; +using Implab.Automaton.RegularExpressions; +using Implab.Automaton; + +namespace Implab.Formats { + public struct BufferScanner { + char[] m_buffer; + int m_offset; + int m_position; + int m_hi; + + readonly int m_chunk; + readonly int m_limit; + + readonly DFAStateDescriptor[] m_dfa; + int m_state; + + public BufferScanner(DFAStateDescriptor[] dfa, int initialState, int chunk, int limit) { + m_dfa = dfa; + m_state = initialState; + m_chunk = chunk; + m_limit = limit; + m_buffer = null; + m_offset = 0; + m_position = 0; + m_hi = 0; + } + + public char[] Buffer { + get { + return m_buffer; + } + } + + public int HiMark { + get { + return m_hi; + } + } + + public int Position { + get { + return m_position; + } + } + + public int Length { + get { + return m_hi - m_position; + } + } + + public int TokenOffset { + get { + return m_offset; + } + } + + public int TokenLength { + get { + return m_position - m_offset; + } + } + + public void Init(char[] buffer, int position, int length) { + m_buffer = buffer; + m_position = position; + m_offset = position; + m_hi = position + length; + } + + public int Extend() { + // free space + var free = m_buffer.Length - m_hi; + + // if the buffer have enough free space + if (free > 0) + return free; + + // effective size of the buffer + var size = m_buffer.Length - m_offset; + + // calculate the new size + int grow = Math.Min(m_limit - size, m_chunk); + if (grow <= 0) + throw new ParserException(String.Format("Input buffer {0} bytes limit exceeded", m_limit)); + + var temp = new char[size + grow]; + Array.Copy(m_buffer, m_offset, temp, 0, m_hi - m_offset); + m_position -= m_offset; + m_hi -= m_offset; + m_offset = 0; + m_buffer = temp; + + return free + grow; + } + + public void RaiseMark(int size) { + m_hi += size; + } + + /// + /// Scan this instance. + /// + /// true - additional data required + public bool Scan() { + while (m_position < m_hi) { + var ch = m_buffer[m_position]; + var next = m_dfa[m_state].transitions[(int)ch]; + if (next == DFAConst.UNREACHABLE_STATE) { + if (m_dfa[m_state].final) + return false; + + throw new ParserException( + String.Format( + "Unexpected token '{0}'", + new string(m_buffer, m_offset, m_position - m_offset) + ) + ); + } + m_state = next; + m_position++; + } + + return true; + } + + public void Eof() { + if (!m_dfa[m_state].final) + throw new ParserException( + String.Format( + "Unexpected token '{0}'", + new string(m_buffer, m_offset, m_position - m_offset) + ) + ); + } + + public TTag[] GetTokenTags() { + return m_dfa[m_state].tags; + } + } +} + diff --git a/Implab/Formats/TextScanner.cs b/Implab/Formats/TextScanner.cs new file mode 100644 --- /dev/null +++ b/Implab/Formats/TextScanner.cs @@ -0,0 +1,72 @@ +using System; +using Implab.Components; + +namespace Implab.Formats { + public abstract class TextScanner : Disposable { + + char[] m_buffer; + int m_offset; + int m_length; + int m_tokenOffset; + int m_tokenLength; + TTag[] m_tags; + + BufferScanner m_scanner; + + protected bool ReadTokenInternal() { + if (EOF) + return false; + + // create a new scanner from template (scanners are structs) + var inst = m_scanner; + + // initialize the scanner + inst.Init(m_buffer, m_offset, m_length); + + // do work + while (inst.Scan()) + Feed(ref inst); + + // save result; + m_buffer = inst.Buffer; + m_length = inst.Length; + m_offset = inst.Position; + m_tokenOffset = inst.TokenOffset; + m_tokenLength = inst.TokenLength; + + m_tags = inst.GetTokenTags(); + } + + protected string GetToken() { + return new String(m_buffer, m_tokenOffset, m_tokenLength); + } + + protected TTag[] Tags { + get { + return m_tags; + } + } + + /// + /// Feed the specified scanner. + /// + /// Scanner. + /// + /// protected override void Feed(ref BufferScanner scanner) { + /// var size = scanner.Extend(); + /// var actual = m_reader.Read(scanner.Buffer, scanner.HiMark, size); + /// if (actual == 0) { + /// m_eof = true; + /// scanner.Eof(); + /// } else { + /// scanner.RaiseHiMark(actual); + /// } + /// } + /// + protected abstract void Feed(ref BufferScanner scanner); + + public abstract bool EOF { get; } + + } +} + diff --git a/Implab/Implab.csproj b/Implab/Implab.csproj --- a/Implab/Implab.csproj +++ b/Implab/Implab.csproj @@ -191,6 +191,8 @@ + +