FastInpurScanner.cs
128 lines
| 4.1 KiB
| text/x-csharp
|
CSharpLexer
cin
|
r289 | using Implab.Automaton; | ||
using System; | ||||
using System.Collections.Generic; | ||||
using System.Diagnostics; | ||||
using System.Linq; | ||||
using System.Runtime.CompilerServices; | ||||
using System.Text; | ||||
using System.Threading.Tasks; | ||||
namespace Implab.Formats { | ||||
/// <summary> | ||||
/// Fast input scanner for max 255 states and 255 input chacters. | ||||
/// </summary> | ||||
/// <typeparam name="TTag"></typeparam> | ||||
/// <remarks> | ||||
/// <para> | ||||
/// Creates a one rank array to store automa transition table, each entry in this table is byte, to make this table small enough to fit L1 cache. | ||||
/// </para> | ||||
/// <para> | ||||
/// Each entry is addressed as <c>(state << 8) | input</c> which make this quite fast to get the next state. Each input symbol below 255 is treated as 255. | ||||
/// </para> | ||||
/// </remarks> | ||||
public class FastInputScanner<TTag> { | ||||
const int StateShift = 8; | ||||
const int StateMask = ~((1 << StateShift) - 1); | ||||
const int AlphabetSize = 1 << StateShift; | ||||
const int UnclassifiedInput = (1 << StateShift) - 1; | ||||
const byte UnreachableState = byte.MaxValue; | ||||
readonly TTag[] m_tags; | ||||
readonly bool[] m_final; | ||||
readonly byte m_initialState; | ||||
readonly byte[] m_dfa; | ||||
int m_position; | ||||
byte m_state; | ||||
protected FastInputScanner(byte[] table, bool[] final, TTag[] tags, byte initial) { | ||||
m_dfa = table; | ||||
m_final = final; | ||||
m_tags = tags; | ||||
m_initialState = initial; | ||||
} | ||||
public FastInputScanner(int[,] dfaTable, bool[] finalStates, TTag[] tags, int initialState, int[] inputMap) { | ||||
var states = dfaTable.GetLength(0); | ||||
Debug.Assert(states < byte.MaxValue); | ||||
m_dfa = new byte[states << StateShift]; | ||||
m_initialState = (byte)initialState; | ||||
m_tags = tags; | ||||
m_final = finalStates; | ||||
// iterate over states | ||||
for(int si = 0; si < states; si++) { | ||||
// state offset for the new table | ||||
var offset = si << StateShift; | ||||
// iterate over alphabet | ||||
for(int a = 0; a < AlphabetSize; a++) { | ||||
var next = dfaTable[si, a < inputMap.Length ? inputMap[a] : AutomatonConst.UnclassifiedInput]; | ||||
if (next == AutomatonConst.UnreachableState) | ||||
next = UnreachableState; | ||||
m_dfa[offset | a] = (byte)next; | ||||
} | ||||
} | ||||
} | ||||
public TTag Tag { | ||||
[MethodImpl(MethodImplOptions.AggressiveInlining)] | ||||
get { | ||||
return m_tags[m_state]; | ||||
} | ||||
} | ||||
public int Position { | ||||
[MethodImpl(MethodImplOptions.AggressiveInlining)] | ||||
get { | ||||
return m_position; | ||||
} | ||||
} | ||||
public bool IsFinal { | ||||
[MethodImpl(MethodImplOptions.AggressiveInlining)] | ||||
get { | ||||
return m_final[m_state]; | ||||
} | ||||
} | ||||
[MethodImpl(MethodImplOptions.AggressiveInlining)] | ||||
public void ResetState() { | ||||
m_state = m_initialState; | ||||
} | ||||
public FastInputScanner<TTag> Clone() { | ||||
var clone = new FastInputScanner<TTag>(m_dfa, m_final, m_tags, m_initialState); | ||||
clone.m_state = m_state; | ||||
clone.m_position = m_position; | ||||
return clone; | ||||
} | ||||
public bool Scan(char[] data, int offset, int max) { | ||||
var next = m_state; | ||||
m_position = offset; | ||||
while (m_position < max) { | ||||
var ch = data[m_position]; | ||||
next = m_dfa[(ch >= AlphabetSize ? (next << StateShift) | UnclassifiedInput : (next << StateShift) | ch)]; | ||||
if (next == UnreachableState) | ||||
// scanner stops at the next position after the last recognized symbol | ||||
return false; | ||||
m_state = next; | ||||
m_position++; | ||||
} | ||||
return true; | ||||
} | ||||
} | ||||
} | ||||