|
|
using Implab.Automaton;
|
|
|
using System.Diagnostics;
|
|
|
using System.Runtime.CompilerServices;
|
|
|
|
|
|
namespace Implab.Formats {
|
|
|
|
|
|
/// <summary>
|
|
|
/// Fast input scanner for max 255 states and 255 input chacters.
|
|
|
/// </summary>
|
|
|
/// <typeparam name="TTag"></typeparam>
|
|
|
/// <remarks>
|
|
|
/// <para>
|
|
|
/// Creates a one rank array to store automa transition table, each entry in this table is byte, to make this table small enough to fit L1 cache.
|
|
|
/// </para>
|
|
|
/// <para>
|
|
|
/// Each entry is addressed as <c>(state << 8) | input</c> which make this quite fast to get the next state. Each input symbol below 255 is treated as 255.
|
|
|
/// </para>
|
|
|
/// </remarks>
|
|
|
public class FastInputScanner<TTag> {
|
|
|
const int StateShift = 8;
|
|
|
const int StateMask = ~((1 << StateShift) - 1);
|
|
|
const int AlphabetSize = 1 << StateShift;
|
|
|
const int UnclassifiedInput = (1 << StateShift) - 1;
|
|
|
const byte UnreachableState = byte.MaxValue;
|
|
|
|
|
|
readonly TTag[] m_tags;
|
|
|
readonly bool[] m_final;
|
|
|
|
|
|
readonly byte m_initialState;
|
|
|
readonly byte[] m_dfa;
|
|
|
|
|
|
int m_position;
|
|
|
byte m_state;
|
|
|
|
|
|
protected FastInputScanner(byte[] table, bool[] final, TTag[] tags, byte initial) {
|
|
|
m_dfa = table;
|
|
|
m_final = final;
|
|
|
m_tags = tags;
|
|
|
m_initialState = initial;
|
|
|
}
|
|
|
|
|
|
public FastInputScanner(int[,] dfaTable, bool[] finalStates, TTag[] tags, int initialState, int[] inputMap) {
|
|
|
var states = dfaTable.GetLength(0);
|
|
|
Debug.Assert(states < byte.MaxValue);
|
|
|
|
|
|
m_dfa = new byte[states << StateShift];
|
|
|
m_initialState = (byte)initialState;
|
|
|
|
|
|
m_tags = tags;
|
|
|
m_final = finalStates;
|
|
|
|
|
|
// iterate over states
|
|
|
for(int si = 0; si < states; si++) {
|
|
|
// state offset for the new table
|
|
|
var offset = si << StateShift;
|
|
|
|
|
|
// iterate over alphabet
|
|
|
for(int a = 0; a < AlphabetSize; a++) {
|
|
|
var next = dfaTable[si, a < inputMap.Length ? inputMap[a] : AutomatonConst.UnclassifiedInput];
|
|
|
if (next == AutomatonConst.UnreachableState)
|
|
|
next = UnreachableState;
|
|
|
|
|
|
m_dfa[offset | a] = (byte)next;
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
public TTag Tag {
|
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
|
get {
|
|
|
return m_tags[m_state];
|
|
|
}
|
|
|
}
|
|
|
|
|
|
public int Position {
|
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
|
get {
|
|
|
return m_position;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
public bool IsFinal {
|
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
|
get {
|
|
|
return m_final[m_state];
|
|
|
}
|
|
|
}
|
|
|
|
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
|
public void ResetState() {
|
|
|
m_state = m_initialState;
|
|
|
}
|
|
|
|
|
|
public FastInputScanner<TTag> Clone() {
|
|
|
var clone = new FastInputScanner<TTag>(m_dfa, m_final, m_tags, m_initialState);
|
|
|
clone.m_state = m_state;
|
|
|
clone.m_position = m_position;
|
|
|
return clone;
|
|
|
}
|
|
|
|
|
|
public bool Scan(char[] data, int offset, int max) {
|
|
|
var next = m_state;
|
|
|
|
|
|
m_position = offset;
|
|
|
while (m_position < max) {
|
|
|
var ch = data[m_position];
|
|
|
|
|
|
next = m_dfa[(ch >= AlphabetSize ? (next << StateShift) | UnclassifiedInput : (next << StateShift) | ch)];
|
|
|
|
|
|
if (next == UnreachableState)
|
|
|
// scanner stops at the next position after the last recognized symbol
|
|
|
return false;
|
|
|
|
|
|
m_state = next;
|
|
|
m_position++;
|
|
|
}
|
|
|
|
|
|
return true;
|
|
|
}
|
|
|
|
|
|
|
|
|
}
|
|
|
}
|
|
|
|