##// END OF EJS Templates
Merge pull request !2 from ImplabNet v3...
Merge pull request !2 from ImplabNet v3 Changes from branch: V3

File last commit:

r289:95896f882995 v3.0.14 v3
r294:abef3ebaa230 merge default
Show More
FastInpurScanner.cs
128 lines | 4.1 KiB | text/x-csharp | CSharpLexer
using Implab.Automaton;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Runtime.CompilerServices;
using System.Text;
using System.Threading.Tasks;
namespace Implab.Formats {
/// <summary>
/// Fast input scanner for max 255 states and 255 input chacters.
/// </summary>
/// <typeparam name="TTag"></typeparam>
/// <remarks>
/// <para>
/// Creates a one rank array to store automa transition table, each entry in this table is byte, to make this table small enough to fit L1 cache.
/// </para>
/// <para>
/// Each entry is addressed as <c>(state << 8) | input</c> which make this quite fast to get the next state. Each input symbol below 255 is treated as 255.
/// </para>
/// </remarks>
public class FastInputScanner<TTag> {
const int StateShift = 8;
const int StateMask = ~((1 << StateShift) - 1);
const int AlphabetSize = 1 << StateShift;
const int UnclassifiedInput = (1 << StateShift) - 1;
const byte UnreachableState = byte.MaxValue;
readonly TTag[] m_tags;
readonly bool[] m_final;
readonly byte m_initialState;
readonly byte[] m_dfa;
int m_position;
byte m_state;
protected FastInputScanner(byte[] table, bool[] final, TTag[] tags, byte initial) {
m_dfa = table;
m_final = final;
m_tags = tags;
m_initialState = initial;
}
public FastInputScanner(int[,] dfaTable, bool[] finalStates, TTag[] tags, int initialState, int[] inputMap) {
var states = dfaTable.GetLength(0);
Debug.Assert(states < byte.MaxValue);
m_dfa = new byte[states << StateShift];
m_initialState = (byte)initialState;
m_tags = tags;
m_final = finalStates;
// iterate over states
for(int si = 0; si < states; si++) {
// state offset for the new table
var offset = si << StateShift;
// iterate over alphabet
for(int a = 0; a < AlphabetSize; a++) {
var next = dfaTable[si, a < inputMap.Length ? inputMap[a] : AutomatonConst.UnclassifiedInput];
if (next == AutomatonConst.UnreachableState)
next = UnreachableState;
m_dfa[offset | a] = (byte)next;
}
}
}
public TTag Tag {
[MethodImpl(MethodImplOptions.AggressiveInlining)]
get {
return m_tags[m_state];
}
}
public int Position {
[MethodImpl(MethodImplOptions.AggressiveInlining)]
get {
return m_position;
}
}
public bool IsFinal {
[MethodImpl(MethodImplOptions.AggressiveInlining)]
get {
return m_final[m_state];
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public void ResetState() {
m_state = m_initialState;
}
public FastInputScanner<TTag> Clone() {
var clone = new FastInputScanner<TTag>(m_dfa, m_final, m_tags, m_initialState);
clone.m_state = m_state;
clone.m_position = m_position;
return clone;
}
public bool Scan(char[] data, int offset, int max) {
var next = m_state;
m_position = offset;
while (m_position < max) {
var ch = data[m_position];
next = m_dfa[(ch >= AlphabetSize ? (next << StateShift) | UnclassifiedInput : (next << StateShift) | ch)];
if (next == UnreachableState)
// scanner stops at the next position after the last recognized symbol
return false;
m_state = next;
m_position++;
}
return true;
}
}
}