|
|
using Implab;
|
|
|
using System;
|
|
|
using System.Collections.Generic;
|
|
|
using System.Diagnostics;
|
|
|
using System.Linq;
|
|
|
|
|
|
namespace Implab.Automaton {
|
|
|
/// <summary>
|
|
|
/// Indexed alphabet is the finite set of symbols where each symbol has a zero-based unique index.
|
|
|
/// </summary>
|
|
|
/// <remarks>
|
|
|
/// Indexed alphabets are usefull in bulting efficient translations from source alphabet
|
|
|
/// to the input alphabet of the automaton. It's assumed that the index to the symbol match
|
|
|
/// is well known and documented.
|
|
|
/// </remarks>
|
|
|
public abstract class IndexedAlphabetBase<T> : IAlphabetBuilder<T> {
|
|
|
int m_nextId = 1;
|
|
|
readonly int[] m_map;
|
|
|
|
|
|
protected IndexedAlphabetBase(int mapSize) {
|
|
|
m_map = new int[mapSize];
|
|
|
}
|
|
|
|
|
|
protected IndexedAlphabetBase(int[] map) {
|
|
|
Debug.Assert(map != null && map.Length > 0);
|
|
|
Debug.Assert(map.All(x => x >= 0));
|
|
|
|
|
|
m_map = map;
|
|
|
m_nextId = map.Max() + 1;
|
|
|
}
|
|
|
|
|
|
public int DefineSymbol(T symbol) {
|
|
|
var index = GetSymbolIndex(symbol);
|
|
|
if (m_map[index] == DFAConst.UNCLASSIFIED_INPUT)
|
|
|
m_map[index] = m_nextId++;
|
|
|
return m_map[index];
|
|
|
}
|
|
|
|
|
|
public int DefineSymbol(T symbol, int cls) {
|
|
|
var index = GetSymbolIndex(symbol);
|
|
|
m_map[index] = cls;
|
|
|
m_nextId = Math.Max(cls + 1, m_nextId);
|
|
|
return cls;
|
|
|
}
|
|
|
|
|
|
public int DefineClass(IEnumerable<T> symbols) {
|
|
|
return DefineClass(symbols, m_nextId);
|
|
|
}
|
|
|
|
|
|
public int DefineClass(IEnumerable<T> symbols, int cls) {
|
|
|
Safe.ArgumentNotNull(symbols, "symbols");
|
|
|
symbols = symbols.Distinct();
|
|
|
|
|
|
foreach (var symbol in symbols)
|
|
|
m_map[GetSymbolIndex(symbol)] = cls;
|
|
|
|
|
|
m_nextId = Math.Max(cls + 1, m_nextId);
|
|
|
|
|
|
return cls;
|
|
|
}
|
|
|
|
|
|
public virtual int Translate(T symbol) {
|
|
|
return m_map[GetSymbolIndex(symbol)];
|
|
|
}
|
|
|
|
|
|
public int Count {
|
|
|
get { return m_nextId; }
|
|
|
}
|
|
|
|
|
|
public bool Contains(T symbol) {
|
|
|
return true;
|
|
|
}
|
|
|
|
|
|
public abstract int GetSymbolIndex(T symbol);
|
|
|
|
|
|
public abstract IEnumerable<T> InputSymbols { get; }
|
|
|
|
|
|
/// <summary>
|
|
|
/// Gets the translation map from the index of the symbol to it's class this is usefull for the optimized input symbols transtaion.
|
|
|
/// </summary>
|
|
|
/// <returns>The translation map.</returns>
|
|
|
public int[] GetTranslationMap() {
|
|
|
return m_map;
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|