IndexedAlphabetBase.cs
94 lines
| 2.9 KiB
| text/x-csharp
|
CSharpLexer
cin
|
r162 | using Implab; | ||
using System; | ||||
using System.Collections.Generic; | ||||
using System.Diagnostics; | ||||
using System.Linq; | ||||
namespace Implab.Automaton { | ||||
/// <summary> | ||||
/// Indexed alphabet is the finite set of symbols where each symbol has a zero-based unique index. | ||||
/// </summary> | ||||
cin
|
r167 | /// <remarks> | ||
/// Indexed alphabets are usefull in bulting efficient translations from source alphabet | ||||
/// to the input alphabet of the automaton. It's assumed that the index to the symbol match | ||||
/// is well known and documented. | ||||
/// </remarks> | ||||
cin
|
r162 | public abstract class IndexedAlphabetBase<T> : IAlphabetBuilder<T> { | ||
int m_nextId = 1; | ||||
readonly int[] m_map; | ||||
protected IndexedAlphabetBase(int mapSize) { | ||||
m_map = new int[mapSize]; | ||||
} | ||||
protected IndexedAlphabetBase(int[] map) { | ||||
cin
|
r171 | Debug.Assert(map != null && map.Length > 0); | ||
Debug.Assert(map.All(x => x >= 0)); | ||||
cin
|
r162 | |||
m_map = map; | ||||
m_nextId = map.Max() + 1; | ||||
} | ||||
public int DefineSymbol(T symbol) { | ||||
var index = GetSymbolIndex(symbol); | ||||
cin
|
r164 | if (m_map[index] == DFAConst.UNCLASSIFIED_INPUT) | ||
cin
|
r162 | m_map[index] = m_nextId++; | ||
return m_map[index]; | ||||
} | ||||
cin
|
r171 | public int DefineSymbol(T symbol, int cls) { | ||
var index = GetSymbolIndex(symbol); | ||||
m_map[index] = cls; | ||||
m_nextId = Math.Max(cls + 1, m_nextId); | ||||
return cls; | ||||
} | ||||
cin
|
r162 | public int DefineClass(IEnumerable<T> symbols) { | ||
cin
|
r171 | return DefineClass(symbols, m_nextId); | ||
} | ||||
public int DefineClass(IEnumerable<T> symbols, int cls) { | ||||
cin
|
r162 | Safe.ArgumentNotNull(symbols, "symbols"); | ||
symbols = symbols.Distinct(); | ||||
cin
|
r171 | foreach (var symbol in symbols) | ||
m_map[GetSymbolIndex(symbol)] = cls; | ||||
m_nextId = Math.Max(cls + 1, m_nextId); | ||||
cin
|
r162 | |||
cin
|
r171 | return cls; | ||
cin
|
r162 | } | ||
public virtual int Translate(T symbol) { | ||||
return m_map[GetSymbolIndex(symbol)]; | ||||
} | ||||
cin
|
r171 | public int Count { | ||
get { return m_nextId; } | ||||
} | ||||
public bool Contains(T symbol) { | ||||
return true; | ||||
} | ||||
cin
|
r172 | public IEnumerable<T> GetSymbols(int cls) { | ||
for (var i = 0; i < m_map.Length; i++) | ||||
if (m_map[i] == cls) | ||||
yield return GetSymbolByIndex(i); | ||||
} | ||||
cin
|
r162 | public abstract int GetSymbolIndex(T symbol); | ||
cin
|
r172 | public abstract T GetSymbolByIndex(int index); | ||
cin
|
r162 | public abstract IEnumerable<T> InputSymbols { get; } | ||
/// <summary> | ||||
/// Gets the translation map from the index of the symbol to it's class this is usefull for the optimized input symbols transtaion. | ||||
/// </summary> | ||||
/// <returns>The translation map.</returns> | ||||
public int[] GetTranslationMap() { | ||||
return m_map; | ||||
} | ||||
} | ||||
} | ||||