IndexedAlphabetBase.cs
108 lines
| 3.9 KiB
| text/x-csharp
|
CSharpLexer
cin
|
r162 | using Implab; | ||
using System; | ||||
using System.Collections.Generic; | ||||
using System.Diagnostics; | ||||
using System.Linq; | ||||
namespace Implab.Automaton { | ||||
/// <summary> | ||||
/// Indexed alphabet is the finite set of symbols where each symbol has a zero-based unique index. | ||||
/// </summary> | ||||
cin
|
r167 | /// <remarks> | ||
/// Indexed alphabets are usefull in bulting efficient translations from source alphabet | ||||
/// to the input alphabet of the automaton. It's assumed that the index to the symbol match | ||||
/// is well known and documented. | ||||
/// </remarks> | ||||
cin
|
r162 | public abstract class IndexedAlphabetBase<T> : IAlphabetBuilder<T> { | ||
int m_nextId = 1; | ||||
readonly int[] m_map; | ||||
public int Count { | ||||
get { return m_nextId; } | ||||
} | ||||
protected IndexedAlphabetBase(int mapSize) { | ||||
m_map = new int[mapSize]; | ||||
} | ||||
protected IndexedAlphabetBase(int[] map) { | ||||
Debug.Assert(map != null); | ||||
m_map = map; | ||||
m_nextId = map.Max() + 1; | ||||
} | ||||
public int DefineSymbol(T symbol) { | ||||
var index = GetSymbolIndex(symbol); | ||||
cin
|
r164 | if (m_map[index] == DFAConst.UNCLASSIFIED_INPUT) | ||
cin
|
r162 | m_map[index] = m_nextId++; | ||
return m_map[index]; | ||||
} | ||||
public int DefineClass(IEnumerable<T> symbols) { | ||||
Safe.ArgumentNotNull(symbols, "symbols"); | ||||
symbols = symbols.Distinct(); | ||||
foreach (var symbol in symbols) { | ||||
var index = GetSymbolIndex(symbol); | ||||
cin
|
r164 | if (m_map[index] == DFAConst.UNCLASSIFIED_INPUT) | ||
cin
|
r162 | m_map[GetSymbolIndex(symbol)] = m_nextId; | ||
else | ||||
throw new InvalidOperationException(String.Format("Symbol '{0}' already in use", symbol)); | ||||
} | ||||
return m_nextId++; | ||||
} | ||||
public List<T>[] CreateReverseMap() { | ||||
return | ||||
cin
|
r164 | Enumerable.Range(0, Count) | ||
cin
|
r162 | .Select( | ||
i => InputSymbols | ||||
cin
|
r164 | .Where(x => i != DFAConst.UNCLASSIFIED_INPUT && m_map[GetSymbolIndex(x)] == i) | ||
cin
|
r162 | .ToList() | ||
) | ||||
.ToArray(); | ||||
} | ||||
public int[] Reclassify(IAlphabetBuilder<T> newAlphabet, IEnumerable<IEnumerable<int>> classes) { | ||||
Safe.ArgumentNotNull(newAlphabet, "newAlphabet"); | ||||
Safe.ArgumentNotNull(classes, "classes"); | ||||
var reverseMap = CreateReverseMap(); | ||||
var translationMap = new int[Count]; | ||||
foreach (var scl in classes) { | ||||
// skip if the supper class contains the unclassified element | ||||
cin
|
r164 | if (scl.Contains(DFAConst.UNCLASSIFIED_INPUT)) | ||
cin
|
r162 | continue; | ||
var range = new List<T>(); | ||||
foreach (var cl in scl) { | ||||
if (cl < 0 || cl >= reverseMap.Length) | ||||
throw new ArgumentOutOfRangeException(String.Format("Class {0} is not valid for the current alphabet", cl)); | ||||
range.AddRange(reverseMap[cl]); | ||||
} | ||||
var newClass = newAlphabet.DefineClass(range); | ||||
foreach (var cl in scl) | ||||
translationMap[cl] = newClass; | ||||
} | ||||
return translationMap; | ||||
} | ||||
public virtual int Translate(T symbol) { | ||||
return m_map[GetSymbolIndex(symbol)]; | ||||
} | ||||
public abstract int GetSymbolIndex(T symbol); | ||||
public abstract IEnumerable<T> InputSymbols { get; } | ||||
/// <summary> | ||||
/// Gets the translation map from the index of the symbol to it's class this is usefull for the optimized input symbols transtaion. | ||||
/// </summary> | ||||
/// <returns>The translation map.</returns> | ||||
public int[] GetTranslationMap() { | ||||
return m_map; | ||||
} | ||||
} | ||||
} | ||||