IndexedAlphabetBase.cs
107 lines
| 3.6 KiB
| text/x-csharp
|
CSharpLexer
cin
|
r158 | using Implab; | ||
using System; | ||||
using System.Collections.Generic; | ||||
using System.Diagnostics; | ||||
using System.Linq; | ||||
using System.Text; | ||||
using System.Threading.Tasks; | ||||
namespace Implab.Parsing { | ||||
/// <summary> | ||||
/// Indexed alphabet is the finite set of symbols where each symbol has a zero-based unique index. | ||||
/// </summary> | ||||
public abstract class IndexedAlphabetBase<T> : IAlphabet<T> { | ||||
public const int UNCLASSIFIED = 0; | ||||
int m_nextId = 1; | ||||
readonly int[] m_map; | ||||
public int Count { | ||||
get { return m_nextId; } | ||||
} | ||||
protected IndexedAlphabetBase(int mapSize) { | ||||
m_map = new int[mapSize]; | ||||
} | ||||
protected IndexedAlphabetBase(int[] map) { | ||||
Debug.Assert(map != null); | ||||
m_map = map; | ||||
m_nextId = map.Max() + 1; | ||||
} | ||||
public int DefineSymbol(T symbol) { | ||||
var index = GetSymbolIndex(symbol); | ||||
if (m_map[index] == UNCLASSIFIED) | ||||
m_map[index] = m_nextId++; | ||||
return m_map[index]; | ||||
} | ||||
public int DefineClass(IEnumerable<T> symbols) { | ||||
Safe.ArgumentNotNull(symbols, "symbols"); | ||||
symbols = symbols.Distinct(); | ||||
foreach (var symbol in symbols) { | ||||
var index = GetSymbolIndex(symbol); | ||||
if (m_map[index] == UNCLASSIFIED) | ||||
m_map[GetSymbolIndex(symbol)] = m_nextId; | ||||
else | ||||
throw new InvalidOperationException(String.Format("Symbol '{0}' already in use", symbol)); | ||||
} | ||||
return m_nextId++; | ||||
} | ||||
public List<T>[] CreateReverseMap() { | ||||
return | ||||
Enumerable.Range(UNCLASSIFIED, Count) | ||||
.Select( | ||||
i => InputSymbols | ||||
.Where(x => i != UNCLASSIFIED && m_map[GetSymbolIndex(x)] == i) | ||||
.ToList() | ||||
) | ||||
.ToArray(); | ||||
} | ||||
public int[] Reclassify(IAlphabet<T> newAlphabet, IEnumerable<ICollection<int>> classes) { | ||||
Safe.ArgumentNotNull(newAlphabet, "newAlphabet"); | ||||
Safe.ArgumentNotNull(classes, "classes"); | ||||
var reverseMap = CreateReverseMap(); | ||||
int[] translationMap = new int[Count]; | ||||
foreach (var scl in classes) { | ||||
// skip if the supper class contains the unclassified element | ||||
if (scl.Contains(UNCLASSIFIED)) | ||||
continue; | ||||
var range = new List<T>(); | ||||
foreach (var cl in scl) { | ||||
if (cl < 0 || cl >= reverseMap.Length) | ||||
throw new ArgumentOutOfRangeException(String.Format("Class {0} is not valid for the current alphabet", cl)); | ||||
range.AddRange(reverseMap[cl]); | ||||
} | ||||
var newClass = newAlphabet.DefineClass(range); | ||||
foreach (var cl in scl) | ||||
translationMap[cl] = newClass; | ||||
} | ||||
return translationMap; | ||||
} | ||||
public virtual int Translate(T symbol) { | ||||
return m_map[GetSymbolIndex(symbol)]; | ||||
} | ||||
public abstract int GetSymbolIndex(T symbol); | ||||
public abstract IEnumerable<T> InputSymbols { get; } | ||||
/// <summary> | ||||
/// Gets the translation map from the index of the symbol to it's class this is usefull for the optimized input symbols transtaion. | ||||
/// </summary> | ||||
/// <returns>The translation map.</returns> | ||||
public int[] GetTranslationMap() { | ||||
return m_map; | ||||
} | ||||
} | ||||
} | ||||