|
|
using Implab;
|
|
|
using System;
|
|
|
using System.Collections.Generic;
|
|
|
using System.Diagnostics;
|
|
|
using System.Linq;
|
|
|
using System.Text;
|
|
|
using System.Threading.Tasks;
|
|
|
|
|
|
namespace Implab.Parsing {
|
|
|
public abstract class AlphabetBase<T> : IAlphabet<T> {
|
|
|
public const int UNCLASSIFIED = 0;
|
|
|
|
|
|
int m_nextId = 1;
|
|
|
int[] m_map;
|
|
|
|
|
|
public int Count {
|
|
|
get { return m_nextId; }
|
|
|
}
|
|
|
|
|
|
protected AlphabetBase(int mapSize) {
|
|
|
m_map = new int[mapSize];
|
|
|
}
|
|
|
|
|
|
protected AlphabetBase(int[] map) {
|
|
|
Debug.Assert(map != null);
|
|
|
|
|
|
m_map = map;
|
|
|
m_nextId = map.Max() + 1;
|
|
|
}
|
|
|
|
|
|
public int DefineSymbol(T symbol) {
|
|
|
var index = GetSymbolIndex(symbol);
|
|
|
if (m_map[index] == UNCLASSIFIED)
|
|
|
m_map[index] = m_nextId++;
|
|
|
return m_map[index];
|
|
|
}
|
|
|
|
|
|
public int DefineClass(IEnumerable<T> symbols) {
|
|
|
Safe.ArgumentNotNull(symbols, "symbols");
|
|
|
symbols = symbols.Distinct();
|
|
|
|
|
|
foreach (var symbol in symbols) {
|
|
|
var index = GetSymbolIndex(symbol);
|
|
|
if (m_map[index] == UNCLASSIFIED)
|
|
|
m_map[GetSymbolIndex(symbol)] = m_nextId;
|
|
|
else
|
|
|
throw new InvalidOperationException(String.Format("Symbol '{0}' already in use", symbol));
|
|
|
}
|
|
|
return m_nextId++;
|
|
|
}
|
|
|
|
|
|
public List<T>[] CreateReverseMap() {
|
|
|
return
|
|
|
Enumerable.Range(UNCLASSIFIED, Count)
|
|
|
.Select(
|
|
|
i => InputSymbols
|
|
|
.Where(x => i != UNCLASSIFIED && m_map[GetSymbolIndex(x)] == i)
|
|
|
.ToList()
|
|
|
)
|
|
|
.ToArray();
|
|
|
}
|
|
|
|
|
|
public int[] Reclassify(IAlphabet<T> newAlphabet, IEnumerable<ICollection<int>> classes) {
|
|
|
Safe.ArgumentNotNull(newAlphabet, "newAlphabet");
|
|
|
Safe.ArgumentNotNull(classes, "classes");
|
|
|
var reverseMap = CreateReverseMap();
|
|
|
|
|
|
int[] translationMap = new int[Count];
|
|
|
|
|
|
foreach (var scl in classes) {
|
|
|
// skip if the supper class contains the unclassified element
|
|
|
if (scl.Contains(UNCLASSIFIED))
|
|
|
continue;
|
|
|
var range = new List<T>();
|
|
|
foreach (var cl in scl) {
|
|
|
if (cl < 0 || cl >= reverseMap.Length)
|
|
|
throw new ArgumentOutOfRangeException(String.Format("Class {0} is not valid for the current alphabet", cl));
|
|
|
range.AddRange(reverseMap[cl]);
|
|
|
}
|
|
|
var newClass = newAlphabet.DefineClass(range);
|
|
|
foreach (var cl in scl)
|
|
|
translationMap[cl] = newClass;
|
|
|
}
|
|
|
|
|
|
return translationMap;
|
|
|
}
|
|
|
|
|
|
public int Translate(T symbol) {
|
|
|
return m_map[GetSymbolIndex(symbol)];
|
|
|
}
|
|
|
|
|
|
public abstract int GetSymbolIndex(T symbol);
|
|
|
|
|
|
public abstract IEnumerable<T> InputSymbols { get; }
|
|
|
|
|
|
public int[] GetTranslationMap() {
|
|
|
return m_map;
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|