|
|
using Implab;
|
|
|
using System;
|
|
|
using System.Collections.Generic;
|
|
|
using System.Diagnostics;
|
|
|
using System.Linq;
|
|
|
|
|
|
namespace Implab.Parsing {
|
|
|
public class DFADefinition<TInput, TState, TTag> : IDFADefinition<TInput, TState, TTag> {
|
|
|
readonly List<DFAStateDescriptior<TTag>> m_states;
|
|
|
|
|
|
public const int INITIAL_STATE = 1;
|
|
|
public const int UNREACHEBLE_STATE = 0;
|
|
|
|
|
|
DFAStateDescriptior<TTag>[] m_statesArray;
|
|
|
readonly int m_alpabetSize;
|
|
|
|
|
|
public DFADefinition(int alphabetSize) {
|
|
|
m_states = new List<DFAStateDescriptior<TTag>>();
|
|
|
m_alpabetSize = alphabetSize;
|
|
|
|
|
|
m_states.Add(new DFAStateDescriptior<TTag>());
|
|
|
}
|
|
|
|
|
|
public bool InitialStateIsFinal {
|
|
|
get {
|
|
|
return m_states[INITIAL_STATE].final;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
public int AddState() {
|
|
|
var index = m_states.Count;
|
|
|
m_states.Add(new DFAStateDescriptior<TTag> {
|
|
|
final = false,
|
|
|
transitions = new int[AlphabetSize]
|
|
|
});
|
|
|
m_statesArray = null;
|
|
|
|
|
|
return index;
|
|
|
}
|
|
|
|
|
|
public int AddState(TTag[] tag) {
|
|
|
var index = m_states.Count;
|
|
|
bool final = tag != null && tag.Length != 0;
|
|
|
m_states.Add(new DFAStateDescriptior<TTag> {
|
|
|
final = final,
|
|
|
transitions = new int[AlphabetSize],
|
|
|
tag = final ? tag : null
|
|
|
});
|
|
|
m_statesArray = null;
|
|
|
return index;
|
|
|
}
|
|
|
|
|
|
public void DefineTransition(TState s1, TState s2, TInput symbol) {
|
|
|
int is1 = StateAlphabet.Translate(s1);
|
|
|
int is2 = StateAlphabet.Translate(s2);
|
|
|
int isym = InputAlphabet.Translate(symbol);
|
|
|
|
|
|
Safe.ArgumentAssert(is1 != 0, "s1");
|
|
|
Safe.ArgumentAssert(is2 != 0, "s2");
|
|
|
Safe.ArgumentAssert(isym != 0, "symbol");
|
|
|
|
|
|
m_states[is1].transitions[isym] = is2;
|
|
|
}
|
|
|
|
|
|
#region IDFADefinition implementation
|
|
|
|
|
|
public DFAStateDescriptior<TTag>[] GetTransitionTable() {
|
|
|
if (m_statesArray == null)
|
|
|
m_statesArray = m_states.ToArray();
|
|
|
return m_statesArray;
|
|
|
}
|
|
|
|
|
|
public IAlphabet<TInput> InputAlphabet {
|
|
|
get {
|
|
|
throw new NotImplementedException();
|
|
|
}
|
|
|
}
|
|
|
|
|
|
public IAlphabet<TState> StateAlphabet {
|
|
|
get {
|
|
|
throw new NotImplementedException();
|
|
|
}
|
|
|
}
|
|
|
|
|
|
#endregion
|
|
|
|
|
|
protected IDFADefinition<> Optimize<TA>(Func<IAlphabet<TA>, IDFADefinition> dfaFactory,IAlphabet<TA> sourceAlphabet, IAlphabet<TA> minimalAlphabet) {
|
|
|
Safe.ArgumentNotNull(dfaFactory, "dfaFactory");
|
|
|
Safe.ArgumentNotNull(minimalAlphabet, "minimalAlphabet");
|
|
|
|
|
|
var setComparer = new CustomEqualityComparer<HashSet<int>>(
|
|
|
(x, y) => x.SetEquals(y),
|
|
|
(s) => s.Sum(x => x.GetHashCode())
|
|
|
);
|
|
|
|
|
|
var arrayComparer = new CustomEqualityComparer<int[]>(
|
|
|
(x,y) => (new HashSet<int>(x)).SetEquals(new HashSet<int>(y)),
|
|
|
(a) => a.Sum(x => x.GetHashCode())
|
|
|
);
|
|
|
|
|
|
var optimalStates = new HashSet<HashSet<int>>(setComparer);
|
|
|
var queue = new HashSet<HashSet<int>>(setComparer);
|
|
|
|
|
|
foreach (var g in Enumerable
|
|
|
.Range(INITIAL_STATE, m_states.Count-1)
|
|
|
.Select(i => new {
|
|
|
index = i,
|
|
|
descriptor = m_states[i]
|
|
|
})
|
|
|
.Where(x => x.descriptor.final)
|
|
|
.GroupBy(x => x.descriptor.tag, arrayComparer)
|
|
|
) {
|
|
|
optimalStates.Add(new HashSet<int>(g.Select(x => x.index)));
|
|
|
}
|
|
|
|
|
|
var state = new HashSet<int>(
|
|
|
Enumerable
|
|
|
.Range(INITIAL_STATE, m_states.Count - 1)
|
|
|
.Where(i => !m_states[i].final)
|
|
|
);
|
|
|
optimalStates.Add(state);
|
|
|
queue.Add(state);
|
|
|
|
|
|
while (queue.Count > 0) {
|
|
|
var stateA = queue.First();
|
|
|
queue.Remove(stateA);
|
|
|
|
|
|
for (int c = 0; c < AlphabetSize; c++) {
|
|
|
var stateX = new HashSet<int>();
|
|
|
|
|
|
for(int s = 1; s < m_states.Count; s++) {
|
|
|
if (stateA.Contains(m_states[s].transitions[c]))
|
|
|
stateX.Add(s);
|
|
|
}
|
|
|
|
|
|
foreach (var stateY in optimalStates.ToArray()) {
|
|
|
if (stateX.Overlaps(stateY) && !stateY.IsSubsetOf(stateX)) {
|
|
|
var stateR1 = new HashSet<int>(stateY);
|
|
|
var stateR2 = new HashSet<int>(stateY);
|
|
|
|
|
|
stateR1.IntersectWith(stateX);
|
|
|
stateR2.ExceptWith(stateX);
|
|
|
|
|
|
optimalStates.Remove(stateY);
|
|
|
optimalStates.Add(stateR1);
|
|
|
optimalStates.Add(stateR2);
|
|
|
|
|
|
if (queue.Contains(stateY)) {
|
|
|
queue.Remove(stateY);
|
|
|
queue.Add(stateR1);
|
|
|
queue.Add(stateR2);
|
|
|
} else {
|
|
|
queue.Add(stateR1.Count <= stateR2.Count ? stateR1 : stateR2);
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
// строим карты соотвествия оптимальных состояний с оригинальными
|
|
|
|
|
|
var initialState = optimalStates.Single(x => x.Contains(INITIAL_STATE));
|
|
|
|
|
|
// карта получения оптимального состояния по соотвествующему ему простому состоянию
|
|
|
int[] reveseOptimalMap = new int[m_states.Count];
|
|
|
// карта с индексами оптимальных состояний
|
|
|
HashSet<int>[] optimalMap = new HashSet<int>[optimalStates.Count + 1];
|
|
|
{
|
|
|
optimalMap[0] = new HashSet<int>(); // unreachable state
|
|
|
optimalMap[1] = initialState; // initial state
|
|
|
foreach (var ss in initialState)
|
|
|
reveseOptimalMap[ss] = 1;
|
|
|
|
|
|
int i = 2;
|
|
|
foreach (var s in optimalStates) {
|
|
|
if (s.SetEquals(initialState))
|
|
|
continue;
|
|
|
optimalMap[i] = s;
|
|
|
foreach (var ss in s)
|
|
|
reveseOptimalMap[ss] = i;
|
|
|
i++;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
// получаем минимальный алфавит
|
|
|
|
|
|
var minClasses = new HashSet<HashSet<int>>(setComparer);
|
|
|
var alphaQueue = new Queue<HashSet<int>>();
|
|
|
alphaQueue.Enqueue(new HashSet<int>(Enumerable.Range(0,AlphabetSize)));
|
|
|
|
|
|
for (int s = 1 ; s < optimalMap.Length; s++) {
|
|
|
var newQueue = new Queue<HashSet<int>>();
|
|
|
|
|
|
foreach (var A in alphaQueue) {
|
|
|
if (A.Count == 1) {
|
|
|
minClasses.Add(A);
|
|
|
continue;
|
|
|
}
|
|
|
|
|
|
// различаем классы символов, которые переводят в различные оптимальные состояния
|
|
|
// optimalState -> alphaClass
|
|
|
var classes = new Dictionary<int, HashSet<int>>();
|
|
|
|
|
|
foreach (var term in A) {
|
|
|
// ищем все переходы класса по символу term
|
|
|
var s2 = reveseOptimalMap[
|
|
|
optimalMap[s].Select(x => m_states[x].transitions[term]).FirstOrDefault(x => x != 0) // первое допустимое элементарное состояние, если есть
|
|
|
];
|
|
|
|
|
|
HashSet<int> A2;
|
|
|
if (!classes.TryGetValue(s2, out A2)) {
|
|
|
A2 = new HashSet<int>();
|
|
|
newQueue.Enqueue(A2);
|
|
|
classes[s2] = A2;
|
|
|
}
|
|
|
A2.Add(term);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
if (newQueue.Count == 0)
|
|
|
break;
|
|
|
alphaQueue = newQueue;
|
|
|
}
|
|
|
|
|
|
foreach (var A in alphaQueue)
|
|
|
minClasses.Add(A);
|
|
|
|
|
|
var alphabetMap = sourceAlphabet.Reclassify(minimalAlphabet, minClasses);
|
|
|
|
|
|
// построение автомата
|
|
|
|
|
|
var minimalDFA = dfaFactory(minimalAlphabet);
|
|
|
|
|
|
var states = new int[ optimalMap.Length ];
|
|
|
states[0] = UNREACHEBLE_STATE;
|
|
|
|
|
|
for(var s = INITIAL_STATE; s < states.Length; s++) {
|
|
|
var tags = optimalMap[s].SelectMany(x => m_states[x].tag ?? Enumerable.Empty<int>()).Distinct().ToArray();
|
|
|
if (tags.Length > 0)
|
|
|
states[s] = minimalDFA.AddState(tags);
|
|
|
else
|
|
|
states[s] = minimalDFA.AddState();
|
|
|
}
|
|
|
|
|
|
Debug.Assert(states[INITIAL_STATE] == INITIAL_STATE);
|
|
|
|
|
|
for (int s1 = 1; s1 < m_states.Count; s1++) {
|
|
|
for (int c = 0; c < AlphabetSize; c++) {
|
|
|
var s2 = m_states[s1].transitions[c];
|
|
|
if (s2 != UNREACHEBLE_STATE) {
|
|
|
minimalDFA.DefineTransition(
|
|
|
reveseOptimalMap[s1],
|
|
|
reveseOptimalMap[s2],
|
|
|
alphabetMap[c]
|
|
|
);
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
return minimalDFA;
|
|
|
}
|
|
|
|
|
|
public void PrintDFA<TA>(IAlphabet<TA> alphabet) {
|
|
|
|
|
|
var reverseMap = alphabet.CreateReverseMap();
|
|
|
|
|
|
for (int i = 1; i < reverseMap.Length; i++) {
|
|
|
Console.WriteLine("C{0}: {1}", i, String.Join(",", reverseMap[i]));
|
|
|
}
|
|
|
|
|
|
for (int i = 1; i < m_states.Count; i++) {
|
|
|
var s = m_states[i];
|
|
|
for (int c = 0; c < AlphabetSize; c++)
|
|
|
if (s.transitions[c] != UNREACHEBLE_STATE)
|
|
|
Console.WriteLine("S{0} -{1}-> S{2}{3}", i, String.Join(",", reverseMap[c]), s.transitions[c], m_states[s.transitions[c]].final ? "$" : "");
|
|
|
}
|
|
|
}
|
|
|
|
|
|
public int AlphabetSize {
|
|
|
get {
|
|
|
return m_alpabetSize;
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|