DFADefinition.cs
262 lines
| 10.1 KiB
| text/x-csharp
|
CSharpLexer
cin
|
r158 | using Implab; | ||
using System; | ||||
using System.Collections.Generic; | ||||
using System.Diagnostics; | ||||
using System.Linq; | ||||
namespace Implab.Parsing { | ||||
public class DFADefinition : IDFADefinition { | ||||
readonly List<DFAStateDescriptior> m_states; | ||||
public const int INITIAL_STATE = 1; | ||||
public const int UNREACHEBLE_STATE = 0; | ||||
DFAStateDescriptior[] m_statesArray; | ||||
readonly int m_alpabetSize; | ||||
public DFADefinition(int alphabetSize) { | ||||
m_states = new List<DFAStateDescriptior>(); | ||||
m_alpabetSize = alphabetSize; | ||||
m_states.Add(new DFAStateDescriptior()); | ||||
} | ||||
public DFAStateDescriptior[] States { | ||||
get { | ||||
if (m_statesArray == null) | ||||
m_statesArray = m_states.ToArray(); | ||||
return m_statesArray; | ||||
} | ||||
} | ||||
public bool InitialStateIsFinal { | ||||
get { | ||||
return m_states[INITIAL_STATE].final; | ||||
} | ||||
} | ||||
public int AddState() { | ||||
var index = m_states.Count; | ||||
m_states.Add(new DFAStateDescriptior { | ||||
final = false, | ||||
transitions = new int[AlphabetSize] | ||||
}); | ||||
m_statesArray = null; | ||||
return index; | ||||
} | ||||
public int AddState(int[] tag) { | ||||
var index = m_states.Count; | ||||
bool final = tag != null && tag.Length != 0; | ||||
m_states.Add(new DFAStateDescriptior { | ||||
final = final, | ||||
transitions = new int[AlphabetSize], | ||||
tag = final ? tag : null | ||||
}); | ||||
m_statesArray = null; | ||||
return index; | ||||
} | ||||
public void DefineTransition(int s1,int s2, int symbol) { | ||||
Safe.ArgumentInRange(s1, 0, m_states.Count-1, "s1"); | ||||
Safe.ArgumentInRange(s2, 0, m_states.Count-1, "s2"); | ||||
Safe.ArgumentInRange(symbol, 0, AlphabetSize-1, "symbol"); | ||||
m_states[s1].transitions[symbol] = s2; | ||||
} | ||||
public void Optimize<TA>(IDFADefinition minimalDFA,IAlphabet<TA> sourceAlphabet, IAlphabet<TA> minimalAlphabet) { | ||||
Safe.ArgumentNotNull(minimalDFA, "minimalDFA"); | ||||
Safe.ArgumentNotNull(minimalAlphabet, "minimalAlphabet"); | ||||
var setComparer = new CustomEqualityComparer<HashSet<int>>( | ||||
(x, y) => x.SetEquals(y), | ||||
(s) => s.Sum(x => x.GetHashCode()) | ||||
); | ||||
var arrayComparer = new CustomEqualityComparer<int[]>( | ||||
(x,y) => (new HashSet<int>(x)).SetEquals(new HashSet<int>(y)), | ||||
(a) => a.Sum(x => x.GetHashCode()) | ||||
); | ||||
var optimalStates = new HashSet<HashSet<int>>(setComparer); | ||||
var queue = new HashSet<HashSet<int>>(setComparer); | ||||
foreach (var g in Enumerable | ||||
.Range(INITIAL_STATE, m_states.Count-1) | ||||
.Select(i => new { | ||||
index = i, | ||||
descriptor = m_states[i] | ||||
}) | ||||
.Where(x => x.descriptor.final) | ||||
.GroupBy(x => x.descriptor.tag, arrayComparer) | ||||
) { | ||||
optimalStates.Add(new HashSet<int>(g.Select(x => x.index))); | ||||
} | ||||
var state = new HashSet<int>( | ||||
Enumerable | ||||
.Range(INITIAL_STATE, m_states.Count - 1) | ||||
.Where(i => !m_states[i].final) | ||||
); | ||||
optimalStates.Add(state); | ||||
queue.Add(state); | ||||
while (queue.Count > 0) { | ||||
var stateA = queue.First(); | ||||
queue.Remove(stateA); | ||||
for (int c = 0; c < AlphabetSize; c++) { | ||||
var stateX = new HashSet<int>(); | ||||
for(int s = 1; s < m_states.Count; s++) { | ||||
if (stateA.Contains(m_states[s].transitions[c])) | ||||
stateX.Add(s); | ||||
} | ||||
foreach (var stateY in optimalStates.ToArray()) { | ||||
if (stateX.Overlaps(stateY) && !stateY.IsSubsetOf(stateX)) { | ||||
var stateR1 = new HashSet<int>(stateY); | ||||
var stateR2 = new HashSet<int>(stateY); | ||||
stateR1.IntersectWith(stateX); | ||||
stateR2.ExceptWith(stateX); | ||||
optimalStates.Remove(stateY); | ||||
optimalStates.Add(stateR1); | ||||
optimalStates.Add(stateR2); | ||||
if (queue.Contains(stateY)) { | ||||
queue.Remove(stateY); | ||||
queue.Add(stateR1); | ||||
queue.Add(stateR2); | ||||
} else { | ||||
queue.Add(stateR1.Count <= stateR2.Count ? stateR1 : stateR2); | ||||
} | ||||
} | ||||
} | ||||
} | ||||
} | ||||
// строим карты соотвествия оптимальных состояний с оригинальными | ||||
var initialState = optimalStates.Single(x => x.Contains(INITIAL_STATE)); | ||||
// карта получения оптимального состояния по соотвествующему ему простому состоянию | ||||
int[] reveseOptimalMap = new int[m_states.Count]; | ||||
// карта с индексами оптимальных состояний | ||||
HashSet<int>[] optimalMap = new HashSet<int>[optimalStates.Count + 1]; | ||||
{ | ||||
optimalMap[0] = new HashSet<int>(); // unreachable state | ||||
optimalMap[1] = initialState; // initial state | ||||
foreach (var ss in initialState) | ||||
reveseOptimalMap[ss] = 1; | ||||
int i = 2; | ||||
foreach (var s in optimalStates) { | ||||
if (s.SetEquals(initialState)) | ||||
continue; | ||||
optimalMap[i] = s; | ||||
foreach (var ss in s) | ||||
reveseOptimalMap[ss] = i; | ||||
i++; | ||||
} | ||||
} | ||||
// получаем минимальный алфавит | ||||
var minClasses = new HashSet<HashSet<int>>(setComparer); | ||||
var alphaQueue = new Queue<HashSet<int>>(); | ||||
alphaQueue.Enqueue(new HashSet<int>(Enumerable.Range(0,AlphabetSize))); | ||||
for (int s = 1 ; s < optimalMap.Length; s++) { | ||||
var newQueue = new Queue<HashSet<int>>(); | ||||
foreach (var A in alphaQueue) { | ||||
if (A.Count == 1) { | ||||
minClasses.Add(A); | ||||
continue; | ||||
} | ||||
// различаем классы символов, которые переводят в различные оптимальные состояния | ||||
// optimalState -> alphaClass | ||||
var classes = new Dictionary<int, HashSet<int>>(); | ||||
foreach (var term in A) { | ||||
// ищем все переходы класса по символу term | ||||
var s2 = reveseOptimalMap[ | ||||
optimalMap[s].Select(x => m_states[x].transitions[term]).FirstOrDefault(x => x != 0) // первое допустимое элементарное состояние, если есть | ||||
]; | ||||
HashSet<int> A2; | ||||
if (!classes.TryGetValue(s2, out A2)) { | ||||
A2 = new HashSet<int>(); | ||||
newQueue.Enqueue(A2); | ||||
classes[s2] = A2; | ||||
} | ||||
A2.Add(term); | ||||
} | ||||
} | ||||
if (newQueue.Count == 0) | ||||
break; | ||||
alphaQueue = newQueue; | ||||
} | ||||
foreach (var A in alphaQueue) | ||||
minClasses.Add(A); | ||||
var alphabetMap = sourceAlphabet.Reclassify(minimalAlphabet, minClasses); | ||||
// построение автомата | ||||
var states = new int[ optimalMap.Length ]; | ||||
states[0] = UNREACHEBLE_STATE; | ||||
for(var s = INITIAL_STATE; s < states.Length; s++) { | ||||
var tags = optimalMap[s].SelectMany(x => m_states[x].tag ?? Enumerable.Empty<int>()).Distinct().ToArray(); | ||||
if (tags.Length > 0) | ||||
states[s] = minimalDFA.AddState(tags); | ||||
else | ||||
states[s] = minimalDFA.AddState(); | ||||
} | ||||
Debug.Assert(states[INITIAL_STATE] == INITIAL_STATE); | ||||
for (int s1 = 1; s1 < m_states.Count; s1++) { | ||||
for (int c = 0; c < AlphabetSize; c++) { | ||||
var s2 = m_states[s1].transitions[c]; | ||||
if (s2 != UNREACHEBLE_STATE) { | ||||
minimalDFA.DefineTransition( | ||||
reveseOptimalMap[s1], | ||||
reveseOptimalMap[s2], | ||||
alphabetMap[c] | ||||
); | ||||
} | ||||
} | ||||
} | ||||
} | ||||
public void PrintDFA<TA>(IAlphabet<TA> alphabet) { | ||||
var reverseMap = alphabet.CreateReverseMap(); | ||||
for (int i = 1; i < reverseMap.Length; i++) { | ||||
Console.WriteLine("C{0}: {1}", i, String.Join(",", reverseMap[i])); | ||||
} | ||||
for (int i = 1; i < m_states.Count; i++) { | ||||
var s = m_states[i]; | ||||
for (int c = 0; c < AlphabetSize; c++) | ||||
if (s.transitions[c] != UNREACHEBLE_STATE) | ||||
Console.WriteLine("S{0} -{1}-> S{2}{3}", i, String.Join(",", reverseMap[c]), s.transitions[c], m_states[s.transitions[c]].final ? "$" : ""); | ||||
} | ||||
} | ||||
public int AlphabetSize { | ||||
get; | ||||
} | ||||
} | ||||
} | ||||