using Implab; using System; using System.Collections.Generic; using System.Diagnostics; using System.Linq; namespace Implab.Parsing { public class DFADefinition : IDFADefinition { readonly List> m_states; public const int INITIAL_STATE = 1; public const int UNREACHEBLE_STATE = 0; DFAStateDescriptior[] m_statesArray; readonly int m_alpabetSize; public DFADefinition(int alphabetSize) { m_states = new List>(); m_alpabetSize = alphabetSize; m_states.Add(new DFAStateDescriptior()); } public bool InitialStateIsFinal { get { return m_states[INITIAL_STATE].final; } } public int AddState() { var index = m_states.Count; m_states.Add(new DFAStateDescriptior { final = false, transitions = new int[AlphabetSize] }); m_statesArray = null; return index; } public int AddState(TTag[] tag) { var index = m_states.Count; bool final = tag != null && tag.Length != 0; m_states.Add(new DFAStateDescriptior { final = final, transitions = new int[AlphabetSize], tag = final ? tag : null }); m_statesArray = null; return index; } public void DefineTransition(TState s1, TState s2, TInput symbol) { int is1 = StateAlphabet.Translate(s1); int is2 = StateAlphabet.Translate(s2); int isym = InputAlphabet.Translate(symbol); Safe.ArgumentAssert(is1 != 0, "s1"); Safe.ArgumentAssert(is2 != 0, "s2"); Safe.ArgumentAssert(isym != 0, "symbol"); m_states[is1].transitions[isym] = is2; } #region IDFADefinition implementation public DFAStateDescriptior[] GetTransitionTable() { if (m_statesArray == null) m_statesArray = m_states.ToArray(); return m_statesArray; } public IAlphabet InputAlphabet { get { throw new NotImplementedException(); } } public IAlphabet StateAlphabet { get { throw new NotImplementedException(); } } #endregion protected IDFADefinition<> Optimize(Func, IDFADefinition> dfaFactory,IAlphabet sourceAlphabet, IAlphabet minimalAlphabet) { Safe.ArgumentNotNull(dfaFactory, "dfaFactory"); Safe.ArgumentNotNull(minimalAlphabet, "minimalAlphabet"); var setComparer = new CustomEqualityComparer>( (x, y) => x.SetEquals(y), (s) => s.Sum(x => x.GetHashCode()) ); var arrayComparer = new CustomEqualityComparer( (x,y) => (new HashSet(x)).SetEquals(new HashSet(y)), (a) => a.Sum(x => x.GetHashCode()) ); var optimalStates = new HashSet>(setComparer); var queue = new HashSet>(setComparer); foreach (var g in Enumerable .Range(INITIAL_STATE, m_states.Count-1) .Select(i => new { index = i, descriptor = m_states[i] }) .Where(x => x.descriptor.final) .GroupBy(x => x.descriptor.tag, arrayComparer) ) { optimalStates.Add(new HashSet(g.Select(x => x.index))); } var state = new HashSet( Enumerable .Range(INITIAL_STATE, m_states.Count - 1) .Where(i => !m_states[i].final) ); optimalStates.Add(state); queue.Add(state); while (queue.Count > 0) { var stateA = queue.First(); queue.Remove(stateA); for (int c = 0; c < AlphabetSize; c++) { var stateX = new HashSet(); for(int s = 1; s < m_states.Count; s++) { if (stateA.Contains(m_states[s].transitions[c])) stateX.Add(s); } foreach (var stateY in optimalStates.ToArray()) { if (stateX.Overlaps(stateY) && !stateY.IsSubsetOf(stateX)) { var stateR1 = new HashSet(stateY); var stateR2 = new HashSet(stateY); stateR1.IntersectWith(stateX); stateR2.ExceptWith(stateX); optimalStates.Remove(stateY); optimalStates.Add(stateR1); optimalStates.Add(stateR2); if (queue.Contains(stateY)) { queue.Remove(stateY); queue.Add(stateR1); queue.Add(stateR2); } else { queue.Add(stateR1.Count <= stateR2.Count ? stateR1 : stateR2); } } } } } // строим карты соотвествия оптимальных состояний с оригинальными var initialState = optimalStates.Single(x => x.Contains(INITIAL_STATE)); // карта получения оптимального состояния по соотвествующему ему простому состоянию int[] reveseOptimalMap = new int[m_states.Count]; // карта с индексами оптимальных состояний HashSet[] optimalMap = new HashSet[optimalStates.Count + 1]; { optimalMap[0] = new HashSet(); // unreachable state optimalMap[1] = initialState; // initial state foreach (var ss in initialState) reveseOptimalMap[ss] = 1; int i = 2; foreach (var s in optimalStates) { if (s.SetEquals(initialState)) continue; optimalMap[i] = s; foreach (var ss in s) reveseOptimalMap[ss] = i; i++; } } // получаем минимальный алфавит var minClasses = new HashSet>(setComparer); var alphaQueue = new Queue>(); alphaQueue.Enqueue(new HashSet(Enumerable.Range(0,AlphabetSize))); for (int s = 1 ; s < optimalMap.Length; s++) { var newQueue = new Queue>(); foreach (var A in alphaQueue) { if (A.Count == 1) { minClasses.Add(A); continue; } // различаем классы символов, которые переводят в различные оптимальные состояния // optimalState -> alphaClass var classes = new Dictionary>(); foreach (var term in A) { // ищем все переходы класса по символу term var s2 = reveseOptimalMap[ optimalMap[s].Select(x => m_states[x].transitions[term]).FirstOrDefault(x => x != 0) // первое допустимое элементарное состояние, если есть ]; HashSet A2; if (!classes.TryGetValue(s2, out A2)) { A2 = new HashSet(); newQueue.Enqueue(A2); classes[s2] = A2; } A2.Add(term); } } if (newQueue.Count == 0) break; alphaQueue = newQueue; } foreach (var A in alphaQueue) minClasses.Add(A); var alphabetMap = sourceAlphabet.Reclassify(minimalAlphabet, minClasses); // построение автомата var minimalDFA = dfaFactory(minimalAlphabet); var states = new int[ optimalMap.Length ]; states[0] = UNREACHEBLE_STATE; for(var s = INITIAL_STATE; s < states.Length; s++) { var tags = optimalMap[s].SelectMany(x => m_states[x].tag ?? Enumerable.Empty()).Distinct().ToArray(); if (tags.Length > 0) states[s] = minimalDFA.AddState(tags); else states[s] = minimalDFA.AddState(); } Debug.Assert(states[INITIAL_STATE] == INITIAL_STATE); for (int s1 = 1; s1 < m_states.Count; s1++) { for (int c = 0; c < AlphabetSize; c++) { var s2 = m_states[s1].transitions[c]; if (s2 != UNREACHEBLE_STATE) { minimalDFA.DefineTransition( reveseOptimalMap[s1], reveseOptimalMap[s2], alphabetMap[c] ); } } } return minimalDFA; } public void PrintDFA(IAlphabet alphabet) { var reverseMap = alphabet.CreateReverseMap(); for (int i = 1; i < reverseMap.Length; i++) { Console.WriteLine("C{0}: {1}", i, String.Join(",", reverseMap[i])); } for (int i = 1; i < m_states.Count; i++) { var s = m_states[i]; for (int c = 0; c < AlphabetSize; c++) if (s.transitions[c] != UNREACHEBLE_STATE) Console.WriteLine("S{0} -{1}-> S{2}{3}", i, String.Join(",", reverseMap[c]), s.transitions[c], m_states[s.transitions[c]].final ? "$" : ""); } } public int AlphabetSize { get { return m_alpabetSize; } } } }