using Implab; using System; using System.Collections.Generic; using System.Diagnostics; using System.Linq; using System.Text; using System.Threading.Tasks; namespace Implab.Parsing { /// /// Используется для построения ДКА по регулярному выражению, сначала обходит /// регулярное выражение и вычисляет followpos, затем используется метод /// для построения автомата. /// public class DFABuilder : IVisitor { int m_idx = 0; Token m_root; HashSet m_firstpos; HashSet m_lastpos; Dictionary> m_followpos = new Dictionary>(); Dictionary m_indexes = new Dictionary(); Dictionary m_ends = new Dictionary(); public Dictionary> FollowposMap { get { return m_followpos; } } public HashSet Followpos(int pos) { HashSet set; if (m_followpos.TryGetValue(pos, out set)) return set; return m_followpos[pos] = new HashSet(); } bool Nullable(object n) { if (n is EmptyToken || n is StarToken) return true; if (n is AltToken) return Nullable(((AltToken)n).Left) || Nullable(((AltToken)n).Right); if (n is CatToken) return Nullable(((CatToken)n).Left) && Nullable(((CatToken)n).Right); return false; } public void Visit(AltToken token) { if (m_root == null) m_root = token; var firtspos = new HashSet(); var lastpos = new HashSet(); token.Left.Accept(this); firtspos.UnionWith(m_firstpos); lastpos.UnionWith(m_lastpos); token.Right.Accept(this); firtspos.UnionWith(m_firstpos); lastpos.UnionWith(m_lastpos); m_firstpos = firtspos; m_lastpos = lastpos; } public void Visit(StarToken token) { if (m_root == null) m_root = token; token.Token.Accept(this); foreach (var i in m_lastpos) Followpos(i).UnionWith(m_firstpos); } public void Visit(CatToken token) { if (m_root == null) m_root = token; var firtspos = new HashSet(); var lastpos = new HashSet(); token.Left.Accept(this); firtspos.UnionWith(m_firstpos); var leftLastpos = m_lastpos; token.Right.Accept(this); lastpos.UnionWith(m_lastpos); var rightFirstpos = m_firstpos; if (Nullable(token.Left)) firtspos.UnionWith(rightFirstpos); if (Nullable(token.Right)) lastpos.UnionWith(leftLastpos); m_firstpos = firtspos; m_lastpos = lastpos; foreach (var i in leftLastpos) Followpos(i).UnionWith(rightFirstpos); } public void Visit(EmptyToken token) { if (m_root == null) m_root = token; ; } public void Visit(SymbolToken token) { if (m_root == null) m_root = token; m_idx++; m_indexes[m_idx] = token.Value; m_firstpos = new HashSet(new[] { m_idx }); m_lastpos = new HashSet(new[] { m_idx }); } public void Visit(EndToken token) { if (m_root == null) m_root = token; m_idx++; m_indexes[m_idx] = Alphabet.UNCLASSIFIED; m_firstpos = new HashSet(new[] { m_idx }); m_lastpos = new HashSet(new[] { m_idx }); Followpos(m_idx); m_ends.Add(m_idx, token.Tag); } public void BuildDFA(IDFADefinition dfa) { Safe.ArgumentNotNull(dfa,"dfa"); var stateMap = new Dictionary, int>(new CustomEqualityComparer>( (x, y) => x.SetEquals(y), (x) => x.Sum(n => n.GetHashCode()) )); stateMap[m_firstpos] = DefineState( dfa, m_firstpos); Debug.Assert(stateMap[m_firstpos] == DFADefinitionBase.INITIAL_STATE); var queue = new Queue>(); queue.Enqueue(m_firstpos); while (queue.Count > 0) { var state = queue.Dequeue(); var s1 = stateMap[state]; for (int a = 0; a < dfa.AlphabetSize; a++) { var next = new HashSet(); foreach (var p in state) { if (m_indexes[p] == a) { next.UnionWith(Followpos(p)); } } if (next.Count > 0) { int s2; if (!stateMap.TryGetValue(next, out s2)) { stateMap[next] = s2 = DefineState(dfa, next); queue.Enqueue(next); } dfa.DefineTransition(s1, s2, a); } } } } int[] GetStateTags(HashSet state) { Debug.Assert(state != null); return state.Where(pos => m_ends.ContainsKey(pos)).Select(pos => m_ends[pos]).ToArray(); } int DefineState(IDFADefinition automa, HashSet state) { Debug.Assert(automa != null); Debug.Assert(state != null); var tags = GetStateTags(state); return tags.Length > 0 ? automa.AddState(tags) : automa.AddState(); } } }