DFABuilder.cs
182 lines
| 6.1 KiB
| text/x-csharp
|
CSharpLexer
|
|
r55 | using Implab; | ||
| using System; | ||||
| using System.Collections.Generic; | ||||
| using System.Diagnostics; | ||||
| using System.Linq; | ||||
| using System.Text; | ||||
| using System.Threading.Tasks; | ||||
| namespace Implab.Parsing { | ||||
| /// <summary> | ||||
| /// Используется для построения ДКА по регулярному выражению, сначала обходит | ||||
| /// регулярное выражение и вычисляет followpos, затем используется метод | ||||
| /// <see cref="BuildDFA(IDFADefinition)"/> для построения автомата. | ||||
| /// </summary> | ||||
| public class DFABuilder : IVisitor { | ||||
| int m_idx = 0; | ||||
| Token m_root; | ||||
| HashSet<int> m_firstpos; | ||||
| HashSet<int> m_lastpos; | ||||
| Dictionary<int, HashSet<int>> m_followpos = new Dictionary<int, HashSet<int>>(); | ||||
| Dictionary<int, int> m_indexes = new Dictionary<int, int>(); | ||||
| Dictionary<int, int> m_ends = new Dictionary<int, int>(); | ||||
| public Dictionary<int, HashSet<int>> FollowposMap { | ||||
| get { return m_followpos; } | ||||
| } | ||||
| public HashSet<int> Followpos(int pos) { | ||||
| HashSet<int> set; | ||||
| if (m_followpos.TryGetValue(pos, out set)) | ||||
| return set; | ||||
| return m_followpos[pos] = new HashSet<int>(); | ||||
| } | ||||
| bool Nullable(object n) { | ||||
| if (n is EmptyToken || n is StarToken) | ||||
| return true; | ||||
| if (n is AltToken) | ||||
| return Nullable(((AltToken)n).Left) || Nullable(((AltToken)n).Right); | ||||
| if (n is CatToken) | ||||
| return Nullable(((CatToken)n).Left) && Nullable(((CatToken)n).Right); | ||||
| return false; | ||||
| } | ||||
| public void Visit(AltToken token) { | ||||
| if (m_root == null) | ||||
| m_root = token; | ||||
| var firtspos = new HashSet<int>(); | ||||
| var lastpos = new HashSet<int>(); | ||||
| token.Left.Accept(this); | ||||
| firtspos.UnionWith(m_firstpos); | ||||
| lastpos.UnionWith(m_lastpos); | ||||
| token.Right.Accept(this); | ||||
| firtspos.UnionWith(m_firstpos); | ||||
| lastpos.UnionWith(m_lastpos); | ||||
| m_firstpos = firtspos; | ||||
| m_lastpos = lastpos; | ||||
| } | ||||
| public void Visit(StarToken token) { | ||||
| if (m_root == null) | ||||
| m_root = token; | ||||
| token.Token.Accept(this); | ||||
| foreach (var i in m_lastpos) | ||||
| Followpos(i).UnionWith(m_firstpos); | ||||
| } | ||||
| public void Visit(CatToken token) { | ||||
| if (m_root == null) | ||||
| m_root = token; | ||||
| var firtspos = new HashSet<int>(); | ||||
| var lastpos = new HashSet<int>(); | ||||
| token.Left.Accept(this); | ||||
| firtspos.UnionWith(m_firstpos); | ||||
| var leftLastpos = m_lastpos; | ||||
| token.Right.Accept(this); | ||||
| lastpos.UnionWith(m_lastpos); | ||||
| var rightFirstpos = m_firstpos; | ||||
| if (Nullable(token.Left)) | ||||
| firtspos.UnionWith(rightFirstpos); | ||||
| if (Nullable(token.Right)) | ||||
| lastpos.UnionWith(leftLastpos); | ||||
| m_firstpos = firtspos; | ||||
| m_lastpos = lastpos; | ||||
| foreach (var i in leftLastpos) | ||||
| Followpos(i).UnionWith(rightFirstpos); | ||||
| } | ||||
| public void Visit(EmptyToken token) { | ||||
| if (m_root == null) | ||||
| m_root = token; | ||||
| ; | ||||
| } | ||||
| public void Visit(SymbolToken token) { | ||||
| if (m_root == null) | ||||
| m_root = token; | ||||
| m_idx++; | ||||
| m_indexes[m_idx] = token.Value; | ||||
| m_firstpos = new HashSet<int>(new[] { m_idx }); | ||||
| m_lastpos = new HashSet<int>(new[] { m_idx }); | ||||
| } | ||||
| public void Visit(EndToken token) { | ||||
| if (m_root == null) | ||||
| m_root = token; | ||||
| m_idx++; | ||||
| m_indexes[m_idx] = Alphabet.UNCLASSIFIED; | ||||
| m_firstpos = new HashSet<int>(new[] { m_idx }); | ||||
| m_lastpos = new HashSet<int>(new[] { m_idx }); | ||||
| Followpos(m_idx); | ||||
| m_ends.Add(m_idx, token.Tag); | ||||
| } | ||||
| public void BuildDFA(IDFADefinition dfa) { | ||||
| Safe.ArgumentNotNull(dfa,"dfa"); | ||||
| var stateMap = new Dictionary<HashSet<int>, int>(new CustomEqualityComparer<HashSet<int>>( | ||||
| (x, y) => x.SetEquals(y), | ||||
| (x) => x.Sum(n => n.GetHashCode()) | ||||
| )); | ||||
| stateMap[m_firstpos] = DefineState( dfa, m_firstpos); | ||||
| Debug.Assert(stateMap[m_firstpos] == DFADefinitionBase.INITIAL_STATE); | ||||
| var queue = new Queue<HashSet<int>>(); | ||||
| queue.Enqueue(m_firstpos); | ||||
| while (queue.Count > 0) { | ||||
| var state = queue.Dequeue(); | ||||
| var s1 = stateMap[state]; | ||||
| for (int a = 0; a < dfa.AlphabetSize; a++) { | ||||
| var next = new HashSet<int>(); | ||||
| foreach (var p in state) { | ||||
| if (m_indexes[p] == a) { | ||||
| next.UnionWith(Followpos(p)); | ||||
| } | ||||
| } | ||||
| if (next.Count > 0) { | ||||
| int s2; | ||||
| if (!stateMap.TryGetValue(next, out s2)) { | ||||
| stateMap[next] = s2 = DefineState(dfa, next); | ||||
| queue.Enqueue(next); | ||||
| } | ||||
| dfa.DefineTransition(s1, s2, a); | ||||
| } | ||||
| } | ||||
| } | ||||
| } | ||||
| int[] GetStateTags(HashSet<int> state) { | ||||
| Debug.Assert(state != null); | ||||
| return state.Where(pos => m_ends.ContainsKey(pos)).Select(pos => m_ends[pos]).ToArray(); | ||||
| } | ||||
| int DefineState(IDFADefinition automa, HashSet<int> state) { | ||||
| Debug.Assert(automa != null); | ||||
| Debug.Assert(state != null); | ||||
| var tags = GetStateTags(state); | ||||
| return tags.Length > 0 ? automa.AddState(tags) : automa.AddState(); | ||||
| } | ||||
| } | ||||
| } | ||||
