RegularExpressionVisitor.cs
212 lines
| 6.8 KiB
| text/x-csharp
|
CSharpLexer
|
|
r172 | using Implab; | ||
| using System; | ||||
| using System.Collections.Generic; | ||||
| using System.Diagnostics; | ||||
| using System.Linq; | ||||
| namespace Implab.Automaton.RegularExpressions { | ||||
| /// <summary> | ||||
| /// Используется для построения ДКА по регулярному выражению, сначала обходит | ||||
| /// регулярное выражение и вычисляет followpos, затем используется метод | ||||
| /// <see cref="BuildDFA(IDFADefinition)"/> для построения автомата. | ||||
| /// </summary> | ||||
|
|
r178 | public class RegularExpressionVisitor : IVisitor { | ||
|
|
r172 | int m_idx; | ||
|
|
r177 | Token m_root; | ||
|
|
r172 | HashSet<int> m_firstpos; | ||
| HashSet<int> m_lastpos; | ||||
| readonly Dictionary<int, HashSet<int>> m_followpos = new Dictionary<int, HashSet<int>>(); | ||||
| readonly Dictionary<int, int> m_indexes = new Dictionary<int, int>(); | ||||
|
|
r177 | readonly HashSet<int> m_ends = new HashSet<int>(); | ||
|
|
r172 | |||
|
|
r178 | readonly IDFATableBuilder m_builder; | ||
| readonly IAlphabetBuilder<HashSet<int>> m_states = new MapAlphabet<HashSet<int>>( | ||||
| false, | ||||
| new CustomEqualityComparer<HashSet<int>>( | ||||
| (x, y) => x.SetEquals(y), | ||||
| x => x.Sum(n => n.GetHashCode()) | ||||
| ) | ||||
| ); | ||||
| public RegularExpressionVisitor(IDFATableBuilder builder) { | ||||
| Safe.ArgumentNotNull(builder, "builder"); | ||||
| m_builder = builder; | ||||
|
|
r172 | } | ||
|
|
r178 | HashSet<int> Followpos(int pos) { | ||
|
|
r172 | HashSet<int> set; | ||
| return m_followpos.TryGetValue(pos, out set) ? set : m_followpos[pos] = new HashSet<int>(); | ||||
| } | ||||
| bool Nullable(object n) { | ||||
|
|
r177 | if (n is EmptyToken || n is StarToken) | ||
|
|
r172 | return true; | ||
|
|
r177 | var altToken = n as AltToken; | ||
|
|
r172 | if (altToken != null) | ||
| return Nullable(altToken.Left) || Nullable(altToken.Right); | ||||
|
|
r177 | var catToken = n as CatToken; | ||
|
|
r172 | if (catToken != null) | ||
| return Nullable(catToken.Left) && Nullable(catToken.Right); | ||||
| return false; | ||||
| } | ||||
|
|
r178 | protected int Index { | ||
| get { return m_idx; } | ||||
| } | ||||
|
|
r172 | |||
|
|
r177 | public void Visit(AltToken token) { | ||
|
|
r172 | if (m_root == null) | ||
| m_root = token; | ||||
| var firtspos = new HashSet<int>(); | ||||
| var lastpos = new HashSet<int>(); | ||||
| token.Left.Accept(this); | ||||
| firtspos.UnionWith(m_firstpos); | ||||
| lastpos.UnionWith(m_lastpos); | ||||
| token.Right.Accept(this); | ||||
| firtspos.UnionWith(m_firstpos); | ||||
| lastpos.UnionWith(m_lastpos); | ||||
| m_firstpos = firtspos; | ||||
| m_lastpos = lastpos; | ||||
| } | ||||
|
|
r177 | public void Visit(StarToken token) { | ||
|
|
r172 | if (m_root == null) | ||
| m_root = token; | ||||
| token.Token.Accept(this); | ||||
| foreach (var i in m_lastpos) | ||||
| Followpos(i).UnionWith(m_firstpos); | ||||
| } | ||||
|
|
r177 | public void Visit(CatToken token) { | ||
|
|
r172 | if (m_root == null) | ||
| m_root = token; | ||||
| var firtspos = new HashSet<int>(); | ||||
| var lastpos = new HashSet<int>(); | ||||
| token.Left.Accept(this); | ||||
| firtspos.UnionWith(m_firstpos); | ||||
| var leftLastpos = m_lastpos; | ||||
| token.Right.Accept(this); | ||||
| lastpos.UnionWith(m_lastpos); | ||||
| var rightFirstpos = m_firstpos; | ||||
| if (Nullable(token.Left)) | ||||
| firtspos.UnionWith(rightFirstpos); | ||||
| if (Nullable(token.Right)) | ||||
| lastpos.UnionWith(leftLastpos); | ||||
| m_firstpos = firtspos; | ||||
| m_lastpos = lastpos; | ||||
| foreach (var i in leftLastpos) | ||||
| Followpos(i).UnionWith(rightFirstpos); | ||||
| } | ||||
|
|
r177 | public void Visit(EmptyToken token) { | ||
|
|
r172 | if (m_root == null) | ||
| m_root = token; | ||||
| } | ||||
|
|
r177 | public void Visit(SymbolToken token) { | ||
|
|
r172 | if (m_root == null) | ||
| m_root = token; | ||||
| m_idx++; | ||||
| m_indexes[m_idx] = token.Value; | ||||
| m_firstpos = new HashSet<int>(new[] { m_idx }); | ||||
| m_lastpos = new HashSet<int>(new[] { m_idx }); | ||||
| } | ||||
|
|
r178 | public virtual void Visit(EndToken token) { | ||
|
|
r172 | if (m_root == null) | ||
| m_root = token; | ||||
| m_idx++; | ||||
|
|
r178 | m_indexes[m_idx] = AutomatonConst.UNCLASSIFIED_INPUT; | ||
|
|
r177 | m_firstpos = new HashSet<int>(new[] { m_idx }); | ||
| m_lastpos = new HashSet<int>(new[] { m_idx }); | ||||
| Followpos(m_idx); | ||||
| m_ends.Add(m_idx); | ||||
|
|
r172 | } | ||
|
|
r178 | public void BuildDFA() { | ||
| AddState(m_firstpos); | ||||
| SetInitialState(m_firstpos); | ||||
|
|
r172 | |||
|
|
r178 | if(IsFinal(m_firstpos)) | ||
| MarkFinalState(m_firstpos); | ||||
|
|
r172 | |||
| var inputMax = m_indexes.Values.Max(); | ||||
| var queue = new Queue<HashSet<int>>(); | ||||
| queue.Enqueue(m_firstpos); | ||||
| while (queue.Count > 0) { | ||||
|
|
r178 | var s1 = queue.Dequeue(); | ||
|
|
r172 | |||
| for (int a = 0; a <= inputMax; a++) { | ||||
|
|
r178 | var s2 = new HashSet<int>(); | ||
| foreach (var p in s1) { | ||||
|
|
r172 | if (m_indexes[p] == a) { | ||
|
|
r178 | s2.UnionWith(Followpos(p)); | ||
|
|
r172 | } | ||
| } | ||||
|
|
r178 | if (s2.Count > 0) { | ||
| if (!HasState(s2)) { | ||||
| AddState(s2); | ||||
| if (IsFinal(s2)) | ||||
| MarkFinalState(s2); | ||||
| queue.Enqueue(s2); | ||||
| } | ||||
|
|
r172 | |||
|
|
r178 | DefineTransition(s1, s2, a); | ||
|
|
r172 | } | ||
|
|
r178 | |||
|
|
r172 | } | ||
| } | ||||
| } | ||||
|
|
r178 | protected bool HasState(HashSet<int> state) { | ||
| return m_states.Contains(state); | ||||
| } | ||||
| protected void AddState(HashSet<int> state) { | ||||
| Debug.Assert(!HasState(state)); | ||||
| m_states.DefineSymbol(state); | ||||
| } | ||||
| protected int Translate(HashSet<int> state) { | ||||
| Debug.Assert(HasState(state)); | ||||
| return m_states.Translate(state); | ||||
| } | ||||
| protected virtual void SetInitialState(HashSet<int> state) { | ||||
| m_builder.SetInitialState(Translate(state)); | ||||
| } | ||||
| protected virtual void MarkFinalState(HashSet<int> state) { | ||||
| m_builder.MarkFinalState(Translate(state)); | ||||
| } | ||||
| protected virtual void DefineTransition(HashSet<int> s1, HashSet<int> s2, int ch) { | ||||
| m_builder.Add(new AutomatonTransition(Translate(s1), Translate(s2), ch)); | ||||
| } | ||||
|
|
r177 | bool IsFinal(IEnumerable<int> state) { | ||
| Debug.Assert(state != null); | ||||
| return state.Any(m_ends.Contains); | ||||
| } | ||||
|
|
r172 | } | ||
| } | ||||
