RegularExpressionVisitor.cs
184 lines
| 6.5 KiB
| text/x-csharp
|
CSharpLexer
cin
|
r172 | using Implab; | ||
using System; | ||||
using System.Collections.Generic; | ||||
using System.Diagnostics; | ||||
using System.Linq; | ||||
namespace Implab.Automaton.RegularExpressions { | ||||
/// <summary> | ||||
/// Используется для построения ДКА по регулярному выражению, сначала обходит | ||||
/// регулярное выражение и вычисляет followpos, затем используется метод | ||||
/// <see cref="BuildDFA(IDFADefinition)"/> для построения автомата. | ||||
/// </summary> | ||||
public class RegularExpressionVisitor<TTag> : IVisitor<TTag> { | ||||
int m_idx; | ||||
Token<TTag> m_root; | ||||
HashSet<int> m_firstpos; | ||||
HashSet<int> m_lastpos; | ||||
readonly Dictionary<int, HashSet<int>> m_followpos = new Dictionary<int, HashSet<int>>(); | ||||
readonly Dictionary<int, int> m_indexes = new Dictionary<int, int>(); | ||||
readonly Dictionary<int, TTag> m_ends = new Dictionary<int, TTag>(); | ||||
public Dictionary<int, HashSet<int>> FollowposMap { | ||||
get { return m_followpos; } | ||||
} | ||||
public HashSet<int> Followpos(int pos) { | ||||
HashSet<int> set; | ||||
return m_followpos.TryGetValue(pos, out set) ? set : m_followpos[pos] = new HashSet<int>(); | ||||
} | ||||
bool Nullable(object n) { | ||||
if (n is EmptyToken<TTag> || n is StarToken<TTag>) | ||||
return true; | ||||
var altToken = n as AltToken<TTag>; | ||||
if (altToken != null) | ||||
return Nullable(altToken.Left) || Nullable(altToken.Right); | ||||
var catToken = n as CatToken<TTag>; | ||||
if (catToken != null) | ||||
return Nullable(catToken.Left) && Nullable(catToken.Right); | ||||
return false; | ||||
} | ||||
public void Visit(AltToken<TTag> token) { | ||||
if (m_root == null) | ||||
m_root = token; | ||||
var firtspos = new HashSet<int>(); | ||||
var lastpos = new HashSet<int>(); | ||||
token.Left.Accept(this); | ||||
firtspos.UnionWith(m_firstpos); | ||||
lastpos.UnionWith(m_lastpos); | ||||
token.Right.Accept(this); | ||||
firtspos.UnionWith(m_firstpos); | ||||
lastpos.UnionWith(m_lastpos); | ||||
m_firstpos = firtspos; | ||||
m_lastpos = lastpos; | ||||
} | ||||
public void Visit(StarToken<TTag> token) { | ||||
if (m_root == null) | ||||
m_root = token; | ||||
token.Token.Accept(this); | ||||
foreach (var i in m_lastpos) | ||||
Followpos(i).UnionWith(m_firstpos); | ||||
} | ||||
public void Visit(CatToken<TTag> token) { | ||||
if (m_root == null) | ||||
m_root = token; | ||||
var firtspos = new HashSet<int>(); | ||||
var lastpos = new HashSet<int>(); | ||||
token.Left.Accept(this); | ||||
firtspos.UnionWith(m_firstpos); | ||||
var leftLastpos = m_lastpos; | ||||
token.Right.Accept(this); | ||||
lastpos.UnionWith(m_lastpos); | ||||
var rightFirstpos = m_firstpos; | ||||
if (Nullable(token.Left)) | ||||
firtspos.UnionWith(rightFirstpos); | ||||
if (Nullable(token.Right)) | ||||
lastpos.UnionWith(leftLastpos); | ||||
m_firstpos = firtspos; | ||||
m_lastpos = lastpos; | ||||
foreach (var i in leftLastpos) | ||||
Followpos(i).UnionWith(rightFirstpos); | ||||
} | ||||
public void Visit(EmptyToken<TTag> token) { | ||||
if (m_root == null) | ||||
m_root = token; | ||||
} | ||||
public void Visit(SymbolToken<TTag> token) { | ||||
if (m_root == null) | ||||
m_root = token; | ||||
m_idx++; | ||||
m_indexes[m_idx] = token.Value; | ||||
m_firstpos = new HashSet<int>(new[] { m_idx }); | ||||
m_lastpos = new HashSet<int>(new[] { m_idx }); | ||||
} | ||||
public void Visit(EndToken<TTag> token) { | ||||
if (m_root == null) | ||||
m_root = token; | ||||
m_idx++; | ||||
m_indexes[m_idx] = DFAConst.UNCLASSIFIED_INPUT; | ||||
m_firstpos = new HashSet<int>(new[] { m_idx }); | ||||
m_lastpos = new HashSet<int>(new[] { m_idx }); | ||||
Followpos(m_idx); | ||||
m_ends.Add(m_idx, token.Tag); | ||||
} | ||||
public void BuildDFA(ITaggedDFABuilder<TTag> dfa) { | ||||
Safe.ArgumentNotNull(dfa,"dfa"); | ||||
var states = new MapAlphabet<HashSet<int>>( | ||||
false, | ||||
new CustomEqualityComparer<HashSet<int>>( | ||||
(x, y) => x.SetEquals(y), | ||||
x => x.Sum(n => n.GetHashCode()) | ||||
)); | ||||
var initialState = states.DefineSymbol(m_firstpos); | ||||
dfa.SetInitialState(initialState); | ||||
var tags = GetStateTags(m_firstpos); | ||||
if (tags != null && tags.Length > 0) | ||||
dfa.MarkFinalState(initialState, tags); | ||||
var inputMax = m_indexes.Values.Max(); | ||||
var queue = new Queue<HashSet<int>>(); | ||||
queue.Enqueue(m_firstpos); | ||||
while (queue.Count > 0) { | ||||
var state = queue.Dequeue(); | ||||
var s1 = states.Translate(state); | ||||
Debug.Assert(s1 != DFAConst.UNCLASSIFIED_INPUT); | ||||
for (int a = 0; a <= inputMax; a++) { | ||||
var next = new HashSet<int>(); | ||||
foreach (var p in state) { | ||||
if (m_indexes[p] == a) { | ||||
next.UnionWith(Followpos(p)); | ||||
} | ||||
} | ||||
if (next.Count > 0) { | ||||
int s2 = states.Translate(next); | ||||
if (s2 == DFAConst.UNCLASSIFIED_INPUT) { | ||||
s2 = states.DefineSymbol(next); | ||||
tags = GetStateTags(next); | ||||
if (tags != null && tags.Length > 0) { | ||||
dfa.MarkFinalState(s2); | ||||
dfa.SetStateTag(s2, tags); | ||||
} | ||||
queue.Enqueue(next); | ||||
} | ||||
dfa.Add(new AutomatonTransition(s1, s2, a)); | ||||
} | ||||
} | ||||
} | ||||
} | ||||
TTag[] GetStateTags(IEnumerable<int> state) { | ||||
Debug.Assert(state != null); | ||||
return state.Where(m_ends.ContainsKey).Select(pos => m_ends[pos]).ToArray(); | ||||
} | ||||
} | ||||
} | ||||