# HG changeset patch # User cin # Date 2016-02-24 23:11:13 # Node ID ec35731ae2994fd3cad658174049ec6ea9f98c7e # Parent 419aa51b04fd58b7b877848248539b48db04cf76 Almost complete DFA refactoring diff --git a/Implab/Automaton/ByteAlphabet.cs b/Implab/Automaton/ByteAlphabet.cs deleted file mode 100644 --- a/Implab/Automaton/ByteAlphabet.cs +++ /dev/null @@ -1,23 +0,0 @@ -using System; - -namespace Implab.Automaton { - public class ByteAlphabet : IndexedAlphabetBase { - public ByteAlphabet() : base(byte.MaxValue + 1){ - } - - #region implemented abstract members of IndexedAlphabetBase - - public override int GetSymbolIndex(byte symbol) { - return (int)symbol; - } - - public override System.Collections.Generic.IEnumerable InputSymbols { - get { - throw new NotImplementedException(); - } - } - - #endregion - } -} - diff --git a/Implab/Automaton/CDFADefinition.cs b/Implab/Automaton/CDFADefinition.cs deleted file mode 100644 --- a/Implab/Automaton/CDFADefinition.cs +++ /dev/null @@ -1,22 +0,0 @@ -namespace Implab.Automaton { - public class CDFADefinition : DFADefinition { - readonly CharAlphabet m_alphabet; - - public CharAlphabet Alphabet { - get { return m_alphabet; } - } - - public CDFADefinition(CharAlphabet alphabet): base(alphabet.Count) { - m_alphabet = alphabet; - } - - public CDFADefinition Optimize() { - - return (CDFADefinition)Optimize(alphabet => new CDFADefinition((CharAlphabet)alphabet), m_alphabet, new CharAlphabet()); - } - - public void PrintDFA() { - PrintDFA(m_alphabet); - } - } -} diff --git a/Implab/Automaton/CharAlphabet.cs b/Implab/Automaton/CharAlphabet.cs deleted file mode 100644 --- a/Implab/Automaton/CharAlphabet.cs +++ /dev/null @@ -1,23 +0,0 @@ -using Implab; -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; - -namespace Implab.Automaton { - public class CharAlphabet: IndexedAlphabetBase { - - public CharAlphabet() - : base(char.MaxValue + 1) { - } - - public override int GetSymbolIndex(char symbol) { - return symbol; - } - - public override IEnumerable InputSymbols { - get { return Enumerable.Range(char.MinValue, char.MaxValue).Select(x => (char)x); } - } - } -} diff --git a/Implab/Automaton/DFADefinition.cs b/Implab/Automaton/DFADefinition.cs deleted file mode 100644 --- a/Implab/Automaton/DFADefinition.cs +++ /dev/null @@ -1,213 +0,0 @@ -using Implab; -using System; -using System.Collections.Generic; -using System.Linq; - -namespace Implab.Automaton { - public class DFADefinition : IDFADefinitionBuilder, IDFADefinition { - DFAStateDescriptior[] m_dfaTable; - readonly IAlphabet m_inputAlphabet; - readonly IAlphabet m_stateAlphabet; - - readonly Dictionary m_finalStates = new Dictionary(); - readonly HashSet m_transitions = new HashSet(); - - public DFADefinition(IAlphabet inputAlphabet, IAlphabet stateAlphabet) { - Safe.ArgumentNotNull(inputAlphabet, "inputAlphabet"); - Safe.ArgumentNotNull(stateAlphabet, "stateAlphabet"); - - m_inputAlphabet = inputAlphabet; - m_stateAlphabet = stateAlphabet; - } - - public void DefineTransition(int s1, int s2, int symbol) { - Safe.ArgumentInRange(s1, 0, m_stateAlphabet.Count-1, "s1"); - Safe.ArgumentInRange(s2, 0, m_stateAlphabet.Count-1, "s2"); - Safe.ArgumentInRange(symbol, 0, m_inputAlphabet.Count-1, "symbol"); - - m_transitions.Add(new AutomatonTransition(s1, s2, symbol)); - } - - - #region IDFADefinition implementation - - public DFAStateDescriptior[] GetTransitionTable() { - if (m_dfaTable == null) { - m_dfaTable = new DFAStateDescriptior[m_stateAlphabet.Count]; - - foreach (var pair in m_finalStates) { - var idx = pair.Key; - - m_dfaTable[idx].final = true; - m_dfaTable[idx].tag = m_dfaTable[idx].tag != null ? - m_dfaTable[idx].tag.Concat(pair.Value).Distinct().ToArray() : - pair.Value; - } - - foreach (var t in m_transitions) { - if (m_dfaTable[t.s1].transitions == null) { - m_dfaTable[t.s1].transitions = new int[m_inputAlphabet.Count]; - for (int i = 0; i < m_dfaTable[t.s1].transitions.Length; i++) - m_dfaTable[t.s1].transitions[i] = DFAConst.UNREACHABLE_STATE; - } - - m_dfaTable[t.s1].transitions[t.edge] = t.s2; - } - } - return m_dfaTable; - } - - public IAlphabet InputAlphabet { - get { - return m_inputAlphabet; - } - } - - public IAlphabet StateAlphabet { - get { - return m_stateAlphabet; - } - } - - #endregion - - #region IDFADefinitionBuilder - - public void DefineTransition(int s1, int s2, int symbol) { - Safe.ArgumentInRange(s1, 0, m_stateAlphabet.Count - 1, "s1"); - Safe.ArgumentInRange(s2, 0, m_stateAlphabet.Count - 1, "s2"); - Safe.ArgumentInRange(symbol, 0, m_inputAlphabet.Count - 1, "symbol"); - - m_transitions.Add(new AutomatonTransition(s1, s2, symbol)); - } - - public void MarkFinalState(int state, params TTag[] tags) { - m_finalStates[state] = tags; - } - - - #endregion - - protected void Optimize(IDFADefinitionBuilder optimalDFA,IAlphabetBuilder optimalInputAlphabet, IAlphabetBuilder optimalStateAlphabet) { - Safe.ArgumentNotNull(optimalDFA, "dfa"); - Safe.ArgumentNotNull(optimalInputAlphabet, "optimalInputAlphabet"); - Safe.ArgumentNotNull(optimalStateAlphabet, "optimalStateAlphabet"); - - var setComparer = new CustomEqualityComparer>( - (x, y) => x.SetEquals(y), - s => s.Sum(x => x.GetHashCode()) - ); - - var arrayComparer = new CustomEqualityComparer( - (x,y) => (new HashSet(x)).SetEquals(new HashSet(y)), - a => a.Sum(x => x.GetHashCode()) - ); - - var optimalStates = new HashSet>(setComparer); - var queue = new HashSet>(setComparer); - - // получаем конечные состояния, сгруппированные по маркерам - optimalStates.UnionWith( - m_finalStates - .GroupBy(pair => pair.Value, arrayComparer) - .Select( - g => new HashSet( - g.Select( pair => pair.Key) - ) - ) - ); - - var state = new HashSet( - Enumerable - .Range(0, m_stateAlphabet.Count - 1) - .Where(i => !m_finalStates.ContainsKey(i)) - ); - optimalStates.Add(state); - queue.Add(state); - - var rmap = m_transitions - .GroupBy(t => t.s2) - .ToLookup( - g => g.Key, // s2 - g => g.ToLookup(t => t.edge, t => t.s1) - ); - - while (queue.Count > 0) { - var stateA = queue.First(); - queue.Remove(stateA); - - for (int c = 0; c < m_inputAlphabet.Count; c++) { - var stateX = new HashSet(); - foreach(var a in stateA) - stateX.UnionWith(rmap[a][c]); // all states from wich 'c' leads to 'a' - - foreach (var stateY in optimalStates.ToArray()) { - if (stateX.Overlaps(stateY) && !stateY.IsSubsetOf(stateX)) { - var stateR1 = new HashSet(stateY); - var stateR2 = new HashSet(stateY); - - stateR1.IntersectWith(stateX); - stateR2.ExceptWith(stateX); - - optimalStates.Remove(stateY); - optimalStates.Add(stateR1); - optimalStates.Add(stateR2); - - if (queue.Contains(stateY)) { - queue.Remove(stateY); - queue.Add(stateR1); - queue.Add(stateR2); - } else { - queue.Add(stateR1.Count <= stateR2.Count ? stateR1 : stateR2); - } - } - } - } - } - - // карта получения оптимального состояния по соотвествующему ему простому состоянию - var statesMap = m_stateAlphabet.Reclassify(optimalStateAlphabet, optimalStates); - - // получаем минимальный алфавит - // входные символы не различимы, если Move(s,a1) == Move(s,a2) - var optimalAlphabet = m_transitions - .GroupBy(t => Tuple.Create(statesMap[t.s1], statesMap[t.s2]), t => t.edge); - - var alphabetMap = m_inputAlphabet.Reclassify(optimalInputAlphabet, optimalAlphabet); - - var optimalTags = m_finalStates - .GroupBy(pair => statesMap[pair.Key]) - .ToDictionary( - g => g.Key, - g => g.SelectMany(pair => pair.Value).ToArray() - ); - - // построение автомата - foreach (var pair in optimalTags) - optimalDFA.MarkFinalState(pair.Key, pair.Value); - - foreach (var t in m_transitions.Select(t => new AutomatonTransition(statesMap[t.s1],statesMap[t.s2],alphabetMap[t.edge])).Distinct()) - optimalDFA.DefineTransition(t.s1, t.s2, t.edge); - } - - public void PrintDFA() { - - var inputMap = InputAlphabet.CreateReverseMap(); - var stateMap = StateAlphabet.CreateReverseMap(); - - for (int i = 0; i < inputMap.Length; i++) { - Console.WriteLine("C{0}: {1}", i, String.Join(",", inputMap[i])); - } - - foreach(var t in m_transitions) - Console.WriteLine( - "S{0} -{1}-> S{2}{3}", - stateMap[t.s1], - String.Join(",", inputMap[t.edge]), - stateMap[t.s2], - m_finalStates.ContainsKey(t.s2) ? "$" : "" - ); - - } - } -} diff --git a/Implab/Automaton/DFAStateDescriptor.cs b/Implab/Automaton/DFAStateDescriptor.cs --- a/Implab/Automaton/DFAStateDescriptor.cs +++ b/Implab/Automaton/DFAStateDescriptor.cs @@ -1,10 +1,4 @@ -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; - -namespace Implab.Automaton { +namespace Implab.Automaton { public struct DFAStateDescriptior { public bool final; public TTag[] tag; diff --git a/Implab/Automaton/DFATransitionTable.cs b/Implab/Automaton/DFATransitionTable.cs new file mode 100644 --- /dev/null +++ b/Implab/Automaton/DFATransitionTable.cs @@ -0,0 +1,251 @@ +using Implab; +using System; +using System.Collections.Generic; +using System.Linq; + +namespace Implab.Automaton { + public class DFATransitionTable : IDFATransitionTableBuilder { + DFAStateDescriptior[] m_dfaTable; + + int m_stateCount; + int m_symbolCount; + int m_initialState; + + readonly Dictionary m_finalStates = new Dictionary(); + readonly HashSet m_transitions = new HashSet(); + + + #region IDFADefinition implementation + + public DFAStateDescriptior[] GetTransitionTable() { + if (m_dfaTable == null) { + if (m_stateCount <= 0) + throw new InvalidOperationException("Invalid automaton definition: states count = {0}", m_stateCount); + if (m_symbolCount <= 0) + throw new InvalidOperationException("Invalid automaton definition: symbols count = {0}", m_symbolCount); + + m_dfaTable = ConstructTransitionTable(); + } + return m_dfaTable; + } + + public bool IsFinalState(int s) { + Safe.ArgumentInRange(s, 0, m_stateCount, "s"); + + return m_finalStates.ContainsKey(s); + } + + public IEnumerable> FinalStates { + get { + return m_finalStates; + } + } + + public int StateCount { + get { return m_stateCount; } + } + + public int AlphabetSize { + get { return m_symbolCount; } + } + + public int InitialState { + get { return m_initialState; } + } + + #endregion + + protected virtual DFAStateDescriptior[] ConstructTransitionTable() { + var dfaTable = new DFAStateDescriptior[m_stateCount]; + + foreach (var pair in m_finalStates) { + var idx = pair.Key; + + dfaTable[idx].final = true; + dfaTable[idx].tag = pair.Value; + } + + foreach (var t in m_transitions) { + if (dfaTable[t.s1].transitions == null) { + dfaTable[t.s1].transitions = new int[m_symbolCount]; + for (int i = 0; i < dfaTable[t.s1].transitions.Length; i++) + dfaTable[t.s1].transitions[i] = DFAConst.UNREACHABLE_STATE; + } + + dfaTable[t.s1].transitions[t.edge] = t.s2; + } + } + + #region IDFADefinitionBuilder + + public void DefineTransition(int s1, int s2, int symbol) { + if (m_dfaTable != null) + throw new InvalidOperationException("The transition table is already built"); + + Safe.ArgumentAssert(s1 > 0, "s1"); + Safe.ArgumentAssert(s2 > 0, "s2"); + Safe.ArgumentAssert(symbol >= 0, "symbol"); + + m_stateCount = Math.Max(Math.Max(m_stateCount, s1 + 1), s2 + 1); + m_symbolCount = Math.Max(m_symbolCount, symbol + 1); + + m_transitions.Add(new AutomatonTransition(s1, s2, symbol)); + } + + public void MarkFinalState(int state, params TTag[] tags) { + if (m_dfaTable != null) + throw new InvalidOperationException("The transition table is already built"); + + m_finalStates[state] = tags; + } + + public void SetInitialState(int s) { + Safe.ArgumentAssert(s >= 0, "s"); + m_initialState = s; + } + + + #endregion + + protected void Optimize( + IDFATransitionTableBuilder optimalDFA, + IAlphabet inputAlphabet, + IAlphabetBuilder optimalInputAlphabet, + IAlphabet stateAlphabet, + IAlphabetBuilder optimalStateAlphabet + ) { + Safe.ArgumentNotNull(optimalDFA, "dfa"); + Safe.ArgumentNotNull(optimalInputAlphabet, "optimalInputAlphabet"); + Safe.ArgumentNotNull(optimalStateAlphabet, "optimalStateAlphabet"); + Safe.ArgumentNotNull(inputAlphabet, "inputAlphabet"); + Safe.ArgumentNotNull(stateAlphabet, "stateAlphabet"); + + if (inputAlphabet.Count != m_symbolCount) + throw new InvalidOperationException("The input symbols aphabet mismatch"); + if (stateAlphabet.Count != m_stateCount) + throw new InvalidOperationException("The states alphabet mismatch"); + + var setComparer = new CustomEqualityComparer>( + (x, y) => x.SetEquals(y), + s => s.Sum(x => x.GetHashCode()) + ); + + var arrayComparer = new CustomEqualityComparer( + (x,y) => (new HashSet(x)).SetEquals(new HashSet(y)), + a => a.Sum(x => x.GetHashCode()) + ); + + var optimalStates = new HashSet>(setComparer); + var queue = new HashSet>(setComparer); + + // получаем конечные состояния, сгруппированные по маркерам + optimalStates.UnionWith( + m_finalStates + .GroupBy(pair => pair.Value, arrayComparer) + .Select( + g => new HashSet( + g.Select( pair => pair.Key) + ) + ) + ); + + var state = new HashSet( + Enumerable + .Range(0, m_stateCount - 1) + .Where(i => !m_finalStates.ContainsKey(i)) + ); + optimalStates.Add(state); + queue.Add(state); + + var rmap = m_transitions + .GroupBy(t => t.s2) + .ToLookup( + g => g.Key, // s2 + g => g.ToLookup(t => t.edge, t => t.s1) + ); + + while (queue.Count > 0) { + var stateA = queue.First(); + queue.Remove(stateA); + + for (int c = 0; c < m_symbolCount; c++) { + var stateX = new HashSet(); + foreach(var a in stateA) + stateX.UnionWith(rmap[a][c]); // all states from wich 'c' leads to 'a' + + foreach (var stateY in optimalStates.ToArray()) { + if (stateX.Overlaps(stateY) && !stateY.IsSubsetOf(stateX)) { + var stateR1 = new HashSet(stateY); + var stateR2 = new HashSet(stateY); + + stateR1.IntersectWith(stateX); + stateR2.ExceptWith(stateX); + + optimalStates.Remove(stateY); + optimalStates.Add(stateR1); + optimalStates.Add(stateR2); + + if (queue.Contains(stateY)) { + queue.Remove(stateY); + queue.Add(stateR1); + queue.Add(stateR2); + } else { + queue.Add(stateR1.Count <= stateR2.Count ? stateR1 : stateR2); + } + } + } + } + } + + // карта получения оптимального состояния по соотвествующему ему простому состоянию + var statesMap = stateAlphabet.Reclassify(optimalStateAlphabet, optimalStates); + + // получаем минимальный алфавит + // входные символы не различимы, если Move(s,a1) == Move(s,a2) + var optimalAlphabet = m_transitions + .GroupBy(t => Tuple.Create(statesMap[t.s1], statesMap[t.s2]), t => t.edge); + + var alphabetMap = inputAlphabet.Reclassify(optimalInputAlphabet, optimalAlphabet); + + var optimalTags = m_finalStates + .GroupBy(pair => statesMap[pair.Key]) + .ToDictionary( + g => g.Key, + g => g.SelectMany(pair => pair.Value).ToArray() + ); + + // построение автомата + optimalDFA.SetInitialState(statesMap[m_initialState]); + + foreach (var pair in optimalTags) + optimalDFA.MarkFinalState(pair.Key, pair.Value); + + foreach (var t in m_transitions.Select(t => new AutomatonTransition(statesMap[t.s1],statesMap[t.s2],alphabetMap[t.edge])).Distinct()) + optimalDFA.DefineTransition(t.s1, t.s2, t.edge); + + } + + protected void PrintDFA(IAlphabet inputAlphabet, IAlphabet stateAlphabet) { + Safe.ArgumentNotNull(inputAlphabet, "inputAlphabet"); + Safe.ArgumentNotNull(stateAlphabet, "stateAlphabet"); + + var inputMap = inputAlphabet.CreateReverseMap(); + var stateMap = stateAlphabet.CreateReverseMap(); + + for (int i = 0; i < inputMap.Length; i++) + Console.WriteLine("C{0}: {1}", i, String.Join(",", inputMap[i])); + + + foreach(var t in m_transitions) + Console.WriteLine( + "[{0}] -{{{1}}}-> [{2}]{3}", + stateMap[t.s1], + String.Join(",", inputMap[t.edge]), + stateMap[t.s2], + m_finalStates.ContainsKey(t.s2) ? "$" : "" + ); + + } + + } +} diff --git a/Implab/Automaton/DummyAlphabet.cs b/Implab/Automaton/DummyAlphabet.cs --- a/Implab/Automaton/DummyAlphabet.cs +++ b/Implab/Automaton/DummyAlphabet.cs @@ -3,8 +3,16 @@ using System.Collections.Generic; using System.Linq; namespace Implab.Automaton { + /// + /// Dummy alphabet consists of integer numbers which are identical to their classes. + /// public class DummyAlphabet : IAlphabet { readonly int m_size; + + /// + /// Creates a new dummy alphabet with given size. + /// + /// The size of the alphabet, must be greater then zero. public DummyAlphabet(int size) { Safe.ArgumentAssert(size > 0); m_size = 0; @@ -21,6 +29,8 @@ namespace Implab.Automaton { Safe.ArgumentNotNull(classes, "classes"); var map = new int[m_size]; foreach (var cls in classes) { + if (cls.Contains(DFAConst.UNCLASSIFIED_INPUT)) + continue; var newid = newAlphabet.DefineClass(cls); foreach (var id in cls) map[id] = newid; diff --git a/Implab/Automaton/EDFADefinition.cs b/Implab/Automaton/EDFADefinition.cs deleted file mode 100644 --- a/Implab/Automaton/EDFADefinition.cs +++ /dev/null @@ -1,29 +0,0 @@ -using Implab; -using System; - -namespace Implab.Parsing { - public class EDFADefinition : DFADefinition where T : struct, IConvertible { - readonly EnumAlphabet m_alphabet; - - public EnumAlphabet Alphabet { - get { return m_alphabet; } - } - - public EDFADefinition(EnumAlphabet alphabet) : base(alphabet.Count) { - m_alphabet = alphabet; - } - - public void DefineTransition(int s1, int s2, T input) { - DefineTransition(s1, s2, m_alphabet.Translate(input)); - } - - public EDFADefinition Optimize() { - - return (EDFADefinition)Optimize(alphabet => new EDFADefinition((EnumAlphabet)alphabet), m_alphabet, new EnumAlphabet()); - } - - public void PrintDFA() { - PrintDFA(m_alphabet); - } - } -} diff --git a/Implab/Automaton/EnumAlphabet.cs b/Implab/Automaton/EnumAlphabet.cs --- a/Implab/Automaton/EnumAlphabet.cs +++ b/Implab/Automaton/EnumAlphabet.cs @@ -12,46 +12,49 @@ namespace Implab.Automaton { /// Тип перечислений public class EnumAlphabet : IndexedAlphabetBase where T : struct, IConvertible { [SuppressMessage("Microsoft.Design", "CA1000:DoNotDeclareStaticMembersOnGenericTypes")] - static readonly T[] _symbols; - static readonly EnumAlphabet _fullAlphabet; + static readonly Lazy _symbols = new Lazy(GetSymbols); + + [SuppressMessage("Microsoft.Design", "CA1000:DoNotDeclareStaticMembersOnGenericTypes")] + static readonly Lazy> _fullAlphabet = new Lazy>(CreateEnumAlphabet); + + static EnumAlphabet CreateEnumAlphabet() { + var symbols = _symbols.Value; - [SuppressMessage("Microsoft.Design", "CA1065:DoNotRaiseExceptionsInUnexpectedLocations")] - static EnumAlphabet() { + if ( + symbols[symbols.Length - 1].ToInt32(CultureInfo.InvariantCulture) >= symbols.Length + || symbols[0].ToInt32(CultureInfo.InvariantCulture) != 0 + ) + throw new InvalidOperationException("The specified enumeration must be zero-based and continuously numbered"); + + return new EnumAlphabet(symbols.Select(x => x.ToInt32(CultureInfo.InvariantCulture)).ToArray()); + } + + static T[] GetSymbols() { if (!typeof(T).IsEnum) throw new InvalidOperationException("Invalid generic parameter, enumeration is required"); if (Enum.GetUnderlyingType(typeof(T)) != typeof(Int32)) throw new InvalidOperationException("Only enums based on Int32 are supported"); - - _symbols = ((T[])Enum.GetValues(typeof(T))) + + return ((T[])Enum.GetValues(typeof(T))) .OrderBy(x => x.ToInt32(CultureInfo.InvariantCulture)) .ToArray(); - - if ( - _symbols[_symbols.Length - 1].ToInt32(CultureInfo.InvariantCulture) >= _symbols.Length - || _symbols[0].ToInt32(CultureInfo.InvariantCulture) != 0 - ) - throw new InvalidOperationException("The specified enumeration must be zero-based and continuously numbered"); - - _fullAlphabet = new EnumAlphabet(_symbols.Select(x => x.ToInt32(CultureInfo.InvariantCulture)).ToArray()); } - - public static EnumAlphabet FullAlphabet { get { - return _fullAlphabet; + return _fullAlphabet.Value; } } public EnumAlphabet() - : base(_symbols.Length) { + : base(_symbols.Value.Length) { } public EnumAlphabet(int[] map) : base(map) { - Debug.Assert(map.Length == _symbols.Length); + Debug.Assert(map.Length == _symbols.Value.Length); } @@ -60,7 +63,7 @@ namespace Implab.Automaton { } public override IEnumerable InputSymbols { - get { return _symbols; } + get { return _symbols.Value; } } } diff --git a/Implab/Automaton/IAlphabetBuilder.cs b/Implab/Automaton/IAlphabetBuilder.cs --- a/Implab/Automaton/IAlphabetBuilder.cs +++ b/Implab/Automaton/IAlphabetBuilder.cs @@ -17,9 +17,6 @@ namespace Implab.Automaton { /// Множестов исходных символов /// Идентификатор символа алфавита. int DefineClass(IEnumerable symbols); - - - } } diff --git a/Implab/Automaton/IDFADefinition.cs b/Implab/Automaton/IDFADefinition.cs deleted file mode 100644 --- a/Implab/Automaton/IDFADefinition.cs +++ /dev/null @@ -1,56 +0,0 @@ - -namespace Implab.Automaton { - /// - /// Полностью описывает DFA автомат, его поведение, состояние и входные символы. - /// - /// - /// class MyAutomaton { - /// int m_current; - /// readonly DFAStateDescriptor[] m_automaton; - /// readonly IAlphabet m_commands; - /// - /// public MyAutomaton(IDFADefinition<MyCommands,MyStates,string> definition) { - /// m_current = definition.StateAlphabet.Translate(MyStates.Initial); - /// m_automaton = definition.GetTransitionTable(); - /// m_commands = definition.InputAlphabet; - /// } - /// - /// // defined a method which will move the automaton to the next state - /// public void Move(MyCommands cmd) { - /// // use transition map to determine the next state - /// var next = m_automaton[m_current].transitions[m_commands.Translate(cmd)]; - /// - /// // validate that we aren't in the unreachable state - /// if (next == DFAConst.UNREACHABLE_STATE) - /// throw new InvalidOperationException("The specified command is invalid"); - /// - /// // if everything is ok - /// m_current = next; - /// } - /// } - /// - public interface IDFADefinition { - /// - /// Алфавит входных символов - /// - /// The input alphabet. - IAlphabet InputAlphabet { - get; - } - - /// - /// Алфавит состояний автомата - /// - /// The state alphabet. - IAlphabet StateAlphabet { - get; - } - - /// - /// Таблица переходов состояний автомата - /// - /// The transition table. - DFAStateDescriptior[] GetTransitionTable(); - - } -} diff --git a/Implab/Automaton/IDFADefinitionBuilder.cs b/Implab/Automaton/IDFADefinitionBuilder.cs deleted file mode 100644 --- a/Implab/Automaton/IDFADefinitionBuilder.cs +++ /dev/null @@ -1,23 +0,0 @@ -using System; - -namespace Implab.Automaton { - public interface IDFADefinitionBuilder { - /// - /// Marks the state as final and assings tags. - /// - /// State. - /// Tags. - void MarkFinalState(int state, params TTag[] tags); - - /// - /// Defines the transition from to - /// with input . - /// - /// S1. - /// S2. - /// Symbol. - void DefineTransition(int s1, int s2, int symbol); - - } -} - diff --git a/Implab/Automaton/IDFATransitionTable.cs b/Implab/Automaton/IDFATransitionTable.cs new file mode 100644 --- /dev/null +++ b/Implab/Automaton/IDFATransitionTable.cs @@ -0,0 +1,59 @@ +using System.Collections.Generic; + + +namespace Implab.Automaton { + /// + /// Полностью описывает DFA автомат, его поведение, состояние и входные символы. + /// + /// + /// class MyAutomaton { + /// int m_current; + /// readonly DFAStateDescriptor[] m_automaton; + /// readonly IAlphabet m_commands; + /// + /// public MyAutomaton(IDFADefinition<MyCommands,MyStates,string> definition) { + /// m_current = definition.StateAlphabet.Translate(MyStates.Initial); + /// m_automaton = definition.GetTransitionTable(); + /// m_commands = definition.InputAlphabet; + /// } + /// + /// // defined a method which will move the automaton to the next state + /// public void Move(MyCommands cmd) { + /// // use transition map to determine the next state + /// var next = m_automaton[m_current].transitions[m_commands.Translate(cmd)]; + /// + /// // validate that we aren't in the unreachable state + /// if (next == DFAConst.UNREACHABLE_STATE) + /// throw new InvalidOperationException("The specified command is invalid"); + /// + /// // if everything is ok + /// m_current = next; + /// } + /// } + /// + public interface IDFATransitionTable { + /// + /// Таблица переходов состояний автомата + /// + /// The transition table. + DFAStateDescriptior[] GetTransitionTable(); + + int StateCount { + get; + } + + int AlphabetSize { + get; + } + + int InitialState { + get; + } + + bool IsFinalState(int s); + + IEnumerable> FinalStates { + get; + } + } +} diff --git a/Implab/Automaton/IDFATransitionTableBuilder.cs b/Implab/Automaton/IDFATransitionTableBuilder.cs new file mode 100644 --- /dev/null +++ b/Implab/Automaton/IDFATransitionTableBuilder.cs @@ -0,0 +1,25 @@ +using System; + +namespace Implab.Automaton { + public interface IDFATransitionTableBuilder : IDFATransitionTable { + /// + /// Marks the state as final and assings tags. + /// + /// State. + /// Tags. + void MarkFinalState(int state, params TTag[] tags); + + /// + /// Defines the transition from to + /// with input . + /// + /// S1. + /// S2. + /// Symbol. + void DefineTransition(int s1, int s2, int symbol); + + void SetInitialState(int s); + + } +} + diff --git a/Implab/Automaton/IndexedAlphabetBase.cs b/Implab/Automaton/IndexedAlphabetBase.cs --- a/Implab/Automaton/IndexedAlphabetBase.cs +++ b/Implab/Automaton/IndexedAlphabetBase.cs @@ -9,8 +9,6 @@ namespace Implab.Automaton { /// Indexed alphabet is the finite set of symbols where each symbol has a zero-based unique index. /// public abstract class IndexedAlphabetBase : IAlphabetBuilder { - public const int UNCLASSIFIED = 0; - int m_nextId = 1; readonly int[] m_map; @@ -31,7 +29,7 @@ namespace Implab.Automaton { public int DefineSymbol(T symbol) { var index = GetSymbolIndex(symbol); - if (m_map[index] == UNCLASSIFIED) + if (m_map[index] == DFAConst.UNCLASSIFIED_INPUT) m_map[index] = m_nextId++; return m_map[index]; } @@ -42,7 +40,7 @@ namespace Implab.Automaton { foreach (var symbol in symbols) { var index = GetSymbolIndex(symbol); - if (m_map[index] == UNCLASSIFIED) + if (m_map[index] == DFAConst.UNCLASSIFIED_INPUT) m_map[GetSymbolIndex(symbol)] = m_nextId; else throw new InvalidOperationException(String.Format("Symbol '{0}' already in use", symbol)); @@ -52,10 +50,10 @@ namespace Implab.Automaton { public List[] CreateReverseMap() { return - Enumerable.Range(UNCLASSIFIED, Count) + Enumerable.Range(0, Count) .Select( i => InputSymbols - .Where(x => i != UNCLASSIFIED && m_map[GetSymbolIndex(x)] == i) + .Where(x => i != DFAConst.UNCLASSIFIED_INPUT && m_map[GetSymbolIndex(x)] == i) .ToList() ) .ToArray(); @@ -70,7 +68,7 @@ namespace Implab.Automaton { foreach (var scl in classes) { // skip if the supper class contains the unclassified element - if (scl.Contains(UNCLASSIFIED)) + if (scl.Contains(DFAConst.UNCLASSIFIED_INPUT)) continue; var range = new List(); foreach (var cl in scl) { diff --git a/Implab/Automaton/MapAlphabet.cs b/Implab/Automaton/MapAlphabet.cs --- a/Implab/Automaton/MapAlphabet.cs +++ b/Implab/Automaton/MapAlphabet.cs @@ -5,11 +5,14 @@ using System.Linq; namespace Implab.Automaton { public class MapAlphabet : IAlphabetBuilder { readonly Dictionary m_map; - int m_nextCls; + int m_nextCls = 1; + + public MapAlphabet() { + m_map = new Dictionary(); + } public MapAlphabet(IEqualityComparer comparer) { m_map = new Dictionary(comparer); - m_nextCls = 1; } #region IAlphabetBuilder implementation @@ -71,6 +74,9 @@ namespace Implab.Automaton { var map = new int[rmap.Length]; foreach (var cls in classes) { + if (cls.Contains(DFAConst.UNCLASSIFIED_INPUT)) + continue; + var symbols = new List(); foreach (var id in cls) { if (id < 0 || id >= rmap.Length) diff --git a/Implab/Automaton/RegularExpressions/Grammar.cs b/Implab/Automaton/RegularExpressions/Grammar.cs --- a/Implab/Automaton/RegularExpressions/Grammar.cs +++ b/Implab/Automaton/RegularExpressions/Grammar.cs @@ -71,14 +71,16 @@ namespace Implab.Automaton.RegularExpres protected CDFADefinition BuildDFA(Token lang) { Safe.ArgumentNotNull(lang, "lang"); - var dfa = new CDFADefinition(Alphabet); - + var dfa = new RegularDFADefinition(Alphabet, 0); + + var table = new DFATransitionTable(); + var builder = new RegularDFABuilder(); lang.Accept( builder ); - builder.BuildDFA(dfa); - if (dfa.InitialStateIsFinal) + var initialState = builder.BuildDFA(table); + if (table.IsFinalState(initialState)) throw new ApplicationException("The specified language contains empty token"); return dfa.Optimize(); diff --git a/Implab/Automaton/RegularExpressions/RegularDFABuilder.cs b/Implab/Automaton/RegularExpressions/RegularDFABuilder.cs --- a/Implab/Automaton/RegularExpressions/RegularDFABuilder.cs +++ b/Implab/Automaton/RegularExpressions/RegularDFABuilder.cs @@ -122,7 +122,7 @@ namespace Implab.Automaton.RegularExpres m_ends.Add(m_idx, token.Tag); } - public void BuildDFA(IDFADefinitionBuilder dfa) { + public void BuildDFA(IDFATransitionTableBuilder dfa) { Safe.ArgumentNotNull(dfa,"dfa"); var states = new MapAlphabet>(new CustomEqualityComparer>( @@ -131,7 +131,8 @@ namespace Implab.Automaton.RegularExpres )); var initialState = states.DefineSymbol(m_firstpos); - + dfa.SetInitialState(initialState); + var tags = GetStateTags(m_firstpos); if (tags != null && tags.Length > 0) dfa.MarkFinalState(initialState, tags); diff --git a/Implab/Automaton/RegularExpressions/RegularDFADefinition.cs b/Implab/Automaton/RegularExpressions/RegularDFADefinition.cs new file mode 100644 --- /dev/null +++ b/Implab/Automaton/RegularExpressions/RegularDFADefinition.cs @@ -0,0 +1,46 @@ +using System; + +namespace Implab.Automaton.RegularExpressions { + public class RegularDFADefinition : DFATransitionTable, IDFATransitionTable { + + readonly IAlphabet m_alphabet; + readonly int m_initialState; + + public RegularDFADefinition(IAlphabet alphabet, int initialState) { + Safe.ArgumentNotNull(alphabet, "aplhabet"); + + m_alphabet = alphabet; + m_initialState = initialState; + } + + + public IAlphabet InputAlphabet { + get { + return m_alphabet; + } + } + + protected override DFAStateDescriptior[] ConstructTransitionTable() { + if (InputAlphabet.Count != m_alphabet.Count) + throw new InvalidOperationException("The alphabet doesn't match the transition table"); + + return base.ConstructTransitionTable(); + } + + /// + /// Optimize the specified alphabet. + /// + /// Пустой алфавит, который будет зполнен в процессе оптимизации. + public RegularDFADefinition Optimize(IAlphabetBuilder alphabet) { + Safe.ArgumentNotNull(alphabet, "alphabet"); + + var optimalDFA = new RegularDFADefinition(alphabet, m_initialState); + + Optimize(optimalDFA, InputAlphabet, alphabet, new DummyAlphabet(StateCount), new MapAlphabet()); + + } + + + } +} + diff --git a/Implab/Automaton/Scanner.cs b/Implab/Automaton/Scanner.cs --- a/Implab/Automaton/Scanner.cs +++ b/Implab/Automaton/Scanner.cs @@ -17,12 +17,14 @@ namespace Implab.Automaton { struct ScannerConfig { public DFAStateDescriptior[] states; public int[] alphabetMap; + public int initialState; } Stack m_defs = new Stack(); DFAStateDescriptior[] m_states; int[] m_alphabetMap; + int m_initialState; protected DFAStateDescriptior m_currentState; int m_previewCode; @@ -39,12 +41,13 @@ namespace Implab.Automaton { int m_chunkSize = 1024; // 1k int m_limit = 10 * 1024 * 1024; // 10Mb - protected Scanner(DFAStateDescriptior[] states, int[] alphabet) { + protected Scanner(DFAStateDescriptior[] states, int[] alphabet, int initialState) { Safe.ArgumentNotEmpty(states, "states"); Safe.ArgumentNotNull(alphabet, "alphabet"); m_states = states; m_alphabetMap = alphabet; + m_initialState = initialState; Feed(new char[0]); } @@ -130,7 +133,7 @@ namespace Implab.Automaton { if (m_pointer >= m_bufferSize) return false; - m_currentState = m_states[DFADefinition.INITIAL_STATE]; + m_currentState = m_states[m_initialState]; m_tokenLen = 0; m_tokenOffset = m_pointer; int nextState; @@ -217,16 +220,18 @@ namespace Implab.Automaton { /// /// Таблица состояний нового ДКА /// Таблица входных символов для нового ДКА - protected void Switch(DFAStateDescriptior[] states, int[] alphabet) { + protected void Switch(DFAStateDescriptior[] states, int[] alphabet, int initialState) { Safe.ArgumentNotNull(states, "dfa"); m_defs.Push(new ScannerConfig { states = m_states, - alphabetMap = m_alphabetMap + alphabetMap = m_alphabetMap, + initialState = m_initialState }); m_states = states; m_alphabetMap = alphabet; + m_initialState = initialState; m_previewCode = m_alphabetMap[m_buffer[m_pointer]]; } @@ -240,6 +245,7 @@ namespace Implab.Automaton { var prev = m_defs.Pop(); m_states = prev.states; m_alphabetMap = prev.alphabetMap; + m_initialState = prev.initialState; m_previewCode = m_alphabetMap[m_buffer[m_pointer]]; } diff --git a/Implab/Formats/ByteAlphabet.cs b/Implab/Formats/ByteAlphabet.cs new file mode 100644 --- /dev/null +++ b/Implab/Formats/ByteAlphabet.cs @@ -0,0 +1,25 @@ +using System; +using System.Collections.Generic; +using System.Linq; + +namespace Implab.Automaton { + public class ByteAlphabet : IndexedAlphabetBase { + public ByteAlphabet() : base(byte.MaxValue + 1){ + } + + #region implemented abstract members of IndexedAlphabetBase + + public override int GetSymbolIndex(byte symbol) { + return (int)symbol; + } + + public IEnumerable InputSymbols { + get { + return Enumerable.Range(byte.MinValue, byte.MaxValue).Cast(); + } + } + + #endregion + } +} + diff --git a/Implab/Formats/CharAlphabet.cs b/Implab/Formats/CharAlphabet.cs new file mode 100644 --- /dev/null +++ b/Implab/Formats/CharAlphabet.cs @@ -0,0 +1,23 @@ +using Implab; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Implab.Automaton { + public class CharAlphabet: IndexedAlphabetBase { + + public CharAlphabet() + : base(char.MaxValue + 1) { + } + + public override int GetSymbolIndex(char symbol) { + return symbol; + } + + public override IEnumerable InputSymbols { + get { return Enumerable.Range(char.MinValue, char.MaxValue).Cast(); } + } + } +} diff --git a/Implab/Implab.csproj b/Implab/Implab.csproj --- a/Implab/Implab.csproj +++ b/Implab/Implab.csproj @@ -8,6 +8,9 @@ Implab Implab v4.5 + 0.2 + 8.0.30703 + 2.0 true @@ -148,20 +151,14 @@ - - - - - - @@ -174,7 +171,6 @@ - @@ -188,6 +184,12 @@ + + + + + +