| @@ -0,0 +1,59 | |||||
|
|
1 | using System.Collections.Generic; | |||
|
|
2 | ||||
|
|
3 | ||||
|
|
4 | namespace Implab.Automaton { | |||
|
|
5 | /// <summary> | |||
|
|
6 | /// Полностью описывает DFA автомат, его поведение, состояние и входные символы. | |||
|
|
7 | /// </summary> | |||
|
|
8 | /// <example> | |||
|
|
9 | /// class MyAutomaton { | |||
|
|
10 | /// int m_current; | |||
|
|
11 | /// readonly DFAStateDescriptor<string>[] m_automaton; | |||
|
|
12 | /// readonly IAlphabet<MyCommands> m_commands; | |||
|
|
13 | /// | |||
|
|
14 | /// public MyAutomaton(IDFADefinition<MyCommands,MyStates,string> definition) { | |||
|
|
15 | /// m_current = definition.StateAlphabet.Translate(MyStates.Initial); | |||
|
|
16 | /// m_automaton = definition.GetTransitionTable(); | |||
|
|
17 | /// m_commands = definition.InputAlphabet; | |||
|
|
18 | /// } | |||
|
|
19 | /// | |||
|
|
20 | /// // defined a method which will move the automaton to the next state | |||
|
|
21 | /// public void Move(MyCommands cmd) { | |||
|
|
22 | /// // use transition map to determine the next state | |||
|
|
23 | /// var next = m_automaton[m_current].transitions[m_commands.Translate(cmd)]; | |||
|
|
24 | /// | |||
|
|
25 | /// // validate that we aren't in the unreachable state | |||
|
|
26 | /// if (next == DFAConst.UNREACHABLE_STATE) | |||
|
|
27 | /// throw new InvalidOperationException("The specified command is invalid"); | |||
|
|
28 | /// | |||
|
|
29 | /// // if everything is ok | |||
|
|
30 | /// m_current = next; | |||
|
|
31 | /// } | |||
|
|
32 | /// } | |||
|
|
33 | /// </example> | |||
|
|
34 | public interface IDFATable { | |||
|
|
35 | /// <summary> | |||
|
|
36 | /// Таблица переходов состояний автомата | |||
|
|
37 | /// </summary> | |||
|
|
38 | /// <returns>The transition table.</returns> | |||
|
|
39 | DFAStateDescriptior[] GetTransitionTable(); | |||
|
|
40 | ||||
|
|
41 | int StateCount { | |||
|
|
42 | get; | |||
|
|
43 | } | |||
|
|
44 | ||||
|
|
45 | int AlphabetSize { | |||
|
|
46 | get; | |||
|
|
47 | } | |||
|
|
48 | ||||
|
|
49 | int InitialState { | |||
|
|
50 | get; | |||
|
|
51 | } | |||
|
|
52 | ||||
|
|
53 | bool IsFinalState(int s); | |||
|
|
54 | ||||
|
|
55 | IEnumerable<int> FinalStates { | |||
|
|
56 | get; | |||
|
|
57 | } | |||
|
|
58 | } | |||
|
|
59 | } | |||
| @@ -0,0 +1,24 | |||||
|
|
1 | using System; | |||
|
|
2 | ||||
|
|
3 | namespace Implab.Automaton { | |||
|
|
4 | public interface IDFATableBuilder : IDFATable { | |||
|
|
5 | /// <summary> | |||
|
|
6 | /// Marks the state as final. | |||
|
|
7 | /// </summary> | |||
|
|
8 | /// <param name="state">State.</param> | |||
|
|
9 | void MarkFinalState(int state); | |||
|
|
10 | ||||
|
|
11 | /// <summary> | |||
|
|
12 | /// Defines the transition from <paramref name="s1"/> to | |||
|
|
13 | /// <paramref name="s2"/> with input <paramref name="symbol"/>. | |||
|
|
14 | /// </summary> | |||
|
|
15 | /// <param name="s1">S1.</param> | |||
|
|
16 | /// <param name="s2">S2.</param> | |||
|
|
17 | /// <param name="symbol">Symbol.</param> | |||
|
|
18 | void DefineTransition(int s1, int s2, int symbol); | |||
|
|
19 | ||||
|
|
20 | void SetInitialState(int s); | |||
|
|
21 | ||||
|
|
22 | } | |||
|
|
23 | } | |||
|
|
24 | ||||
| @@ -0,0 +1,17 | |||||
|
|
1 | using System; | |||
|
|
2 | using Implab.Automaton.RegularExpressions; | |||
|
|
3 | ||||
|
|
4 | namespace Implab.Formats { | |||
|
|
5 | public class RegularCharDFADefinition<TTag> : RegularDFADefinition<char,TTag> { | |||
|
|
6 | readonly CharAlphabet m_alphabet; | |||
|
|
7 | ||||
|
|
8 | public RegularCharDFADefinition(CharAlphabet alphabet) : base(alphabet) { | |||
|
|
9 | m_alphabet = alphabet; | |||
|
|
10 | } | |||
|
|
11 | ||||
|
|
12 | public new CharAlphabet InputAlphabet { | |||
|
|
13 | get { return m_alphabet; } | |||
|
|
14 | } | |||
|
|
15 | } | |||
|
|
16 | } | |||
|
|
17 | ||||
| @@ -1,7 +1,15 | |||||
| 1 | namespace Implab.Automaton { |
|
1 | namespace Implab.Automaton { | |
| 2 |
public struct DFAStateDescriptior |
|
2 | public struct DFAStateDescriptior { | |
| 3 | public bool final; |
|
3 | public readonly bool final; | |
| 4 |
public |
|
4 | public readonly int[] transitions; | |
| 5 | public int[] transitions; |
|
5 | ||
|
|
6 | ||||
|
|
7 | public DFAStateDescriptior(int[] transitions, bool final) { | |||
|
|
8 | this.transitions = transitions; | |||
|
|
9 | this.final = final; | |||
|
|
10 | } | |||
|
|
11 | ||||
|
|
12 | public DFAStateDescriptior(int[] transitions) : this(transitions, false) { | |||
|
|
13 | } | |||
| 6 | } |
|
14 | } | |
| 7 | } |
|
15 | } | |
| @@ -4,8 +4,8 using System.Collections.Generic; | |||||
| 4 | using System.Linq; |
|
4 | using System.Linq; | |
| 5 |
|
5 | |||
| 6 | namespace Implab.Automaton { |
|
6 | namespace Implab.Automaton { | |
| 7 |
public class DFATransitionTable<TTag> : IDFAT |
|
7 | public class DFATransitionTable<TTag> : IDFATableBuilder { | |
| 8 |
DFAStateDescriptior |
|
8 | DFAStateDescriptior[] m_dfaTable; | |
| 9 |
|
9 | |||
| 10 | int m_stateCount; |
|
10 | int m_stateCount; | |
| 11 | int m_symbolCount; |
|
11 | int m_symbolCount; | |
| @@ -17,7 +17,7 namespace Implab.Automaton { | |||||
| 17 |
|
17 | |||
| 18 | #region IDFADefinition implementation |
|
18 | #region IDFADefinition implementation | |
| 19 |
|
19 | |||
| 20 |
public DFAStateDescriptior |
|
20 | public DFAStateDescriptior[] GetTransitionTable() { | |
| 21 | if (m_dfaTable == null) { |
|
21 | if (m_dfaTable == null) { | |
| 22 | if (m_stateCount <= 0) |
|
22 | if (m_stateCount <= 0) | |
| 23 | throw new InvalidOperationException("Invalid automaton definition: states count = {0}", m_stateCount); |
|
23 | throw new InvalidOperationException("Invalid automaton definition: states count = {0}", m_stateCount); | |
| @@ -108,7 +108,7 namespace Implab.Automaton { | |||||
| 108 | #endregion |
|
108 | #endregion | |
| 109 |
|
109 | |||
| 110 | protected void Optimize<TInput, TState>( |
|
110 | protected void Optimize<TInput, TState>( | |
| 111 |
IDFAT |
|
111 | IDFATableBuilder<TTag> optimalDFA, | |
| 112 | IAlphabet<TInput> inputAlphabet, |
|
112 | IAlphabet<TInput> inputAlphabet, | |
| 113 | IAlphabetBuilder<TInput> optimalInputAlphabet, |
|
113 | IAlphabetBuilder<TInput> optimalInputAlphabet, | |
| 114 | IAlphabet<TState> stateAlphabet, |
|
114 | IAlphabet<TState> stateAlphabet, | |
| @@ -2,48 +2,46 | |||||
| 2 | using System; |
|
2 | using System; | |
| 3 | using System.Collections.Generic; |
|
3 | using System.Collections.Generic; | |
| 4 | using System.Linq; |
|
4 | using System.Linq; | |
| 5 | using System.Text; |
|
|||
| 6 | using System.Threading.Tasks; |
|
|||
| 7 |
|
5 | |||
| 8 | namespace Implab.Automaton.RegularExpressions { |
|
6 | namespace Implab.Automaton.RegularExpressions { | |
| 9 | /// <summary> |
|
7 | /// <summary> | |
| 10 | /// Базовый абстрактный класс. Грамматика, позволяет формулировать выражения над алфавитом типа <c>char</c>. |
|
8 | /// Базовый абстрактный класс. Грамматика, позволяет формулировать выражения над алфавитом типа <c>char</c>. | |
| 11 | /// </summary> |
|
9 | /// </summary> | |
| 12 |
|
|
10 | public abstract class Grammar<TSymbol, TTag> { | |
| 13 |
|
11 | |||
| 14 |
p |
|
12 | protected abstract IAlphabetBuilder<TSymbol> AlphabetBuilder { | |
| 15 | get; |
|
13 | get; | |
| 16 | } |
|
14 | } | |
| 17 |
|
15 | |||
| 18 |
p |
|
16 | protected SymbolToken<TTag> UnclassifiedToken() { | |
| 19 | return new SymbolToken<TTag>(DFAConst.UNCLASSIFIED_INPUT); |
|
17 | return new SymbolToken<TTag>(DFAConst.UNCLASSIFIED_INPUT); | |
| 20 | } |
|
18 | } | |
| 21 |
|
19 | |||
| 22 |
p |
|
20 | protected void DefineAlphabet(IEnumerable<TSymbol> alphabet) { | |
| 23 | Safe.ArgumentNotNull(alphabet, "alphabet"); |
|
21 | Safe.ArgumentNotNull(alphabet, "alphabet"); | |
| 24 |
|
22 | |||
| 25 | foreach (var ch in alphabet) |
|
23 | foreach (var ch in alphabet) | |
| 26 | Alphabet.DefineSymbol(ch); |
|
24 | AlphabetBuilder.DefineSymbol(ch); | |
| 27 | } |
|
25 | } | |
| 28 |
|
26 | |||
| 29 |
p |
|
27 | protected Token<TTag> SymbolToken(TSymbol symbol) { | |
| 30 | return Token<TTag>.New(TranslateOrAdd(symbol)); |
|
28 | return Token<TTag>.New(TranslateOrAdd(symbol)); | |
| 31 | } |
|
29 | } | |
| 32 |
|
30 | |||
| 33 |
p |
|
31 | protected Token<TTag> SymbolToken(IEnumerable<TSymbol> symbols) { | |
| 34 | Safe.ArgumentNotNull(symbols, "symbols"); |
|
32 | Safe.ArgumentNotNull(symbols, "symbols"); | |
| 35 |
|
33 | |||
| 36 | return Token<TTag>.New(TranslateOrAdd(symbols).ToArray()); |
|
34 | return Token<TTag>.New(TranslateOrAdd(symbols).ToArray()); | |
| 37 | } |
|
35 | } | |
| 38 |
|
36 | |||
| 39 |
p |
|
37 | protected Token<TTag> SymbolSetToken(params TSymbol[] set) { | |
| 40 | return SymbolToken(set); |
|
38 | return SymbolToken(set); | |
| 41 | } |
|
39 | } | |
| 42 |
|
40 | |||
| 43 | int TranslateOrAdd(TSymbol ch) { |
|
41 | int TranslateOrAdd(TSymbol ch) { | |
| 44 | var t = Alphabet.Translate(ch); |
|
42 | var t = AlphabetBuilder.Translate(ch); | |
| 45 | if (t == DFAConst.UNCLASSIFIED_INPUT) |
|
43 | if (t == DFAConst.UNCLASSIFIED_INPUT) | |
| 46 | t = Alphabet.DefineSymbol(ch); |
|
44 | t = AlphabetBuilder.DefineSymbol(ch); | |
| 47 | return t; |
|
45 | return t; | |
| 48 | } |
|
46 | } | |
| 49 |
|
47 | |||
| @@ -52,7 +50,7 namespace Implab.Automaton.RegularExpres | |||||
| 52 | } |
|
50 | } | |
| 53 |
|
51 | |||
| 54 | int TranslateOrDie(TSymbol ch) { |
|
52 | int TranslateOrDie(TSymbol ch) { | |
| 55 | var t = Alphabet.Translate(ch); |
|
53 | var t = AlphabetBuilder.Translate(ch); | |
| 56 | if (t == DFAConst.UNCLASSIFIED_INPUT) |
|
54 | if (t == DFAConst.UNCLASSIFIED_INPUT) | |
| 57 | throw new ApplicationException(String.Format("Symbol '{0}' is UNCLASSIFIED", ch)); |
|
55 | throw new ApplicationException(String.Format("Symbol '{0}' is UNCLASSIFIED", ch)); | |
| 58 | return t; |
|
56 | return t; | |
| @@ -62,33 +60,31 namespace Implab.Automaton.RegularExpres | |||||
| 62 | return symbols.Distinct().Select(TranslateOrDie); |
|
60 | return symbols.Distinct().Select(TranslateOrDie); | |
| 63 | } |
|
61 | } | |
| 64 |
|
62 | |||
| 65 |
p |
|
63 | protected Token<TTag> SymbolTokenExcept(IEnumerable<TSymbol> symbols) { | |
| 66 | Safe.ArgumentNotNull(symbols, "symbols"); |
|
64 | Safe.ArgumentNotNull(symbols, "symbols"); | |
| 67 |
|
65 | |||
| 68 | return Token<TTag>.New( Enumerable.Range(0, Alphabet.Count).Except(TranslateOrDie(symbols)).ToArray() ); |
|
66 | return Token<TTag>.New( Enumerable.Range(0, AlphabetBuilder.Count).Except(TranslateOrDie(symbols)).ToArray() ); | |
| 69 | } |
|
67 | } | |
| 70 |
|
68 | |||
| 71 | protected CDFADefinition BuildDFA(Token<TTag> lang) { |
|
69 | protected void BuildDFA(Token<TTag> lang, IDFATableBuilder<TTag> dfaTable, IAlphabetBuilder<TSymbol> dfaAlphabet) { | |
| 72 | Safe.ArgumentNotNull(lang, "lang"); |
|
70 | Safe.ArgumentNotNull(lang, "lang"); | |
|
|
71 | Safe.ArgumentNotNull(dfaAlphabet, "dfaAlphabet"); | |||
| 73 |
|
72 | |||
| 74 |
var dfa = new RegularDFADefinition<TSymbol, TTag>(Alphabet |
|
73 | var dfa = new RegularDFADefinition<TSymbol, TTag>(AlphabetBuilder); | |
| 75 |
|
||||
| 76 | var table = new DFATransitionTable<TTag>(); |
|
|||
| 77 |
|
74 | |||
| 78 | var builder = new RegularDFABuilder<TTag>(); |
|
75 | var builder = new RegularDFABuilder<TTag>(); | |
| 79 |
|
76 | |||
| 80 | lang.Accept( builder ); |
|
77 | lang.Accept( builder ); | |
| 81 |
|
78 | |||
| 82 |
|
|
79 | builder.BuildDFA(dfa); | |
| 83 | if (table.IsFinalState(initialState)) |
|
80 | ||
|
|
81 | if (dfa.IsFinalState(dfa.InitialState)) | |||
| 84 | throw new ApplicationException("The specified language contains empty token"); |
|
82 | throw new ApplicationException("The specified language contains empty token"); | |
| 85 |
|
83 | |||
| 86 |
|
|
84 | dfa.Optimize(dfaTable, dfaAlphabet); | |
|
|
85 | ||||
| 87 | } |
|
86 | } | |
| 88 |
|
87 | |||
| 89 |
|
||||
| 90 |
|
||||
| 91 | //protected abstract TGrammar CreateInstance(); |
|
|||
| 92 | } |
|
88 | } | |
| 93 |
|
89 | |||
| 94 |
|
90 | |||
| @@ -11,7 +11,7 namespace Implab.Automaton.RegularExpres | |||||
| 11 | /// <see cref="BuildDFA(IDFADefinition)"/> для построения автомата. |
|
11 | /// <see cref="BuildDFA(IDFADefinition)"/> для построения автомата. | |
| 12 | /// </summary> |
|
12 | /// </summary> | |
| 13 | public class RegularDFABuilder<TTag> : IVisitor<TTag> { |
|
13 | public class RegularDFABuilder<TTag> : IVisitor<TTag> { | |
| 14 |
int m_idx |
|
14 | int m_idx; | |
| 15 | Token<TTag> m_root; |
|
15 | Token<TTag> m_root; | |
| 16 | HashSet<int> m_firstpos; |
|
16 | HashSet<int> m_firstpos; | |
| 17 | HashSet<int> m_lastpos; |
|
17 | HashSet<int> m_lastpos; | |
| @@ -26,18 +26,18 namespace Implab.Automaton.RegularExpres | |||||
| 26 |
|
26 | |||
| 27 | public HashSet<int> Followpos(int pos) { |
|
27 | public HashSet<int> Followpos(int pos) { | |
| 28 | HashSet<int> set; |
|
28 | HashSet<int> set; | |
| 29 |
|
|
29 | return m_followpos.TryGetValue(pos, out set) ? set : m_followpos[pos] = new HashSet<int>(); | |
| 30 | return set; |
|
|||
| 31 | return m_followpos[pos] = new HashSet<int>(); |
|
|||
| 32 | } |
|
30 | } | |
| 33 |
|
31 | |||
| 34 | bool Nullable(object n) { |
|
32 | bool Nullable(object n) { | |
| 35 | if (n is EmptyToken<TTag> || n is StarToken<TTag>) |
|
33 | if (n is EmptyToken<TTag> || n is StarToken<TTag>) | |
| 36 | return true; |
|
34 | return true; | |
| 37 |
|
|
35 | var altToken = n as AltToken<TTag>; | |
| 38 | return Nullable(((AltToken<TTag>)n).Left) || Nullable(((AltToken<TTag>)n).Right); |
|
36 | if (altToken != null) | |
| 39 | if (n is CatToken<TTag>) |
|
37 | return Nullable(altToken.Left) || Nullable(altToken.Right); | |
| 40 | return Nullable(((CatToken<TTag>)n).Left) && Nullable(((CatToken<TTag>)n).Right); |
|
38 | var catToken = n as CatToken<TTag>; | |
|
|
39 | if (catToken != null) | |||
|
|
40 | return Nullable(catToken.Left) && Nullable(catToken.Right); | |||
| 41 | return false; |
|
41 | return false; | |
| 42 | } |
|
42 | } | |
| 43 |
|
43 | |||
| @@ -122,7 +122,7 namespace Implab.Automaton.RegularExpres | |||||
| 122 | m_ends.Add(m_idx, token.Tag); |
|
122 | m_ends.Add(m_idx, token.Tag); | |
| 123 | } |
|
123 | } | |
| 124 |
|
124 | |||
| 125 |
public void BuildDFA(IDFAT |
|
125 | public void BuildDFA(IDFATableBuilder<TTag> dfa) { | |
| 126 | Safe.ArgumentNotNull(dfa,"dfa"); |
|
126 | Safe.ArgumentNotNull(dfa,"dfa"); | |
| 127 |
|
127 | |||
| 128 | var states = new MapAlphabet<HashSet<int>>(new CustomEqualityComparer<HashSet<int>>( |
|
128 | var states = new MapAlphabet<HashSet<int>>(new CustomEqualityComparer<HashSet<int>>( | |
| @@ -4,13 +4,11 namespace Implab.Automaton.RegularExpres | |||||
| 4 | public class RegularDFADefinition<TInput, TTag> : DFATransitionTable<TTag>, IDFATransitionTable<TTag> { |
|
4 | public class RegularDFADefinition<TInput, TTag> : DFATransitionTable<TTag>, IDFATransitionTable<TTag> { | |
| 5 |
|
5 | |||
| 6 | readonly IAlphabet<TInput> m_alphabet; |
|
6 | readonly IAlphabet<TInput> m_alphabet; | |
| 7 | readonly int m_initialState; |
|
|||
| 8 |
|
7 | |||
| 9 |
public RegularDFADefinition(IAlphabet<TInput> alphabet |
|
8 | public RegularDFADefinition(IAlphabet<TInput> alphabet) { | |
| 10 | Safe.ArgumentNotNull(alphabet, "aplhabet"); |
|
9 | Safe.ArgumentNotNull(alphabet, "aplhabet"); | |
| 11 |
|
10 | |||
| 12 | m_alphabet = alphabet; |
|
11 | m_alphabet = alphabet; | |
| 13 | m_initialState = initialState; |
|
|||
| 14 | } |
|
12 | } | |
| 15 |
|
13 | |||
| 16 |
|
14 | |||
| @@ -30,14 +28,13 namespace Implab.Automaton.RegularExpres | |||||
| 30 | /// <summary> |
|
28 | /// <summary> | |
| 31 | /// Optimize the specified alphabet. |
|
29 | /// Optimize the specified alphabet. | |
| 32 | /// </summary> |
|
30 | /// </summary> | |
|
|
31 | /// <param name = "dfaTable"></param> | |||
| 33 | /// <param name="alphabet">Пустой алфавит, который будет зполнен в процессе оптимизации.</param> |
|
32 | /// <param name="alphabet">Пустой алфавит, который будет зполнен в процессе оптимизации.</param> | |
| 34 |
public |
|
33 | public void Optimize(IDFATableBuilder<TTag> dfaTable, IAlphabetBuilder<TInput> alphabet) { | |
| 35 | Safe.ArgumentNotNull(alphabet, "alphabet"); |
|
34 | Safe.ArgumentNotNull(alphabet, "alphabet"); | |
|
|
35 | Safe.ArgumentNotNull(dfaTable, "dfaTable"); | |||
| 36 |
|
36 | |||
| 37 | var optimalDFA = new RegularDFADefinition<TInput,TTag>(alphabet, m_initialState); |
|
37 | Optimize(dfaTable, InputAlphabet, alphabet, new DummyAlphabet(StateCount), new MapAlphabet<int>()); | |
| 38 |
|
||||
| 39 | Optimize(optimalDFA, InputAlphabet, alphabet, new DummyAlphabet(StateCount), new MapAlphabet<int>()); |
|
|||
| 40 |
|
||||
| 41 | } |
|
38 | } | |
| 42 |
|
39 | |||
| 43 |
|
40 | |||
| @@ -10,7 +10,7 namespace Implab.Automaton.RegularExpres | |||||
| 10 | return Cat(new EndToken<TTag>()); |
|
10 | return Cat(new EndToken<TTag>()); | |
| 11 | } |
|
11 | } | |
| 12 |
|
12 | |||
| 13 |
public Token<TTag> Tag |
|
13 | public Token<TTag> Tag(TTag tag) { | |
| 14 | return Cat(new EndToken<TTag>(tag)); |
|
14 | return Cat(new EndToken<TTag>(tag)); | |
| 15 | } |
|
15 | } | |
| 16 |
|
16 | |||
| @@ -48,11 +48,11 namespace Implab.Automaton.RegularExpres | |||||
| 48 | var token = Repeat(min); |
|
48 | var token = Repeat(min); | |
| 49 |
|
49 | |||
| 50 | for (int i = min; i < max; i++) |
|
50 | for (int i = min; i < max; i++) | |
| 51 |
token = token.Cat( |
|
51 | token = token.Cat( Optional() ); | |
| 52 | return token; |
|
52 | return token; | |
| 53 | } |
|
53 | } | |
| 54 |
|
54 | |||
| 55 |
public static Token<TTag> New |
|
55 | public static Token<TTag> New(params int[] set) { | |
| 56 | Safe.ArgumentNotNull(set, "set"); |
|
56 | Safe.ArgumentNotNull(set, "set"); | |
| 57 | Token<TTag> token = null; |
|
57 | Token<TTag> token = null; | |
| 58 | foreach(var c in set.Distinct()) |
|
58 | foreach(var c in set.Distinct()) | |
| @@ -29,7 +29,7 namespace Implab.Automaton { | |||||
| 29 | protected DFAStateDescriptior<TTag> m_currentState; |
|
29 | protected DFAStateDescriptior<TTag> m_currentState; | |
| 30 | int m_previewCode; |
|
30 | int m_previewCode; | |
| 31 |
|
31 | |||
| 32 |
protected int m_tokenLen |
|
32 | protected int m_tokenLen; | |
| 33 | protected int m_tokenOffset; |
|
33 | protected int m_tokenOffset; | |
| 34 |
|
34 | |||
| 35 | protected char[] m_buffer; |
|
35 | protected char[] m_buffer; | |
| @@ -142,18 +142,17 namespace Implab.Automaton { | |||||
| 142 | if (nextState == DFAConst.UNREACHABLE_STATE) { |
|
142 | if (nextState == DFAConst.UNREACHABLE_STATE) { | |
| 143 | if (m_currentState.final) |
|
143 | if (m_currentState.final) | |
| 144 | return true; |
|
144 | return true; | |
| 145 |
|
|
145 | ||
| 146 |
|
|
146 | throw new ParserException( | |
| 147 |
|
|
147 | String.Format( | |
| 148 |
|
|
148 | "Unexpected symbol '{0}', at pos {1}", | |
| 149 |
|
|
149 | m_buffer[m_pointer], | |
| 150 |
|
|
150 | Position | |
| 151 |
|
|
151 | ) | |
| 152 |
|
|
152 | ); | |
| 153 | } else { |
|
|||
| 154 | m_currentState = m_states[nextState]; |
|
|||
| 155 | m_tokenLen++; |
|
|||
| 156 | } |
|
153 | } | |
|
|
154 | m_currentState = m_states[nextState]; | |||
|
|
155 | m_tokenLen++; | |||
| 157 |
|
156 | |||
| 158 | } while (Shift()); |
|
157 | } while (Shift()); | |
| 159 |
|
158 | |||
| @@ -220,6 +219,7 namespace Implab.Automaton { | |||||
| 220 | /// </summary> |
|
219 | /// </summary> | |
| 221 | /// <param name="states">Таблица состояний нового ДКА</param> |
|
220 | /// <param name="states">Таблица состояний нового ДКА</param> | |
| 222 | /// <param name="alphabet">Таблица входных символов для нового ДКА</param> |
|
221 | /// <param name="alphabet">Таблица входных символов для нового ДКА</param> | |
|
|
222 | /// <param name = "initialState"></param> | |||
| 223 | protected void Switch(DFAStateDescriptior<TTag>[] states, int[] alphabet, int initialState) { |
|
223 | protected void Switch(DFAStateDescriptior<TTag>[] states, int[] alphabet, int initialState) { | |
| 224 | Safe.ArgumentNotNull(states, "dfa"); |
|
224 | Safe.ArgumentNotNull(states, "dfa"); | |
| 225 |
|
225 | |||
| @@ -1,8 +1,8 | |||||
| 1 | using System; |
|
1 | using System.Collections.Generic; | |
| 2 | using System.Collections.Generic; |
|
|||
| 3 | using System.Linq; |
|
2 | using System.Linq; | |
|
|
3 | using Implab.Automaton; | |||
| 4 |
|
4 | |||
| 5 |
namespace Implab. |
|
5 | namespace Implab.Formats { | |
| 6 | public class ByteAlphabet : IndexedAlphabetBase<byte> { |
|
6 | public class ByteAlphabet : IndexedAlphabetBase<byte> { | |
| 7 | public ByteAlphabet() : base(byte.MaxValue + 1){ |
|
7 | public ByteAlphabet() : base(byte.MaxValue + 1){ | |
| 8 | } |
|
8 | } | |
| @@ -1,11 +1,8 | |||||
| 1 | using Implab; |
|
1 | using System.Collections.Generic; | |
| 2 | using System; |
|
|||
| 3 | using System.Collections.Generic; |
|
|||
| 4 | using System.Linq; |
|
2 | using System.Linq; | |
| 5 | using System.Text; |
|
3 | using Implab.Automaton; | |
| 6 | using System.Threading.Tasks; |
|
|||
| 7 |
|
4 | |||
| 8 |
namespace Implab. |
|
5 | namespace Implab.Formats { | |
| 9 | public class CharAlphabet: IndexedAlphabetBase<char> { |
|
6 | public class CharAlphabet: IndexedAlphabetBase<char> { | |
| 10 |
|
7 | |||
| 11 | public CharAlphabet() |
|
8 | public CharAlphabet() | |
| @@ -1,14 +1,8 | |||||
| 1 | using System; |
|
1 | namespace Implab.Formats.JSON { | |
| 2 | using System.Collections.Generic; |
|
|||
| 3 | using System.Linq; |
|
|||
| 4 | using System.Text; |
|
|||
| 5 | using System.Threading.Tasks; |
|
|||
| 6 |
|
||||
| 7 | namespace Implab.JSON { |
|
|||
| 8 | /// <summary> |
|
2 | /// <summary> | |
| 9 | /// internal |
|
3 | /// internal | |
| 10 | /// </summary> |
|
4 | /// </summary> | |
| 11 |
|
|
5 | enum JSONElementContext { | |
| 12 | None, |
|
6 | None, | |
| 13 | Object, |
|
7 | Object, | |
| 14 | Array, |
|
8 | Array, | |
| @@ -1,10 +1,4 | |||||
| 1 | using System; |
|
1 | namespace Implab.Formats.JSON { | |
| 2 | using System.Collections.Generic; |
|
|||
| 3 | using System.Linq; |
|
|||
| 4 | using System.Text; |
|
|||
| 5 | using System.Threading.Tasks; |
|
|||
| 6 |
|
||||
| 7 | namespace Implab.JSON { |
|
|||
| 8 | /// <summary> |
|
2 | /// <summary> | |
| 9 | /// Тип элемента на котором находится парсер |
|
3 | /// Тип элемента на котором находится парсер | |
| 10 | /// </summary> |
|
4 | /// </summary> | |
| @@ -1,8 +1,9 | |||||
| 1 | using System.Linq; |
|
1 | using System.Linq; | |
| 2 | using Implab.Automaton.RegularExpressions; |
|
2 | using Implab.Automaton.RegularExpressions; | |
|
|
3 | using System; | |||
| 3 |
|
4 | |||
| 4 | namespace Implab.Formats.JSON { |
|
5 | namespace Implab.Formats.JSON { | |
| 5 | class JSONGrammar : Grammar<JSONGrammar> { |
|
6 | class JSONGrammar : Grammar<char,JSONGrammar.TokenType> { | |
| 6 | public enum TokenType { |
|
7 | public enum TokenType { | |
| 7 | None, |
|
8 | None, | |
| 8 | BeginObject, |
|
9 | BeginObject, | |
| @@ -28,8 +29,14 namespace Implab.Formats.JSON { | |||||
| 28 | Exp |
|
29 | Exp | |
| 29 | } |
|
30 | } | |
| 30 |
|
31 | |||
| 31 | readonly CDFADefinition m_jsonDFA; |
|
32 | static Lazy<JSONGrammar> _instance = new Lazy<JSONGrammar>(); | |
| 32 | readonly CDFADefinition m_stringDFA; |
|
33 | ||
|
|
34 | public static JSONGrammar Instance { | |||
|
|
35 | get { return _instance.Value; } | |||
|
|
36 | } | |||
|
|
37 | ||||
|
|
38 | readonly RegularCharDFADefinition<TokenType> m_jsonDFA; | |||
|
|
39 | readonly RegularCharDFADefinition<TokenType> m_stringDFA; | |||
| 33 |
|
40 | |||
| 34 | public JSONGrammar() { |
|
41 | public JSONGrammar() { | |
| 35 | DefineAlphabet(Enumerable.Range(0, 0x20).Select(x => (char)x)); |
|
42 | DefineAlphabet(Enumerable.Range(0, 0x20).Select(x => (char)x)); | |
| @@ -80,20 +87,29 namespace Implab.Formats.JSON { | |||||
| 80 | .Or(unescaped.Closure().Tag(TokenType.UnescapedChar)); |
|
87 | .Or(unescaped.Closure().Tag(TokenType.UnescapedChar)); | |
| 81 |
|
88 | |||
| 82 |
|
89 | |||
| 83 | m_jsonDFA = BuildDFA(jsonExpression); |
|
90 | m_jsonDFA = new RegularCharDFADefinition<TokenType>(new CharAlphabet()); | |
| 84 | m_stringDFA = BuildDFA(jsonStringExpression); |
|
91 | BuildDFA(jsonExpression, m_jsonDFA, m_jsonDFA.InputAlphabet); | |
|
|
92 | ||||
|
|
93 | ||||
|
|
94 | m_stringDFA = new RegularCharDFADefinition<TokenType>(new CharAlphabet()); | |||
|
|
95 | BuildDFA(jsonStringExpression, m_jsonDFA, m_jsonDFA.InputAlphabet); | |||
| 85 | } |
|
96 | } | |
| 86 |
|
97 | |||
| 87 | public CDFADefinition JsonDFA { |
|
98 | public RegularCharDFADefinition<TokenType> JsonDFA { | |
| 88 | get { |
|
99 | get { | |
| 89 | return m_jsonDFA; |
|
100 | return m_jsonDFA; | |
| 90 | } |
|
101 | } | |
| 91 | } |
|
102 | } | |
| 92 |
|
103 | |||
| 93 |
public |
|
104 | public RegularDFADefinition<char,TokenType> JsonStringDFA { | |
| 94 | get { |
|
105 | get { | |
| 95 | return m_stringDFA; |
|
106 | return m_stringDFA; | |
| 96 | } |
|
107 | } | |
| 97 | } |
|
108 | } | |
|
|
109 | ||||
|
|
110 | Token<TokenType> SymbolRangeToken(char start, char stop) { | |||
|
|
111 | return SymbolToken(Enumerable.Range(start,stop - start).Cast<char>()); | |||
|
|
112 | } | |||
|
|
113 | ||||
| 98 | } |
|
114 | } | |
| 99 | } |
|
115 | } | |
| @@ -1,9 +1,12 | |||||
| 1 | using Implab.Parsing; |
|
1 | using System; | |
| 2 | using System; |
|
|||
| 3 | using System.Diagnostics; |
|
2 | using System.Diagnostics; | |
| 4 | using System.IO; |
|
3 | using System.IO; | |
|
|
4 | using Implab.Automaton; | |||
|
|
5 | using Implab.Automaton.RegularExpressions; | |||
|
|
6 | using System.Linq; | |||
|
|
7 | using Implab.Components; | |||
| 5 |
|
8 | |||
| 6 | namespace Implab.JSON { |
|
9 | namespace Implab.Formats.JSON { | |
| 7 | /// <summary> |
|
10 | /// <summary> | |
| 8 | /// internal |
|
11 | /// internal | |
| 9 | /// </summary> |
|
12 | /// </summary> | |
| @@ -28,53 +31,65 namespace Implab.JSON { | |||||
| 28 | /// } // Level = 0 |
|
31 | /// } // Level = 0 | |
| 29 | /// </code> |
|
32 | /// </code> | |
| 30 | /// </remarks> |
|
33 | /// </remarks> | |
| 31 |
public class JSONParser : |
|
34 | public class JSONParser : Disposable { | |
| 32 |
|
35 | |||
| 33 | enum MemberContext { |
|
36 | enum MemberContext { | |
| 34 | MemberName, |
|
37 | MemberName, | |
| 35 | MemberValue |
|
38 | MemberValue | |
| 36 | } |
|
39 | } | |
| 37 |
|
40 | |||
|
|
41 | struct ParserContext { | |||
|
|
42 | DFAStateDescriptior<object> | |||
|
|
43 | } | |||
|
|
44 | ||||
| 38 | static readonly EnumAlphabet<JsonTokenType> _alphabet = EnumAlphabet<JsonTokenType>.FullAlphabet; |
|
45 | static readonly EnumAlphabet<JsonTokenType> _alphabet = EnumAlphabet<JsonTokenType>.FullAlphabet; | |
| 39 | static readonly DFAStateDescriptior[] _jsonDFA; |
|
46 | static readonly DFAStateDescriptior<object>[] _jsonDFA; | |
| 40 |
static readonly |
|
47 | static readonly int _jsonDFAInitialState; | |
| 41 |
static readonly DFAStateDescriptior[] _ |
|
48 | static readonly DFAStateDescriptior<object>[] _objectDFA; | |
|
|
49 | static readonly int _objectDFAInitialState; | |||
|
|
50 | static readonly DFAStateDescriptior<object>[] _arrayDFA; | |||
|
|
51 | static readonly int _arrayDFAInitialState; | |||
| 42 |
|
52 | |||
| 43 | static JSONParser() { |
|
53 | static JSONParser() { | |
| 44 |
|
54 | |||
| 45 |
|
55 | |||
| 46 |
var valueExpression = Token |
|
56 | var valueExpression = Token(JsonTokenType.BeginArray, JsonTokenType.BeginObject, JsonTokenType.Literal, JsonTokenType.Number, JsonTokenType.String); | |
| 47 |
var memberExpression = Token |
|
57 | var memberExpression = Token(JsonTokenType.String).Cat(Token(JsonTokenType.NameSeparator)).Cat(valueExpression); | |
| 48 |
|
58 | |||
| 49 | var objectExpression = memberExpression |
|
59 | var objectExpression = memberExpression | |
| 50 | .Cat( |
|
60 | .Cat( | |
| 51 |
Token |
|
61 | Token(JsonTokenType.ValueSeparator) | |
| 52 | .Cat(memberExpression) |
|
62 | .Cat(memberExpression) | |
| 53 | .EClosure() |
|
63 | .EClosure() | |
| 54 | ) |
|
64 | ) | |
| 55 | .Optional() |
|
65 | .Optional() | |
| 56 |
.Cat(Token |
|
66 | .Cat(Token(JsonTokenType.EndObject)) | |
| 57 |
.Tag( |
|
67 | .Tag(null); | |
| 58 | var arrayExpression = valueExpression |
|
68 | var arrayExpression = valueExpression | |
| 59 | .Cat( |
|
69 | .Cat( | |
| 60 |
Token |
|
70 | Token(JsonTokenType.ValueSeparator) | |
| 61 | .Cat(valueExpression) |
|
71 | .Cat(valueExpression) | |
| 62 | .EClosure() |
|
72 | .EClosure() | |
| 63 | ) |
|
73 | ) | |
| 64 | .Optional() |
|
74 | .Optional() | |
| 65 |
.Cat(Token |
|
75 | .Cat(Token(JsonTokenType.EndArray)) | |
| 66 |
.Tag( |
|
76 | .Tag(null); | |
| 67 |
|
77 | |||
| 68 |
var jsonExpression = valueExpression.Tag( |
|
78 | var jsonExpression = valueExpression.Tag(null); | |
| 69 |
|
79 | |||
| 70 |
_jsonDFA = |
|
80 | _jsonDFA = CreateDFA(jsonExpression).GetTransitionTable(); | |
| 71 |
_objectDFA = |
|
81 | _objectDFA = CreateDFA(objectExpression).GetTransitionTable(); | |
| 72 |
_arrayDFA = |
|
82 | _arrayDFA = CreateDFA(arrayExpression).GetTransitionTable(); | |
| 73 | } |
|
83 | } | |
| 74 |
|
84 | |||
| 75 | static EDFADefinition<JsonTokenType> BuildDFA(Token expr) { |
|
85 | static Token<object> Token(params JsonTokenType[] input) { | |
| 76 | var builder = new DFABuilder(); |
|
86 | return Token<object>.New(input.Select(t => _alphabet.Translate(t)).ToArray()); | |
| 77 | var dfa = new EDFADefinition<JsonTokenType>(_alphabet); |
|
87 | } | |
|
|
88 | ||||
|
|
89 | static RegularDFADefinition<JsonTokenType,object> CreateDFA(Token<object> expr) { | |||
|
|
90 | var builder = new RegularDFABuilder<object>(); | |||
|
|
91 | var dfa = new RegularDFADefinition<JsonTokenType,object>(_alphabet); | |||
|
|
92 | ||||
| 78 | expr.Accept(builder); |
|
93 | expr.Accept(builder); | |
| 79 |
|
94 | |||
| 80 | builder.BuildDFA(dfa); |
|
95 | builder.BuildDFA(dfa); | |
| @@ -243,25 +258,13 namespace Implab.JSON { | |||||
| 243 | } |
|
258 | } | |
| 244 | } |
|
259 | } | |
| 245 |
|
260 | |||
| 246 |
protected |
|
261 | protected override void Dispose(bool disposing) { | |
| 247 | if (disposing) { |
|
262 | if (disposing) { | |
| 248 | m_scanner.Dispose(); |
|
263 | m_scanner.Dispose(); | |
| 249 | } |
|
264 | } | |
| 250 | } |
|
265 | } | |
| 251 |
|
266 | |||
| 252 | /// <summary> |
|
267 | /// <summary> | |
| 253 | /// Освобождает парсер и связанный с ним сканнер. |
|
|||
| 254 | /// </summary> |
|
|||
| 255 | public void Dispose() { |
|
|||
| 256 | Dispose(true); |
|
|||
| 257 | GC.SuppressFinalize(this); |
|
|||
| 258 | } |
|
|||
| 259 |
|
||||
| 260 | ~JSONParser() { |
|
|||
| 261 | Dispose(false); |
|
|||
| 262 | } |
|
|||
| 263 |
|
||||
| 264 | /// <summary> |
|
|||
| 265 | /// Переходит в конец текущего объекта. |
|
268 | /// Переходит в конец текущего объекта. | |
| 266 | /// </summary> |
|
269 | /// </summary> | |
| 267 | public void SeekElementEnd() { |
|
270 | public void SeekElementEnd() { | |
| @@ -1,25 +1,21 | |||||
| 1 | using Implab.Parsing; |
|
1 | using System; | |
| 2 | using System; |
|
|||
| 3 | using System.Collections.Generic; |
|
|||
| 4 | using System.Globalization; |
|
2 | using System.Globalization; | |
| 5 | using System.Linq; |
|
3 | using Implab.Automaton; | |
| 6 | using System.Text; |
|
|||
| 7 | using System.Threading.Tasks; |
|
|||
| 8 |
|
4 | |||
| 9 | namespace Implab.JSON { |
|
5 | namespace Implab.Formats.JSON { | |
| 10 | /// <summary> |
|
6 | /// <summary> | |
| 11 | /// Сканнер (лексер), разбивающий поток символов на токены JSON. |
|
7 | /// Сканнер (лексер), разбивающий поток символов на токены JSON. | |
| 12 | /// </summary> |
|
8 | /// </summary> | |
| 13 | public class JSONScanner : Scanner { |
|
9 | public class JSONScanner : Scanner<object> { | |
| 14 | char[] m_stringBuffer; |
|
10 | char[] m_stringBuffer; | |
| 15 | DFAStateDescriptior[] m_stringDFA; |
|
11 | DFAStateDescriptior<>[] m_stringDFA; | |
| 16 | int[] m_stringAlphabet; |
|
12 | int[] m_stringAlphabet; | |
| 17 |
|
13 | |||
| 18 | /// <summary> |
|
14 | /// <summary> | |
| 19 | /// Создает новый экземпляр сканнера |
|
15 | /// Создает новый экземпляр сканнера | |
| 20 | /// </summary> |
|
16 | /// </summary> | |
| 21 | public JSONScanner() |
|
17 | public JSONScanner() | |
| 22 |
: base(JSONGrammar.Instance.JsonDFA. |
|
18 | : base(JSONGrammar.Instance.JsonDFA.GetTransitionTable(), JSONGrammar.Instance.JsonDFA.Alphabet.GetTranslationMap()) { | |
| 23 | m_stringBuffer = new char[1024]; |
|
19 | m_stringBuffer = new char[1024]; | |
| 24 | var dfa = JSONGrammar.Instance.JsonStringDFA; |
|
20 | var dfa = JSONGrammar.Instance.JsonStringDFA; | |
| 25 | m_stringAlphabet = dfa.Alphabet.GetTranslationMap(); |
|
21 | m_stringAlphabet = dfa.Alphabet.GetTranslationMap(); | |
| @@ -1,10 +1,4 | |||||
| 1 | using System; |
|
1 | namespace Implab.Formats.JSON { | |
| 2 | using System.Collections.Generic; |
|
|||
| 3 | using System.Linq; |
|
|||
| 4 | using System.Text; |
|
|||
| 5 | using System.Threading.Tasks; |
|
|||
| 6 |
|
||||
| 7 | namespace Implab.JSON { |
|
|||
| 8 | /// <summary> |
|
2 | /// <summary> | |
| 9 | /// Тип токенов, возвращаемых <see cref="JSONScanner"/>. |
|
3 | /// Тип токенов, возвращаемых <see cref="JSONScanner"/>. | |
| 10 | /// </summary> |
|
4 | /// </summary> | |
| @@ -152,7 +152,6 | |||||
| 152 | <Compile Include="Components\RunnableComponent.cs" /> |
|
152 | <Compile Include="Components\RunnableComponent.cs" /> | |
| 153 | <Compile Include="Components\IFactory.cs" /> |
|
153 | <Compile Include="Components\IFactory.cs" /> | |
| 154 | <Compile Include="Automaton\DFAStateDescriptor.cs" /> |
|
154 | <Compile Include="Automaton\DFAStateDescriptor.cs" /> | |
| 155 | <Compile Include="Automaton\DFAutomaton.cs" /> |
|
|||
| 156 | <Compile Include="Automaton\EnumAlphabet.cs" /> |
|
155 | <Compile Include="Automaton\EnumAlphabet.cs" /> | |
| 157 | <Compile Include="Automaton\IAlphabet.cs" /> |
|
156 | <Compile Include="Automaton\IAlphabet.cs" /> | |
| 158 | <Compile Include="Automaton\ParserException.cs" /> |
|
157 | <Compile Include="Automaton\ParserException.cs" /> | |
| @@ -185,11 +184,12 | |||||
| 185 | <Compile Include="Automaton\MapAlphabet.cs" /> |
|
184 | <Compile Include="Automaton\MapAlphabet.cs" /> | |
| 186 | <Compile Include="Automaton\DummyAlphabet.cs" /> |
|
185 | <Compile Include="Automaton\DummyAlphabet.cs" /> | |
| 187 | <Compile Include="Automaton\DFATransitionTable.cs" /> |
|
186 | <Compile Include="Automaton\DFATransitionTable.cs" /> | |
| 188 | <Compile Include="Automaton\IDFATransitionTableBuilder.cs" /> |
|
|||
| 189 | <Compile Include="Automaton\IDFATransitionTable.cs" /> |
|
|||
| 190 | <Compile Include="Automaton\RegularExpressions\RegularDFADefinition.cs" /> |
|
187 | <Compile Include="Automaton\RegularExpressions\RegularDFADefinition.cs" /> | |
| 191 | <Compile Include="Formats\CharAlphabet.cs" /> |
|
188 | <Compile Include="Formats\CharAlphabet.cs" /> | |
| 192 | <Compile Include="Formats\ByteAlphabet.cs" /> |
|
189 | <Compile Include="Formats\ByteAlphabet.cs" /> | |
|
|
190 | <Compile Include="Formats\RegularCharDFADefinition.cs" /> | |||
|
|
191 | <Compile Include="Automaton\IDFATable.cs" /> | |||
|
|
192 | <Compile Include="Automaton\IDFATableBuilder.cs" /> | |||
| 193 | </ItemGroup> |
|
193 | </ItemGroup> | |
| 194 | <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" /> |
|
194 | <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" /> | |
| 195 | <ItemGroup /> |
|
195 | <ItemGroup /> | |
| 1 | NO CONTENT: file was removed |
|
NO CONTENT: file was removed |
| 1 | NO CONTENT: file was removed |
|
NO CONTENT: file was removed |
| 1 | NO CONTENT: file was removed |
|
NO CONTENT: file was removed |
General Comments 0
You need to be logged in to leave comments.
Login now
