@@ -0,0 +1,33 | |||||
|
1 | using Implab; | |||
|
2 | ||||
|
3 | namespace Implab.Automaton.RegularExpressions { | |||
|
4 | /// <summary> | |||
|
5 | /// Конечный символ расширенного регулярного выражения, при построении ДКА | |||
|
6 | /// используется для определения конечных состояний. | |||
|
7 | /// </summary> | |||
|
8 | public class EndToken<TTag>: Token { | |||
|
9 | ||||
|
10 | TTag m_tag; | |||
|
11 | ||||
|
12 | public EndToken(TTag tag) { | |||
|
13 | m_tag = tag; | |||
|
14 | } | |||
|
15 | ||||
|
16 | public EndToken() | |||
|
17 | : this(default(TTag)) { | |||
|
18 | } | |||
|
19 | ||||
|
20 | public TTag Tag { | |||
|
21 | get { return m_tag; } | |||
|
22 | } | |||
|
23 | ||||
|
24 | public override void Accept(IVisitor visitor) { | |||
|
25 | Safe.ArgumentOfType(visitor, typeof(IVisitor<TTag>), "visitor"); | |||
|
26 | Safe.ArgumentNotNull(visitor, "visitor"); | |||
|
27 | ((IVisitor<TTag>)visitor).Visit(this); | |||
|
28 | } | |||
|
29 | public override string ToString() { | |||
|
30 | return "#"; | |||
|
31 | } | |||
|
32 | } | |||
|
33 | } |
@@ -0,0 +1,8 | |||||
|
1 | namespace Implab.Automaton.RegularExpressions { | |||
|
2 | /// <summary> | |||
|
3 | /// Интерфейс обходчика синтаксического дерева регулярного выражения | |||
|
4 | /// </summary> | |||
|
5 | public interface IVisitor<T> : IVisitor { | |||
|
6 | void Visit(EndToken<T> token); | |||
|
7 | } | |||
|
8 | } |
@@ -0,0 +1,100 | |||||
|
1 | using Implab; | |||
|
2 | using System; | |||
|
3 | using System.Collections.Generic; | |||
|
4 | using System.Linq; | |||
|
5 | using Implab.Automaton; | |||
|
6 | using Implab.Automaton.RegularExpressions; | |||
|
7 | ||||
|
8 | namespace Implab.Formats { | |||
|
9 | /// <summary> | |||
|
10 | /// Базовый абстрактный класс. Грамматика, позволяет формулировать выражения над алфавитом типа <c>char</c>. | |||
|
11 | /// </summary> | |||
|
12 | public abstract class Grammar<TSymbol, TTag> { | |||
|
13 | ||||
|
14 | protected abstract IAlphabetBuilder<TSymbol> AlphabetBuilder { | |||
|
15 | get; | |||
|
16 | } | |||
|
17 | ||||
|
18 | protected SymbolToken<TTag> UnclassifiedToken() { | |||
|
19 | return new SymbolToken<TTag>(DFAConst.UNCLASSIFIED_INPUT); | |||
|
20 | } | |||
|
21 | ||||
|
22 | protected void DefineAlphabet(IEnumerable<TSymbol> alphabet) { | |||
|
23 | Safe.ArgumentNotNull(alphabet, "alphabet"); | |||
|
24 | ||||
|
25 | foreach (var ch in alphabet) | |||
|
26 | AlphabetBuilder.DefineSymbol(ch); | |||
|
27 | } | |||
|
28 | ||||
|
29 | protected Token<TTag> SymbolToken(TSymbol symbol) { | |||
|
30 | return Token<TTag>.New(TranslateOrAdd(symbol)); | |||
|
31 | } | |||
|
32 | ||||
|
33 | protected Token<TTag> SymbolToken(IEnumerable<TSymbol> symbols) { | |||
|
34 | Safe.ArgumentNotNull(symbols, "symbols"); | |||
|
35 | ||||
|
36 | return Token<TTag>.New(TranslateOrAdd(symbols).ToArray()); | |||
|
37 | } | |||
|
38 | ||||
|
39 | protected Token<TTag> SymbolSetToken(params TSymbol[] set) { | |||
|
40 | return SymbolToken(set); | |||
|
41 | } | |||
|
42 | ||||
|
43 | int TranslateOrAdd(TSymbol ch) { | |||
|
44 | var t = AlphabetBuilder.Translate(ch); | |||
|
45 | if (t == DFAConst.UNCLASSIFIED_INPUT) | |||
|
46 | t = AlphabetBuilder.DefineSymbol(ch); | |||
|
47 | return t; | |||
|
48 | } | |||
|
49 | ||||
|
50 | IEnumerable<int> TranslateOrAdd(IEnumerable<TSymbol> symbols) { | |||
|
51 | return symbols.Distinct().Select(TranslateOrAdd); | |||
|
52 | } | |||
|
53 | ||||
|
54 | int TranslateOrDie(TSymbol ch) { | |||
|
55 | var t = AlphabetBuilder.Translate(ch); | |||
|
56 | if (t == DFAConst.UNCLASSIFIED_INPUT) | |||
|
57 | throw new ApplicationException(String.Format("Symbol '{0}' is UNCLASSIFIED", ch)); | |||
|
58 | return t; | |||
|
59 | } | |||
|
60 | ||||
|
61 | IEnumerable<int> TranslateOrDie(IEnumerable<TSymbol> symbols) { | |||
|
62 | return symbols.Distinct().Select(TranslateOrDie); | |||
|
63 | } | |||
|
64 | ||||
|
65 | protected Token<TTag> SymbolTokenExcept(IEnumerable<TSymbol> symbols) { | |||
|
66 | Safe.ArgumentNotNull(symbols, "symbols"); | |||
|
67 | ||||
|
68 | return Token<TTag>.New( Enumerable.Range(0, AlphabetBuilder.Count).Except(TranslateOrDie(symbols)).ToArray() ); | |||
|
69 | } | |||
|
70 | ||||
|
71 | protected abstract IndexedAlphabetBase<TSymbol> CreateAlphabet(); | |||
|
72 | ||||
|
73 | protected ScannerContext<TTag> BuildScannerContext(Token<TTag> regexp) { | |||
|
74 | ||||
|
75 | var dfa = new RegularDFA<TSymbol, TTag>(AlphabetBuilder); | |||
|
76 | ||||
|
77 | var visitor = new RegularExpressionVisitor<TTag>(); | |||
|
78 | regexp.Accept( visitor ); | |||
|
79 | ||||
|
80 | visitor.BuildDFA(dfa); | |||
|
81 | ||||
|
82 | if (dfa.IsFinalState(dfa.InitialState)) | |||
|
83 | throw new ApplicationException("The specified language contains empty token"); | |||
|
84 | ||||
|
85 | var ab = CreateAlphabet(); | |||
|
86 | var optimal = dfa.Optimize(ab); | |||
|
87 | ||||
|
88 | return new ScannerContext<TTag>( | |||
|
89 | optimal.CreateTransitionTable(), | |||
|
90 | optimal.CreateFinalStateTable(), | |||
|
91 | optimal.CreateTagTable(), | |||
|
92 | optimal.InitialState, | |||
|
93 | ab.GetTranslationMap() | |||
|
94 | ); | |||
|
95 | } | |||
|
96 | ||||
|
97 | } | |||
|
98 | ||||
|
99 | ||||
|
100 | } |
@@ -1,17 +1,17 | |||||
1 | using System; |
|
1 | using System; | |
2 |
|
2 | |||
3 | namespace Implab.Automaton.RegularExpressions { |
|
3 | namespace Implab.Automaton.RegularExpressions { | |
4 |
public class AltToken |
|
4 | public class AltToken: BinaryToken { | |
5 |
public AltToken(Token |
|
5 | public AltToken(Token left, Token right) | |
6 | : base(left, right) { |
|
6 | : base(left, right) { | |
7 | } |
|
7 | } | |
8 |
|
8 | |||
9 |
public override void Accept(IVisitor |
|
9 | public override void Accept(IVisitor visitor) { | |
10 | Safe.ArgumentNotNull(visitor, "visitor"); |
|
10 | Safe.ArgumentNotNull(visitor, "visitor"); | |
11 | visitor.Visit(this); |
|
11 | visitor.Visit(this); | |
12 | } |
|
12 | } | |
13 | public override string ToString() { |
|
13 | public override string ToString() { | |
14 |
return String.Format(Right is BinaryToken |
|
14 | return String.Format(Right is BinaryToken ? "{0}|({1})" : "{0}|{1}", Left, Right); | |
15 | } |
|
15 | } | |
16 | } |
|
16 | } | |
17 | } |
|
17 | } |
@@ -1,19 +1,19 | |||||
1 | using Implab; |
|
1 | using Implab; | |
2 |
|
2 | |||
3 | namespace Implab.Automaton.RegularExpressions { |
|
3 | namespace Implab.Automaton.RegularExpressions { | |
4 |
public abstract class BinaryToken |
|
4 | public abstract class BinaryToken: Token { | |
5 |
readonly Token |
|
5 | readonly Token m_left; | |
6 |
readonly Token |
|
6 | readonly Token m_right; | |
7 |
|
7 | |||
8 |
public Token |
|
8 | public Token Left { | |
9 | get { return m_left; } |
|
9 | get { return m_left; } | |
10 | } |
|
10 | } | |
11 |
|
11 | |||
12 |
public Token |
|
12 | public Token Right { | |
13 | get { return m_right; } |
|
13 | get { return m_right; } | |
14 | } |
|
14 | } | |
15 |
|
15 | |||
16 |
protected BinaryToken(Token |
|
16 | protected BinaryToken(Token left, Token right) { | |
17 | Safe.ArgumentNotNull(m_left = left, "left"); |
|
17 | Safe.ArgumentNotNull(m_left = left, "left"); | |
18 | Safe.ArgumentNotNull(m_right = right, "right"); |
|
18 | Safe.ArgumentNotNull(m_right = right, "right"); | |
19 | } |
|
19 | } |
@@ -1,12 +1,12 | |||||
1 | using System; |
|
1 | using System; | |
2 |
|
2 | |||
3 | namespace Implab.Automaton.RegularExpressions { |
|
3 | namespace Implab.Automaton.RegularExpressions { | |
4 |
public class CatToken |
|
4 | public class CatToken : BinaryToken { | |
5 |
public CatToken(Token |
|
5 | public CatToken(Token left, Token right) | |
6 | : base(left, right) { |
|
6 | : base(left, right) { | |
7 | } |
|
7 | } | |
8 |
|
8 | |||
9 |
public override void Accept(IVisitor |
|
9 | public override void Accept(IVisitor visitor) { | |
10 | Safe.ArgumentNotNull(visitor, "visitor"); |
|
10 | Safe.ArgumentNotNull(visitor, "visitor"); | |
11 | visitor.Visit(this); |
|
11 | visitor.Visit(this); | |
12 | } |
|
12 | } | |
@@ -15,8 +15,8 namespace Implab.Automaton.RegularExpres | |||||
15 | return String.Format("{0}{1}", FormatToken(Left), FormatToken(Right)); |
|
15 | return String.Format("{0}{1}", FormatToken(Left), FormatToken(Right)); | |
16 | } |
|
16 | } | |
17 |
|
17 | |||
18 |
static string FormatToken(Token |
|
18 | static string FormatToken(Token token) { | |
19 |
return String.Format(token is AltToken |
|
19 | return String.Format(token is AltToken ? "({0})" : "{0}", token); | |
20 | } |
|
20 | } | |
21 | } |
|
21 | } | |
22 | } |
|
22 | } |
@@ -1,8 +1,8 | |||||
1 | using Implab; |
|
1 | using Implab; | |
2 |
|
2 | |||
3 | namespace Implab.Automaton.RegularExpressions { |
|
3 | namespace Implab.Automaton.RegularExpressions { | |
4 |
public class EmptyToken |
|
4 | public class EmptyToken: Token { | |
5 |
public override void Accept(IVisitor |
|
5 | public override void Accept(IVisitor visitor) { | |
6 | Safe.ArgumentNotNull(visitor, "visitor"); |
|
6 | Safe.ArgumentNotNull(visitor, "visitor"); | |
7 | visitor.Visit(this); |
|
7 | visitor.Visit(this); | |
8 | } |
|
8 | } |
@@ -5,23 +5,9 namespace Implab.Automaton.RegularExpres | |||||
5 | /// Конечный символ расширенного регулярного выражения, при построении ДКА |
|
5 | /// Конечный символ расширенного регулярного выражения, при построении ДКА | |
6 | /// используется для определения конечных состояний. |
|
6 | /// используется для определения конечных состояний. | |
7 | /// </summary> |
|
7 | /// </summary> | |
8 |
public class EndToken |
|
8 | public class EndToken: Token { | |
9 |
|
||||
10 | TTag m_tag; |
|
|||
11 |
|
||||
12 | public EndToken(TTag tag) { |
|
|||
13 | m_tag = tag; |
|
|||
14 | } |
|
|||
15 |
|
9 | |||
16 | public EndToken() |
|
10 | public override void Accept(IVisitor visitor) { | |
17 | : this(default(TTag)) { |
|
|||
18 | } |
|
|||
19 |
|
||||
20 | public TTag Tag { |
|
|||
21 | get { return m_tag; } |
|
|||
22 | } |
|
|||
23 |
|
||||
24 | public override void Accept(IVisitor<TTag> visitor) { |
|
|||
25 | Safe.ArgumentNotNull(visitor, "visitor"); |
|
11 | Safe.ArgumentNotNull(visitor, "visitor"); | |
26 | visitor.Visit(this); |
|
12 | visitor.Visit(this); | |
27 | } |
|
13 | } |
@@ -1,5 +1,4 | |||||
1 | using System; |
|
1 | ||
2 |
|
||||
3 | namespace Implab.Automaton.RegularExpressions { |
|
2 | namespace Implab.Automaton.RegularExpressions { | |
4 | public interface ITaggedDFABuilder<TTag> : IDFATableBuilder { |
|
3 | public interface ITaggedDFABuilder<TTag> : IDFATableBuilder { | |
5 | void SetStateTag(int s, TTag[] tags); |
|
4 | void SetStateTag(int s, TTag[] tags); |
@@ -2,12 +2,12 | |||||
2 | /// <summary> |
|
2 | /// <summary> | |
3 | /// Интерфейс обходчика синтаксического дерева регулярного выражения |
|
3 | /// Интерфейс обходчика синтаксического дерева регулярного выражения | |
4 | /// </summary> |
|
4 | /// </summary> | |
5 |
public interface IVisitor |
|
5 | public interface IVisitor { | |
6 |
void Visit(AltToken |
|
6 | void Visit(AltToken token); | |
7 |
void Visit(StarToken |
|
7 | void Visit(StarToken token); | |
8 |
void Visit(CatToken |
|
8 | void Visit(CatToken token); | |
9 |
void Visit(EmptyToken |
|
9 | void Visit(EmptyToken token); | |
10 |
void Visit(EndToken |
|
10 | void Visit(EndToken token); | |
11 |
void Visit(SymbolToken |
|
11 | void Visit(SymbolToken token); | |
12 | } |
|
12 | } | |
13 | } |
|
13 | } |
@@ -1,5 +1,4 | |||||
1 | using System; |
|
1 | using System.Collections.Generic; | |
2 | using System.Collections.Generic; |
|
|||
3 | using System.Linq; |
|
2 | using System.Linq; | |
4 |
|
3 | |||
5 | namespace Implab.Automaton.RegularExpressions { |
|
4 | namespace Implab.Automaton.RegularExpressions { |
@@ -12,13 +12,14 namespace Implab.Automaton.RegularExpres | |||||
12 | /// </summary> |
|
12 | /// </summary> | |
13 | public class RegularExpressionVisitor<TTag> : IVisitor<TTag> { |
|
13 | public class RegularExpressionVisitor<TTag> : IVisitor<TTag> { | |
14 | int m_idx; |
|
14 | int m_idx; | |
15 |
Token |
|
15 | Token m_root; | |
16 | HashSet<int> m_firstpos; |
|
16 | HashSet<int> m_firstpos; | |
17 | HashSet<int> m_lastpos; |
|
17 | HashSet<int> m_lastpos; | |
18 |
|
18 | |||
19 | readonly Dictionary<int, HashSet<int>> m_followpos = new Dictionary<int, HashSet<int>>(); |
|
19 | readonly Dictionary<int, HashSet<int>> m_followpos = new Dictionary<int, HashSet<int>>(); | |
20 | readonly Dictionary<int, int> m_indexes = new Dictionary<int, int>(); |
|
20 | readonly Dictionary<int, int> m_indexes = new Dictionary<int, int>(); | |
21 |
readonly |
|
21 | readonly HashSet<int> m_ends = new HashSet<int>(); | |
|
22 | readonly Dictionary<int, TTag> m_tags = new Dictionary<int, TTag>(); | |||
22 |
|
23 | |||
23 | public Dictionary<int, HashSet<int>> FollowposMap { |
|
24 | public Dictionary<int, HashSet<int>> FollowposMap { | |
24 | get { return m_followpos; } |
|
25 | get { return m_followpos; } | |
@@ -30,19 +31,19 namespace Implab.Automaton.RegularExpres | |||||
30 | } |
|
31 | } | |
31 |
|
32 | |||
32 | bool Nullable(object n) { |
|
33 | bool Nullable(object n) { | |
33 |
if (n is EmptyToken |
|
34 | if (n is EmptyToken || n is StarToken) | |
34 | return true; |
|
35 | return true; | |
35 |
var altToken = n as AltToken |
|
36 | var altToken = n as AltToken; | |
36 | if (altToken != null) |
|
37 | if (altToken != null) | |
37 | return Nullable(altToken.Left) || Nullable(altToken.Right); |
|
38 | return Nullable(altToken.Left) || Nullable(altToken.Right); | |
38 |
var catToken = n as CatToken |
|
39 | var catToken = n as CatToken; | |
39 | if (catToken != null) |
|
40 | if (catToken != null) | |
40 | return Nullable(catToken.Left) && Nullable(catToken.Right); |
|
41 | return Nullable(catToken.Left) && Nullable(catToken.Right); | |
41 | return false; |
|
42 | return false; | |
42 | } |
|
43 | } | |
43 |
|
44 | |||
44 |
|
45 | |||
45 |
public void Visit(AltToken |
|
46 | public void Visit(AltToken token) { | |
46 | if (m_root == null) |
|
47 | if (m_root == null) | |
47 | m_root = token; |
|
48 | m_root = token; | |
48 | var firtspos = new HashSet<int>(); |
|
49 | var firtspos = new HashSet<int>(); | |
@@ -60,7 +61,7 namespace Implab.Automaton.RegularExpres | |||||
60 | m_lastpos = lastpos; |
|
61 | m_lastpos = lastpos; | |
61 | } |
|
62 | } | |
62 |
|
63 | |||
63 |
public void Visit(StarToken |
|
64 | public void Visit(StarToken token) { | |
64 | if (m_root == null) |
|
65 | if (m_root == null) | |
65 | m_root = token; |
|
66 | m_root = token; | |
66 | token.Token.Accept(this); |
|
67 | token.Token.Accept(this); | |
@@ -69,7 +70,7 namespace Implab.Automaton.RegularExpres | |||||
69 | Followpos(i).UnionWith(m_firstpos); |
|
70 | Followpos(i).UnionWith(m_firstpos); | |
70 | } |
|
71 | } | |
71 |
|
72 | |||
72 |
public void Visit(CatToken |
|
73 | public void Visit(CatToken token) { | |
73 | if (m_root == null) |
|
74 | if (m_root == null) | |
74 | m_root = token; |
|
75 | m_root = token; | |
75 |
|
76 | |||
@@ -97,12 +98,12 namespace Implab.Automaton.RegularExpres | |||||
97 |
|
98 | |||
98 | } |
|
99 | } | |
99 |
|
100 | |||
100 |
public void Visit(EmptyToken |
|
101 | public void Visit(EmptyToken token) { | |
101 | if (m_root == null) |
|
102 | if (m_root == null) | |
102 | m_root = token; |
|
103 | m_root = token; | |
103 | } |
|
104 | } | |
104 |
|
105 | |||
105 |
public void Visit(SymbolToken |
|
106 | public void Visit(SymbolToken token) { | |
106 | if (m_root == null) |
|
107 | if (m_root == null) | |
107 | m_root = token; |
|
108 | m_root = token; | |
108 | m_idx++; |
|
109 | m_idx++; | |
@@ -119,7 +120,19 namespace Implab.Automaton.RegularExpres | |||||
119 | m_firstpos = new HashSet<int>(new[] { m_idx }); |
|
120 | m_firstpos = new HashSet<int>(new[] { m_idx }); | |
120 | m_lastpos = new HashSet<int>(new[] { m_idx }); |
|
121 | m_lastpos = new HashSet<int>(new[] { m_idx }); | |
121 | Followpos(m_idx); |
|
122 | Followpos(m_idx); | |
122 |
m_ends.Add(m_idx |
|
123 | m_ends.Add(m_idx); | |
|
124 | m_tags.Add(m_idx, token.Tag); | |||
|
125 | } | |||
|
126 | ||||
|
127 | public void Visit(EndToken token) { | |||
|
128 | if (m_root == null) | |||
|
129 | m_root = token; | |||
|
130 | m_idx++; | |||
|
131 | m_indexes[m_idx] = DFAConst.UNCLASSIFIED_INPUT; | |||
|
132 | m_firstpos = new HashSet<int>(new[] { m_idx }); | |||
|
133 | m_lastpos = new HashSet<int>(new[] { m_idx }); | |||
|
134 | Followpos(m_idx); | |||
|
135 | m_ends.Add(m_idx); | |||
123 | } |
|
136 | } | |
124 |
|
137 | |||
125 | public void BuildDFA(ITaggedDFABuilder<TTag> dfa) { |
|
138 | public void BuildDFA(ITaggedDFABuilder<TTag> dfa) { | |
@@ -157,14 +170,18 namespace Implab.Automaton.RegularExpres | |||||
157 | } |
|
170 | } | |
158 | } |
|
171 | } | |
159 | if (next.Count > 0) { |
|
172 | if (next.Count > 0) { | |
160 |
int s2 |
|
173 | int s2; | |
161 |
if ( |
|
174 | if (states.Contains(next)) { | |
|
175 | s2 = states.Translate(next); | |||
|
176 | } else { | |||
162 | s2 = states.DefineSymbol(next); |
|
177 | s2 = states.DefineSymbol(next); | |
163 |
|
178 | |||
164 |
|
|
179 | if (IsFinal(next)) { | |
165 |
|
|
180 | ||
166 | dfa.MarkFinalState(s2); |
|
181 | dfa.MarkFinalState(s2); | |
167 |
|
|
182 | tags = GetStateTags(next); | |
|
183 | if (tags != null && tags.Length > 0) | |||
|
184 | dfa.SetStateTag(s2, tags); | |||
168 | } |
|
185 | } | |
169 |
|
186 | |||
170 | queue.Enqueue(next); |
|
187 | queue.Enqueue(next); | |
@@ -175,9 +192,14 namespace Implab.Automaton.RegularExpres | |||||
175 | } |
|
192 | } | |
176 | } |
|
193 | } | |
177 |
|
194 | |||
|
195 | bool IsFinal(IEnumerable<int> state) { | |||
|
196 | Debug.Assert(state != null); | |||
|
197 | return state.Any(m_ends.Contains); | |||
|
198 | } | |||
|
199 | ||||
178 | TTag[] GetStateTags(IEnumerable<int> state) { |
|
200 | TTag[] GetStateTags(IEnumerable<int> state) { | |
179 | Debug.Assert(state != null); |
|
201 | Debug.Assert(state != null); | |
180 |
return state.Where(m_ |
|
202 | return state.Where(m_tags.ContainsKey).Select(pos => m_tags[pos]).ToArray(); | |
181 | } |
|
203 | } | |
182 |
|
204 | |||
183 | } |
|
205 | } |
@@ -1,28 +1,25 | |||||
1 | using Implab; |
|
1 | using Implab; | |
2 | using System; |
|
2 | using System; | |
3 | using System.Collections.Generic; |
|
3 | ||
4 | using System.Linq; |
|
|||
5 | using System.Text; |
|
|||
6 | using System.Threading.Tasks; |
|
|||
7 |
|
4 | |||
8 | namespace Implab.Automaton.RegularExpressions { |
|
5 | namespace Implab.Automaton.RegularExpressions { | |
9 | /// <summary> |
|
6 | /// <summary> | |
10 | /// Замыкание выражения с 0 и более повторов. |
|
7 | /// Замыкание выражения с 0 и более повторов. | |
11 | /// </summary> |
|
8 | /// </summary> | |
12 |
public class StarToken |
|
9 | public class StarToken: Token { | |
13 |
|
10 | |||
14 |
Token |
|
11 | Token m_token; | |
15 |
|
12 | |||
16 |
public Token |
|
13 | public Token Token { | |
17 | get { return m_token; } |
|
14 | get { return m_token; } | |
18 | } |
|
15 | } | |
19 |
|
16 | |||
20 |
public StarToken(Token |
|
17 | public StarToken(Token token) { | |
21 | Safe.ArgumentNotNull(token, "token"); |
|
18 | Safe.ArgumentNotNull(token, "token"); | |
22 | m_token = token; |
|
19 | m_token = token; | |
23 | } |
|
20 | } | |
24 |
|
21 | |||
25 |
public override void Accept(IVisitor |
|
22 | public override void Accept(IVisitor visitor) { | |
26 | Safe.ArgumentNotNull(visitor, "visitor"); |
|
23 | Safe.ArgumentNotNull(visitor, "visitor"); | |
27 | visitor.Visit(this); |
|
24 | visitor.Visit(this); | |
28 | } |
|
25 | } |
@@ -4,7 +4,7 namespace Implab.Automaton.RegularExpres | |||||
4 | /// <summary> |
|
4 | /// <summary> | |
5 | /// Выражение, соответсвующее одному символу. |
|
5 | /// Выражение, соответсвующее одному символу. | |
6 | /// </summary> |
|
6 | /// </summary> | |
7 |
public class SymbolToken |
|
7 | public class SymbolToken: Token { | |
8 | int m_value; |
|
8 | int m_value; | |
9 |
|
9 | |||
10 | public int Value { |
|
10 | public int Value { | |
@@ -14,7 +14,7 namespace Implab.Automaton.RegularExpres | |||||
14 | public SymbolToken(int value) { |
|
14 | public SymbolToken(int value) { | |
15 | m_value = value; |
|
15 | m_value = value; | |
16 | } |
|
16 | } | |
17 |
public override void Accept(IVisitor |
|
17 | public override void Accept(IVisitor visitor) { | |
18 | Safe.ArgumentNotNull(visitor, "visitor"); |
|
18 | Safe.ArgumentNotNull(visitor, "visitor"); | |
19 |
|
19 | |||
20 | visitor.Visit(this); |
|
20 | visitor.Visit(this); |
@@ -3,46 +3,46 using System; | |||||
3 | using System.Linq; |
|
3 | using System.Linq; | |
4 |
|
4 | |||
5 | namespace Implab.Automaton.RegularExpressions { |
|
5 | namespace Implab.Automaton.RegularExpressions { | |
6 |
public abstract class Token |
|
6 | public abstract class Token { | |
7 |
public abstract void Accept(IVisitor |
|
7 | public abstract void Accept(IVisitor visitor); | |
8 |
|
8 | |||
9 |
public Token |
|
9 | public Token Extend() { | |
10 |
return Cat(new EndToken |
|
10 | return Cat(new EndToken()); | |
11 | } |
|
11 | } | |
12 |
|
12 | |||
13 |
public Token<TTag> |
|
13 | public Token Tag<TTag>(TTag tag) { | |
14 | return Cat(new EndToken<TTag>(tag)); |
|
14 | return Cat(new EndToken<TTag>(tag)); | |
15 | } |
|
15 | } | |
16 |
|
16 | |||
17 |
public Token |
|
17 | public Token Cat(Token right) { | |
18 |
return new CatToken |
|
18 | return new CatToken(this, right); | |
19 | } |
|
19 | } | |
20 |
|
20 | |||
21 |
public Token |
|
21 | public Token Or(Token right) { | |
22 |
return new AltToken |
|
22 | return new AltToken(this, right); | |
23 | } |
|
23 | } | |
24 |
|
24 | |||
25 |
public Token |
|
25 | public Token Optional() { | |
26 |
return Or(new EmptyToken |
|
26 | return Or(new EmptyToken()); | |
27 | } |
|
27 | } | |
28 |
|
28 | |||
29 |
public Token |
|
29 | public Token EClosure() { | |
30 |
return new StarToken |
|
30 | return new StarToken(this); | |
31 | } |
|
31 | } | |
32 |
|
32 | |||
33 |
public Token |
|
33 | public Token Closure() { | |
34 |
return Cat(new StarToken |
|
34 | return Cat(new StarToken(this)); | |
35 | } |
|
35 | } | |
36 |
|
36 | |||
37 |
public Token |
|
37 | public Token Repeat(int count) { | |
38 |
Token |
|
38 | Token token = null; | |
39 |
|
39 | |||
40 | for (int i = 0; i < count; i++) |
|
40 | for (int i = 0; i < count; i++) | |
41 | token = token != null ? token.Cat(this) : this; |
|
41 | token = token != null ? token.Cat(this) : this; | |
42 |
return token ?? new EmptyToken |
|
42 | return token ?? new EmptyToken(); | |
43 | } |
|
43 | } | |
44 |
|
44 | |||
45 |
public Token |
|
45 | public Token Repeat(int min, int max) { | |
46 | if (min > max || min < 1) |
|
46 | if (min > max || min < 1) | |
47 | throw new ArgumentOutOfRangeException(); |
|
47 | throw new ArgumentOutOfRangeException(); | |
48 | var token = Repeat(min); |
|
48 | var token = Repeat(min); | |
@@ -52,11 +52,11 namespace Implab.Automaton.RegularExpres | |||||
52 | return token; |
|
52 | return token; | |
53 | } |
|
53 | } | |
54 |
|
54 | |||
55 |
public static Token |
|
55 | public static Token New(params int[] set) { | |
56 | Safe.ArgumentNotNull(set, "set"); |
|
56 | Safe.ArgumentNotNull(set, "set"); | |
57 |
Token |
|
57 | Token token = null; | |
58 | foreach(var c in set.Distinct()) |
|
58 | foreach(var c in set.Distinct()) | |
59 |
token = token == null ? new SymbolToken |
|
59 | token = token == null ? new SymbolToken(c) : token.Or(new SymbolToken(c)); | |
60 | return token; |
|
60 | return token; | |
61 | } |
|
61 | } | |
62 | } |
|
62 | } |
@@ -4,8 +4,6 using Implab.Automaton; | |||||
4 |
|
4 | |||
5 | namespace Implab.Formats { |
|
5 | namespace Implab.Formats { | |
6 | public class ByteAlphabet : IndexedAlphabetBase<byte> { |
|
6 | public class ByteAlphabet : IndexedAlphabetBase<byte> { | |
7 | public ByteAlphabet() { |
|
|||
8 | } |
|
|||
9 |
|
7 | |||
10 | #region implemented abstract members of IndexedAlphabetBase |
|
8 | #region implemented abstract members of IndexedAlphabetBase | |
11 |
|
9 |
@@ -5,9 +5,6 using Implab.Automaton; | |||||
5 | namespace Implab.Formats { |
|
5 | namespace Implab.Formats { | |
6 | public class CharAlphabet: IndexedAlphabetBase<char> { |
|
6 | public class CharAlphabet: IndexedAlphabetBase<char> { | |
7 |
|
7 | |||
8 | public CharAlphabet() { |
|
|||
9 | } |
|
|||
10 |
|
||||
11 | public override int GetSymbolIndex(char symbol) { |
|
8 | public override int GetSymbolIndex(char symbol) { | |
12 | return symbol; |
|
9 | return symbol; | |
13 | } |
|
10 | } |
@@ -4,7 +4,6 using Implab.Automaton; | |||||
4 | using System.Text; |
|
4 | using System.Text; | |
5 | using Implab.Components; |
|
5 | using Implab.Components; | |
6 | using System.IO; |
|
6 | using System.IO; | |
7 | using Implab.Automaton.RegularExpressions; |
|
|||
8 |
|
7 | |||
9 | namespace Implab.Formats.JSON { |
|
8 | namespace Implab.Formats.JSON { | |
10 | /// <summary> |
|
9 | /// <summary> | |
@@ -13,8 +12,8 namespace Implab.Formats.JSON { | |||||
13 | public class JSONScanner : Disposable { |
|
12 | public class JSONScanner : Disposable { | |
14 | readonly StringBuilder m_builder = new StringBuilder(); |
|
13 | readonly StringBuilder m_builder = new StringBuilder(); | |
15 |
|
14 | |||
16 |
readonly ScannerContext<JSONGrammar.TokenType> m_json |
|
15 | readonly ScannerContext<JSONGrammar.TokenType> m_jsonContext = JSONGrammar.Instance.JsonDFA; | |
17 |
readonly ScannerContext<JSONGrammar.TokenType> m_string |
|
16 | readonly ScannerContext<JSONGrammar.TokenType> m_stringContext = JSONGrammar.Instance.JsonStringDFA; | |
18 |
|
17 | |||
19 |
|
18 | |||
20 | readonly TextScanner m_scanner; |
|
19 | readonly TextScanner m_scanner; | |
@@ -31,7 +30,7 namespace Implab.Formats.JSON { | |||||
31 | public JSONScanner(TextReader reader, int bufferMax, int chunkSize) { |
|
30 | public JSONScanner(TextReader reader, int bufferMax, int chunkSize) { | |
32 | Safe.ArgumentNotNull(reader, "reader"); |
|
31 | Safe.ArgumentNotNull(reader, "reader"); | |
33 |
|
32 | |||
34 | m_scanner = new ReaderScanner(reader); |
|
33 | m_scanner = new ReaderScanner(reader, bufferMax, chunkSize); | |
35 | } |
|
34 | } | |
36 |
|
35 | |||
37 | /// <summary> |
|
36 | /// <summary> | |
@@ -44,7 +43,7 namespace Implab.Formats.JSON { | |||||
44 | /// в строках обрабатываются экранированные символы, числа становтся типа double.</remarks> |
|
43 | /// в строках обрабатываются экранированные символы, числа становтся типа double.</remarks> | |
45 | public bool ReadToken(out object tokenValue, out JsonTokenType tokenType) { |
|
44 | public bool ReadToken(out object tokenValue, out JsonTokenType tokenType) { | |
46 | JSONGrammar.TokenType[] tag; |
|
45 | JSONGrammar.TokenType[] tag; | |
47 |
if (m_json |
|
46 | if (m_jsonContext.Execute(m_scanner, out tag)) { | |
48 | switch (tag[0]) { |
|
47 | switch (tag[0]) { | |
49 | case JSONGrammar.TokenType.StringBound: |
|
48 | case JSONGrammar.TokenType.StringBound: | |
50 | tokenValue = ReadString(); |
|
49 | tokenValue = ReadString(); | |
@@ -68,12 +67,12 namespace Implab.Formats.JSON { | |||||
68 |
|
67 | |||
69 | string ReadString() { |
|
68 | string ReadString() { | |
70 | int pos = 0; |
|
69 | int pos = 0; | |
71 |
|
|
70 | var buf = new char[6]; // the buffer for unescaping chars | |
72 |
|
71 | |||
73 | JSONGrammar.TokenType[] tag; |
|
72 | JSONGrammar.TokenType[] tag; | |
74 | m_builder.Clear(); |
|
73 | m_builder.Clear(); | |
75 |
|
74 | |||
76 |
while (m_string |
|
75 | while (m_stringContext.Execute(m_scanner, out tag)) { | |
77 | switch (tag[0]) { |
|
76 | switch (tag[0]) { | |
78 | case JSONGrammar.TokenType.StringBound: |
|
77 | case JSONGrammar.TokenType.StringBound: | |
79 | return m_builder.ToString(); |
|
78 | return m_builder.ToString(); | |
@@ -89,13 +88,17 namespace Implab.Formats.JSON { | |||||
89 | m_scanner.CopyTokenTo(buf, 0); |
|
88 | m_scanner.CopyTokenTo(buf, 0); | |
90 | m_builder.Append(StringTranslator.TranslateEscapedChar(buf[1])); |
|
89 | m_builder.Append(StringTranslator.TranslateEscapedChar(buf[1])); | |
91 | break; |
|
90 | break; | |
92 | default: |
|
|||
93 | break; |
|
|||
94 | } |
|
91 | } | |
95 |
|
92 | |||
96 | } |
|
93 | } | |
97 |
|
94 | |||
98 | throw new ParserException("Unexpected end of data"); |
|
95 | throw new ParserException("Unexpected end of data"); | |
99 | } |
|
96 | } | |
|
97 | ||||
|
98 | protected override void Dispose(bool disposing) { | |||
|
99 | if (disposing) | |||
|
100 | Safe.Dispose(m_scanner); | |||
|
101 | base.Dispose(disposing); | |||
|
102 | } | |||
100 | } |
|
103 | } | |
101 | } |
|
104 | } |
@@ -1,11 +1,17 | |||||
1 | using System; |
|
1 | namespace Implab.Formats { | |
2 |
|
2 | /// <summary> | ||
3 | namespace Implab.Formats { |
|
3 | /// Represents a scanner configuration usefull to recongnize token, based on the DFA. | |
|
4 | /// </summary> | |||
4 | public class ScannerContext<TTag> { |
|
5 | public class ScannerContext<TTag> { | |
|
6 | ||||
5 | public int[,] Dfa { get; private set; } |
|
7 | public int[,] Dfa { get; private set; } | |
|
8 | ||||
6 | public bool[] Final { get; private set; } |
|
9 | public bool[] Final { get; private set; } | |
|
10 | ||||
7 | public TTag[][] Tags { get; private set; } |
|
11 | public TTag[][] Tags { get; private set; } | |
|
12 | ||||
8 | public int State { get; private set; } |
|
13 | public int State { get; private set; } | |
|
14 | ||||
9 | public int[] Alphabet { get; private set; } |
|
15 | public int[] Alphabet { get; private set; } | |
10 |
|
16 | |||
11 | public ScannerContext(int[,] dfa, bool[] final, TTag[][] tags, int state, int[] alphabet) { |
|
17 | public ScannerContext(int[,] dfa, bool[] final, TTag[][] tags, int state, int[] alphabet) { |
@@ -1,9 +1,7 | |||||
1 | using System; |
|
1 | using System; | |
2 | using Implab.Components; |
|
2 | using Implab.Components; | |
3 | using Implab.Automaton.RegularExpressions; |
|
|||
4 | using System.Diagnostics; |
|
3 | using System.Diagnostics; | |
5 | using Implab.Automaton; |
|
4 | using Implab.Automaton; | |
6 | using System.IO; |
|
|||
7 | using System.Text; |
|
5 | using System.Text; | |
8 |
|
6 | |||
9 | namespace Implab.Formats { |
|
7 | namespace Implab.Formats { | |
@@ -18,7 +16,7 namespace Implab.Formats { | |||||
18 | int m_tokenLength; |
|
16 | int m_tokenLength; | |
19 |
|
17 | |||
20 | /// <summary> |
|
18 | /// <summary> | |
21 |
/// Initializes a new instance of the <see cref="Implab.Formats.TextScanner |
|
19 | /// Initializes a new instance of the <see cref="Implab.Formats.TextScanner"/> class. | |
22 | /// </summary> |
|
20 | /// </summary> | |
23 | /// <param name="bufferMax">Buffer max.</param> |
|
21 | /// <param name="bufferMax">Buffer max.</param> | |
24 | /// <param name="chunkSize">Chunk size.</param> |
|
22 | /// <param name="chunkSize">Chunk size.</param> | |
@@ -30,7 +28,7 namespace Implab.Formats { | |||||
30 | } |
|
28 | } | |
31 |
|
29 | |||
32 | /// <summary> |
|
30 | /// <summary> | |
33 |
/// Initializes a new instance of the <see cref="Implab.Formats.TextScanner |
|
31 | /// Initializes a new instance of the <see cref="Implab.Formats.TextScanner"/> class. | |
34 | /// </summary> |
|
32 | /// </summary> | |
35 | /// <param name="buffer">Buffer.</param> |
|
33 | /// <param name="buffer">Buffer.</param> | |
36 | protected TextScanner(char[] buffer) { |
|
34 | protected TextScanner(char[] buffer) { | |
@@ -48,7 +46,9 namespace Implab.Formats { | |||||
48 | /// <param name="final">Final states of the automaton.</param> |
|
46 | /// <param name="final">Final states of the automaton.</param> | |
49 | /// <param name="tags">Tags.</param> |
|
47 | /// <param name="tags">Tags.</param> | |
50 | /// <param name="state">The initial state for the automaton.</param> |
|
48 | /// <param name="state">The initial state for the automaton.</param> | |
51 | internal bool ReadToken<TTag>(int[,] dfa, int[] final, TTag[][] tags, int state, int[] alphabet, out TTag[] tag) { |
|
49 | /// <param name="alphabet"></param> | |
|
50 | /// <param name = "tag"></param> | |||
|
51 | internal bool ReadToken<TTag>(int[,] dfa, bool[] final, TTag[][] tags, int state, int[] alphabet, out TTag[] tag) { | |||
52 | Safe.ArgumentNotNull(); |
|
52 | Safe.ArgumentNotNull(); | |
53 | m_tokenLength = 0; |
|
53 | m_tokenLength = 0; | |
54 |
|
54 | |||
@@ -58,10 +58,10 namespace Implab.Formats { | |||||
58 | // after the next chunk is read the offset in the buffer may change |
|
58 | // after the next chunk is read the offset in the buffer may change | |
59 | int pos = m_bufferOffset + m_tokenLength; |
|
59 | int pos = m_bufferOffset + m_tokenLength; | |
60 |
|
60 | |||
61 | while(pos < m_bufferSize) { |
|
61 | while (pos < m_bufferSize) { | |
62 | var ch = m_buffer[pos]; |
|
62 | var ch = m_buffer[pos]; | |
63 |
|
63 | |||
64 | state = dfa[state,ch > maxSymbol ? DFAConst.UNCLASSIFIED_INPUT : alphabet[ch]]; |
|
64 | state = dfa[state, ch > maxSymbol ? DFAConst.UNCLASSIFIED_INPUT : alphabet[ch]]; | |
65 | if (state == DFAConst.UNREACHABLE_STATE) |
|
65 | if (state == DFAConst.UNREACHABLE_STATE) | |
66 | break; |
|
66 | break; | |
67 |
|
67 | |||
@@ -77,16 +77,17 namespace Implab.Formats { | |||||
77 | if (final[state]) { |
|
77 | if (final[state]) { | |
78 | tag = tags[state]; |
|
78 | tag = tags[state]; | |
79 | return true; |
|
79 | return true; | |
80 |
} |
|
80 | } | |
81 | if (m_bufferOffset == m_bufferSize) { |
|
81 | ||
82 | if (m_tokenLength == 0) //EOF |
|
82 | if (m_bufferOffset == m_bufferSize) { | |
|
83 | if (m_tokenLength == 0) //EOF | |||
83 | return false; |
|
84 | return false; | |
84 |
|
85 | |||
85 |
|
|
86 | throw new ParserException(); | |
86 |
|
|
87 | } | |
87 | throw new ParserException(String.Format("Unexpected symbol '{0}'", m_buffer[m_bufferOffset])); |
|
88 | ||
|
89 | throw new ParserException(String.Format("Unexpected symbol '{0}'", m_buffer[m_bufferOffset])); | |||
88 |
|
90 | |||
89 | } |
|
|||
90 | } |
|
91 | } | |
91 |
|
92 | |||
92 | protected void Feed(char[] buffer, int offset, int length) { |
|
93 | protected void Feed(char[] buffer, int offset, int length) { | |
@@ -108,7 +109,7 namespace Implab.Formats { | |||||
108 | var size = used + free; |
|
109 | var size = used + free; | |
109 |
|
110 | |||
110 | if (size > m_bufferMax) |
|
111 | if (size > m_bufferMax) | |
111 |
throw new ParserException(String.Format("The buffer limit ({0} Kb) is reached" |
|
112 | throw new ParserException(String.Format("The buffer limit ({0} Kb) is reached", m_bufferMax/1024)); | |
112 |
|
113 | |||
113 | var temp = new char[size]; |
|
114 | var temp = new char[size]; | |
114 |
|
115 |
@@ -160,11 +160,9 | |||||
160 | <Compile Include="Automaton\RegularExpressions\BinaryToken.cs" /> |
|
160 | <Compile Include="Automaton\RegularExpressions\BinaryToken.cs" /> | |
161 | <Compile Include="Automaton\RegularExpressions\CatToken.cs" /> |
|
161 | <Compile Include="Automaton\RegularExpressions\CatToken.cs" /> | |
162 | <Compile Include="Automaton\DFAConst.cs" /> |
|
162 | <Compile Include="Automaton\DFAConst.cs" /> | |
163 | <Compile Include="Automaton\RegularExpressions\Grammar.cs" /> |
|
|||
164 | <Compile Include="Automaton\RegularExpressions\StarToken.cs" /> |
|
163 | <Compile Include="Automaton\RegularExpressions\StarToken.cs" /> | |
165 | <Compile Include="Automaton\RegularExpressions\SymbolToken.cs" /> |
|
164 | <Compile Include="Automaton\RegularExpressions\SymbolToken.cs" /> | |
166 | <Compile Include="Automaton\RegularExpressions\EmptyToken.cs" /> |
|
165 | <Compile Include="Automaton\RegularExpressions\EmptyToken.cs" /> | |
167 | <Compile Include="Automaton\RegularExpressions\EndToken.cs" /> |
|
|||
168 | <Compile Include="Automaton\RegularExpressions\Token.cs" /> |
|
166 | <Compile Include="Automaton\RegularExpressions\Token.cs" /> | |
169 | <Compile Include="Automaton\RegularExpressions\IVisitor.cs" /> |
|
167 | <Compile Include="Automaton\RegularExpressions\IVisitor.cs" /> | |
170 | <Compile Include="Automaton\AutomatonTransition.cs" /> |
|
168 | <Compile Include="Automaton\AutomatonTransition.cs" /> | |
@@ -192,6 +190,10 | |||||
192 | <Compile Include="Formats\StringScanner.cs" /> |
|
190 | <Compile Include="Formats\StringScanner.cs" /> | |
193 | <Compile Include="Formats\ReaderScanner.cs" /> |
|
191 | <Compile Include="Formats\ReaderScanner.cs" /> | |
194 | <Compile Include="Formats\ScannerContext.cs" /> |
|
192 | <Compile Include="Formats\ScannerContext.cs" /> | |
|
193 | <Compile Include="Formats\Grammar.cs" /> | |||
|
194 | <Compile Include="Automaton\RegularExpressions\EndTokenT.cs" /> | |||
|
195 | <Compile Include="Automaton\RegularExpressions\EndToken.cs" /> | |||
|
196 | <Compile Include="Automaton\RegularExpressions\IVisitorT.cs" /> | |||
195 | </ItemGroup> |
|
197 | </ItemGroup> | |
196 | <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" /> |
|
198 | <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" /> | |
197 | <ItemGroup /> |
|
199 | <ItemGroup /> |
@@ -41,6 +41,11 namespace Implab | |||||
41 | throw new ArgumentOutOfRangeException(paramName); |
|
41 | throw new ArgumentOutOfRangeException(paramName); | |
42 | } |
|
42 | } | |
43 |
|
43 | |||
|
44 | public static void ArgumentOfType(object value, Type type, string paramName) { | |||
|
45 | if (!type.IsInstanceOfType(value)) | |||
|
46 | throw new ArgumentException(String.Format("The parameter must be of type {0}", type), paramName); | |||
|
47 | } | |||
|
48 | ||||
44 | public static void Dispose(params IDisposable[] objects) { |
|
49 | public static void Dispose(params IDisposable[] objects) { | |
45 | foreach (var d in objects) |
|
50 | foreach (var d in objects) | |
46 | if (d != null) |
|
51 | if (d != null) |
1 | NO CONTENT: file was removed |
|
NO CONTENT: file was removed |
General Comments 0
You need to be logged in to leave comments.
Login now