@@ -0,0 +1,33 | |||
|
1 | using Implab; | |
|
2 | ||
|
3 | namespace Implab.Automaton.RegularExpressions { | |
|
4 | /// <summary> | |
|
5 | /// Конечный символ расширенного регулярного выражения, при построении ДКА | |
|
6 | /// используется для определения конечных состояний. | |
|
7 | /// </summary> | |
|
8 | public class EndToken<TTag>: Token { | |
|
9 | ||
|
10 | TTag m_tag; | |
|
11 | ||
|
12 | public EndToken(TTag tag) { | |
|
13 | m_tag = tag; | |
|
14 | } | |
|
15 | ||
|
16 | public EndToken() | |
|
17 | : this(default(TTag)) { | |
|
18 | } | |
|
19 | ||
|
20 | public TTag Tag { | |
|
21 | get { return m_tag; } | |
|
22 | } | |
|
23 | ||
|
24 | public override void Accept(IVisitor visitor) { | |
|
25 | Safe.ArgumentOfType(visitor, typeof(IVisitor<TTag>), "visitor"); | |
|
26 | Safe.ArgumentNotNull(visitor, "visitor"); | |
|
27 | ((IVisitor<TTag>)visitor).Visit(this); | |
|
28 | } | |
|
29 | public override string ToString() { | |
|
30 | return "#"; | |
|
31 | } | |
|
32 | } | |
|
33 | } |
@@ -0,0 +1,8 | |||
|
1 | namespace Implab.Automaton.RegularExpressions { | |
|
2 | /// <summary> | |
|
3 | /// Интерфейс обходчика синтаксического дерева регулярного выражения | |
|
4 | /// </summary> | |
|
5 | public interface IVisitor<T> : IVisitor { | |
|
6 | void Visit(EndToken<T> token); | |
|
7 | } | |
|
8 | } |
@@ -0,0 +1,100 | |||
|
1 | using Implab; | |
|
2 | using System; | |
|
3 | using System.Collections.Generic; | |
|
4 | using System.Linq; | |
|
5 | using Implab.Automaton; | |
|
6 | using Implab.Automaton.RegularExpressions; | |
|
7 | ||
|
8 | namespace Implab.Formats { | |
|
9 | /// <summary> | |
|
10 | /// Базовый абстрактный класс. Грамматика, позволяет формулировать выражения над алфавитом типа <c>char</c>. | |
|
11 | /// </summary> | |
|
12 | public abstract class Grammar<TSymbol, TTag> { | |
|
13 | ||
|
14 | protected abstract IAlphabetBuilder<TSymbol> AlphabetBuilder { | |
|
15 | get; | |
|
16 | } | |
|
17 | ||
|
18 | protected SymbolToken<TTag> UnclassifiedToken() { | |
|
19 | return new SymbolToken<TTag>(DFAConst.UNCLASSIFIED_INPUT); | |
|
20 | } | |
|
21 | ||
|
22 | protected void DefineAlphabet(IEnumerable<TSymbol> alphabet) { | |
|
23 | Safe.ArgumentNotNull(alphabet, "alphabet"); | |
|
24 | ||
|
25 | foreach (var ch in alphabet) | |
|
26 | AlphabetBuilder.DefineSymbol(ch); | |
|
27 | } | |
|
28 | ||
|
29 | protected Token<TTag> SymbolToken(TSymbol symbol) { | |
|
30 | return Token<TTag>.New(TranslateOrAdd(symbol)); | |
|
31 | } | |
|
32 | ||
|
33 | protected Token<TTag> SymbolToken(IEnumerable<TSymbol> symbols) { | |
|
34 | Safe.ArgumentNotNull(symbols, "symbols"); | |
|
35 | ||
|
36 | return Token<TTag>.New(TranslateOrAdd(symbols).ToArray()); | |
|
37 | } | |
|
38 | ||
|
39 | protected Token<TTag> SymbolSetToken(params TSymbol[] set) { | |
|
40 | return SymbolToken(set); | |
|
41 | } | |
|
42 | ||
|
43 | int TranslateOrAdd(TSymbol ch) { | |
|
44 | var t = AlphabetBuilder.Translate(ch); | |
|
45 | if (t == DFAConst.UNCLASSIFIED_INPUT) | |
|
46 | t = AlphabetBuilder.DefineSymbol(ch); | |
|
47 | return t; | |
|
48 | } | |
|
49 | ||
|
50 | IEnumerable<int> TranslateOrAdd(IEnumerable<TSymbol> symbols) { | |
|
51 | return symbols.Distinct().Select(TranslateOrAdd); | |
|
52 | } | |
|
53 | ||
|
54 | int TranslateOrDie(TSymbol ch) { | |
|
55 | var t = AlphabetBuilder.Translate(ch); | |
|
56 | if (t == DFAConst.UNCLASSIFIED_INPUT) | |
|
57 | throw new ApplicationException(String.Format("Symbol '{0}' is UNCLASSIFIED", ch)); | |
|
58 | return t; | |
|
59 | } | |
|
60 | ||
|
61 | IEnumerable<int> TranslateOrDie(IEnumerable<TSymbol> symbols) { | |
|
62 | return symbols.Distinct().Select(TranslateOrDie); | |
|
63 | } | |
|
64 | ||
|
65 | protected Token<TTag> SymbolTokenExcept(IEnumerable<TSymbol> symbols) { | |
|
66 | Safe.ArgumentNotNull(symbols, "symbols"); | |
|
67 | ||
|
68 | return Token<TTag>.New( Enumerable.Range(0, AlphabetBuilder.Count).Except(TranslateOrDie(symbols)).ToArray() ); | |
|
69 | } | |
|
70 | ||
|
71 | protected abstract IndexedAlphabetBase<TSymbol> CreateAlphabet(); | |
|
72 | ||
|
73 | protected ScannerContext<TTag> BuildScannerContext(Token<TTag> regexp) { | |
|
74 | ||
|
75 | var dfa = new RegularDFA<TSymbol, TTag>(AlphabetBuilder); | |
|
76 | ||
|
77 | var visitor = new RegularExpressionVisitor<TTag>(); | |
|
78 | regexp.Accept( visitor ); | |
|
79 | ||
|
80 | visitor.BuildDFA(dfa); | |
|
81 | ||
|
82 | if (dfa.IsFinalState(dfa.InitialState)) | |
|
83 | throw new ApplicationException("The specified language contains empty token"); | |
|
84 | ||
|
85 | var ab = CreateAlphabet(); | |
|
86 | var optimal = dfa.Optimize(ab); | |
|
87 | ||
|
88 | return new ScannerContext<TTag>( | |
|
89 | optimal.CreateTransitionTable(), | |
|
90 | optimal.CreateFinalStateTable(), | |
|
91 | optimal.CreateTagTable(), | |
|
92 | optimal.InitialState, | |
|
93 | ab.GetTranslationMap() | |
|
94 | ); | |
|
95 | } | |
|
96 | ||
|
97 | } | |
|
98 | ||
|
99 | ||
|
100 | } |
@@ -1,17 +1,17 | |||
|
1 | 1 | using System; |
|
2 | 2 | |
|
3 | 3 | namespace Implab.Automaton.RegularExpressions { |
|
4 |
public class AltToken |
|
|
5 |
public AltToken(Token |
|
|
4 | public class AltToken: BinaryToken { | |
|
5 | public AltToken(Token left, Token right) | |
|
6 | 6 | : base(left, right) { |
|
7 | 7 | } |
|
8 | 8 | |
|
9 |
public override void Accept(IVisitor |
|
|
9 | public override void Accept(IVisitor visitor) { | |
|
10 | 10 | Safe.ArgumentNotNull(visitor, "visitor"); |
|
11 | 11 | visitor.Visit(this); |
|
12 | 12 | } |
|
13 | 13 | public override string ToString() { |
|
14 |
return String.Format(Right is BinaryToken |
|
|
14 | return String.Format(Right is BinaryToken ? "{0}|({1})" : "{0}|{1}", Left, Right); | |
|
15 | 15 | } |
|
16 | 16 | } |
|
17 | 17 | } |
@@ -1,19 +1,19 | |||
|
1 | 1 | using Implab; |
|
2 | 2 | |
|
3 | 3 | namespace Implab.Automaton.RegularExpressions { |
|
4 |
public abstract class BinaryToken |
|
|
5 |
readonly Token |
|
|
6 |
readonly Token |
|
|
4 | public abstract class BinaryToken: Token { | |
|
5 | readonly Token m_left; | |
|
6 | readonly Token m_right; | |
|
7 | 7 | |
|
8 |
public Token |
|
|
8 | public Token Left { | |
|
9 | 9 | get { return m_left; } |
|
10 | 10 | } |
|
11 | 11 | |
|
12 |
public Token |
|
|
12 | public Token Right { | |
|
13 | 13 | get { return m_right; } |
|
14 | 14 | } |
|
15 | 15 | |
|
16 |
protected BinaryToken(Token |
|
|
16 | protected BinaryToken(Token left, Token right) { | |
|
17 | 17 | Safe.ArgumentNotNull(m_left = left, "left"); |
|
18 | 18 | Safe.ArgumentNotNull(m_right = right, "right"); |
|
19 | 19 | } |
@@ -1,12 +1,12 | |||
|
1 | 1 | using System; |
|
2 | 2 | |
|
3 | 3 | namespace Implab.Automaton.RegularExpressions { |
|
4 |
public class CatToken |
|
|
5 |
public CatToken(Token |
|
|
4 | public class CatToken : BinaryToken { | |
|
5 | public CatToken(Token left, Token right) | |
|
6 | 6 | : base(left, right) { |
|
7 | 7 | } |
|
8 | 8 | |
|
9 |
public override void Accept(IVisitor |
|
|
9 | public override void Accept(IVisitor visitor) { | |
|
10 | 10 | Safe.ArgumentNotNull(visitor, "visitor"); |
|
11 | 11 | visitor.Visit(this); |
|
12 | 12 | } |
@@ -15,8 +15,8 namespace Implab.Automaton.RegularExpres | |||
|
15 | 15 | return String.Format("{0}{1}", FormatToken(Left), FormatToken(Right)); |
|
16 | 16 | } |
|
17 | 17 | |
|
18 |
static string FormatToken(Token |
|
|
19 |
return String.Format(token is AltToken |
|
|
18 | static string FormatToken(Token token) { | |
|
19 | return String.Format(token is AltToken ? "({0})" : "{0}", token); | |
|
20 | 20 | } |
|
21 | 21 | } |
|
22 | 22 | } |
@@ -1,8 +1,8 | |||
|
1 | 1 | using Implab; |
|
2 | 2 | |
|
3 | 3 | namespace Implab.Automaton.RegularExpressions { |
|
4 |
public class EmptyToken |
|
|
5 |
public override void Accept(IVisitor |
|
|
4 | public class EmptyToken: Token { | |
|
5 | public override void Accept(IVisitor visitor) { | |
|
6 | 6 | Safe.ArgumentNotNull(visitor, "visitor"); |
|
7 | 7 | visitor.Visit(this); |
|
8 | 8 | } |
@@ -5,23 +5,9 namespace Implab.Automaton.RegularExpres | |||
|
5 | 5 | /// Конечный символ расширенного регулярного выражения, при построении ДКА |
|
6 | 6 | /// используется для определения конечных состояний. |
|
7 | 7 | /// </summary> |
|
8 |
public class EndToken |
|
|
9 | ||
|
10 | TTag m_tag; | |
|
11 | ||
|
12 | public EndToken(TTag tag) { | |
|
13 | m_tag = tag; | |
|
14 | } | |
|
8 | public class EndToken: Token { | |
|
15 | 9 | |
|
16 | public EndToken() | |
|
17 | : this(default(TTag)) { | |
|
18 | } | |
|
19 | ||
|
20 | public TTag Tag { | |
|
21 | get { return m_tag; } | |
|
22 | } | |
|
23 | ||
|
24 | public override void Accept(IVisitor<TTag> visitor) { | |
|
10 | public override void Accept(IVisitor visitor) { | |
|
25 | 11 | Safe.ArgumentNotNull(visitor, "visitor"); |
|
26 | 12 | visitor.Visit(this); |
|
27 | 13 | } |
@@ -1,5 +1,4 | |||
|
1 | using System; | |
|
2 | ||
|
1 | ||
|
3 | 2 | namespace Implab.Automaton.RegularExpressions { |
|
4 | 3 | public interface ITaggedDFABuilder<TTag> : IDFATableBuilder { |
|
5 | 4 | void SetStateTag(int s, TTag[] tags); |
@@ -2,12 +2,12 | |||
|
2 | 2 | /// <summary> |
|
3 | 3 | /// Интерфейс обходчика синтаксического дерева регулярного выражения |
|
4 | 4 | /// </summary> |
|
5 |
public interface IVisitor |
|
|
6 |
void Visit(AltToken |
|
|
7 |
void Visit(StarToken |
|
|
8 |
void Visit(CatToken |
|
|
9 |
void Visit(EmptyToken |
|
|
10 |
void Visit(EndToken |
|
|
11 |
void Visit(SymbolToken |
|
|
5 | public interface IVisitor { | |
|
6 | void Visit(AltToken token); | |
|
7 | void Visit(StarToken token); | |
|
8 | void Visit(CatToken token); | |
|
9 | void Visit(EmptyToken token); | |
|
10 | void Visit(EndToken token); | |
|
11 | void Visit(SymbolToken token); | |
|
12 | 12 | } |
|
13 | 13 | } |
@@ -1,5 +1,4 | |||
|
1 | using System; | |
|
2 | using System.Collections.Generic; | |
|
1 | using System.Collections.Generic; | |
|
3 | 2 | using System.Linq; |
|
4 | 3 | |
|
5 | 4 | namespace Implab.Automaton.RegularExpressions { |
@@ -12,13 +12,14 namespace Implab.Automaton.RegularExpres | |||
|
12 | 12 | /// </summary> |
|
13 | 13 | public class RegularExpressionVisitor<TTag> : IVisitor<TTag> { |
|
14 | 14 | int m_idx; |
|
15 |
Token |
|
|
15 | Token m_root; | |
|
16 | 16 | HashSet<int> m_firstpos; |
|
17 | 17 | HashSet<int> m_lastpos; |
|
18 | 18 | |
|
19 | 19 | readonly Dictionary<int, HashSet<int>> m_followpos = new Dictionary<int, HashSet<int>>(); |
|
20 | 20 | readonly Dictionary<int, int> m_indexes = new Dictionary<int, int>(); |
|
21 |
readonly |
|
|
21 | readonly HashSet<int> m_ends = new HashSet<int>(); | |
|
22 | readonly Dictionary<int, TTag> m_tags = new Dictionary<int, TTag>(); | |
|
22 | 23 | |
|
23 | 24 | public Dictionary<int, HashSet<int>> FollowposMap { |
|
24 | 25 | get { return m_followpos; } |
@@ -30,19 +31,19 namespace Implab.Automaton.RegularExpres | |||
|
30 | 31 | } |
|
31 | 32 | |
|
32 | 33 | bool Nullable(object n) { |
|
33 |
if (n is EmptyToken |
|
|
34 | if (n is EmptyToken || n is StarToken) | |
|
34 | 35 | return true; |
|
35 |
var altToken = n as AltToken |
|
|
36 | var altToken = n as AltToken; | |
|
36 | 37 | if (altToken != null) |
|
37 | 38 | return Nullable(altToken.Left) || Nullable(altToken.Right); |
|
38 |
var catToken = n as CatToken |
|
|
39 | var catToken = n as CatToken; | |
|
39 | 40 | if (catToken != null) |
|
40 | 41 | return Nullable(catToken.Left) && Nullable(catToken.Right); |
|
41 | 42 | return false; |
|
42 | 43 | } |
|
43 | 44 | |
|
44 | 45 | |
|
45 |
public void Visit(AltToken |
|
|
46 | public void Visit(AltToken token) { | |
|
46 | 47 | if (m_root == null) |
|
47 | 48 | m_root = token; |
|
48 | 49 | var firtspos = new HashSet<int>(); |
@@ -60,7 +61,7 namespace Implab.Automaton.RegularExpres | |||
|
60 | 61 | m_lastpos = lastpos; |
|
61 | 62 | } |
|
62 | 63 | |
|
63 |
public void Visit(StarToken |
|
|
64 | public void Visit(StarToken token) { | |
|
64 | 65 | if (m_root == null) |
|
65 | 66 | m_root = token; |
|
66 | 67 | token.Token.Accept(this); |
@@ -69,7 +70,7 namespace Implab.Automaton.RegularExpres | |||
|
69 | 70 | Followpos(i).UnionWith(m_firstpos); |
|
70 | 71 | } |
|
71 | 72 | |
|
72 |
public void Visit(CatToken |
|
|
73 | public void Visit(CatToken token) { | |
|
73 | 74 | if (m_root == null) |
|
74 | 75 | m_root = token; |
|
75 | 76 | |
@@ -97,12 +98,12 namespace Implab.Automaton.RegularExpres | |||
|
97 | 98 | |
|
98 | 99 | } |
|
99 | 100 | |
|
100 |
public void Visit(EmptyToken |
|
|
101 | public void Visit(EmptyToken token) { | |
|
101 | 102 | if (m_root == null) |
|
102 | 103 | m_root = token; |
|
103 | 104 | } |
|
104 | 105 | |
|
105 |
public void Visit(SymbolToken |
|
|
106 | public void Visit(SymbolToken token) { | |
|
106 | 107 | if (m_root == null) |
|
107 | 108 | m_root = token; |
|
108 | 109 | m_idx++; |
@@ -119,7 +120,19 namespace Implab.Automaton.RegularExpres | |||
|
119 | 120 | m_firstpos = new HashSet<int>(new[] { m_idx }); |
|
120 | 121 | m_lastpos = new HashSet<int>(new[] { m_idx }); |
|
121 | 122 | Followpos(m_idx); |
|
122 |
m_ends.Add(m_idx |
|
|
123 | m_ends.Add(m_idx); | |
|
124 | m_tags.Add(m_idx, token.Tag); | |
|
125 | } | |
|
126 | ||
|
127 | public void Visit(EndToken token) { | |
|
128 | if (m_root == null) | |
|
129 | m_root = token; | |
|
130 | m_idx++; | |
|
131 | m_indexes[m_idx] = DFAConst.UNCLASSIFIED_INPUT; | |
|
132 | m_firstpos = new HashSet<int>(new[] { m_idx }); | |
|
133 | m_lastpos = new HashSet<int>(new[] { m_idx }); | |
|
134 | Followpos(m_idx); | |
|
135 | m_ends.Add(m_idx); | |
|
123 | 136 | } |
|
124 | 137 | |
|
125 | 138 | public void BuildDFA(ITaggedDFABuilder<TTag> dfa) { |
@@ -157,14 +170,18 namespace Implab.Automaton.RegularExpres | |||
|
157 | 170 | } |
|
158 | 171 | } |
|
159 | 172 | if (next.Count > 0) { |
|
160 |
int s2 |
|
|
161 |
if ( |
|
|
173 | int s2; | |
|
174 | if (states.Contains(next)) { | |
|
175 | s2 = states.Translate(next); | |
|
176 | } else { | |
|
162 | 177 | s2 = states.DefineSymbol(next); |
|
163 | 178 | |
|
164 |
|
|
|
165 |
|
|
|
179 | if (IsFinal(next)) { | |
|
180 | ||
|
166 | 181 | dfa.MarkFinalState(s2); |
|
167 |
|
|
|
182 | tags = GetStateTags(next); | |
|
183 | if (tags != null && tags.Length > 0) | |
|
184 | dfa.SetStateTag(s2, tags); | |
|
168 | 185 | } |
|
169 | 186 | |
|
170 | 187 | queue.Enqueue(next); |
@@ -175,9 +192,14 namespace Implab.Automaton.RegularExpres | |||
|
175 | 192 | } |
|
176 | 193 | } |
|
177 | 194 | |
|
195 | bool IsFinal(IEnumerable<int> state) { | |
|
196 | Debug.Assert(state != null); | |
|
197 | return state.Any(m_ends.Contains); | |
|
198 | } | |
|
199 | ||
|
178 | 200 | TTag[] GetStateTags(IEnumerable<int> state) { |
|
179 | 201 | Debug.Assert(state != null); |
|
180 |
return state.Where(m_ |
|
|
202 | return state.Where(m_tags.ContainsKey).Select(pos => m_tags[pos]).ToArray(); | |
|
181 | 203 | } |
|
182 | 204 | |
|
183 | 205 | } |
@@ -1,28 +1,25 | |||
|
1 | 1 | using Implab; |
|
2 | 2 | using System; |
|
3 | using System.Collections.Generic; | |
|
4 | using System.Linq; | |
|
5 | using System.Text; | |
|
6 | using System.Threading.Tasks; | |
|
3 | ||
|
7 | 4 | |
|
8 | 5 | namespace Implab.Automaton.RegularExpressions { |
|
9 | 6 | /// <summary> |
|
10 | 7 | /// Замыкание выражения с 0 и более повторов. |
|
11 | 8 | /// </summary> |
|
12 |
public class StarToken |
|
|
9 | public class StarToken: Token { | |
|
13 | 10 | |
|
14 |
Token |
|
|
11 | Token m_token; | |
|
15 | 12 | |
|
16 |
public Token |
|
|
13 | public Token Token { | |
|
17 | 14 | get { return m_token; } |
|
18 | 15 | } |
|
19 | 16 | |
|
20 |
public StarToken(Token |
|
|
17 | public StarToken(Token token) { | |
|
21 | 18 | Safe.ArgumentNotNull(token, "token"); |
|
22 | 19 | m_token = token; |
|
23 | 20 | } |
|
24 | 21 | |
|
25 |
public override void Accept(IVisitor |
|
|
22 | public override void Accept(IVisitor visitor) { | |
|
26 | 23 | Safe.ArgumentNotNull(visitor, "visitor"); |
|
27 | 24 | visitor.Visit(this); |
|
28 | 25 | } |
@@ -4,7 +4,7 namespace Implab.Automaton.RegularExpres | |||
|
4 | 4 | /// <summary> |
|
5 | 5 | /// Выражение, соответсвующее одному символу. |
|
6 | 6 | /// </summary> |
|
7 |
public class SymbolToken |
|
|
7 | public class SymbolToken: Token { | |
|
8 | 8 | int m_value; |
|
9 | 9 | |
|
10 | 10 | public int Value { |
@@ -14,7 +14,7 namespace Implab.Automaton.RegularExpres | |||
|
14 | 14 | public SymbolToken(int value) { |
|
15 | 15 | m_value = value; |
|
16 | 16 | } |
|
17 |
public override void Accept(IVisitor |
|
|
17 | public override void Accept(IVisitor visitor) { | |
|
18 | 18 | Safe.ArgumentNotNull(visitor, "visitor"); |
|
19 | 19 | |
|
20 | 20 | visitor.Visit(this); |
@@ -3,46 +3,46 using System; | |||
|
3 | 3 | using System.Linq; |
|
4 | 4 | |
|
5 | 5 | namespace Implab.Automaton.RegularExpressions { |
|
6 |
public abstract class Token |
|
|
7 |
public abstract void Accept(IVisitor |
|
|
6 | public abstract class Token { | |
|
7 | public abstract void Accept(IVisitor visitor); | |
|
8 | 8 | |
|
9 |
public Token |
|
|
10 |
return Cat(new EndToken |
|
|
9 | public Token Extend() { | |
|
10 | return Cat(new EndToken()); | |
|
11 | 11 | } |
|
12 | 12 | |
|
13 |
public Token<TTag> |
|
|
13 | public Token Tag<TTag>(TTag tag) { | |
|
14 | 14 | return Cat(new EndToken<TTag>(tag)); |
|
15 | 15 | } |
|
16 | 16 | |
|
17 |
public Token |
|
|
18 |
return new CatToken |
|
|
17 | public Token Cat(Token right) { | |
|
18 | return new CatToken(this, right); | |
|
19 | 19 | } |
|
20 | 20 | |
|
21 |
public Token |
|
|
22 |
return new AltToken |
|
|
21 | public Token Or(Token right) { | |
|
22 | return new AltToken(this, right); | |
|
23 | 23 | } |
|
24 | 24 | |
|
25 |
public Token |
|
|
26 |
return Or(new EmptyToken |
|
|
25 | public Token Optional() { | |
|
26 | return Or(new EmptyToken()); | |
|
27 | 27 | } |
|
28 | 28 | |
|
29 |
public Token |
|
|
30 |
return new StarToken |
|
|
29 | public Token EClosure() { | |
|
30 | return new StarToken(this); | |
|
31 | 31 | } |
|
32 | 32 | |
|
33 |
public Token |
|
|
34 |
return Cat(new StarToken |
|
|
33 | public Token Closure() { | |
|
34 | return Cat(new StarToken(this)); | |
|
35 | 35 | } |
|
36 | 36 | |
|
37 |
public Token |
|
|
38 |
Token |
|
|
37 | public Token Repeat(int count) { | |
|
38 | Token token = null; | |
|
39 | 39 | |
|
40 | 40 | for (int i = 0; i < count; i++) |
|
41 | 41 | token = token != null ? token.Cat(this) : this; |
|
42 |
return token ?? new EmptyToken |
|
|
42 | return token ?? new EmptyToken(); | |
|
43 | 43 | } |
|
44 | 44 | |
|
45 |
public Token |
|
|
45 | public Token Repeat(int min, int max) { | |
|
46 | 46 | if (min > max || min < 1) |
|
47 | 47 | throw new ArgumentOutOfRangeException(); |
|
48 | 48 | var token = Repeat(min); |
@@ -52,11 +52,11 namespace Implab.Automaton.RegularExpres | |||
|
52 | 52 | return token; |
|
53 | 53 | } |
|
54 | 54 | |
|
55 |
public static Token |
|
|
55 | public static Token New(params int[] set) { | |
|
56 | 56 | Safe.ArgumentNotNull(set, "set"); |
|
57 |
Token |
|
|
57 | Token token = null; | |
|
58 | 58 | foreach(var c in set.Distinct()) |
|
59 |
token = token == null ? new SymbolToken |
|
|
59 | token = token == null ? new SymbolToken(c) : token.Or(new SymbolToken(c)); | |
|
60 | 60 | return token; |
|
61 | 61 | } |
|
62 | 62 | } |
@@ -4,8 +4,6 using Implab.Automaton; | |||
|
4 | 4 | |
|
5 | 5 | namespace Implab.Formats { |
|
6 | 6 | public class ByteAlphabet : IndexedAlphabetBase<byte> { |
|
7 | public ByteAlphabet() { | |
|
8 | } | |
|
9 | 7 | |
|
10 | 8 | #region implemented abstract members of IndexedAlphabetBase |
|
11 | 9 |
@@ -5,9 +5,6 using Implab.Automaton; | |||
|
5 | 5 | namespace Implab.Formats { |
|
6 | 6 | public class CharAlphabet: IndexedAlphabetBase<char> { |
|
7 | 7 | |
|
8 | public CharAlphabet() { | |
|
9 | } | |
|
10 | ||
|
11 | 8 | public override int GetSymbolIndex(char symbol) { |
|
12 | 9 | return symbol; |
|
13 | 10 | } |
@@ -4,7 +4,6 using Implab.Automaton; | |||
|
4 | 4 | using System.Text; |
|
5 | 5 | using Implab.Components; |
|
6 | 6 | using System.IO; |
|
7 | using Implab.Automaton.RegularExpressions; | |
|
8 | 7 | |
|
9 | 8 | namespace Implab.Formats.JSON { |
|
10 | 9 | /// <summary> |
@@ -13,8 +12,8 namespace Implab.Formats.JSON { | |||
|
13 | 12 | public class JSONScanner : Disposable { |
|
14 | 13 | readonly StringBuilder m_builder = new StringBuilder(); |
|
15 | 14 | |
|
16 |
readonly ScannerContext<JSONGrammar.TokenType> m_json |
|
|
17 |
readonly ScannerContext<JSONGrammar.TokenType> m_string |
|
|
15 | readonly ScannerContext<JSONGrammar.TokenType> m_jsonContext = JSONGrammar.Instance.JsonDFA; | |
|
16 | readonly ScannerContext<JSONGrammar.TokenType> m_stringContext = JSONGrammar.Instance.JsonStringDFA; | |
|
18 | 17 | |
|
19 | 18 | |
|
20 | 19 | readonly TextScanner m_scanner; |
@@ -31,7 +30,7 namespace Implab.Formats.JSON { | |||
|
31 | 30 | public JSONScanner(TextReader reader, int bufferMax, int chunkSize) { |
|
32 | 31 | Safe.ArgumentNotNull(reader, "reader"); |
|
33 | 32 | |
|
34 | m_scanner = new ReaderScanner(reader); | |
|
33 | m_scanner = new ReaderScanner(reader, bufferMax, chunkSize); | |
|
35 | 34 | } |
|
36 | 35 | |
|
37 | 36 | /// <summary> |
@@ -44,7 +43,7 namespace Implab.Formats.JSON { | |||
|
44 | 43 | /// в строках обрабатываются экранированные символы, числа становтся типа double.</remarks> |
|
45 | 44 | public bool ReadToken(out object tokenValue, out JsonTokenType tokenType) { |
|
46 | 45 | JSONGrammar.TokenType[] tag; |
|
47 |
if (m_json |
|
|
46 | if (m_jsonContext.Execute(m_scanner, out tag)) { | |
|
48 | 47 | switch (tag[0]) { |
|
49 | 48 | case JSONGrammar.TokenType.StringBound: |
|
50 | 49 | tokenValue = ReadString(); |
@@ -68,12 +67,12 namespace Implab.Formats.JSON { | |||
|
68 | 67 | |
|
69 | 68 | string ReadString() { |
|
70 | 69 | int pos = 0; |
|
71 |
|
|
|
70 | var buf = new char[6]; // the buffer for unescaping chars | |
|
72 | 71 | |
|
73 | 72 | JSONGrammar.TokenType[] tag; |
|
74 | 73 | m_builder.Clear(); |
|
75 | 74 | |
|
76 |
while (m_string |
|
|
75 | while (m_stringContext.Execute(m_scanner, out tag)) { | |
|
77 | 76 | switch (tag[0]) { |
|
78 | 77 | case JSONGrammar.TokenType.StringBound: |
|
79 | 78 | return m_builder.ToString(); |
@@ -89,13 +88,17 namespace Implab.Formats.JSON { | |||
|
89 | 88 | m_scanner.CopyTokenTo(buf, 0); |
|
90 | 89 | m_builder.Append(StringTranslator.TranslateEscapedChar(buf[1])); |
|
91 | 90 | break; |
|
92 | default: | |
|
93 | break; | |
|
94 | 91 | } |
|
95 | 92 | |
|
96 | 93 | } |
|
97 | 94 | |
|
98 | 95 | throw new ParserException("Unexpected end of data"); |
|
99 | 96 | } |
|
97 | ||
|
98 | protected override void Dispose(bool disposing) { | |
|
99 | if (disposing) | |
|
100 | Safe.Dispose(m_scanner); | |
|
101 | base.Dispose(disposing); | |
|
102 | } | |
|
100 | 103 | } |
|
101 | 104 | } |
@@ -1,11 +1,17 | |||
|
1 | using System; | |
|
2 | ||
|
3 | namespace Implab.Formats { | |
|
1 | namespace Implab.Formats { | |
|
2 | /// <summary> | |
|
3 | /// Represents a scanner configuration usefull to recongnize token, based on the DFA. | |
|
4 | /// </summary> | |
|
4 | 5 | public class ScannerContext<TTag> { |
|
6 | ||
|
5 | 7 | public int[,] Dfa { get; private set; } |
|
8 | ||
|
6 | 9 | public bool[] Final { get; private set; } |
|
10 | ||
|
7 | 11 | public TTag[][] Tags { get; private set; } |
|
12 | ||
|
8 | 13 | public int State { get; private set; } |
|
14 | ||
|
9 | 15 | public int[] Alphabet { get; private set; } |
|
10 | 16 | |
|
11 | 17 | public ScannerContext(int[,] dfa, bool[] final, TTag[][] tags, int state, int[] alphabet) { |
@@ -1,9 +1,7 | |||
|
1 | 1 | using System; |
|
2 | 2 | using Implab.Components; |
|
3 | using Implab.Automaton.RegularExpressions; | |
|
4 | 3 | using System.Diagnostics; |
|
5 | 4 | using Implab.Automaton; |
|
6 | using System.IO; | |
|
7 | 5 | using System.Text; |
|
8 | 6 | |
|
9 | 7 | namespace Implab.Formats { |
@@ -18,7 +16,7 namespace Implab.Formats { | |||
|
18 | 16 | int m_tokenLength; |
|
19 | 17 | |
|
20 | 18 | /// <summary> |
|
21 |
/// Initializes a new instance of the <see cref="Implab.Formats.TextScanner |
|
|
19 | /// Initializes a new instance of the <see cref="Implab.Formats.TextScanner"/> class. | |
|
22 | 20 | /// </summary> |
|
23 | 21 | /// <param name="bufferMax">Buffer max.</param> |
|
24 | 22 | /// <param name="chunkSize">Chunk size.</param> |
@@ -30,7 +28,7 namespace Implab.Formats { | |||
|
30 | 28 | } |
|
31 | 29 | |
|
32 | 30 | /// <summary> |
|
33 |
/// Initializes a new instance of the <see cref="Implab.Formats.TextScanner |
|
|
31 | /// Initializes a new instance of the <see cref="Implab.Formats.TextScanner"/> class. | |
|
34 | 32 | /// </summary> |
|
35 | 33 | /// <param name="buffer">Buffer.</param> |
|
36 | 34 | protected TextScanner(char[] buffer) { |
@@ -48,7 +46,9 namespace Implab.Formats { | |||
|
48 | 46 | /// <param name="final">Final states of the automaton.</param> |
|
49 | 47 | /// <param name="tags">Tags.</param> |
|
50 | 48 | /// <param name="state">The initial state for the automaton.</param> |
|
51 | internal bool ReadToken<TTag>(int[,] dfa, int[] final, TTag[][] tags, int state, int[] alphabet, out TTag[] tag) { | |
|
49 | /// <param name="alphabet"></param> | |
|
50 | /// <param name = "tag"></param> | |
|
51 | internal bool ReadToken<TTag>(int[,] dfa, bool[] final, TTag[][] tags, int state, int[] alphabet, out TTag[] tag) { | |
|
52 | 52 | Safe.ArgumentNotNull(); |
|
53 | 53 | m_tokenLength = 0; |
|
54 | 54 | |
@@ -58,10 +58,10 namespace Implab.Formats { | |||
|
58 | 58 | // after the next chunk is read the offset in the buffer may change |
|
59 | 59 | int pos = m_bufferOffset + m_tokenLength; |
|
60 | 60 | |
|
61 | while(pos < m_bufferSize) { | |
|
61 | while (pos < m_bufferSize) { | |
|
62 | 62 | var ch = m_buffer[pos]; |
|
63 | 63 | |
|
64 | state = dfa[state,ch > maxSymbol ? DFAConst.UNCLASSIFIED_INPUT : alphabet[ch]]; | |
|
64 | state = dfa[state, ch > maxSymbol ? DFAConst.UNCLASSIFIED_INPUT : alphabet[ch]]; | |
|
65 | 65 | if (state == DFAConst.UNREACHABLE_STATE) |
|
66 | 66 | break; |
|
67 | 67 | |
@@ -77,16 +77,17 namespace Implab.Formats { | |||
|
77 | 77 | if (final[state]) { |
|
78 | 78 | tag = tags[state]; |
|
79 | 79 | return true; |
|
80 |
} |
|
|
81 | if (m_bufferOffset == m_bufferSize) { | |
|
82 | if (m_tokenLength == 0) //EOF | |
|
80 | } | |
|
81 | ||
|
82 | if (m_bufferOffset == m_bufferSize) { | |
|
83 | if (m_tokenLength == 0) //EOF | |
|
83 | 84 | return false; |
|
84 | 85 | |
|
85 |
|
|
|
86 |
|
|
|
87 | throw new ParserException(String.Format("Unexpected symbol '{0}'", m_buffer[m_bufferOffset])); | |
|
86 | throw new ParserException(); | |
|
87 | } | |
|
88 | ||
|
89 | throw new ParserException(String.Format("Unexpected symbol '{0}'", m_buffer[m_bufferOffset])); | |
|
88 | 90 | |
|
89 | } | |
|
90 | 91 | } |
|
91 | 92 | |
|
92 | 93 | protected void Feed(char[] buffer, int offset, int length) { |
@@ -108,7 +109,7 namespace Implab.Formats { | |||
|
108 | 109 | var size = used + free; |
|
109 | 110 | |
|
110 | 111 | if (size > m_bufferMax) |
|
111 |
throw new ParserException(String.Format("The buffer limit ({0} Kb) is reached" |
|
|
112 | throw new ParserException(String.Format("The buffer limit ({0} Kb) is reached", m_bufferMax/1024)); | |
|
112 | 113 | |
|
113 | 114 | var temp = new char[size]; |
|
114 | 115 |
@@ -160,11 +160,9 | |||
|
160 | 160 | <Compile Include="Automaton\RegularExpressions\BinaryToken.cs" /> |
|
161 | 161 | <Compile Include="Automaton\RegularExpressions\CatToken.cs" /> |
|
162 | 162 | <Compile Include="Automaton\DFAConst.cs" /> |
|
163 | <Compile Include="Automaton\RegularExpressions\Grammar.cs" /> | |
|
164 | 163 | <Compile Include="Automaton\RegularExpressions\StarToken.cs" /> |
|
165 | 164 | <Compile Include="Automaton\RegularExpressions\SymbolToken.cs" /> |
|
166 | 165 | <Compile Include="Automaton\RegularExpressions\EmptyToken.cs" /> |
|
167 | <Compile Include="Automaton\RegularExpressions\EndToken.cs" /> | |
|
168 | 166 | <Compile Include="Automaton\RegularExpressions\Token.cs" /> |
|
169 | 167 | <Compile Include="Automaton\RegularExpressions\IVisitor.cs" /> |
|
170 | 168 | <Compile Include="Automaton\AutomatonTransition.cs" /> |
@@ -192,6 +190,10 | |||
|
192 | 190 | <Compile Include="Formats\StringScanner.cs" /> |
|
193 | 191 | <Compile Include="Formats\ReaderScanner.cs" /> |
|
194 | 192 | <Compile Include="Formats\ScannerContext.cs" /> |
|
193 | <Compile Include="Formats\Grammar.cs" /> | |
|
194 | <Compile Include="Automaton\RegularExpressions\EndTokenT.cs" /> | |
|
195 | <Compile Include="Automaton\RegularExpressions\EndToken.cs" /> | |
|
196 | <Compile Include="Automaton\RegularExpressions\IVisitorT.cs" /> | |
|
195 | 197 | </ItemGroup> |
|
196 | 198 | <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" /> |
|
197 | 199 | <ItemGroup /> |
@@ -41,6 +41,11 namespace Implab | |||
|
41 | 41 | throw new ArgumentOutOfRangeException(paramName); |
|
42 | 42 | } |
|
43 | 43 | |
|
44 | public static void ArgumentOfType(object value, Type type, string paramName) { | |
|
45 | if (!type.IsInstanceOfType(value)) | |
|
46 | throw new ArgumentException(String.Format("The parameter must be of type {0}", type), paramName); | |
|
47 | } | |
|
48 | ||
|
44 | 49 | public static void Dispose(params IDisposable[] objects) { |
|
45 | 50 | foreach (var d in objects) |
|
46 | 51 | if (d != null) |
|
1 | NO CONTENT: file was removed |
General Comments 0
You need to be logged in to leave comments.
Login now