##// END OF EJS Templates
refactoring
cin -
r177:a0ff6a0e9c44 ref20160224
parent child
Show More
@@ -0,0 +1,33
1 using Implab;
2
3 namespace Implab.Automaton.RegularExpressions {
4 /// <summary>
5 /// Конечный символ расширенного регулярного выражения, при построении ДКА
6 /// используется для определения конечных состояний.
7 /// </summary>
8 public class EndToken<TTag>: Token {
9
10 TTag m_tag;
11
12 public EndToken(TTag tag) {
13 m_tag = tag;
14 }
15
16 public EndToken()
17 : this(default(TTag)) {
18 }
19
20 public TTag Tag {
21 get { return m_tag; }
22 }
23
24 public override void Accept(IVisitor visitor) {
25 Safe.ArgumentOfType(visitor, typeof(IVisitor<TTag>), "visitor");
26 Safe.ArgumentNotNull(visitor, "visitor");
27 ((IVisitor<TTag>)visitor).Visit(this);
28 }
29 public override string ToString() {
30 return "#";
31 }
32 }
33 }
@@ -0,0 +1,8
1 namespace Implab.Automaton.RegularExpressions {
2 /// <summary>
3 /// Интерфейс обходчика синтаксического дерева регулярного выражения
4 /// </summary>
5 public interface IVisitor<T> : IVisitor {
6 void Visit(EndToken<T> token);
7 }
8 }
@@ -0,0 +1,100
1 using Implab;
2 using System;
3 using System.Collections.Generic;
4 using System.Linq;
5 using Implab.Automaton;
6 using Implab.Automaton.RegularExpressions;
7
8 namespace Implab.Formats {
9 /// <summary>
10 /// Базовый абстрактный класс. Грамматика, позволяет формулировать выражения над алфавитом типа <c>char</c>.
11 /// </summary>
12 public abstract class Grammar<TSymbol, TTag> {
13
14 protected abstract IAlphabetBuilder<TSymbol> AlphabetBuilder {
15 get;
16 }
17
18 protected SymbolToken<TTag> UnclassifiedToken() {
19 return new SymbolToken<TTag>(DFAConst.UNCLASSIFIED_INPUT);
20 }
21
22 protected void DefineAlphabet(IEnumerable<TSymbol> alphabet) {
23 Safe.ArgumentNotNull(alphabet, "alphabet");
24
25 foreach (var ch in alphabet)
26 AlphabetBuilder.DefineSymbol(ch);
27 }
28
29 protected Token<TTag> SymbolToken(TSymbol symbol) {
30 return Token<TTag>.New(TranslateOrAdd(symbol));
31 }
32
33 protected Token<TTag> SymbolToken(IEnumerable<TSymbol> symbols) {
34 Safe.ArgumentNotNull(symbols, "symbols");
35
36 return Token<TTag>.New(TranslateOrAdd(symbols).ToArray());
37 }
38
39 protected Token<TTag> SymbolSetToken(params TSymbol[] set) {
40 return SymbolToken(set);
41 }
42
43 int TranslateOrAdd(TSymbol ch) {
44 var t = AlphabetBuilder.Translate(ch);
45 if (t == DFAConst.UNCLASSIFIED_INPUT)
46 t = AlphabetBuilder.DefineSymbol(ch);
47 return t;
48 }
49
50 IEnumerable<int> TranslateOrAdd(IEnumerable<TSymbol> symbols) {
51 return symbols.Distinct().Select(TranslateOrAdd);
52 }
53
54 int TranslateOrDie(TSymbol ch) {
55 var t = AlphabetBuilder.Translate(ch);
56 if (t == DFAConst.UNCLASSIFIED_INPUT)
57 throw new ApplicationException(String.Format("Symbol '{0}' is UNCLASSIFIED", ch));
58 return t;
59 }
60
61 IEnumerable<int> TranslateOrDie(IEnumerable<TSymbol> symbols) {
62 return symbols.Distinct().Select(TranslateOrDie);
63 }
64
65 protected Token<TTag> SymbolTokenExcept(IEnumerable<TSymbol> symbols) {
66 Safe.ArgumentNotNull(symbols, "symbols");
67
68 return Token<TTag>.New( Enumerable.Range(0, AlphabetBuilder.Count).Except(TranslateOrDie(symbols)).ToArray() );
69 }
70
71 protected abstract IndexedAlphabetBase<TSymbol> CreateAlphabet();
72
73 protected ScannerContext<TTag> BuildScannerContext(Token<TTag> regexp) {
74
75 var dfa = new RegularDFA<TSymbol, TTag>(AlphabetBuilder);
76
77 var visitor = new RegularExpressionVisitor<TTag>();
78 regexp.Accept( visitor );
79
80 visitor.BuildDFA(dfa);
81
82 if (dfa.IsFinalState(dfa.InitialState))
83 throw new ApplicationException("The specified language contains empty token");
84
85 var ab = CreateAlphabet();
86 var optimal = dfa.Optimize(ab);
87
88 return new ScannerContext<TTag>(
89 optimal.CreateTransitionTable(),
90 optimal.CreateFinalStateTable(),
91 optimal.CreateTagTable(),
92 optimal.InitialState,
93 ab.GetTranslationMap()
94 );
95 }
96
97 }
98
99
100 }
@@ -1,17 +1,17
1 using System;
1 using System;
2
2
3 namespace Implab.Automaton.RegularExpressions {
3 namespace Implab.Automaton.RegularExpressions {
4 public class AltToken<TTag>: BinaryToken<TTag> {
4 public class AltToken: BinaryToken {
5 public AltToken(Token<TTag> left, Token<TTag> right)
5 public AltToken(Token left, Token right)
6 : base(left, right) {
6 : base(left, right) {
7 }
7 }
8
8
9 public override void Accept(IVisitor<TTag> visitor) {
9 public override void Accept(IVisitor visitor) {
10 Safe.ArgumentNotNull(visitor, "visitor");
10 Safe.ArgumentNotNull(visitor, "visitor");
11 visitor.Visit(this);
11 visitor.Visit(this);
12 }
12 }
13 public override string ToString() {
13 public override string ToString() {
14 return String.Format(Right is BinaryToken<TTag> ? "{0}|({1})" : "{0}|{1}", Left, Right);
14 return String.Format(Right is BinaryToken ? "{0}|({1})" : "{0}|{1}", Left, Right);
15 }
15 }
16 }
16 }
17 }
17 }
@@ -1,19 +1,19
1 using Implab;
1 using Implab;
2
2
3 namespace Implab.Automaton.RegularExpressions {
3 namespace Implab.Automaton.RegularExpressions {
4 public abstract class BinaryToken<TTag> : Token<TTag> {
4 public abstract class BinaryToken: Token {
5 readonly Token<TTag> m_left;
5 readonly Token m_left;
6 readonly Token<TTag> m_right;
6 readonly Token m_right;
7
7
8 public Token<TTag> Left {
8 public Token Left {
9 get { return m_left; }
9 get { return m_left; }
10 }
10 }
11
11
12 public Token<TTag> Right {
12 public Token Right {
13 get { return m_right; }
13 get { return m_right; }
14 }
14 }
15
15
16 protected BinaryToken(Token<TTag> left, Token<TTag> right) {
16 protected BinaryToken(Token left, Token right) {
17 Safe.ArgumentNotNull(m_left = left, "left");
17 Safe.ArgumentNotNull(m_left = left, "left");
18 Safe.ArgumentNotNull(m_right = right, "right");
18 Safe.ArgumentNotNull(m_right = right, "right");
19 }
19 }
@@ -1,12 +1,12
1 using System;
1 using System;
2
2
3 namespace Implab.Automaton.RegularExpressions {
3 namespace Implab.Automaton.RegularExpressions {
4 public class CatToken<TTag> : BinaryToken<TTag> {
4 public class CatToken : BinaryToken {
5 public CatToken(Token<TTag> left, Token<TTag> right)
5 public CatToken(Token left, Token right)
6 : base(left, right) {
6 : base(left, right) {
7 }
7 }
8
8
9 public override void Accept(IVisitor<TTag> visitor) {
9 public override void Accept(IVisitor visitor) {
10 Safe.ArgumentNotNull(visitor, "visitor");
10 Safe.ArgumentNotNull(visitor, "visitor");
11 visitor.Visit(this);
11 visitor.Visit(this);
12 }
12 }
@@ -15,8 +15,8 namespace Implab.Automaton.RegularExpres
15 return String.Format("{0}{1}", FormatToken(Left), FormatToken(Right));
15 return String.Format("{0}{1}", FormatToken(Left), FormatToken(Right));
16 }
16 }
17
17
18 static string FormatToken(Token<TTag> token) {
18 static string FormatToken(Token token) {
19 return String.Format(token is AltToken<TTag> ? "({0})" : "{0}", token);
19 return String.Format(token is AltToken ? "({0})" : "{0}", token);
20 }
20 }
21 }
21 }
22 }
22 }
@@ -1,8 +1,8
1 using Implab;
1 using Implab;
2
2
3 namespace Implab.Automaton.RegularExpressions {
3 namespace Implab.Automaton.RegularExpressions {
4 public class EmptyToken<TTag> : Token<TTag> {
4 public class EmptyToken: Token {
5 public override void Accept(IVisitor<TTag> visitor) {
5 public override void Accept(IVisitor visitor) {
6 Safe.ArgumentNotNull(visitor, "visitor");
6 Safe.ArgumentNotNull(visitor, "visitor");
7 visitor.Visit(this);
7 visitor.Visit(this);
8 }
8 }
@@ -5,23 +5,9 namespace Implab.Automaton.RegularExpres
5 /// Конечный символ расширенного регулярного выражения, при построении ДКА
5 /// Конечный символ расширенного регулярного выражения, при построении ДКА
6 /// используется для определения конечных состояний.
6 /// используется для определения конечных состояний.
7 /// </summary>
7 /// </summary>
8 public class EndToken<TTag>: Token<TTag> {
8 public class EndToken: Token {
9
10 TTag m_tag;
11
12 public EndToken(TTag tag) {
13 m_tag = tag;
14 }
15
9
16 public EndToken()
10 public override void Accept(IVisitor visitor) {
17 : this(default(TTag)) {
18 }
19
20 public TTag Tag {
21 get { return m_tag; }
22 }
23
24 public override void Accept(IVisitor<TTag> visitor) {
25 Safe.ArgumentNotNull(visitor, "visitor");
11 Safe.ArgumentNotNull(visitor, "visitor");
26 visitor.Visit(this);
12 visitor.Visit(this);
27 }
13 }
@@ -1,5 +1,4
1 using System;
1
2
3 namespace Implab.Automaton.RegularExpressions {
2 namespace Implab.Automaton.RegularExpressions {
4 public interface ITaggedDFABuilder<TTag> : IDFATableBuilder {
3 public interface ITaggedDFABuilder<TTag> : IDFATableBuilder {
5 void SetStateTag(int s, TTag[] tags);
4 void SetStateTag(int s, TTag[] tags);
@@ -2,12 +2,12
2 /// <summary>
2 /// <summary>
3 /// Интерфейс обходчика синтаксического дерева регулярного выражения
3 /// Интерфейс обходчика синтаксического дерева регулярного выражения
4 /// </summary>
4 /// </summary>
5 public interface IVisitor<TTag> {
5 public interface IVisitor {
6 void Visit(AltToken<TTag> token);
6 void Visit(AltToken token);
7 void Visit(StarToken<TTag> token);
7 void Visit(StarToken token);
8 void Visit(CatToken<TTag> token);
8 void Visit(CatToken token);
9 void Visit(EmptyToken<TTag> token);
9 void Visit(EmptyToken token);
10 void Visit(EndToken<TTag> token);
10 void Visit(EndToken token);
11 void Visit(SymbolToken<TTag> token);
11 void Visit(SymbolToken token);
12 }
12 }
13 }
13 }
@@ -1,5 +1,4
1 using System;
1 using System.Collections.Generic;
2 using System.Collections.Generic;
3 using System.Linq;
2 using System.Linq;
4
3
5 namespace Implab.Automaton.RegularExpressions {
4 namespace Implab.Automaton.RegularExpressions {
@@ -12,13 +12,14 namespace Implab.Automaton.RegularExpres
12 /// </summary>
12 /// </summary>
13 public class RegularExpressionVisitor<TTag> : IVisitor<TTag> {
13 public class RegularExpressionVisitor<TTag> : IVisitor<TTag> {
14 int m_idx;
14 int m_idx;
15 Token<TTag> m_root;
15 Token m_root;
16 HashSet<int> m_firstpos;
16 HashSet<int> m_firstpos;
17 HashSet<int> m_lastpos;
17 HashSet<int> m_lastpos;
18
18
19 readonly Dictionary<int, HashSet<int>> m_followpos = new Dictionary<int, HashSet<int>>();
19 readonly Dictionary<int, HashSet<int>> m_followpos = new Dictionary<int, HashSet<int>>();
20 readonly Dictionary<int, int> m_indexes = new Dictionary<int, int>();
20 readonly Dictionary<int, int> m_indexes = new Dictionary<int, int>();
21 readonly Dictionary<int, TTag> m_ends = new Dictionary<int, TTag>();
21 readonly HashSet<int> m_ends = new HashSet<int>();
22 readonly Dictionary<int, TTag> m_tags = new Dictionary<int, TTag>();
22
23
23 public Dictionary<int, HashSet<int>> FollowposMap {
24 public Dictionary<int, HashSet<int>> FollowposMap {
24 get { return m_followpos; }
25 get { return m_followpos; }
@@ -30,19 +31,19 namespace Implab.Automaton.RegularExpres
30 }
31 }
31
32
32 bool Nullable(object n) {
33 bool Nullable(object n) {
33 if (n is EmptyToken<TTag> || n is StarToken<TTag>)
34 if (n is EmptyToken || n is StarToken)
34 return true;
35 return true;
35 var altToken = n as AltToken<TTag>;
36 var altToken = n as AltToken;
36 if (altToken != null)
37 if (altToken != null)
37 return Nullable(altToken.Left) || Nullable(altToken.Right);
38 return Nullable(altToken.Left) || Nullable(altToken.Right);
38 var catToken = n as CatToken<TTag>;
39 var catToken = n as CatToken;
39 if (catToken != null)
40 if (catToken != null)
40 return Nullable(catToken.Left) && Nullable(catToken.Right);
41 return Nullable(catToken.Left) && Nullable(catToken.Right);
41 return false;
42 return false;
42 }
43 }
43
44
44
45
45 public void Visit(AltToken<TTag> token) {
46 public void Visit(AltToken token) {
46 if (m_root == null)
47 if (m_root == null)
47 m_root = token;
48 m_root = token;
48 var firtspos = new HashSet<int>();
49 var firtspos = new HashSet<int>();
@@ -60,7 +61,7 namespace Implab.Automaton.RegularExpres
60 m_lastpos = lastpos;
61 m_lastpos = lastpos;
61 }
62 }
62
63
63 public void Visit(StarToken<TTag> token) {
64 public void Visit(StarToken token) {
64 if (m_root == null)
65 if (m_root == null)
65 m_root = token;
66 m_root = token;
66 token.Token.Accept(this);
67 token.Token.Accept(this);
@@ -69,7 +70,7 namespace Implab.Automaton.RegularExpres
69 Followpos(i).UnionWith(m_firstpos);
70 Followpos(i).UnionWith(m_firstpos);
70 }
71 }
71
72
72 public void Visit(CatToken<TTag> token) {
73 public void Visit(CatToken token) {
73 if (m_root == null)
74 if (m_root == null)
74 m_root = token;
75 m_root = token;
75
76
@@ -97,12 +98,12 namespace Implab.Automaton.RegularExpres
97
98
98 }
99 }
99
100
100 public void Visit(EmptyToken<TTag> token) {
101 public void Visit(EmptyToken token) {
101 if (m_root == null)
102 if (m_root == null)
102 m_root = token;
103 m_root = token;
103 }
104 }
104
105
105 public void Visit(SymbolToken<TTag> token) {
106 public void Visit(SymbolToken token) {
106 if (m_root == null)
107 if (m_root == null)
107 m_root = token;
108 m_root = token;
108 m_idx++;
109 m_idx++;
@@ -119,7 +120,19 namespace Implab.Automaton.RegularExpres
119 m_firstpos = new HashSet<int>(new[] { m_idx });
120 m_firstpos = new HashSet<int>(new[] { m_idx });
120 m_lastpos = new HashSet<int>(new[] { m_idx });
121 m_lastpos = new HashSet<int>(new[] { m_idx });
121 Followpos(m_idx);
122 Followpos(m_idx);
122 m_ends.Add(m_idx, token.Tag);
123 m_ends.Add(m_idx);
124 m_tags.Add(m_idx, token.Tag);
125 }
126
127 public void Visit(EndToken token) {
128 if (m_root == null)
129 m_root = token;
130 m_idx++;
131 m_indexes[m_idx] = DFAConst.UNCLASSIFIED_INPUT;
132 m_firstpos = new HashSet<int>(new[] { m_idx });
133 m_lastpos = new HashSet<int>(new[] { m_idx });
134 Followpos(m_idx);
135 m_ends.Add(m_idx);
123 }
136 }
124
137
125 public void BuildDFA(ITaggedDFABuilder<TTag> dfa) {
138 public void BuildDFA(ITaggedDFABuilder<TTag> dfa) {
@@ -157,14 +170,18 namespace Implab.Automaton.RegularExpres
157 }
170 }
158 }
171 }
159 if (next.Count > 0) {
172 if (next.Count > 0) {
160 int s2 = states.Translate(next);
173 int s2;
161 if (s2 == DFAConst.UNCLASSIFIED_INPUT) {
174 if (states.Contains(next)) {
175 s2 = states.Translate(next);
176 } else {
162 s2 = states.DefineSymbol(next);
177 s2 = states.DefineSymbol(next);
163
178
164 tags = GetStateTags(next);
179 if (IsFinal(next)) {
165 if (tags != null && tags.Length > 0) {
180
166 dfa.MarkFinalState(s2);
181 dfa.MarkFinalState(s2);
167 dfa.SetStateTag(s2, tags);
182 tags = GetStateTags(next);
183 if (tags != null && tags.Length > 0)
184 dfa.SetStateTag(s2, tags);
168 }
185 }
169
186
170 queue.Enqueue(next);
187 queue.Enqueue(next);
@@ -175,9 +192,14 namespace Implab.Automaton.RegularExpres
175 }
192 }
176 }
193 }
177
194
195 bool IsFinal(IEnumerable<int> state) {
196 Debug.Assert(state != null);
197 return state.Any(m_ends.Contains);
198 }
199
178 TTag[] GetStateTags(IEnumerable<int> state) {
200 TTag[] GetStateTags(IEnumerable<int> state) {
179 Debug.Assert(state != null);
201 Debug.Assert(state != null);
180 return state.Where(m_ends.ContainsKey).Select(pos => m_ends[pos]).ToArray();
202 return state.Where(m_tags.ContainsKey).Select(pos => m_tags[pos]).ToArray();
181 }
203 }
182
204
183 }
205 }
@@ -1,28 +1,25
1 using Implab;
1 using Implab;
2 using System;
2 using System;
3 using System.Collections.Generic;
3
4 using System.Linq;
5 using System.Text;
6 using System.Threading.Tasks;
7
4
8 namespace Implab.Automaton.RegularExpressions {
5 namespace Implab.Automaton.RegularExpressions {
9 /// <summary>
6 /// <summary>
10 /// Замыкание выражения с 0 и более повторов.
7 /// Замыкание выражения с 0 и более повторов.
11 /// </summary>
8 /// </summary>
12 public class StarToken<TTag>: Token<TTag> {
9 public class StarToken: Token {
13
10
14 Token<TTag> m_token;
11 Token m_token;
15
12
16 public Token<TTag> Token {
13 public Token Token {
17 get { return m_token; }
14 get { return m_token; }
18 }
15 }
19
16
20 public StarToken(Token<TTag> token) {
17 public StarToken(Token token) {
21 Safe.ArgumentNotNull(token, "token");
18 Safe.ArgumentNotNull(token, "token");
22 m_token = token;
19 m_token = token;
23 }
20 }
24
21
25 public override void Accept(IVisitor<TTag> visitor) {
22 public override void Accept(IVisitor visitor) {
26 Safe.ArgumentNotNull(visitor, "visitor");
23 Safe.ArgumentNotNull(visitor, "visitor");
27 visitor.Visit(this);
24 visitor.Visit(this);
28 }
25 }
@@ -4,7 +4,7 namespace Implab.Automaton.RegularExpres
4 /// <summary>
4 /// <summary>
5 /// Выражение, соответсвующее одному символу.
5 /// Выражение, соответсвующее одному символу.
6 /// </summary>
6 /// </summary>
7 public class SymbolToken<TTag> : Token<TTag> {
7 public class SymbolToken: Token {
8 int m_value;
8 int m_value;
9
9
10 public int Value {
10 public int Value {
@@ -14,7 +14,7 namespace Implab.Automaton.RegularExpres
14 public SymbolToken(int value) {
14 public SymbolToken(int value) {
15 m_value = value;
15 m_value = value;
16 }
16 }
17 public override void Accept(IVisitor<TTag> visitor) {
17 public override void Accept(IVisitor visitor) {
18 Safe.ArgumentNotNull(visitor, "visitor");
18 Safe.ArgumentNotNull(visitor, "visitor");
19
19
20 visitor.Visit(this);
20 visitor.Visit(this);
@@ -3,46 +3,46 using System;
3 using System.Linq;
3 using System.Linq;
4
4
5 namespace Implab.Automaton.RegularExpressions {
5 namespace Implab.Automaton.RegularExpressions {
6 public abstract class Token<TTag> {
6 public abstract class Token {
7 public abstract void Accept(IVisitor<TTag> visitor);
7 public abstract void Accept(IVisitor visitor);
8
8
9 public Token<TTag> Extend() {
9 public Token Extend() {
10 return Cat(new EndToken<TTag>());
10 return Cat(new EndToken());
11 }
11 }
12
12
13 public Token<TTag> Tag(TTag tag) {
13 public Token Tag<TTag>(TTag tag) {
14 return Cat(new EndToken<TTag>(tag));
14 return Cat(new EndToken<TTag>(tag));
15 }
15 }
16
16
17 public Token<TTag> Cat(Token<TTag> right) {
17 public Token Cat(Token right) {
18 return new CatToken<TTag>(this, right);
18 return new CatToken(this, right);
19 }
19 }
20
20
21 public Token<TTag> Or(Token<TTag> right) {
21 public Token Or(Token right) {
22 return new AltToken<TTag>(this, right);
22 return new AltToken(this, right);
23 }
23 }
24
24
25 public Token<TTag> Optional() {
25 public Token Optional() {
26 return Or(new EmptyToken<TTag>());
26 return Or(new EmptyToken());
27 }
27 }
28
28
29 public Token<TTag> EClosure() {
29 public Token EClosure() {
30 return new StarToken<TTag>(this);
30 return new StarToken(this);
31 }
31 }
32
32
33 public Token<TTag> Closure() {
33 public Token Closure() {
34 return Cat(new StarToken<TTag>(this));
34 return Cat(new StarToken(this));
35 }
35 }
36
36
37 public Token<TTag> Repeat(int count) {
37 public Token Repeat(int count) {
38 Token<TTag> token = null;
38 Token token = null;
39
39
40 for (int i = 0; i < count; i++)
40 for (int i = 0; i < count; i++)
41 token = token != null ? token.Cat(this) : this;
41 token = token != null ? token.Cat(this) : this;
42 return token ?? new EmptyToken<TTag>();
42 return token ?? new EmptyToken();
43 }
43 }
44
44
45 public Token<TTag> Repeat(int min, int max) {
45 public Token Repeat(int min, int max) {
46 if (min > max || min < 1)
46 if (min > max || min < 1)
47 throw new ArgumentOutOfRangeException();
47 throw new ArgumentOutOfRangeException();
48 var token = Repeat(min);
48 var token = Repeat(min);
@@ -52,11 +52,11 namespace Implab.Automaton.RegularExpres
52 return token;
52 return token;
53 }
53 }
54
54
55 public static Token<TTag> New(params int[] set) {
55 public static Token New(params int[] set) {
56 Safe.ArgumentNotNull(set, "set");
56 Safe.ArgumentNotNull(set, "set");
57 Token<TTag> token = null;
57 Token token = null;
58 foreach(var c in set.Distinct())
58 foreach(var c in set.Distinct())
59 token = token == null ? new SymbolToken<TTag>(c) : token.Or(new SymbolToken<TTag>(c));
59 token = token == null ? new SymbolToken(c) : token.Or(new SymbolToken(c));
60 return token;
60 return token;
61 }
61 }
62 }
62 }
@@ -4,8 +4,6 using Implab.Automaton;
4
4
5 namespace Implab.Formats {
5 namespace Implab.Formats {
6 public class ByteAlphabet : IndexedAlphabetBase<byte> {
6 public class ByteAlphabet : IndexedAlphabetBase<byte> {
7 public ByteAlphabet() {
8 }
9
7
10 #region implemented abstract members of IndexedAlphabetBase
8 #region implemented abstract members of IndexedAlphabetBase
11
9
@@ -5,9 +5,6 using Implab.Automaton;
5 namespace Implab.Formats {
5 namespace Implab.Formats {
6 public class CharAlphabet: IndexedAlphabetBase<char> {
6 public class CharAlphabet: IndexedAlphabetBase<char> {
7
7
8 public CharAlphabet() {
9 }
10
11 public override int GetSymbolIndex(char symbol) {
8 public override int GetSymbolIndex(char symbol) {
12 return symbol;
9 return symbol;
13 }
10 }
@@ -4,7 +4,6 using Implab.Automaton;
4 using System.Text;
4 using System.Text;
5 using Implab.Components;
5 using Implab.Components;
6 using System.IO;
6 using System.IO;
7 using Implab.Automaton.RegularExpressions;
8
7
9 namespace Implab.Formats.JSON {
8 namespace Implab.Formats.JSON {
10 /// <summary>
9 /// <summary>
@@ -13,8 +12,8 namespace Implab.Formats.JSON {
13 public class JSONScanner : Disposable {
12 public class JSONScanner : Disposable {
14 readonly StringBuilder m_builder = new StringBuilder();
13 readonly StringBuilder m_builder = new StringBuilder();
15
14
16 readonly ScannerContext<JSONGrammar.TokenType> m_jsonScanner = JSONGrammar.Instance.JsonDFA;
15 readonly ScannerContext<JSONGrammar.TokenType> m_jsonContext = JSONGrammar.Instance.JsonDFA;
17 readonly ScannerContext<JSONGrammar.TokenType> m_stringScanner = JSONGrammar.Instance.JsonStringDFA;
16 readonly ScannerContext<JSONGrammar.TokenType> m_stringContext = JSONGrammar.Instance.JsonStringDFA;
18
17
19
18
20 readonly TextScanner m_scanner;
19 readonly TextScanner m_scanner;
@@ -31,7 +30,7 namespace Implab.Formats.JSON {
31 public JSONScanner(TextReader reader, int bufferMax, int chunkSize) {
30 public JSONScanner(TextReader reader, int bufferMax, int chunkSize) {
32 Safe.ArgumentNotNull(reader, "reader");
31 Safe.ArgumentNotNull(reader, "reader");
33
32
34 m_scanner = new ReaderScanner(reader);
33 m_scanner = new ReaderScanner(reader, bufferMax, chunkSize);
35 }
34 }
36
35
37 /// <summary>
36 /// <summary>
@@ -44,7 +43,7 namespace Implab.Formats.JSON {
44 /// в строках обрабатываются экранированные символы, числа становтся типа double.</remarks>
43 /// в строках обрабатываются экранированные символы, числа становтся типа double.</remarks>
45 public bool ReadToken(out object tokenValue, out JsonTokenType tokenType) {
44 public bool ReadToken(out object tokenValue, out JsonTokenType tokenType) {
46 JSONGrammar.TokenType[] tag;
45 JSONGrammar.TokenType[] tag;
47 if (m_jsonScanner.Execute(m_scanner, out tag)) {
46 if (m_jsonContext.Execute(m_scanner, out tag)) {
48 switch (tag[0]) {
47 switch (tag[0]) {
49 case JSONGrammar.TokenType.StringBound:
48 case JSONGrammar.TokenType.StringBound:
50 tokenValue = ReadString();
49 tokenValue = ReadString();
@@ -68,12 +67,12 namespace Implab.Formats.JSON {
68
67
69 string ReadString() {
68 string ReadString() {
70 int pos = 0;
69 int pos = 0;
71 char[] buf = new char[6]; // the buffer for unescaping chars
70 var buf = new char[6]; // the buffer for unescaping chars
72
71
73 JSONGrammar.TokenType[] tag;
72 JSONGrammar.TokenType[] tag;
74 m_builder.Clear();
73 m_builder.Clear();
75
74
76 while (m_stringScanner.Execute(m_scanner, out tag)) {
75 while (m_stringContext.Execute(m_scanner, out tag)) {
77 switch (tag[0]) {
76 switch (tag[0]) {
78 case JSONGrammar.TokenType.StringBound:
77 case JSONGrammar.TokenType.StringBound:
79 return m_builder.ToString();
78 return m_builder.ToString();
@@ -89,13 +88,17 namespace Implab.Formats.JSON {
89 m_scanner.CopyTokenTo(buf, 0);
88 m_scanner.CopyTokenTo(buf, 0);
90 m_builder.Append(StringTranslator.TranslateEscapedChar(buf[1]));
89 m_builder.Append(StringTranslator.TranslateEscapedChar(buf[1]));
91 break;
90 break;
92 default:
93 break;
94 }
91 }
95
92
96 }
93 }
97
94
98 throw new ParserException("Unexpected end of data");
95 throw new ParserException("Unexpected end of data");
99 }
96 }
97
98 protected override void Dispose(bool disposing) {
99 if (disposing)
100 Safe.Dispose(m_scanner);
101 base.Dispose(disposing);
102 }
100 }
103 }
101 }
104 }
@@ -1,11 +1,17
1 using System;
1 namespace Implab.Formats {
2
2 /// <summary>
3 namespace Implab.Formats {
3 /// Represents a scanner configuration usefull to recongnize token, based on the DFA.
4 /// </summary>
4 public class ScannerContext<TTag> {
5 public class ScannerContext<TTag> {
6
5 public int[,] Dfa { get; private set; }
7 public int[,] Dfa { get; private set; }
8
6 public bool[] Final { get; private set; }
9 public bool[] Final { get; private set; }
10
7 public TTag[][] Tags { get; private set; }
11 public TTag[][] Tags { get; private set; }
12
8 public int State { get; private set; }
13 public int State { get; private set; }
14
9 public int[] Alphabet { get; private set; }
15 public int[] Alphabet { get; private set; }
10
16
11 public ScannerContext(int[,] dfa, bool[] final, TTag[][] tags, int state, int[] alphabet) {
17 public ScannerContext(int[,] dfa, bool[] final, TTag[][] tags, int state, int[] alphabet) {
@@ -1,9 +1,7
1 using System;
1 using System;
2 using Implab.Components;
2 using Implab.Components;
3 using Implab.Automaton.RegularExpressions;
4 using System.Diagnostics;
3 using System.Diagnostics;
5 using Implab.Automaton;
4 using Implab.Automaton;
6 using System.IO;
7 using System.Text;
5 using System.Text;
8
6
9 namespace Implab.Formats {
7 namespace Implab.Formats {
@@ -18,7 +16,7 namespace Implab.Formats {
18 int m_tokenLength;
16 int m_tokenLength;
19
17
20 /// <summary>
18 /// <summary>
21 /// Initializes a new instance of the <see cref="Implab.Formats.TextScanner`1"/> class.
19 /// Initializes a new instance of the <see cref="Implab.Formats.TextScanner"/> class.
22 /// </summary>
20 /// </summary>
23 /// <param name="bufferMax">Buffer max.</param>
21 /// <param name="bufferMax">Buffer max.</param>
24 /// <param name="chunkSize">Chunk size.</param>
22 /// <param name="chunkSize">Chunk size.</param>
@@ -30,7 +28,7 namespace Implab.Formats {
30 }
28 }
31
29
32 /// <summary>
30 /// <summary>
33 /// Initializes a new instance of the <see cref="Implab.Formats.TextScanner`1"/> class.
31 /// Initializes a new instance of the <see cref="Implab.Formats.TextScanner"/> class.
34 /// </summary>
32 /// </summary>
35 /// <param name="buffer">Buffer.</param>
33 /// <param name="buffer">Buffer.</param>
36 protected TextScanner(char[] buffer) {
34 protected TextScanner(char[] buffer) {
@@ -48,7 +46,9 namespace Implab.Formats {
48 /// <param name="final">Final states of the automaton.</param>
46 /// <param name="final">Final states of the automaton.</param>
49 /// <param name="tags">Tags.</param>
47 /// <param name="tags">Tags.</param>
50 /// <param name="state">The initial state for the automaton.</param>
48 /// <param name="state">The initial state for the automaton.</param>
51 internal bool ReadToken<TTag>(int[,] dfa, int[] final, TTag[][] tags, int state, int[] alphabet, out TTag[] tag) {
49 /// <param name="alphabet"></param>
50 /// <param name = "tag"></param>
51 internal bool ReadToken<TTag>(int[,] dfa, bool[] final, TTag[][] tags, int state, int[] alphabet, out TTag[] tag) {
52 Safe.ArgumentNotNull();
52 Safe.ArgumentNotNull();
53 m_tokenLength = 0;
53 m_tokenLength = 0;
54
54
@@ -58,10 +58,10 namespace Implab.Formats {
58 // after the next chunk is read the offset in the buffer may change
58 // after the next chunk is read the offset in the buffer may change
59 int pos = m_bufferOffset + m_tokenLength;
59 int pos = m_bufferOffset + m_tokenLength;
60
60
61 while(pos < m_bufferSize) {
61 while (pos < m_bufferSize) {
62 var ch = m_buffer[pos];
62 var ch = m_buffer[pos];
63
63
64 state = dfa[state,ch > maxSymbol ? DFAConst.UNCLASSIFIED_INPUT : alphabet[ch]];
64 state = dfa[state, ch > maxSymbol ? DFAConst.UNCLASSIFIED_INPUT : alphabet[ch]];
65 if (state == DFAConst.UNREACHABLE_STATE)
65 if (state == DFAConst.UNREACHABLE_STATE)
66 break;
66 break;
67
67
@@ -77,16 +77,17 namespace Implab.Formats {
77 if (final[state]) {
77 if (final[state]) {
78 tag = tags[state];
78 tag = tags[state];
79 return true;
79 return true;
80 } else {
80 }
81 if (m_bufferOffset == m_bufferSize) {
81
82 if (m_tokenLength == 0) //EOF
82 if (m_bufferOffset == m_bufferSize) {
83 if (m_tokenLength == 0) //EOF
83 return false;
84 return false;
84
85
85 throw new ParserException();
86 throw new ParserException();
86 }
87 }
87 throw new ParserException(String.Format("Unexpected symbol '{0}'", m_buffer[m_bufferOffset]));
88
89 throw new ParserException(String.Format("Unexpected symbol '{0}'", m_buffer[m_bufferOffset]));
88
90
89 }
90 }
91 }
91
92
92 protected void Feed(char[] buffer, int offset, int length) {
93 protected void Feed(char[] buffer, int offset, int length) {
@@ -108,7 +109,7 namespace Implab.Formats {
108 var size = used + free;
109 var size = used + free;
109
110
110 if (size > m_bufferMax)
111 if (size > m_bufferMax)
111 throw new ParserException(String.Format("The buffer limit ({0} Kb) is reached"), m_bufferMax/1024);
112 throw new ParserException(String.Format("The buffer limit ({0} Kb) is reached", m_bufferMax/1024));
112
113
113 var temp = new char[size];
114 var temp = new char[size];
114
115
@@ -160,11 +160,9
160 <Compile Include="Automaton\RegularExpressions\BinaryToken.cs" />
160 <Compile Include="Automaton\RegularExpressions\BinaryToken.cs" />
161 <Compile Include="Automaton\RegularExpressions\CatToken.cs" />
161 <Compile Include="Automaton\RegularExpressions\CatToken.cs" />
162 <Compile Include="Automaton\DFAConst.cs" />
162 <Compile Include="Automaton\DFAConst.cs" />
163 <Compile Include="Automaton\RegularExpressions\Grammar.cs" />
164 <Compile Include="Automaton\RegularExpressions\StarToken.cs" />
163 <Compile Include="Automaton\RegularExpressions\StarToken.cs" />
165 <Compile Include="Automaton\RegularExpressions\SymbolToken.cs" />
164 <Compile Include="Automaton\RegularExpressions\SymbolToken.cs" />
166 <Compile Include="Automaton\RegularExpressions\EmptyToken.cs" />
165 <Compile Include="Automaton\RegularExpressions\EmptyToken.cs" />
167 <Compile Include="Automaton\RegularExpressions\EndToken.cs" />
168 <Compile Include="Automaton\RegularExpressions\Token.cs" />
166 <Compile Include="Automaton\RegularExpressions\Token.cs" />
169 <Compile Include="Automaton\RegularExpressions\IVisitor.cs" />
167 <Compile Include="Automaton\RegularExpressions\IVisitor.cs" />
170 <Compile Include="Automaton\AutomatonTransition.cs" />
168 <Compile Include="Automaton\AutomatonTransition.cs" />
@@ -192,6 +190,10
192 <Compile Include="Formats\StringScanner.cs" />
190 <Compile Include="Formats\StringScanner.cs" />
193 <Compile Include="Formats\ReaderScanner.cs" />
191 <Compile Include="Formats\ReaderScanner.cs" />
194 <Compile Include="Formats\ScannerContext.cs" />
192 <Compile Include="Formats\ScannerContext.cs" />
193 <Compile Include="Formats\Grammar.cs" />
194 <Compile Include="Automaton\RegularExpressions\EndTokenT.cs" />
195 <Compile Include="Automaton\RegularExpressions\EndToken.cs" />
196 <Compile Include="Automaton\RegularExpressions\IVisitorT.cs" />
195 </ItemGroup>
197 </ItemGroup>
196 <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" />
198 <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" />
197 <ItemGroup />
199 <ItemGroup />
@@ -41,6 +41,11 namespace Implab
41 throw new ArgumentOutOfRangeException(paramName);
41 throw new ArgumentOutOfRangeException(paramName);
42 }
42 }
43
43
44 public static void ArgumentOfType(object value, Type type, string paramName) {
45 if (!type.IsInstanceOfType(value))
46 throw new ArgumentException(String.Format("The parameter must be of type {0}", type), paramName);
47 }
48
44 public static void Dispose(params IDisposable[] objects) {
49 public static void Dispose(params IDisposable[] objects) {
45 foreach (var d in objects)
50 foreach (var d in objects)
46 if (d != null)
51 if (d != null)
1 NO CONTENT: file was removed
NO CONTENT: file was removed
General Comments 0
You need to be logged in to leave comments. Login now