##// END OF EJS Templates
refactoring
cin -
r177:a0ff6a0e9c44 ref20160224
parent child
Show More
@@ -0,0 +1,33
1 using Implab;
2
3 namespace Implab.Automaton.RegularExpressions {
4 /// <summary>
5 /// Конечный символ расширенного регулярного выражения, при построении ДКА
6 /// используется для определения конечных состояний.
7 /// </summary>
8 public class EndToken<TTag>: Token {
9
10 TTag m_tag;
11
12 public EndToken(TTag tag) {
13 m_tag = tag;
14 }
15
16 public EndToken()
17 : this(default(TTag)) {
18 }
19
20 public TTag Tag {
21 get { return m_tag; }
22 }
23
24 public override void Accept(IVisitor visitor) {
25 Safe.ArgumentOfType(visitor, typeof(IVisitor<TTag>), "visitor");
26 Safe.ArgumentNotNull(visitor, "visitor");
27 ((IVisitor<TTag>)visitor).Visit(this);
28 }
29 public override string ToString() {
30 return "#";
31 }
32 }
33 }
@@ -0,0 +1,8
1 namespace Implab.Automaton.RegularExpressions {
2 /// <summary>
3 /// Интерфейс обходчика синтаксического дерева регулярного выражения
4 /// </summary>
5 public interface IVisitor<T> : IVisitor {
6 void Visit(EndToken<T> token);
7 }
8 }
@@ -0,0 +1,100
1 using Implab;
2 using System;
3 using System.Collections.Generic;
4 using System.Linq;
5 using Implab.Automaton;
6 using Implab.Automaton.RegularExpressions;
7
8 namespace Implab.Formats {
9 /// <summary>
10 /// Базовый абстрактный класс. Грамматика, позволяет формулировать выражения над алфавитом типа <c>char</c>.
11 /// </summary>
12 public abstract class Grammar<TSymbol, TTag> {
13
14 protected abstract IAlphabetBuilder<TSymbol> AlphabetBuilder {
15 get;
16 }
17
18 protected SymbolToken<TTag> UnclassifiedToken() {
19 return new SymbolToken<TTag>(DFAConst.UNCLASSIFIED_INPUT);
20 }
21
22 protected void DefineAlphabet(IEnumerable<TSymbol> alphabet) {
23 Safe.ArgumentNotNull(alphabet, "alphabet");
24
25 foreach (var ch in alphabet)
26 AlphabetBuilder.DefineSymbol(ch);
27 }
28
29 protected Token<TTag> SymbolToken(TSymbol symbol) {
30 return Token<TTag>.New(TranslateOrAdd(symbol));
31 }
32
33 protected Token<TTag> SymbolToken(IEnumerable<TSymbol> symbols) {
34 Safe.ArgumentNotNull(symbols, "symbols");
35
36 return Token<TTag>.New(TranslateOrAdd(symbols).ToArray());
37 }
38
39 protected Token<TTag> SymbolSetToken(params TSymbol[] set) {
40 return SymbolToken(set);
41 }
42
43 int TranslateOrAdd(TSymbol ch) {
44 var t = AlphabetBuilder.Translate(ch);
45 if (t == DFAConst.UNCLASSIFIED_INPUT)
46 t = AlphabetBuilder.DefineSymbol(ch);
47 return t;
48 }
49
50 IEnumerable<int> TranslateOrAdd(IEnumerable<TSymbol> symbols) {
51 return symbols.Distinct().Select(TranslateOrAdd);
52 }
53
54 int TranslateOrDie(TSymbol ch) {
55 var t = AlphabetBuilder.Translate(ch);
56 if (t == DFAConst.UNCLASSIFIED_INPUT)
57 throw new ApplicationException(String.Format("Symbol '{0}' is UNCLASSIFIED", ch));
58 return t;
59 }
60
61 IEnumerable<int> TranslateOrDie(IEnumerable<TSymbol> symbols) {
62 return symbols.Distinct().Select(TranslateOrDie);
63 }
64
65 protected Token<TTag> SymbolTokenExcept(IEnumerable<TSymbol> symbols) {
66 Safe.ArgumentNotNull(symbols, "symbols");
67
68 return Token<TTag>.New( Enumerable.Range(0, AlphabetBuilder.Count).Except(TranslateOrDie(symbols)).ToArray() );
69 }
70
71 protected abstract IndexedAlphabetBase<TSymbol> CreateAlphabet();
72
73 protected ScannerContext<TTag> BuildScannerContext(Token<TTag> regexp) {
74
75 var dfa = new RegularDFA<TSymbol, TTag>(AlphabetBuilder);
76
77 var visitor = new RegularExpressionVisitor<TTag>();
78 regexp.Accept( visitor );
79
80 visitor.BuildDFA(dfa);
81
82 if (dfa.IsFinalState(dfa.InitialState))
83 throw new ApplicationException("The specified language contains empty token");
84
85 var ab = CreateAlphabet();
86 var optimal = dfa.Optimize(ab);
87
88 return new ScannerContext<TTag>(
89 optimal.CreateTransitionTable(),
90 optimal.CreateFinalStateTable(),
91 optimal.CreateTagTable(),
92 optimal.InitialState,
93 ab.GetTranslationMap()
94 );
95 }
96
97 }
98
99
100 }
@@ -1,17 +1,17
1 1 using System;
2 2
3 3 namespace Implab.Automaton.RegularExpressions {
4 public class AltToken<TTag>: BinaryToken<TTag> {
5 public AltToken(Token<TTag> left, Token<TTag> right)
4 public class AltToken: BinaryToken {
5 public AltToken(Token left, Token right)
6 6 : base(left, right) {
7 7 }
8 8
9 public override void Accept(IVisitor<TTag> visitor) {
9 public override void Accept(IVisitor visitor) {
10 10 Safe.ArgumentNotNull(visitor, "visitor");
11 11 visitor.Visit(this);
12 12 }
13 13 public override string ToString() {
14 return String.Format(Right is BinaryToken<TTag> ? "{0}|({1})" : "{0}|{1}", Left, Right);
14 return String.Format(Right is BinaryToken ? "{0}|({1})" : "{0}|{1}", Left, Right);
15 15 }
16 16 }
17 17 }
@@ -1,19 +1,19
1 1 using Implab;
2 2
3 3 namespace Implab.Automaton.RegularExpressions {
4 public abstract class BinaryToken<TTag> : Token<TTag> {
5 readonly Token<TTag> m_left;
6 readonly Token<TTag> m_right;
4 public abstract class BinaryToken: Token {
5 readonly Token m_left;
6 readonly Token m_right;
7 7
8 public Token<TTag> Left {
8 public Token Left {
9 9 get { return m_left; }
10 10 }
11 11
12 public Token<TTag> Right {
12 public Token Right {
13 13 get { return m_right; }
14 14 }
15 15
16 protected BinaryToken(Token<TTag> left, Token<TTag> right) {
16 protected BinaryToken(Token left, Token right) {
17 17 Safe.ArgumentNotNull(m_left = left, "left");
18 18 Safe.ArgumentNotNull(m_right = right, "right");
19 19 }
@@ -1,12 +1,12
1 1 using System;
2 2
3 3 namespace Implab.Automaton.RegularExpressions {
4 public class CatToken<TTag> : BinaryToken<TTag> {
5 public CatToken(Token<TTag> left, Token<TTag> right)
4 public class CatToken : BinaryToken {
5 public CatToken(Token left, Token right)
6 6 : base(left, right) {
7 7 }
8 8
9 public override void Accept(IVisitor<TTag> visitor) {
9 public override void Accept(IVisitor visitor) {
10 10 Safe.ArgumentNotNull(visitor, "visitor");
11 11 visitor.Visit(this);
12 12 }
@@ -15,8 +15,8 namespace Implab.Automaton.RegularExpres
15 15 return String.Format("{0}{1}", FormatToken(Left), FormatToken(Right));
16 16 }
17 17
18 static string FormatToken(Token<TTag> token) {
19 return String.Format(token is AltToken<TTag> ? "({0})" : "{0}", token);
18 static string FormatToken(Token token) {
19 return String.Format(token is AltToken ? "({0})" : "{0}", token);
20 20 }
21 21 }
22 22 }
@@ -1,8 +1,8
1 1 using Implab;
2 2
3 3 namespace Implab.Automaton.RegularExpressions {
4 public class EmptyToken<TTag> : Token<TTag> {
5 public override void Accept(IVisitor<TTag> visitor) {
4 public class EmptyToken: Token {
5 public override void Accept(IVisitor visitor) {
6 6 Safe.ArgumentNotNull(visitor, "visitor");
7 7 visitor.Visit(this);
8 8 }
@@ -5,23 +5,9 namespace Implab.Automaton.RegularExpres
5 5 /// Конечный символ расширенного регулярного выражения, при построении ДКА
6 6 /// используется для определения конечных состояний.
7 7 /// </summary>
8 public class EndToken<TTag>: Token<TTag> {
9
10 TTag m_tag;
11
12 public EndToken(TTag tag) {
13 m_tag = tag;
14 }
8 public class EndToken: Token {
15 9
16 public EndToken()
17 : this(default(TTag)) {
18 }
19
20 public TTag Tag {
21 get { return m_tag; }
22 }
23
24 public override void Accept(IVisitor<TTag> visitor) {
10 public override void Accept(IVisitor visitor) {
25 11 Safe.ArgumentNotNull(visitor, "visitor");
26 12 visitor.Visit(this);
27 13 }
@@ -1,5 +1,4
1 using System;
2
1
3 2 namespace Implab.Automaton.RegularExpressions {
4 3 public interface ITaggedDFABuilder<TTag> : IDFATableBuilder {
5 4 void SetStateTag(int s, TTag[] tags);
@@ -2,12 +2,12
2 2 /// <summary>
3 3 /// Интерфейс обходчика синтаксического дерева регулярного выражения
4 4 /// </summary>
5 public interface IVisitor<TTag> {
6 void Visit(AltToken<TTag> token);
7 void Visit(StarToken<TTag> token);
8 void Visit(CatToken<TTag> token);
9 void Visit(EmptyToken<TTag> token);
10 void Visit(EndToken<TTag> token);
11 void Visit(SymbolToken<TTag> token);
5 public interface IVisitor {
6 void Visit(AltToken token);
7 void Visit(StarToken token);
8 void Visit(CatToken token);
9 void Visit(EmptyToken token);
10 void Visit(EndToken token);
11 void Visit(SymbolToken token);
12 12 }
13 13 }
@@ -1,5 +1,4
1 using System;
2 using System.Collections.Generic;
1 using System.Collections.Generic;
3 2 using System.Linq;
4 3
5 4 namespace Implab.Automaton.RegularExpressions {
@@ -12,13 +12,14 namespace Implab.Automaton.RegularExpres
12 12 /// </summary>
13 13 public class RegularExpressionVisitor<TTag> : IVisitor<TTag> {
14 14 int m_idx;
15 Token<TTag> m_root;
15 Token m_root;
16 16 HashSet<int> m_firstpos;
17 17 HashSet<int> m_lastpos;
18 18
19 19 readonly Dictionary<int, HashSet<int>> m_followpos = new Dictionary<int, HashSet<int>>();
20 20 readonly Dictionary<int, int> m_indexes = new Dictionary<int, int>();
21 readonly Dictionary<int, TTag> m_ends = new Dictionary<int, TTag>();
21 readonly HashSet<int> m_ends = new HashSet<int>();
22 readonly Dictionary<int, TTag> m_tags = new Dictionary<int, TTag>();
22 23
23 24 public Dictionary<int, HashSet<int>> FollowposMap {
24 25 get { return m_followpos; }
@@ -30,19 +31,19 namespace Implab.Automaton.RegularExpres
30 31 }
31 32
32 33 bool Nullable(object n) {
33 if (n is EmptyToken<TTag> || n is StarToken<TTag>)
34 if (n is EmptyToken || n is StarToken)
34 35 return true;
35 var altToken = n as AltToken<TTag>;
36 var altToken = n as AltToken;
36 37 if (altToken != null)
37 38 return Nullable(altToken.Left) || Nullable(altToken.Right);
38 var catToken = n as CatToken<TTag>;
39 var catToken = n as CatToken;
39 40 if (catToken != null)
40 41 return Nullable(catToken.Left) && Nullable(catToken.Right);
41 42 return false;
42 43 }
43 44
44 45
45 public void Visit(AltToken<TTag> token) {
46 public void Visit(AltToken token) {
46 47 if (m_root == null)
47 48 m_root = token;
48 49 var firtspos = new HashSet<int>();
@@ -60,7 +61,7 namespace Implab.Automaton.RegularExpres
60 61 m_lastpos = lastpos;
61 62 }
62 63
63 public void Visit(StarToken<TTag> token) {
64 public void Visit(StarToken token) {
64 65 if (m_root == null)
65 66 m_root = token;
66 67 token.Token.Accept(this);
@@ -69,7 +70,7 namespace Implab.Automaton.RegularExpres
69 70 Followpos(i).UnionWith(m_firstpos);
70 71 }
71 72
72 public void Visit(CatToken<TTag> token) {
73 public void Visit(CatToken token) {
73 74 if (m_root == null)
74 75 m_root = token;
75 76
@@ -97,12 +98,12 namespace Implab.Automaton.RegularExpres
97 98
98 99 }
99 100
100 public void Visit(EmptyToken<TTag> token) {
101 public void Visit(EmptyToken token) {
101 102 if (m_root == null)
102 103 m_root = token;
103 104 }
104 105
105 public void Visit(SymbolToken<TTag> token) {
106 public void Visit(SymbolToken token) {
106 107 if (m_root == null)
107 108 m_root = token;
108 109 m_idx++;
@@ -119,7 +120,19 namespace Implab.Automaton.RegularExpres
119 120 m_firstpos = new HashSet<int>(new[] { m_idx });
120 121 m_lastpos = new HashSet<int>(new[] { m_idx });
121 122 Followpos(m_idx);
122 m_ends.Add(m_idx, token.Tag);
123 m_ends.Add(m_idx);
124 m_tags.Add(m_idx, token.Tag);
125 }
126
127 public void Visit(EndToken token) {
128 if (m_root == null)
129 m_root = token;
130 m_idx++;
131 m_indexes[m_idx] = DFAConst.UNCLASSIFIED_INPUT;
132 m_firstpos = new HashSet<int>(new[] { m_idx });
133 m_lastpos = new HashSet<int>(new[] { m_idx });
134 Followpos(m_idx);
135 m_ends.Add(m_idx);
123 136 }
124 137
125 138 public void BuildDFA(ITaggedDFABuilder<TTag> dfa) {
@@ -157,14 +170,18 namespace Implab.Automaton.RegularExpres
157 170 }
158 171 }
159 172 if (next.Count > 0) {
160 int s2 = states.Translate(next);
161 if (s2 == DFAConst.UNCLASSIFIED_INPUT) {
173 int s2;
174 if (states.Contains(next)) {
175 s2 = states.Translate(next);
176 } else {
162 177 s2 = states.DefineSymbol(next);
163 178
164 tags = GetStateTags(next);
165 if (tags != null && tags.Length > 0) {
179 if (IsFinal(next)) {
180
166 181 dfa.MarkFinalState(s2);
167 dfa.SetStateTag(s2, tags);
182 tags = GetStateTags(next);
183 if (tags != null && tags.Length > 0)
184 dfa.SetStateTag(s2, tags);
168 185 }
169 186
170 187 queue.Enqueue(next);
@@ -175,9 +192,14 namespace Implab.Automaton.RegularExpres
175 192 }
176 193 }
177 194
195 bool IsFinal(IEnumerable<int> state) {
196 Debug.Assert(state != null);
197 return state.Any(m_ends.Contains);
198 }
199
178 200 TTag[] GetStateTags(IEnumerable<int> state) {
179 201 Debug.Assert(state != null);
180 return state.Where(m_ends.ContainsKey).Select(pos => m_ends[pos]).ToArray();
202 return state.Where(m_tags.ContainsKey).Select(pos => m_tags[pos]).ToArray();
181 203 }
182 204
183 205 }
@@ -1,28 +1,25
1 1 using Implab;
2 2 using System;
3 using System.Collections.Generic;
4 using System.Linq;
5 using System.Text;
6 using System.Threading.Tasks;
3
7 4
8 5 namespace Implab.Automaton.RegularExpressions {
9 6 /// <summary>
10 7 /// Замыкание выражения с 0 и более повторов.
11 8 /// </summary>
12 public class StarToken<TTag>: Token<TTag> {
9 public class StarToken: Token {
13 10
14 Token<TTag> m_token;
11 Token m_token;
15 12
16 public Token<TTag> Token {
13 public Token Token {
17 14 get { return m_token; }
18 15 }
19 16
20 public StarToken(Token<TTag> token) {
17 public StarToken(Token token) {
21 18 Safe.ArgumentNotNull(token, "token");
22 19 m_token = token;
23 20 }
24 21
25 public override void Accept(IVisitor<TTag> visitor) {
22 public override void Accept(IVisitor visitor) {
26 23 Safe.ArgumentNotNull(visitor, "visitor");
27 24 visitor.Visit(this);
28 25 }
@@ -4,7 +4,7 namespace Implab.Automaton.RegularExpres
4 4 /// <summary>
5 5 /// Выражение, соответсвующее одному символу.
6 6 /// </summary>
7 public class SymbolToken<TTag> : Token<TTag> {
7 public class SymbolToken: Token {
8 8 int m_value;
9 9
10 10 public int Value {
@@ -14,7 +14,7 namespace Implab.Automaton.RegularExpres
14 14 public SymbolToken(int value) {
15 15 m_value = value;
16 16 }
17 public override void Accept(IVisitor<TTag> visitor) {
17 public override void Accept(IVisitor visitor) {
18 18 Safe.ArgumentNotNull(visitor, "visitor");
19 19
20 20 visitor.Visit(this);
@@ -3,46 +3,46 using System;
3 3 using System.Linq;
4 4
5 5 namespace Implab.Automaton.RegularExpressions {
6 public abstract class Token<TTag> {
7 public abstract void Accept(IVisitor<TTag> visitor);
6 public abstract class Token {
7 public abstract void Accept(IVisitor visitor);
8 8
9 public Token<TTag> Extend() {
10 return Cat(new EndToken<TTag>());
9 public Token Extend() {
10 return Cat(new EndToken());
11 11 }
12 12
13 public Token<TTag> Tag(TTag tag) {
13 public Token Tag<TTag>(TTag tag) {
14 14 return Cat(new EndToken<TTag>(tag));
15 15 }
16 16
17 public Token<TTag> Cat(Token<TTag> right) {
18 return new CatToken<TTag>(this, right);
17 public Token Cat(Token right) {
18 return new CatToken(this, right);
19 19 }
20 20
21 public Token<TTag> Or(Token<TTag> right) {
22 return new AltToken<TTag>(this, right);
21 public Token Or(Token right) {
22 return new AltToken(this, right);
23 23 }
24 24
25 public Token<TTag> Optional() {
26 return Or(new EmptyToken<TTag>());
25 public Token Optional() {
26 return Or(new EmptyToken());
27 27 }
28 28
29 public Token<TTag> EClosure() {
30 return new StarToken<TTag>(this);
29 public Token EClosure() {
30 return new StarToken(this);
31 31 }
32 32
33 public Token<TTag> Closure() {
34 return Cat(new StarToken<TTag>(this));
33 public Token Closure() {
34 return Cat(new StarToken(this));
35 35 }
36 36
37 public Token<TTag> Repeat(int count) {
38 Token<TTag> token = null;
37 public Token Repeat(int count) {
38 Token token = null;
39 39
40 40 for (int i = 0; i < count; i++)
41 41 token = token != null ? token.Cat(this) : this;
42 return token ?? new EmptyToken<TTag>();
42 return token ?? new EmptyToken();
43 43 }
44 44
45 public Token<TTag> Repeat(int min, int max) {
45 public Token Repeat(int min, int max) {
46 46 if (min > max || min < 1)
47 47 throw new ArgumentOutOfRangeException();
48 48 var token = Repeat(min);
@@ -52,11 +52,11 namespace Implab.Automaton.RegularExpres
52 52 return token;
53 53 }
54 54
55 public static Token<TTag> New(params int[] set) {
55 public static Token New(params int[] set) {
56 56 Safe.ArgumentNotNull(set, "set");
57 Token<TTag> token = null;
57 Token token = null;
58 58 foreach(var c in set.Distinct())
59 token = token == null ? new SymbolToken<TTag>(c) : token.Or(new SymbolToken<TTag>(c));
59 token = token == null ? new SymbolToken(c) : token.Or(new SymbolToken(c));
60 60 return token;
61 61 }
62 62 }
@@ -4,8 +4,6 using Implab.Automaton;
4 4
5 5 namespace Implab.Formats {
6 6 public class ByteAlphabet : IndexedAlphabetBase<byte> {
7 public ByteAlphabet() {
8 }
9 7
10 8 #region implemented abstract members of IndexedAlphabetBase
11 9
@@ -5,9 +5,6 using Implab.Automaton;
5 5 namespace Implab.Formats {
6 6 public class CharAlphabet: IndexedAlphabetBase<char> {
7 7
8 public CharAlphabet() {
9 }
10
11 8 public override int GetSymbolIndex(char symbol) {
12 9 return symbol;
13 10 }
@@ -4,7 +4,6 using Implab.Automaton;
4 4 using System.Text;
5 5 using Implab.Components;
6 6 using System.IO;
7 using Implab.Automaton.RegularExpressions;
8 7
9 8 namespace Implab.Formats.JSON {
10 9 /// <summary>
@@ -13,8 +12,8 namespace Implab.Formats.JSON {
13 12 public class JSONScanner : Disposable {
14 13 readonly StringBuilder m_builder = new StringBuilder();
15 14
16 readonly ScannerContext<JSONGrammar.TokenType> m_jsonScanner = JSONGrammar.Instance.JsonDFA;
17 readonly ScannerContext<JSONGrammar.TokenType> m_stringScanner = JSONGrammar.Instance.JsonStringDFA;
15 readonly ScannerContext<JSONGrammar.TokenType> m_jsonContext = JSONGrammar.Instance.JsonDFA;
16 readonly ScannerContext<JSONGrammar.TokenType> m_stringContext = JSONGrammar.Instance.JsonStringDFA;
18 17
19 18
20 19 readonly TextScanner m_scanner;
@@ -31,7 +30,7 namespace Implab.Formats.JSON {
31 30 public JSONScanner(TextReader reader, int bufferMax, int chunkSize) {
32 31 Safe.ArgumentNotNull(reader, "reader");
33 32
34 m_scanner = new ReaderScanner(reader);
33 m_scanner = new ReaderScanner(reader, bufferMax, chunkSize);
35 34 }
36 35
37 36 /// <summary>
@@ -44,7 +43,7 namespace Implab.Formats.JSON {
44 43 /// в строках обрабатываются экранированные символы, числа становтся типа double.</remarks>
45 44 public bool ReadToken(out object tokenValue, out JsonTokenType tokenType) {
46 45 JSONGrammar.TokenType[] tag;
47 if (m_jsonScanner.Execute(m_scanner, out tag)) {
46 if (m_jsonContext.Execute(m_scanner, out tag)) {
48 47 switch (tag[0]) {
49 48 case JSONGrammar.TokenType.StringBound:
50 49 tokenValue = ReadString();
@@ -68,12 +67,12 namespace Implab.Formats.JSON {
68 67
69 68 string ReadString() {
70 69 int pos = 0;
71 char[] buf = new char[6]; // the buffer for unescaping chars
70 var buf = new char[6]; // the buffer for unescaping chars
72 71
73 72 JSONGrammar.TokenType[] tag;
74 73 m_builder.Clear();
75 74
76 while (m_stringScanner.Execute(m_scanner, out tag)) {
75 while (m_stringContext.Execute(m_scanner, out tag)) {
77 76 switch (tag[0]) {
78 77 case JSONGrammar.TokenType.StringBound:
79 78 return m_builder.ToString();
@@ -89,13 +88,17 namespace Implab.Formats.JSON {
89 88 m_scanner.CopyTokenTo(buf, 0);
90 89 m_builder.Append(StringTranslator.TranslateEscapedChar(buf[1]));
91 90 break;
92 default:
93 break;
94 91 }
95 92
96 93 }
97 94
98 95 throw new ParserException("Unexpected end of data");
99 96 }
97
98 protected override void Dispose(bool disposing) {
99 if (disposing)
100 Safe.Dispose(m_scanner);
101 base.Dispose(disposing);
102 }
100 103 }
101 104 }
@@ -1,11 +1,17
1 using System;
2
3 namespace Implab.Formats {
1 namespace Implab.Formats {
2 /// <summary>
3 /// Represents a scanner configuration usefull to recongnize token, based on the DFA.
4 /// </summary>
4 5 public class ScannerContext<TTag> {
6
5 7 public int[,] Dfa { get; private set; }
8
6 9 public bool[] Final { get; private set; }
10
7 11 public TTag[][] Tags { get; private set; }
12
8 13 public int State { get; private set; }
14
9 15 public int[] Alphabet { get; private set; }
10 16
11 17 public ScannerContext(int[,] dfa, bool[] final, TTag[][] tags, int state, int[] alphabet) {
@@ -1,9 +1,7
1 1 using System;
2 2 using Implab.Components;
3 using Implab.Automaton.RegularExpressions;
4 3 using System.Diagnostics;
5 4 using Implab.Automaton;
6 using System.IO;
7 5 using System.Text;
8 6
9 7 namespace Implab.Formats {
@@ -18,7 +16,7 namespace Implab.Formats {
18 16 int m_tokenLength;
19 17
20 18 /// <summary>
21 /// Initializes a new instance of the <see cref="Implab.Formats.TextScanner`1"/> class.
19 /// Initializes a new instance of the <see cref="Implab.Formats.TextScanner"/> class.
22 20 /// </summary>
23 21 /// <param name="bufferMax">Buffer max.</param>
24 22 /// <param name="chunkSize">Chunk size.</param>
@@ -30,7 +28,7 namespace Implab.Formats {
30 28 }
31 29
32 30 /// <summary>
33 /// Initializes a new instance of the <see cref="Implab.Formats.TextScanner`1"/> class.
31 /// Initializes a new instance of the <see cref="Implab.Formats.TextScanner"/> class.
34 32 /// </summary>
35 33 /// <param name="buffer">Buffer.</param>
36 34 protected TextScanner(char[] buffer) {
@@ -48,7 +46,9 namespace Implab.Formats {
48 46 /// <param name="final">Final states of the automaton.</param>
49 47 /// <param name="tags">Tags.</param>
50 48 /// <param name="state">The initial state for the automaton.</param>
51 internal bool ReadToken<TTag>(int[,] dfa, int[] final, TTag[][] tags, int state, int[] alphabet, out TTag[] tag) {
49 /// <param name="alphabet"></param>
50 /// <param name = "tag"></param>
51 internal bool ReadToken<TTag>(int[,] dfa, bool[] final, TTag[][] tags, int state, int[] alphabet, out TTag[] tag) {
52 52 Safe.ArgumentNotNull();
53 53 m_tokenLength = 0;
54 54
@@ -58,10 +58,10 namespace Implab.Formats {
58 58 // after the next chunk is read the offset in the buffer may change
59 59 int pos = m_bufferOffset + m_tokenLength;
60 60
61 while(pos < m_bufferSize) {
61 while (pos < m_bufferSize) {
62 62 var ch = m_buffer[pos];
63 63
64 state = dfa[state,ch > maxSymbol ? DFAConst.UNCLASSIFIED_INPUT : alphabet[ch]];
64 state = dfa[state, ch > maxSymbol ? DFAConst.UNCLASSIFIED_INPUT : alphabet[ch]];
65 65 if (state == DFAConst.UNREACHABLE_STATE)
66 66 break;
67 67
@@ -77,16 +77,17 namespace Implab.Formats {
77 77 if (final[state]) {
78 78 tag = tags[state];
79 79 return true;
80 } else {
81 if (m_bufferOffset == m_bufferSize) {
82 if (m_tokenLength == 0) //EOF
80 }
81
82 if (m_bufferOffset == m_bufferSize) {
83 if (m_tokenLength == 0) //EOF
83 84 return false;
84 85
85 throw new ParserException();
86 }
87 throw new ParserException(String.Format("Unexpected symbol '{0}'", m_buffer[m_bufferOffset]));
86 throw new ParserException();
87 }
88
89 throw new ParserException(String.Format("Unexpected symbol '{0}'", m_buffer[m_bufferOffset]));
88 90
89 }
90 91 }
91 92
92 93 protected void Feed(char[] buffer, int offset, int length) {
@@ -108,7 +109,7 namespace Implab.Formats {
108 109 var size = used + free;
109 110
110 111 if (size > m_bufferMax)
111 throw new ParserException(String.Format("The buffer limit ({0} Kb) is reached"), m_bufferMax/1024);
112 throw new ParserException(String.Format("The buffer limit ({0} Kb) is reached", m_bufferMax/1024));
112 113
113 114 var temp = new char[size];
114 115
@@ -160,11 +160,9
160 160 <Compile Include="Automaton\RegularExpressions\BinaryToken.cs" />
161 161 <Compile Include="Automaton\RegularExpressions\CatToken.cs" />
162 162 <Compile Include="Automaton\DFAConst.cs" />
163 <Compile Include="Automaton\RegularExpressions\Grammar.cs" />
164 163 <Compile Include="Automaton\RegularExpressions\StarToken.cs" />
165 164 <Compile Include="Automaton\RegularExpressions\SymbolToken.cs" />
166 165 <Compile Include="Automaton\RegularExpressions\EmptyToken.cs" />
167 <Compile Include="Automaton\RegularExpressions\EndToken.cs" />
168 166 <Compile Include="Automaton\RegularExpressions\Token.cs" />
169 167 <Compile Include="Automaton\RegularExpressions\IVisitor.cs" />
170 168 <Compile Include="Automaton\AutomatonTransition.cs" />
@@ -192,6 +190,10
192 190 <Compile Include="Formats\StringScanner.cs" />
193 191 <Compile Include="Formats\ReaderScanner.cs" />
194 192 <Compile Include="Formats\ScannerContext.cs" />
193 <Compile Include="Formats\Grammar.cs" />
194 <Compile Include="Automaton\RegularExpressions\EndTokenT.cs" />
195 <Compile Include="Automaton\RegularExpressions\EndToken.cs" />
196 <Compile Include="Automaton\RegularExpressions\IVisitorT.cs" />
195 197 </ItemGroup>
196 198 <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" />
197 199 <ItemGroup />
@@ -41,6 +41,11 namespace Implab
41 41 throw new ArgumentOutOfRangeException(paramName);
42 42 }
43 43
44 public static void ArgumentOfType(object value, Type type, string paramName) {
45 if (!type.IsInstanceOfType(value))
46 throw new ArgumentException(String.Format("The parameter must be of type {0}", type), paramName);
47 }
48
44 49 public static void Dispose(params IDisposable[] objects) {
45 50 foreach (var d in objects)
46 51 if (d != null)
1 NO CONTENT: file was removed
General Comments 0
You need to be logged in to leave comments. Login now