| @@ -0,0 +1,33 | |||
|  | 1 | using Implab; | |
|  | 2 | ||
|  | 3 | namespace Implab.Automaton.RegularExpressions { | |
|  | 4 | /// <summary> | |
|  | 5 | /// Конечный символ расширенного регулярного выражения, при построении ДКА | |
|  | 6 | /// используется для определения конечных состояний. | |
|  | 7 | /// </summary> | |
|  | 8 | public class EndToken<TTag>: Token { | |
|  | 9 | ||
|  | 10 | TTag m_tag; | |
|  | 11 | ||
|  | 12 | public EndToken(TTag tag) { | |
|  | 13 | m_tag = tag; | |
|  | 14 | } | |
|  | 15 | ||
|  | 16 | public EndToken() | |
|  | 17 | : this(default(TTag)) { | |
|  | 18 | } | |
|  | 19 | ||
|  | 20 | public TTag Tag { | |
|  | 21 | get { return m_tag; } | |
|  | 22 | } | |
|  | 23 | ||
|  | 24 | public override void Accept(IVisitor visitor) { | |
|  | 25 | Safe.ArgumentOfType(visitor, typeof(IVisitor<TTag>), "visitor"); | |
|  | 26 | Safe.ArgumentNotNull(visitor, "visitor"); | |
|  | 27 | ((IVisitor<TTag>)visitor).Visit(this); | |
|  | 28 | } | |
|  | 29 | public override string ToString() { | |
|  | 30 | return "#"; | |
|  | 31 | } | |
|  | 32 | } | |
|  | 33 | } | |
| @@ -0,0 +1,8 | |||
|  | 1 | namespace Implab.Automaton.RegularExpressions { | |
|  | 2 | /// <summary> | |
|  | 3 | /// Интерфейс обходчика синтаксического дерева регулярного выражения | |
|  | 4 | /// </summary> | |
|  | 5 | public interface IVisitor<T> : IVisitor { | |
|  | 6 | void Visit(EndToken<T> token); | |
|  | 7 | } | |
|  | 8 | } | |
| @@ -0,0 +1,100 | |||
|  | 1 | using Implab; | |
|  | 2 | using System; | |
|  | 3 | using System.Collections.Generic; | |
|  | 4 | using System.Linq; | |
|  | 5 | using Implab.Automaton; | |
|  | 6 | using Implab.Automaton.RegularExpressions; | |
|  | 7 | ||
|  | 8 | namespace Implab.Formats { | |
|  | 9 | /// <summary> | |
|  | 10 | /// Базовый абстрактный класс. Грамматика, позволяет формулировать выражения над алфавитом типа <c>char</c>. | |
|  | 11 | /// </summary> | |
|  | 12 | public abstract class Grammar<TSymbol, TTag> { | |
|  | 13 | ||
|  | 14 | protected abstract IAlphabetBuilder<TSymbol> AlphabetBuilder { | |
|  | 15 | get; | |
|  | 16 | } | |
|  | 17 | ||
|  | 18 | protected SymbolToken<TTag> UnclassifiedToken() { | |
|  | 19 | return new SymbolToken<TTag>(DFAConst.UNCLASSIFIED_INPUT); | |
|  | 20 | } | |
|  | 21 | ||
|  | 22 | protected void DefineAlphabet(IEnumerable<TSymbol> alphabet) { | |
|  | 23 | Safe.ArgumentNotNull(alphabet, "alphabet"); | |
|  | 24 | ||
|  | 25 | foreach (var ch in alphabet) | |
|  | 26 | AlphabetBuilder.DefineSymbol(ch); | |
|  | 27 | } | |
|  | 28 | ||
|  | 29 | protected Token<TTag> SymbolToken(TSymbol symbol) { | |
|  | 30 | return Token<TTag>.New(TranslateOrAdd(symbol)); | |
|  | 31 | } | |
|  | 32 | ||
|  | 33 | protected Token<TTag> SymbolToken(IEnumerable<TSymbol> symbols) { | |
|  | 34 | Safe.ArgumentNotNull(symbols, "symbols"); | |
|  | 35 | ||
|  | 36 | return Token<TTag>.New(TranslateOrAdd(symbols).ToArray()); | |
|  | 37 | } | |
|  | 38 | ||
|  | 39 | protected Token<TTag> SymbolSetToken(params TSymbol[] set) { | |
|  | 40 | return SymbolToken(set); | |
|  | 41 | } | |
|  | 42 | ||
|  | 43 | int TranslateOrAdd(TSymbol ch) { | |
|  | 44 | var t = AlphabetBuilder.Translate(ch); | |
|  | 45 | if (t == DFAConst.UNCLASSIFIED_INPUT) | |
|  | 46 | t = AlphabetBuilder.DefineSymbol(ch); | |
|  | 47 | return t; | |
|  | 48 | } | |
|  | 49 | ||
|  | 50 | IEnumerable<int> TranslateOrAdd(IEnumerable<TSymbol> symbols) { | |
|  | 51 | return symbols.Distinct().Select(TranslateOrAdd); | |
|  | 52 | } | |
|  | 53 | ||
|  | 54 | int TranslateOrDie(TSymbol ch) { | |
|  | 55 | var t = AlphabetBuilder.Translate(ch); | |
|  | 56 | if (t == DFAConst.UNCLASSIFIED_INPUT) | |
|  | 57 | throw new ApplicationException(String.Format("Symbol '{0}' is UNCLASSIFIED", ch)); | |
|  | 58 | return t; | |
|  | 59 | } | |
|  | 60 | ||
|  | 61 | IEnumerable<int> TranslateOrDie(IEnumerable<TSymbol> symbols) { | |
|  | 62 | return symbols.Distinct().Select(TranslateOrDie); | |
|  | 63 | } | |
|  | 64 | ||
|  | 65 | protected Token<TTag> SymbolTokenExcept(IEnumerable<TSymbol> symbols) { | |
|  | 66 | Safe.ArgumentNotNull(symbols, "symbols"); | |
|  | 67 | ||
|  | 68 | return Token<TTag>.New( Enumerable.Range(0, AlphabetBuilder.Count).Except(TranslateOrDie(symbols)).ToArray() ); | |
|  | 69 | } | |
|  | 70 | ||
|  | 71 | protected abstract IndexedAlphabetBase<TSymbol> CreateAlphabet(); | |
|  | 72 | ||
|  | 73 | protected ScannerContext<TTag> BuildScannerContext(Token<TTag> regexp) { | |
|  | 74 | ||
|  | 75 | var dfa = new RegularDFA<TSymbol, TTag>(AlphabetBuilder); | |
|  | 76 | ||
|  | 77 | var visitor = new RegularExpressionVisitor<TTag>(); | |
|  | 78 | regexp.Accept( visitor ); | |
|  | 79 | ||
|  | 80 | visitor.BuildDFA(dfa); | |
|  | 81 | ||
|  | 82 | if (dfa.IsFinalState(dfa.InitialState)) | |
|  | 83 | throw new ApplicationException("The specified language contains empty token"); | |
|  | 84 | ||
|  | 85 | var ab = CreateAlphabet(); | |
|  | 86 | var optimal = dfa.Optimize(ab); | |
|  | 87 | ||
|  | 88 | return new ScannerContext<TTag>( | |
|  | 89 | optimal.CreateTransitionTable(), | |
|  | 90 | optimal.CreateFinalStateTable(), | |
|  | 91 | optimal.CreateTagTable(), | |
|  | 92 | optimal.InitialState, | |
|  | 93 | ab.GetTranslationMap() | |
|  | 94 | ); | |
|  | 95 | } | |
|  | 96 | ||
|  | 97 | } | |
|  | 98 | ||
|  | 99 | ||
|  | 100 | } | |
| @@ -1,17 +1,17 | |||
|  | 1 | 1 | using System; | 
|  | 2 | 2 | |
|  | 3 | 3 | namespace Implab.Automaton.RegularExpressions { | 
|  | 4 | public class AltToken | |
|  | 5 | public AltToken(Token | |
|  | 4 | public class AltToken: BinaryToken { | |
|  | 5 | public AltToken(Token left, Token right) | |
|  | 6 | 6 | : base(left, right) { | 
|  | 7 | 7 | } | 
|  | 8 | 8 | |
|  | 9 | public override void Accept(IVisitor | |
|  | 9 | public override void Accept(IVisitor visitor) { | |
|  | 10 | 10 | Safe.ArgumentNotNull(visitor, "visitor"); | 
|  | 11 | 11 | visitor.Visit(this); | 
|  | 12 | 12 | } | 
|  | 13 | 13 | public override string ToString() { | 
|  | 14 | return String.Format(Right is BinaryToken | |
|  | 14 | return String.Format(Right is BinaryToken ? "{0}|({1})" : "{0}|{1}", Left, Right); | |
|  | 15 | 15 | } | 
|  | 16 | 16 | } | 
|  | 17 | 17 | } | 
| @@ -1,19 +1,19 | |||
|  | 1 | 1 | using Implab; | 
|  | 2 | 2 | |
|  | 3 | 3 | namespace Implab.Automaton.RegularExpressions { | 
|  | 4 | public abstract class BinaryToken | |
|  | 5 | readonly Token | |
|  | 6 | readonly Token | |
|  | 4 | public abstract class BinaryToken: Token { | |
|  | 5 | readonly Token m_left; | |
|  | 6 | readonly Token m_right; | |
|  | 7 | 7 | |
|  | 8 | public Token | |
|  | 8 | public Token Left { | |
|  | 9 | 9 | get { return m_left; } | 
|  | 10 | 10 | } | 
|  | 11 | 11 | |
|  | 12 | public Token | |
|  | 12 | public Token Right { | |
|  | 13 | 13 | get { return m_right; } | 
|  | 14 | 14 | } | 
|  | 15 | 15 | |
|  | 16 | protected BinaryToken(Token | |
|  | 16 | protected BinaryToken(Token left, Token right) { | |
|  | 17 | 17 | Safe.ArgumentNotNull(m_left = left, "left"); | 
|  | 18 | 18 | Safe.ArgumentNotNull(m_right = right, "right"); | 
|  | 19 | 19 | } | 
| @@ -1,12 +1,12 | |||
|  | 1 | 1 | using System; | 
|  | 2 | 2 | |
|  | 3 | 3 | namespace Implab.Automaton.RegularExpressions { | 
|  | 4 | public class CatToken | |
|  | 5 | public CatToken(Token | |
|  | 4 | public class CatToken : BinaryToken { | |
|  | 5 | public CatToken(Token left, Token right) | |
|  | 6 | 6 | : base(left, right) { | 
|  | 7 | 7 | } | 
|  | 8 | 8 | |
|  | 9 | public override void Accept(IVisitor | |
|  | 9 | public override void Accept(IVisitor visitor) { | |
|  | 10 | 10 | Safe.ArgumentNotNull(visitor, "visitor"); | 
|  | 11 | 11 | visitor.Visit(this); | 
|  | 12 | 12 | } | 
| @@ -15,8 +15,8 namespace Implab.Automaton.RegularExpres | |||
|  | 15 | 15 | return String.Format("{0}{1}", FormatToken(Left), FormatToken(Right)); | 
|  | 16 | 16 | } | 
|  | 17 | 17 | |
|  | 18 | static string FormatToken(Token | |
|  | 19 | return String.Format(token is AltToken | |
|  | 18 | static string FormatToken(Token token) { | |
|  | 19 | return String.Format(token is AltToken ? "({0})" : "{0}", token); | |
|  | 20 | 20 | } | 
|  | 21 | 21 | } | 
|  | 22 | 22 | } | 
| @@ -1,8 +1,8 | |||
|  | 1 | 1 | using Implab; | 
|  | 2 | 2 | |
|  | 3 | 3 | namespace Implab.Automaton.RegularExpressions { | 
|  | 4 | public class EmptyToken | |
|  | 5 | public override void Accept(IVisitor | |
|  | 4 | public class EmptyToken: Token { | |
|  | 5 | public override void Accept(IVisitor visitor) { | |
|  | 6 | 6 | Safe.ArgumentNotNull(visitor, "visitor"); | 
|  | 7 | 7 | visitor.Visit(this); | 
|  | 8 | 8 | } | 
| @@ -5,23 +5,9 namespace Implab.Automaton.RegularExpres | |||
|  | 5 | 5 | /// Конечный символ расширенного регулярного выражения, при построении ДКА | 
|  | 6 | 6 | /// используется для определения конечных состояний. | 
|  | 7 | 7 | /// </summary> | 
|  | 8 | public class EndToken | |
|  | 9 | ||
|  | 10 | TTag m_tag; | |
|  | 11 | ||
|  | 12 | public EndToken(TTag tag) { | |
|  | 13 | m_tag = tag; | |
|  | 14 | } | |
|  | 8 | public class EndToken: Token { | |
|  | 15 | 9 | |
|  | 16 | public EndToken() | |
|  | 17 | : this(default(TTag)) { | |
|  | 18 | } | |
|  | 19 | ||
|  | 20 | public TTag Tag { | |
|  | 21 | get { return m_tag; } | |
|  | 22 | } | |
|  | 23 | ||
|  | 24 | public override void Accept(IVisitor<TTag> visitor) { | |
|  | 10 | public override void Accept(IVisitor visitor) { | |
|  | 25 | 11 | Safe.ArgumentNotNull(visitor, "visitor"); | 
|  | 26 | 12 | visitor.Visit(this); | 
|  | 27 | 13 | } | 
| @@ -1,5 +1,4 | |||
|  | 1 | using System; | |
|  | 2 | ||
|  | 1 | ||
|  | 3 | 2 | namespace Implab.Automaton.RegularExpressions { | 
|  | 4 | 3 | public interface ITaggedDFABuilder<TTag> : IDFATableBuilder { | 
|  | 5 | 4 | void SetStateTag(int s, TTag[] tags); | 
| @@ -2,12 +2,12 | |||
|  | 2 | 2 | /// <summary> | 
|  | 3 | 3 | /// Интерфейс обходчика синтаксического дерева регулярного выражения | 
|  | 4 | 4 | /// </summary> | 
|  | 5 | public interface IVisitor | |
|  | 6 | void Visit(AltToken | |
|  | 7 | void Visit(StarToken | |
|  | 8 | void Visit(CatToken | |
|  | 9 | void Visit(EmptyToken | |
|  | 10 | void Visit(EndToken | |
|  | 11 | void Visit(SymbolToken | |
|  | 5 | public interface IVisitor { | |
|  | 6 | void Visit(AltToken token); | |
|  | 7 | void Visit(StarToken token); | |
|  | 8 | void Visit(CatToken token); | |
|  | 9 | void Visit(EmptyToken token); | |
|  | 10 | void Visit(EndToken token); | |
|  | 11 | void Visit(SymbolToken token); | |
|  | 12 | 12 | } | 
|  | 13 | 13 | } | 
| @@ -1,5 +1,4 | |||
|  | 1 | using System; | |
|  | 2 | using System.Collections.Generic; | |
|  | 1 | using System.Collections.Generic; | |
|  | 3 | 2 | using System.Linq; | 
|  | 4 | 3 | |
|  | 5 | 4 | namespace Implab.Automaton.RegularExpressions { | 
| @@ -12,13 +12,14 namespace Implab.Automaton.RegularExpres | |||
|  | 12 | 12 | /// </summary> | 
|  | 13 | 13 | public class RegularExpressionVisitor<TTag> : IVisitor<TTag> { | 
|  | 14 | 14 | int m_idx; | 
|  | 15 | Token | |
|  | 15 | Token m_root; | |
|  | 16 | 16 | HashSet<int> m_firstpos; | 
|  | 17 | 17 | HashSet<int> m_lastpos; | 
|  | 18 | 18 | |
|  | 19 | 19 | readonly Dictionary<int, HashSet<int>> m_followpos = new Dictionary<int, HashSet<int>>(); | 
|  | 20 | 20 | readonly Dictionary<int, int> m_indexes = new Dictionary<int, int>(); | 
|  | 21 | readonly | |
|  | 21 | readonly HashSet<int> m_ends = new HashSet<int>(); | |
|  | 22 | readonly Dictionary<int, TTag> m_tags = new Dictionary<int, TTag>(); | |
|  | 22 | 23 | |
|  | 23 | 24 | public Dictionary<int, HashSet<int>> FollowposMap { | 
|  | 24 | 25 | get { return m_followpos; } | 
| @@ -30,19 +31,19 namespace Implab.Automaton.RegularExpres | |||
|  | 30 | 31 | } | 
|  | 31 | 32 | |
|  | 32 | 33 | bool Nullable(object n) { | 
|  | 33 | if (n is EmptyToken | |
|  | 34 | if (n is EmptyToken || n is StarToken) | |
|  | 34 | 35 | return true; | 
|  | 35 | var altToken = n as AltToken | |
|  | 36 | var altToken = n as AltToken; | |
|  | 36 | 37 | if (altToken != null) | 
|  | 37 | 38 | return Nullable(altToken.Left) || Nullable(altToken.Right); | 
|  | 38 | var catToken = n as CatToken | |
|  | 39 | var catToken = n as CatToken; | |
|  | 39 | 40 | if (catToken != null) | 
|  | 40 | 41 | return Nullable(catToken.Left) && Nullable(catToken.Right); | 
|  | 41 | 42 | return false; | 
|  | 42 | 43 | } | 
|  | 43 | 44 | |
|  | 44 | 45 | |
|  | 45 | public void Visit(AltToken | |
|  | 46 | public void Visit(AltToken token) { | |
|  | 46 | 47 | if (m_root == null) | 
|  | 47 | 48 | m_root = token; | 
|  | 48 | 49 | var firtspos = new HashSet<int>(); | 
| @@ -60,7 +61,7 namespace Implab.Automaton.RegularExpres | |||
|  | 60 | 61 | m_lastpos = lastpos; | 
|  | 61 | 62 | } | 
|  | 62 | 63 | |
|  | 63 | public void Visit(StarToken | |
|  | 64 | public void Visit(StarToken token) { | |
|  | 64 | 65 | if (m_root == null) | 
|  | 65 | 66 | m_root = token; | 
|  | 66 | 67 | token.Token.Accept(this); | 
| @@ -69,7 +70,7 namespace Implab.Automaton.RegularExpres | |||
|  | 69 | 70 | Followpos(i).UnionWith(m_firstpos); | 
|  | 70 | 71 | } | 
|  | 71 | 72 | |
|  | 72 | public void Visit(CatToken | |
|  | 73 | public void Visit(CatToken token) { | |
|  | 73 | 74 | if (m_root == null) | 
|  | 74 | 75 | m_root = token; | 
|  | 75 | 76 | |
| @@ -97,12 +98,12 namespace Implab.Automaton.RegularExpres | |||
|  | 97 | 98 | |
|  | 98 | 99 | } | 
|  | 99 | 100 | |
|  | 100 | public void Visit(EmptyToken | |
|  | 101 | public void Visit(EmptyToken token) { | |
|  | 101 | 102 | if (m_root == null) | 
|  | 102 | 103 | m_root = token; | 
|  | 103 | 104 | } | 
|  | 104 | 105 | |
|  | 105 | public void Visit(SymbolToken | |
|  | 106 | public void Visit(SymbolToken token) { | |
|  | 106 | 107 | if (m_root == null) | 
|  | 107 | 108 | m_root = token; | 
|  | 108 | 109 | m_idx++; | 
| @@ -119,7 +120,19 namespace Implab.Automaton.RegularExpres | |||
|  | 119 | 120 | m_firstpos = new HashSet<int>(new[] { m_idx }); | 
|  | 120 | 121 | m_lastpos = new HashSet<int>(new[] { m_idx }); | 
|  | 121 | 122 | Followpos(m_idx); | 
|  | 122 | m_ends.Add(m_idx | |
|  | 123 | m_ends.Add(m_idx); | |
|  | 124 | m_tags.Add(m_idx, token.Tag); | |
|  | 125 | } | |
|  | 126 | ||
|  | 127 | public void Visit(EndToken token) { | |
|  | 128 | if (m_root == null) | |
|  | 129 | m_root = token; | |
|  | 130 | m_idx++; | |
|  | 131 | m_indexes[m_idx] = DFAConst.UNCLASSIFIED_INPUT; | |
|  | 132 | m_firstpos = new HashSet<int>(new[] { m_idx }); | |
|  | 133 | m_lastpos = new HashSet<int>(new[] { m_idx }); | |
|  | 134 | Followpos(m_idx); | |
|  | 135 | m_ends.Add(m_idx); | |
|  | 123 | 136 | } | 
|  | 124 | 137 | |
|  | 125 | 138 | public void BuildDFA(ITaggedDFABuilder<TTag> dfa) { | 
| @@ -157,14 +170,18 namespace Implab.Automaton.RegularExpres | |||
|  | 157 | 170 | } | 
|  | 158 | 171 | } | 
|  | 159 | 172 | if (next.Count > 0) { | 
|  | 160 | int s2 | |
|  | 161 | if ( | |
|  | 173 | int s2; | |
|  | 174 | if (states.Contains(next)) { | |
|  | 175 | s2 = states.Translate(next); | |
|  | 176 | } else { | |
|  | 162 | 177 | s2 = states.DefineSymbol(next); | 
|  | 163 | 178 | |
|  | 164 |  | |
|  | 165 |  | |
|  | 179 | if (IsFinal(next)) { | |
|  | 180 | ||
|  | 166 | 181 | dfa.MarkFinalState(s2); | 
|  | 167 |  | |
|  | 182 | tags = GetStateTags(next); | |
|  | 183 | if (tags != null && tags.Length > 0) | |
|  | 184 | dfa.SetStateTag(s2, tags); | |
|  | 168 | 185 | } | 
|  | 169 | 186 | |
|  | 170 | 187 | queue.Enqueue(next); | 
| @@ -175,9 +192,14 namespace Implab.Automaton.RegularExpres | |||
|  | 175 | 192 | } | 
|  | 176 | 193 | } | 
|  | 177 | 194 | |
|  | 195 | bool IsFinal(IEnumerable<int> state) { | |
|  | 196 | Debug.Assert(state != null); | |
|  | 197 | return state.Any(m_ends.Contains); | |
|  | 198 | } | |
|  | 199 | ||
|  | 178 | 200 | TTag[] GetStateTags(IEnumerable<int> state) { | 
|  | 179 | 201 | Debug.Assert(state != null); | 
|  | 180 | return state.Where(m_ | |
|  | 202 | return state.Where(m_tags.ContainsKey).Select(pos => m_tags[pos]).ToArray(); | |
|  | 181 | 203 | } | 
|  | 182 | 204 | |
|  | 183 | 205 | } | 
| @@ -1,28 +1,25 | |||
|  | 1 | 1 | using Implab; | 
|  | 2 | 2 | using System; | 
|  | 3 | using System.Collections.Generic; | |
|  | 4 | using System.Linq; | |
|  | 5 | using System.Text; | |
|  | 6 | using System.Threading.Tasks; | |
|  | 3 | ||
|  | 7 | 4 | |
|  | 8 | 5 | namespace Implab.Automaton.RegularExpressions { | 
|  | 9 | 6 | /// <summary> | 
|  | 10 | 7 | /// Замыкание выражения с 0 и более повторов. | 
|  | 11 | 8 | /// </summary> | 
|  | 12 | public class StarToken | |
|  | 9 | public class StarToken: Token { | |
|  | 13 | 10 | |
|  | 14 | Token | |
|  | 11 | Token m_token; | |
|  | 15 | 12 | |
|  | 16 | public Token | |
|  | 13 | public Token Token { | |
|  | 17 | 14 | get { return m_token; } | 
|  | 18 | 15 | } | 
|  | 19 | 16 | |
|  | 20 | public StarToken(Token | |
|  | 17 | public StarToken(Token token) { | |
|  | 21 | 18 | Safe.ArgumentNotNull(token, "token"); | 
|  | 22 | 19 | m_token = token; | 
|  | 23 | 20 | } | 
|  | 24 | 21 | |
|  | 25 | public override void Accept(IVisitor | |
|  | 22 | public override void Accept(IVisitor visitor) { | |
|  | 26 | 23 | Safe.ArgumentNotNull(visitor, "visitor"); | 
|  | 27 | 24 | visitor.Visit(this); | 
|  | 28 | 25 | } | 
| @@ -4,7 +4,7 namespace Implab.Automaton.RegularExpres | |||
|  | 4 | 4 | /// <summary> | 
|  | 5 | 5 | /// Выражение, соответсвующее одному символу. | 
|  | 6 | 6 | /// </summary> | 
|  | 7 | public class SymbolToken | |
|  | 7 | public class SymbolToken: Token { | |
|  | 8 | 8 | int m_value; | 
|  | 9 | 9 | |
|  | 10 | 10 | public int Value { | 
| @@ -14,7 +14,7 namespace Implab.Automaton.RegularExpres | |||
|  | 14 | 14 | public SymbolToken(int value) { | 
|  | 15 | 15 | m_value = value; | 
|  | 16 | 16 | } | 
|  | 17 | public override void Accept(IVisitor | |
|  | 17 | public override void Accept(IVisitor visitor) { | |
|  | 18 | 18 | Safe.ArgumentNotNull(visitor, "visitor"); | 
|  | 19 | 19 | |
|  | 20 | 20 | visitor.Visit(this); | 
| @@ -3,46 +3,46 using System; | |||
|  | 3 | 3 | using System.Linq; | 
|  | 4 | 4 | |
|  | 5 | 5 | namespace Implab.Automaton.RegularExpressions { | 
|  | 6 | public abstract class Token | |
|  | 7 | public abstract void Accept(IVisitor | |
|  | 6 | public abstract class Token { | |
|  | 7 | public abstract void Accept(IVisitor visitor); | |
|  | 8 | 8 | |
|  | 9 | public Token | |
|  | 10 | return Cat(new EndToken | |
|  | 9 | public Token Extend() { | |
|  | 10 | return Cat(new EndToken()); | |
|  | 11 | 11 | } | 
|  | 12 | 12 | |
|  | 13 | public Token<TTag> | |
|  | 13 | public Token Tag<TTag>(TTag tag) { | |
|  | 14 | 14 | return Cat(new EndToken<TTag>(tag)); | 
|  | 15 | 15 | } | 
|  | 16 | 16 | |
|  | 17 | public Token | |
|  | 18 | return new CatToken | |
|  | 17 | public Token Cat(Token right) { | |
|  | 18 | return new CatToken(this, right); | |
|  | 19 | 19 | } | 
|  | 20 | 20 | |
|  | 21 | public Token | |
|  | 22 | return new AltToken | |
|  | 21 | public Token Or(Token right) { | |
|  | 22 | return new AltToken(this, right); | |
|  | 23 | 23 | } | 
|  | 24 | 24 | |
|  | 25 | public Token | |
|  | 26 | return Or(new EmptyToken | |
|  | 25 | public Token Optional() { | |
|  | 26 | return Or(new EmptyToken()); | |
|  | 27 | 27 | } | 
|  | 28 | 28 | |
|  | 29 | public Token | |
|  | 30 | return new StarToken | |
|  | 29 | public Token EClosure() { | |
|  | 30 | return new StarToken(this); | |
|  | 31 | 31 | } | 
|  | 32 | 32 | |
|  | 33 | public Token | |
|  | 34 | return Cat(new StarToken | |
|  | 33 | public Token Closure() { | |
|  | 34 | return Cat(new StarToken(this)); | |
|  | 35 | 35 | } | 
|  | 36 | 36 | |
|  | 37 | public Token | |
|  | 38 | Token | |
|  | 37 | public Token Repeat(int count) { | |
|  | 38 | Token token = null; | |
|  | 39 | 39 | |
|  | 40 | 40 | for (int i = 0; i < count; i++) | 
|  | 41 | 41 | token = token != null ? token.Cat(this) : this; | 
|  | 42 | return token ?? new EmptyToken | |
|  | 42 | return token ?? new EmptyToken(); | |
|  | 43 | 43 | } | 
|  | 44 | 44 | |
|  | 45 | public Token | |
|  | 45 | public Token Repeat(int min, int max) { | |
|  | 46 | 46 | if (min > max || min < 1) | 
|  | 47 | 47 | throw new ArgumentOutOfRangeException(); | 
|  | 48 | 48 | var token = Repeat(min); | 
| @@ -52,11 +52,11 namespace Implab.Automaton.RegularExpres | |||
|  | 52 | 52 | return token; | 
|  | 53 | 53 | } | 
|  | 54 | 54 | |
|  | 55 | public static Token | |
|  | 55 | public static Token New(params int[] set) { | |
|  | 56 | 56 | Safe.ArgumentNotNull(set, "set"); | 
|  | 57 | Token | |
|  | 57 | Token token = null; | |
|  | 58 | 58 | foreach(var c in set.Distinct()) | 
|  | 59 | token = token == null ? new SymbolToken | |
|  | 59 | token = token == null ? new SymbolToken(c) : token.Or(new SymbolToken(c)); | |
|  | 60 | 60 | return token; | 
|  | 61 | 61 | } | 
|  | 62 | 62 | } | 
| @@ -4,8 +4,6 using Implab.Automaton; | |||
|  | 4 | 4 | |
|  | 5 | 5 | namespace Implab.Formats { | 
|  | 6 | 6 | public class ByteAlphabet : IndexedAlphabetBase<byte> { | 
|  | 7 | public ByteAlphabet() { | |
|  | 8 | } | |
|  | 9 | 7 | |
|  | 10 | 8 | #region implemented abstract members of IndexedAlphabetBase | 
|  | 11 | 9 | |
| @@ -5,9 +5,6 using Implab.Automaton; | |||
|  | 5 | 5 | namespace Implab.Formats { | 
|  | 6 | 6 | public class CharAlphabet: IndexedAlphabetBase<char> { | 
|  | 7 | 7 | |
|  | 8 | public CharAlphabet() { | |
|  | 9 | } | |
|  | 10 | ||
|  | 11 | 8 | public override int GetSymbolIndex(char symbol) { | 
|  | 12 | 9 | return symbol; | 
|  | 13 | 10 | } | 
| @@ -4,7 +4,6 using Implab.Automaton; | |||
|  | 4 | 4 | using System.Text; | 
|  | 5 | 5 | using Implab.Components; | 
|  | 6 | 6 | using System.IO; | 
|  | 7 | using Implab.Automaton.RegularExpressions; | |
|  | 8 | 7 | |
|  | 9 | 8 | namespace Implab.Formats.JSON { | 
|  | 10 | 9 | /// <summary> | 
| @@ -13,8 +12,8 namespace Implab.Formats.JSON { | |||
|  | 13 | 12 | public class JSONScanner : Disposable { | 
|  | 14 | 13 | readonly StringBuilder m_builder = new StringBuilder(); | 
|  | 15 | 14 | |
|  | 16 | readonly ScannerContext<JSONGrammar.TokenType> m_json | |
|  | 17 | readonly ScannerContext<JSONGrammar.TokenType> m_string | |
|  | 15 | readonly ScannerContext<JSONGrammar.TokenType> m_jsonContext = JSONGrammar.Instance.JsonDFA; | |
|  | 16 | readonly ScannerContext<JSONGrammar.TokenType> m_stringContext = JSONGrammar.Instance.JsonStringDFA; | |
|  | 18 | 17 | |
|  | 19 | 18 | |
|  | 20 | 19 | readonly TextScanner m_scanner; | 
| @@ -31,7 +30,7 namespace Implab.Formats.JSON { | |||
|  | 31 | 30 | public JSONScanner(TextReader reader, int bufferMax, int chunkSize) { | 
|  | 32 | 31 | Safe.ArgumentNotNull(reader, "reader"); | 
|  | 33 | 32 | |
|  | 34 | m_scanner = new ReaderScanner(reader); | |
|  | 33 | m_scanner = new ReaderScanner(reader, bufferMax, chunkSize); | |
|  | 35 | 34 | } | 
|  | 36 | 35 | |
|  | 37 | 36 | /// <summary> | 
| @@ -44,7 +43,7 namespace Implab.Formats.JSON { | |||
|  | 44 | 43 | /// в строках обрабатываются экранированные символы, числа становтся типа double.</remarks> | 
|  | 45 | 44 | public bool ReadToken(out object tokenValue, out JsonTokenType tokenType) { | 
|  | 46 | 45 | JSONGrammar.TokenType[] tag; | 
|  | 47 | if (m_json | |
|  | 46 | if (m_jsonContext.Execute(m_scanner, out tag)) { | |
|  | 48 | 47 | switch (tag[0]) { | 
|  | 49 | 48 | case JSONGrammar.TokenType.StringBound: | 
|  | 50 | 49 | tokenValue = ReadString(); | 
| @@ -68,12 +67,12 namespace Implab.Formats.JSON { | |||
|  | 68 | 67 | |
|  | 69 | 68 | string ReadString() { | 
|  | 70 | 69 | int pos = 0; | 
|  | 71 |  | |
|  | 70 | var buf = new char[6]; // the buffer for unescaping chars | |
|  | 72 | 71 | |
|  | 73 | 72 | JSONGrammar.TokenType[] tag; | 
|  | 74 | 73 | m_builder.Clear(); | 
|  | 75 | 74 | |
|  | 76 | while (m_string | |
|  | 75 | while (m_stringContext.Execute(m_scanner, out tag)) { | |
|  | 77 | 76 | switch (tag[0]) { | 
|  | 78 | 77 | case JSONGrammar.TokenType.StringBound: | 
|  | 79 | 78 | return m_builder.ToString(); | 
| @@ -89,13 +88,17 namespace Implab.Formats.JSON { | |||
|  | 89 | 88 | m_scanner.CopyTokenTo(buf, 0); | 
|  | 90 | 89 | m_builder.Append(StringTranslator.TranslateEscapedChar(buf[1])); | 
|  | 91 | 90 | break; | 
|  | 92 | default: | |
|  | 93 | break; | |
|  | 94 | 91 | } | 
|  | 95 | 92 | |
|  | 96 | 93 | } | 
|  | 97 | 94 | |
|  | 98 | 95 | throw new ParserException("Unexpected end of data"); | 
|  | 99 | 96 | } | 
|  | 97 | ||
|  | 98 | protected override void Dispose(bool disposing) { | |
|  | 99 | if (disposing) | |
|  | 100 | Safe.Dispose(m_scanner); | |
|  | 101 | base.Dispose(disposing); | |
|  | 102 | } | |
|  | 100 | 103 | } | 
|  | 101 | 104 | } | 
| @@ -1,11 +1,17 | |||
|  | 1 | using System; | |
|  | 2 | ||
|  | 3 | namespace Implab.Formats { | |
|  | 1 | namespace Implab.Formats { | |
|  | 2 | /// <summary> | |
|  | 3 | /// Represents a scanner configuration usefull to recongnize token, based on the DFA. | |
|  | 4 | /// </summary> | |
|  | 4 | 5 | public class ScannerContext<TTag> { | 
|  | 6 | ||
|  | 5 | 7 | public int[,] Dfa { get; private set; } | 
|  | 8 | ||
|  | 6 | 9 | public bool[] Final { get; private set; } | 
|  | 10 | ||
|  | 7 | 11 | public TTag[][] Tags { get; private set; } | 
|  | 12 | ||
|  | 8 | 13 | public int State { get; private set; } | 
|  | 14 | ||
|  | 9 | 15 | public int[] Alphabet { get; private set; } | 
|  | 10 | 16 | |
|  | 11 | 17 | public ScannerContext(int[,] dfa, bool[] final, TTag[][] tags, int state, int[] alphabet) { | 
| @@ -1,9 +1,7 | |||
|  | 1 | 1 | using System; | 
|  | 2 | 2 | using Implab.Components; | 
|  | 3 | using Implab.Automaton.RegularExpressions; | |
|  | 4 | 3 | using System.Diagnostics; | 
|  | 5 | 4 | using Implab.Automaton; | 
|  | 6 | using System.IO; | |
|  | 7 | 5 | using System.Text; | 
|  | 8 | 6 | |
|  | 9 | 7 | namespace Implab.Formats { | 
| @@ -18,7 +16,7 namespace Implab.Formats { | |||
|  | 18 | 16 | int m_tokenLength; | 
|  | 19 | 17 | |
|  | 20 | 18 | /// <summary> | 
|  | 21 | /// Initializes a new instance of the <see cref="Implab.Formats.TextScanner | |
|  | 19 | /// Initializes a new instance of the <see cref="Implab.Formats.TextScanner"/> class. | |
|  | 22 | 20 | /// </summary> | 
|  | 23 | 21 | /// <param name="bufferMax">Buffer max.</param> | 
|  | 24 | 22 | /// <param name="chunkSize">Chunk size.</param> | 
| @@ -30,7 +28,7 namespace Implab.Formats { | |||
|  | 30 | 28 | } | 
|  | 31 | 29 | |
|  | 32 | 30 | /// <summary> | 
|  | 33 | /// Initializes a new instance of the <see cref="Implab.Formats.TextScanner | |
|  | 31 | /// Initializes a new instance of the <see cref="Implab.Formats.TextScanner"/> class. | |
|  | 34 | 32 | /// </summary> | 
|  | 35 | 33 | /// <param name="buffer">Buffer.</param> | 
|  | 36 | 34 | protected TextScanner(char[] buffer) { | 
| @@ -48,7 +46,9 namespace Implab.Formats { | |||
|  | 48 | 46 | /// <param name="final">Final states of the automaton.</param> | 
|  | 49 | 47 | /// <param name="tags">Tags.</param> | 
|  | 50 | 48 | /// <param name="state">The initial state for the automaton.</param> | 
|  | 51 | internal bool ReadToken<TTag>(int[,] dfa, int[] final, TTag[][] tags, int state, int[] alphabet, out TTag[] tag) { | |
|  | 49 | /// <param name="alphabet"></param> | |
|  | 50 | /// <param name = "tag"></param> | |
|  | 51 | internal bool ReadToken<TTag>(int[,] dfa, bool[] final, TTag[][] tags, int state, int[] alphabet, out TTag[] tag) { | |
|  | 52 | 52 | Safe.ArgumentNotNull(); | 
|  | 53 | 53 | m_tokenLength = 0; | 
|  | 54 | 54 | |
| @@ -58,10 +58,10 namespace Implab.Formats { | |||
|  | 58 | 58 | // after the next chunk is read the offset in the buffer may change | 
|  | 59 | 59 | int pos = m_bufferOffset + m_tokenLength; | 
|  | 60 | 60 | |
|  | 61 | while(pos < m_bufferSize) { | |
|  | 61 | while (pos < m_bufferSize) { | |
|  | 62 | 62 | var ch = m_buffer[pos]; | 
|  | 63 | 63 | |
|  | 64 | state = dfa[state,ch > maxSymbol ? DFAConst.UNCLASSIFIED_INPUT : alphabet[ch]]; | |
|  | 64 | state = dfa[state, ch > maxSymbol ? DFAConst.UNCLASSIFIED_INPUT : alphabet[ch]]; | |
|  | 65 | 65 | if (state == DFAConst.UNREACHABLE_STATE) | 
|  | 66 | 66 | break; | 
|  | 67 | 67 | |
| @@ -77,16 +77,17 namespace Implab.Formats { | |||
|  | 77 | 77 | if (final[state]) { | 
|  | 78 | 78 | tag = tags[state]; | 
|  | 79 | 79 | return true; | 
|  | 80 | } | |
|  | 81 | if (m_bufferOffset == m_bufferSize) { | |
|  | 82 | if (m_tokenLength == 0) //EOF | |
|  | 80 | } | |
|  | 81 | ||
|  | 82 | if (m_bufferOffset == m_bufferSize) { | |
|  | 83 | if (m_tokenLength == 0) //EOF | |
|  | 83 | 84 | return false; | 
|  | 84 | 85 | |
|  | 85 |  | |
|  | 86 |  | |
|  | 87 | throw new ParserException(String.Format("Unexpected symbol '{0}'", m_buffer[m_bufferOffset])); | |
|  | 86 | throw new ParserException(); | |
|  | 87 | } | |
|  | 88 | ||
|  | 89 | throw new ParserException(String.Format("Unexpected symbol '{0}'", m_buffer[m_bufferOffset])); | |
|  | 88 | 90 | |
|  | 89 | } | |
|  | 90 | 91 | } | 
|  | 91 | 92 | |
|  | 92 | 93 | protected void Feed(char[] buffer, int offset, int length) { | 
| @@ -108,7 +109,7 namespace Implab.Formats { | |||
|  | 108 | 109 | var size = used + free; | 
|  | 109 | 110 | |
|  | 110 | 111 | if (size > m_bufferMax) | 
|  | 111 | throw new ParserException(String.Format("The buffer limit ({0} Kb) is reached" | |
|  | 112 | throw new ParserException(String.Format("The buffer limit ({0} Kb) is reached", m_bufferMax/1024)); | |
|  | 112 | 113 | |
|  | 113 | 114 | var temp = new char[size]; | 
|  | 114 | 115 | |
| @@ -160,11 +160,9 | |||
|  | 160 | 160 | <Compile Include="Automaton\RegularExpressions\BinaryToken.cs" /> | 
|  | 161 | 161 | <Compile Include="Automaton\RegularExpressions\CatToken.cs" /> | 
|  | 162 | 162 | <Compile Include="Automaton\DFAConst.cs" /> | 
|  | 163 | <Compile Include="Automaton\RegularExpressions\Grammar.cs" /> | |
|  | 164 | 163 | <Compile Include="Automaton\RegularExpressions\StarToken.cs" /> | 
|  | 165 | 164 | <Compile Include="Automaton\RegularExpressions\SymbolToken.cs" /> | 
|  | 166 | 165 | <Compile Include="Automaton\RegularExpressions\EmptyToken.cs" /> | 
|  | 167 | <Compile Include="Automaton\RegularExpressions\EndToken.cs" /> | |
|  | 168 | 166 | <Compile Include="Automaton\RegularExpressions\Token.cs" /> | 
|  | 169 | 167 | <Compile Include="Automaton\RegularExpressions\IVisitor.cs" /> | 
|  | 170 | 168 | <Compile Include="Automaton\AutomatonTransition.cs" /> | 
| @@ -192,6 +190,10 | |||
|  | 192 | 190 | <Compile Include="Formats\StringScanner.cs" /> | 
|  | 193 | 191 | <Compile Include="Formats\ReaderScanner.cs" /> | 
|  | 194 | 192 | <Compile Include="Formats\ScannerContext.cs" /> | 
|  | 193 | <Compile Include="Formats\Grammar.cs" /> | |
|  | 194 | <Compile Include="Automaton\RegularExpressions\EndTokenT.cs" /> | |
|  | 195 | <Compile Include="Automaton\RegularExpressions\EndToken.cs" /> | |
|  | 196 | <Compile Include="Automaton\RegularExpressions\IVisitorT.cs" /> | |
|  | 195 | 197 | </ItemGroup> | 
|  | 196 | 198 | <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" /> | 
|  | 197 | 199 | <ItemGroup /> | 
| @@ -41,6 +41,11 namespace Implab | |||
|  | 41 | 41 | throw new ArgumentOutOfRangeException(paramName); | 
|  | 42 | 42 | } | 
|  | 43 | 43 | |
|  | 44 | public static void ArgumentOfType(object value, Type type, string paramName) { | |
|  | 45 | if (!type.IsInstanceOfType(value)) | |
|  | 46 | throw new ArgumentException(String.Format("The parameter must be of type {0}", type), paramName); | |
|  | 47 | } | |
|  | 48 | ||
|  | 44 | 49 | public static void Dispose(params IDisposable[] objects) { | 
|  | 45 | 50 | foreach (var d in objects) | 
|  | 46 | 51 | if (d != null) | 
|  | 1 | NO CONTENT: file was removed | 
        
        General Comments 0
    
    
  
  
                      You need to be logged in to leave comments.
                      Login now
                    
                