Grammar.cs
89 lines
| 3.0 KiB
| text/x-csharp
|
CSharpLexer
cin
|
r162 | using Implab; | ||
using System; | ||||
using System.Collections.Generic; | ||||
using System.Linq; | ||||
namespace Implab.Automaton.RegularExpressions { | ||||
/// <summary> | ||||
/// Базовый абстрактный класс. Грамматика, позволяет формулировать выражения над алфавитом типа <c>char</c>. | ||||
/// </summary> | ||||
cin
|
r165 | public abstract class Grammar<TSymbol, TTag> { | ||
cin
|
r162 | |||
cin
|
r165 | protected abstract IAlphabetBuilder<TSymbol> AlphabetBuilder { | ||
cin
|
r163 | get; | ||
cin
|
r162 | } | ||
cin
|
r165 | protected SymbolToken<TTag> UnclassifiedToken() { | ||
cin
|
r163 | return new SymbolToken<TTag>(DFAConst.UNCLASSIFIED_INPUT); | ||
cin
|
r162 | } | ||
cin
|
r165 | protected void DefineAlphabet(IEnumerable<TSymbol> alphabet) { | ||
cin
|
r162 | Safe.ArgumentNotNull(alphabet, "alphabet"); | ||
foreach (var ch in alphabet) | ||||
cin
|
r165 | AlphabetBuilder.DefineSymbol(ch); | ||
cin
|
r162 | } | ||
cin
|
r163 | |||
cin
|
r165 | protected Token<TTag> SymbolToken(TSymbol symbol) { | ||
cin
|
r163 | return Token<TTag>.New(TranslateOrAdd(symbol)); | ||
cin
|
r162 | } | ||
cin
|
r165 | protected Token<TTag> SymbolToken(IEnumerable<TSymbol> symbols) { | ||
cin
|
r163 | Safe.ArgumentNotNull(symbols, "symbols"); | ||
return Token<TTag>.New(TranslateOrAdd(symbols).ToArray()); | ||||
cin
|
r162 | } | ||
cin
|
r165 | protected Token<TTag> SymbolSetToken(params TSymbol[] set) { | ||
cin
|
r162 | return SymbolToken(set); | ||
} | ||||
cin
|
r163 | int TranslateOrAdd(TSymbol ch) { | ||
cin
|
r165 | var t = AlphabetBuilder.Translate(ch); | ||
cin
|
r163 | if (t == DFAConst.UNCLASSIFIED_INPUT) | ||
cin
|
r165 | t = AlphabetBuilder.DefineSymbol(ch); | ||
cin
|
r162 | return t; | ||
} | ||||
cin
|
r163 | IEnumerable<int> TranslateOrAdd(IEnumerable<TSymbol> symbols) { | ||
cin
|
r162 | return symbols.Distinct().Select(TranslateOrAdd); | ||
} | ||||
cin
|
r163 | int TranslateOrDie(TSymbol ch) { | ||
cin
|
r165 | var t = AlphabetBuilder.Translate(ch); | ||
cin
|
r163 | if (t == DFAConst.UNCLASSIFIED_INPUT) | ||
cin
|
r162 | throw new ApplicationException(String.Format("Symbol '{0}' is UNCLASSIFIED", ch)); | ||
return t; | ||||
} | ||||
cin
|
r163 | IEnumerable<int> TranslateOrDie(IEnumerable<TSymbol> symbols) { | ||
cin
|
r162 | return symbols.Distinct().Select(TranslateOrDie); | ||
} | ||||
cin
|
r165 | protected Token<TTag> SymbolTokenExcept(IEnumerable<TSymbol> symbols) { | ||
cin
|
r162 | Safe.ArgumentNotNull(symbols, "symbols"); | ||
cin
|
r165 | return Token<TTag>.New( Enumerable.Range(0, AlphabetBuilder.Count).Except(TranslateOrDie(symbols)).ToArray() ); | ||
cin
|
r162 | } | ||
cin
|
r172 | protected abstract IAlphabetBuilder<TSymbol> CreateAlphabet(); | ||
cin
|
r164 | |||
cin
|
r172 | protected RegularDFA<TSymbol, TTag> BuildDFA(Token<TTag> regexp) { | ||
var dfa = new RegularDFA<TSymbol, TTag>(AlphabetBuilder); | ||||
cin
|
r162 | |||
cin
|
r172 | var visitor = new RegularExpressionVisitor<TTag>(); | ||
regexp.Accept( visitor ); | ||||
cin
|
r162 | |||
cin
|
r172 | visitor.BuildDFA(dfa); | ||
cin
|
r165 | |||
if (dfa.IsFinalState(dfa.InitialState)) | ||||
cin
|
r162 | throw new ApplicationException("The specified language contains empty token"); | ||
cin
|
r172 | return dfa.Optimize(CreateAlphabet()); | ||
cin
|
r162 | } | ||
} | ||||
} | ||||