|
|
using Implab;
|
|
|
using System;
|
|
|
using System.Collections.Generic;
|
|
|
using System.Linq;
|
|
|
|
|
|
namespace Implab.Automaton.RegularExpressions {
|
|
|
/// <summary>
|
|
|
/// Базовый абстрактный класс. Грамматика, позволяет формулировать выражения над алфавитом типа <c>char</c>.
|
|
|
/// </summary>
|
|
|
public abstract class Grammar<TSymbol, TTag> {
|
|
|
|
|
|
protected abstract IAlphabetBuilder<TSymbol> AlphabetBuilder {
|
|
|
get;
|
|
|
}
|
|
|
|
|
|
protected SymbolToken<TTag> UnclassifiedToken() {
|
|
|
return new SymbolToken<TTag>(DFAConst.UNCLASSIFIED_INPUT);
|
|
|
}
|
|
|
|
|
|
protected void DefineAlphabet(IEnumerable<TSymbol> alphabet) {
|
|
|
Safe.ArgumentNotNull(alphabet, "alphabet");
|
|
|
|
|
|
foreach (var ch in alphabet)
|
|
|
AlphabetBuilder.DefineSymbol(ch);
|
|
|
}
|
|
|
|
|
|
protected Token<TTag> SymbolToken(TSymbol symbol) {
|
|
|
return Token<TTag>.New(TranslateOrAdd(symbol));
|
|
|
}
|
|
|
|
|
|
protected Token<TTag> SymbolToken(IEnumerable<TSymbol> symbols) {
|
|
|
Safe.ArgumentNotNull(symbols, "symbols");
|
|
|
|
|
|
return Token<TTag>.New(TranslateOrAdd(symbols).ToArray());
|
|
|
}
|
|
|
|
|
|
protected Token<TTag> SymbolSetToken(params TSymbol[] set) {
|
|
|
return SymbolToken(set);
|
|
|
}
|
|
|
|
|
|
int TranslateOrAdd(TSymbol ch) {
|
|
|
var t = AlphabetBuilder.Translate(ch);
|
|
|
if (t == DFAConst.UNCLASSIFIED_INPUT)
|
|
|
t = AlphabetBuilder.DefineSymbol(ch);
|
|
|
return t;
|
|
|
}
|
|
|
|
|
|
IEnumerable<int> TranslateOrAdd(IEnumerable<TSymbol> symbols) {
|
|
|
return symbols.Distinct().Select(TranslateOrAdd);
|
|
|
}
|
|
|
|
|
|
int TranslateOrDie(TSymbol ch) {
|
|
|
var t = AlphabetBuilder.Translate(ch);
|
|
|
if (t == DFAConst.UNCLASSIFIED_INPUT)
|
|
|
throw new ApplicationException(String.Format("Symbol '{0}' is UNCLASSIFIED", ch));
|
|
|
return t;
|
|
|
}
|
|
|
|
|
|
IEnumerable<int> TranslateOrDie(IEnumerable<TSymbol> symbols) {
|
|
|
return symbols.Distinct().Select(TranslateOrDie);
|
|
|
}
|
|
|
|
|
|
protected Token<TTag> SymbolTokenExcept(IEnumerable<TSymbol> symbols) {
|
|
|
Safe.ArgumentNotNull(symbols, "symbols");
|
|
|
|
|
|
return Token<TTag>.New( Enumerable.Range(0, AlphabetBuilder.Count).Except(TranslateOrDie(symbols)).ToArray() );
|
|
|
}
|
|
|
|
|
|
protected abstract IndexedAlphabetBase<TSymbol> CreateAlphabet();
|
|
|
|
|
|
protected ScannerContext<TTag> BuildScannerContext(Token<TTag> regexp) {
|
|
|
|
|
|
var dfa = new RegularDFA<TSymbol, TTag>(AlphabetBuilder);
|
|
|
|
|
|
var visitor = new RegularExpressionVisitor<TTag>();
|
|
|
regexp.Accept( visitor );
|
|
|
|
|
|
visitor.BuildDFA(dfa);
|
|
|
|
|
|
if (dfa.IsFinalState(dfa.InitialState))
|
|
|
throw new ApplicationException("The specified language contains empty token");
|
|
|
|
|
|
var ab = CreateAlphabet();
|
|
|
var optimal = dfa.Optimize(ab);
|
|
|
|
|
|
return new ScannerContext<TTag>(
|
|
|
optimal.CreateTransitionTable(),
|
|
|
optimal.CreateFinalStateTable(),
|
|
|
optimal.CreateTagTable(),
|
|
|
optimal.InitialState,
|
|
|
ab.GetTranslationMap()
|
|
|
);
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
}
|
|
|
|