Grammar.cs
100 lines
| 3.4 KiB
| text/x-csharp
|
CSharpLexer
cin
|
r177 | using Implab; | ||
using System; | ||||
using System.Collections.Generic; | ||||
using System.Linq; | ||||
using Implab.Automaton; | ||||
using Implab.Automaton.RegularExpressions; | ||||
namespace Implab.Formats { | ||||
/// <summary> | ||||
/// Базовый абстрактный класс. Грамматика, позволяет формулировать выражения над алфавитом типа <c>char</c>. | ||||
/// </summary> | ||||
public abstract class Grammar<TSymbol, TTag> { | ||||
protected abstract IAlphabetBuilder<TSymbol> AlphabetBuilder { | ||||
get; | ||||
} | ||||
protected SymbolToken<TTag> UnclassifiedToken() { | ||||
return new SymbolToken<TTag>(DFAConst.UNCLASSIFIED_INPUT); | ||||
} | ||||
protected void DefineAlphabet(IEnumerable<TSymbol> alphabet) { | ||||
Safe.ArgumentNotNull(alphabet, "alphabet"); | ||||
foreach (var ch in alphabet) | ||||
AlphabetBuilder.DefineSymbol(ch); | ||||
} | ||||
protected Token<TTag> SymbolToken(TSymbol symbol) { | ||||
return Token<TTag>.New(TranslateOrAdd(symbol)); | ||||
} | ||||
protected Token<TTag> SymbolToken(IEnumerable<TSymbol> symbols) { | ||||
Safe.ArgumentNotNull(symbols, "symbols"); | ||||
return Token<TTag>.New(TranslateOrAdd(symbols).ToArray()); | ||||
} | ||||
protected Token<TTag> SymbolSetToken(params TSymbol[] set) { | ||||
return SymbolToken(set); | ||||
} | ||||
int TranslateOrAdd(TSymbol ch) { | ||||
var t = AlphabetBuilder.Translate(ch); | ||||
if (t == DFAConst.UNCLASSIFIED_INPUT) | ||||
t = AlphabetBuilder.DefineSymbol(ch); | ||||
return t; | ||||
} | ||||
IEnumerable<int> TranslateOrAdd(IEnumerable<TSymbol> symbols) { | ||||
return symbols.Distinct().Select(TranslateOrAdd); | ||||
} | ||||
int TranslateOrDie(TSymbol ch) { | ||||
var t = AlphabetBuilder.Translate(ch); | ||||
if (t == DFAConst.UNCLASSIFIED_INPUT) | ||||
throw new ApplicationException(String.Format("Symbol '{0}' is UNCLASSIFIED", ch)); | ||||
return t; | ||||
} | ||||
IEnumerable<int> TranslateOrDie(IEnumerable<TSymbol> symbols) { | ||||
return symbols.Distinct().Select(TranslateOrDie); | ||||
} | ||||
protected Token<TTag> SymbolTokenExcept(IEnumerable<TSymbol> symbols) { | ||||
Safe.ArgumentNotNull(symbols, "symbols"); | ||||
return Token<TTag>.New( Enumerable.Range(0, AlphabetBuilder.Count).Except(TranslateOrDie(symbols)).ToArray() ); | ||||
} | ||||
protected abstract IndexedAlphabetBase<TSymbol> CreateAlphabet(); | ||||
protected ScannerContext<TTag> BuildScannerContext(Token<TTag> regexp) { | ||||
var dfa = new RegularDFA<TSymbol, TTag>(AlphabetBuilder); | ||||
var visitor = new RegularExpressionVisitor<TTag>(); | ||||
regexp.Accept( visitor ); | ||||
visitor.BuildDFA(dfa); | ||||
if (dfa.IsFinalState(dfa.InitialState)) | ||||
throw new ApplicationException("The specified language contains empty token"); | ||||
var ab = CreateAlphabet(); | ||||
var optimal = dfa.Optimize(ab); | ||||
return new ScannerContext<TTag>( | ||||
optimal.CreateTransitionTable(), | ||||
optimal.CreateFinalStateTable(), | ||||
optimal.CreateTagTable(), | ||||
optimal.InitialState, | ||||
ab.GetTranslationMap() | ||||
); | ||||
} | ||||
} | ||||
} | ||||