Grammar.cs
        
        
            
                    100 lines
            
             | 3.4 KiB
            
                | text/x-csharp
            
             |
                CSharpLexer
            
          
        |  | r177 | using Implab; | ||
| using System; | ||||
| using System.Collections.Generic; | ||||
| using System.Linq; | ||||
| using Implab.Automaton; | ||||
| using Implab.Automaton.RegularExpressions; | ||||
| namespace Implab.Formats { | ||||
| /// <summary> | ||||
| /// Базовый абстрактный класс. Грамматика, позволяет формулировать выражения над алфавитом типа <c>char</c>. | ||||
| /// </summary> | ||||
| public abstract class Grammar<TSymbol, TTag> { | ||||
| protected abstract IAlphabetBuilder<TSymbol> AlphabetBuilder { | ||||
| get; | ||||
| } | ||||
| protected SymbolToken<TTag> UnclassifiedToken() { | ||||
| return new SymbolToken<TTag>(DFAConst.UNCLASSIFIED_INPUT); | ||||
| } | ||||
| protected void DefineAlphabet(IEnumerable<TSymbol> alphabet) { | ||||
| Safe.ArgumentNotNull(alphabet, "alphabet"); | ||||
| foreach (var ch in alphabet) | ||||
| AlphabetBuilder.DefineSymbol(ch); | ||||
| } | ||||
| protected Token<TTag> SymbolToken(TSymbol symbol) { | ||||
| return Token<TTag>.New(TranslateOrAdd(symbol)); | ||||
| } | ||||
| protected Token<TTag> SymbolToken(IEnumerable<TSymbol> symbols) { | ||||
| Safe.ArgumentNotNull(symbols, "symbols"); | ||||
| return Token<TTag>.New(TranslateOrAdd(symbols).ToArray()); | ||||
| } | ||||
| protected Token<TTag> SymbolSetToken(params TSymbol[] set) { | ||||
| return SymbolToken(set); | ||||
| } | ||||
| int TranslateOrAdd(TSymbol ch) { | ||||
| var t = AlphabetBuilder.Translate(ch); | ||||
| if (t == DFAConst.UNCLASSIFIED_INPUT) | ||||
| t = AlphabetBuilder.DefineSymbol(ch); | ||||
| return t; | ||||
| } | ||||
| IEnumerable<int> TranslateOrAdd(IEnumerable<TSymbol> symbols) { | ||||
| return symbols.Distinct().Select(TranslateOrAdd); | ||||
| } | ||||
| int TranslateOrDie(TSymbol ch) { | ||||
| var t = AlphabetBuilder.Translate(ch); | ||||
| if (t == DFAConst.UNCLASSIFIED_INPUT) | ||||
| throw new ApplicationException(String.Format("Symbol '{0}' is UNCLASSIFIED", ch)); | ||||
| return t; | ||||
| } | ||||
| IEnumerable<int> TranslateOrDie(IEnumerable<TSymbol> symbols) { | ||||
| return symbols.Distinct().Select(TranslateOrDie); | ||||
| } | ||||
| protected Token<TTag> SymbolTokenExcept(IEnumerable<TSymbol> symbols) { | ||||
| Safe.ArgumentNotNull(symbols, "symbols"); | ||||
| return Token<TTag>.New( Enumerable.Range(0, AlphabetBuilder.Count).Except(TranslateOrDie(symbols)).ToArray() ); | ||||
| } | ||||
| protected abstract IndexedAlphabetBase<TSymbol> CreateAlphabet(); | ||||
| protected ScannerContext<TTag> BuildScannerContext(Token<TTag> regexp) { | ||||
| var dfa = new RegularDFA<TSymbol, TTag>(AlphabetBuilder); | ||||
| var visitor = new RegularExpressionVisitor<TTag>(); | ||||
| regexp.Accept( visitor ); | ||||
| visitor.BuildDFA(dfa); | ||||
| if (dfa.IsFinalState(dfa.InitialState)) | ||||
| throw new ApplicationException("The specified language contains empty token"); | ||||
| var ab = CreateAlphabet(); | ||||
| var optimal = dfa.Optimize(ab); | ||||
| return new ScannerContext<TTag>( | ||||
| optimal.CreateTransitionTable(), | ||||
| optimal.CreateFinalStateTable(), | ||||
| optimal.CreateTagTable(), | ||||
| optimal.InitialState, | ||||
| ab.GetTranslationMap() | ||||
| ); | ||||
| } | ||||
| } | ||||
| } | ||||
