Grammar.cs
103 lines
| 3.3 KiB
| text/x-csharp
|
CSharpLexer
cin
|
r55 | using Implab; | ||
using System; | ||||
using System.Collections.Generic; | ||||
using System.Linq; | ||||
using System.Text; | ||||
using System.Threading.Tasks; | ||||
namespace Implab.Parsing { | ||||
/// <summary> | ||||
/// Базовый абстрактный класс. Грамматика, позволяет формулировать выражения над алфавитом типа <c>char</c>. | ||||
/// </summary> | ||||
/// <typeparam name="TGrammar"></typeparam> | ||||
public abstract class Grammar<TGrammar> where TGrammar: Grammar<TGrammar>, new() { | ||||
Alphabet m_alphabet = new Alphabet(); | ||||
static TGrammar _instance; | ||||
public static TGrammar Instance{ | ||||
get { | ||||
if (_instance == null) | ||||
_instance = new TGrammar(); | ||||
return _instance; | ||||
} | ||||
} | ||||
public SymbolToken UnclassifiedToken() { | ||||
return new SymbolToken(Alphabet.UNCLASSIFIED); | ||||
} | ||||
public void DefineAlphabet(IEnumerable<char> alphabet) { | ||||
Safe.ArgumentNotNull(alphabet, "alphabet"); | ||||
foreach (var ch in alphabet) | ||||
m_alphabet.DefineSymbol(ch); | ||||
} | ||||
public Token SymbolRangeToken(char start, char end) { | ||||
return SymbolToken(Enumerable.Range(start, end - start + 1).Select(x => (char)x)); | ||||
} | ||||
public Token SymbolToken(char symbol) { | ||||
return Token.New(TranslateOrAdd(symbol)); | ||||
} | ||||
public Token SymbolToken(IEnumerable<char> symbols) { | ||||
Safe.ArgumentNotNull(symbols, "symbols"); | ||||
return Token.New(TranslateOrAdd(symbols).ToArray()); | ||||
} | ||||
public Token SymbolSetToken(params char[] set) { | ||||
return SymbolToken(set); | ||||
} | ||||
int TranslateOrAdd(char ch) { | ||||
var t = m_alphabet.Translate(ch); | ||||
if (t == Alphabet.UNCLASSIFIED) | ||||
t = m_alphabet.DefineSymbol(ch); | ||||
return t; | ||||
} | ||||
IEnumerable<int> TranslateOrAdd(IEnumerable<char> symbols) { | ||||
return symbols.Distinct().Select(TranslateOrAdd); | ||||
} | ||||
int TranslateOrDie(char ch) { | ||||
var t = m_alphabet.Translate(ch); | ||||
if (t == Alphabet.UNCLASSIFIED) | ||||
throw new ApplicationException(String.Format("Symbol '{0}' is UNCLASSIFIED", ch)); | ||||
return t; | ||||
} | ||||
IEnumerable<int> TranslateOrDie(IEnumerable<char> symbols) { | ||||
return symbols.Distinct().Select(TranslateOrDie); | ||||
} | ||||
public Token SymbolTokenExcept(IEnumerable<char> symbols) { | ||||
Safe.ArgumentNotNull(symbols, "symbols"); | ||||
return Token.New( Enumerable.Range(0, m_alphabet.Count).Except(TranslateOrDie(symbols)).ToArray()); | ||||
} | ||||
protected CDFADefinition BuildDFA(Token lang) { | ||||
Safe.ArgumentNotNull(lang, "lang"); | ||||
var dfa = new CDFADefinition(m_alphabet); | ||||
var builder = new DFABuilder(); | ||||
lang.Accept( builder ); | ||||
builder.BuildDFA(dfa); | ||||
if (dfa.InitialStateIsFinal) | ||||
throw new ApplicationException("The specified language contains empty token"); | ||||
return dfa.Optimize(); | ||||
} | ||||
//protected abstract TGrammar CreateInstance(); | ||||
} | ||||
} | ||||