using System.Linq; using Implab.Automaton.RegularExpressions; using System; using Implab.Automaton; using Implab.Components; namespace Implab.Formats.JSON { class JSONGrammar : Grammar { public enum TokenType { None, BeginObject, EndObject, BeginArray, EndArray, String, Number, Literal, NameSeparator, ValueSeparator, StringBound, EscapedChar, UnescapedChar, EscapedUnicode } static LazyAndWeak _instance = new LazyAndWeak(() => new JSONGrammar()); public static JSONGrammar Instance { get { return _instance.Value; } } readonly ScannerContext m_jsonExpression; readonly ScannerContext m_stringExpression; readonly CharAlphabet m_defaultAlphabet = new CharAlphabet(); public JSONGrammar() { DefineAlphabet(Enumerable.Range(0, 0x20).Select(x => (char)x)); var hexDigit = SymbolRangeToken('a','f').Or(SymbolRangeToken('A','F')).Or(SymbolRangeToken('0','9')); var digit9 = SymbolRangeToken('1', '9'); var zero = SymbolToken('0'); var digit = zero.Or(digit9); var dot = SymbolToken('.'); var minus = SymbolToken('-'); var sign = SymbolSetToken('-', '+'); var expSign = SymbolSetToken('e', 'E'); var letters = SymbolRangeToken('a', 'z'); var integer = zero.Or(digit9.Cat(digit.EClosure())); var frac = dot.Cat(digit.Closure()); var exp = expSign.Cat(sign.Optional()).Cat(digit.Closure()); var quote = SymbolToken('"'); var backSlash = SymbolToken('\\'); var specialEscapeChars = SymbolSetToken('\\', '"', '/', 'b', 'f', 't', 'n', 'r'); var unicodeEspace = SymbolToken('u').Cat(hexDigit.Repeat(4)); var whitespace = SymbolSetToken('\n', '\r', '\t', ' ').EClosure(); var beginObject = whitespace.Cat(SymbolToken('{')).Cat(whitespace); var endObject = whitespace.Cat(SymbolToken('}')).Cat(whitespace); var beginArray = whitespace.Cat(SymbolToken('[')).Cat(whitespace); var endArray = whitespace.Cat(SymbolToken(']')).Cat(whitespace); var nameSep = whitespace.Cat(SymbolToken(':')).Cat(whitespace); var valueSep = whitespace.Cat(SymbolToken(',')).Cat(whitespace); var number = minus.Optional().Cat(integer).Cat(frac.Optional()).Cat(exp.Optional()); var literal = letters.Closure(); var unescaped = SymbolTokenExcept(Enumerable.Range(0, 0x20).Union(new int[] { '\\', '"' }).Select(x => (char)x)); var jsonExpression = number.Tag(TokenType.Number) .Or(literal.Tag(TokenType.Literal)) .Or(quote.Tag(TokenType.StringBound)) .Or(beginObject.Tag(TokenType.BeginObject)) .Or(endObject.Tag(TokenType.EndObject)) .Or(beginArray.Tag(TokenType.BeginArray)) .Or(endArray.Tag(TokenType.EndArray)) .Or(nameSep.Tag(TokenType.NameSeparator)) .Or(valueSep.Tag(TokenType.ValueSeparator)); var jsonStringExpression = quote.Tag(TokenType.StringBound) .Or(backSlash.Cat(specialEscapeChars).Tag(TokenType.EscapedChar)) .Or(backSlash.Cat(unicodeEspace).Tag(TokenType.EscapedUnicode)) .Or(unescaped.Closure().Tag(TokenType.UnescapedChar)); m_jsonExpression = BuildScannerContext(jsonExpression); m_stringExpression = BuildScannerContext(jsonStringExpression); } protected override IAlphabetBuilder AlphabetBuilder { get { return m_defaultAlphabet; } } public ScannerContext JsonExpression { get { return m_jsonExpression; } } public ScannerContext JsonStringExpression { get { return m_stringExpression; } } Token SymbolRangeToken(char start, char stop) { return SymbolToken(Enumerable.Range(start,stop - start).Select(x => (char)x)); } protected override IndexedAlphabetBase CreateAlphabet() { return new CharAlphabet(); } } }