using System.Linq; using Implab.Automaton.RegularExpressions; using System; namespace Implab.Formats.JSON { class JSONGrammar : Grammar { public enum TokenType { None, BeginObject, EndObject, BeginArray, EndArray, String, Number, Literal, NameSeparator, ValueSeparator, StringBound, EscapedChar, UnescapedChar, EscapedUnicode, Minus, Plus, Sign, Integer, Dot, Exp } static Lazy _instance = new Lazy(); public static JSONGrammar Instance { get { return _instance.Value; } } readonly RegularCharDFADefinition m_jsonDFA; readonly RegularCharDFADefinition m_stringDFA; public JSONGrammar() { DefineAlphabet(Enumerable.Range(0, 0x20).Select(x => (char)x)); var hexDigit = SymbolRangeToken('a','f').Or(SymbolRangeToken('A','F')).Or(SymbolRangeToken('0','9')); var digit9 = SymbolRangeToken('1', '9'); var zero = SymbolToken('0'); var digit = zero.Or(digit9); var dot = SymbolToken('.'); var minus = SymbolToken('-'); var sign = SymbolSetToken('-', '+'); var expSign = SymbolSetToken('e', 'E'); var letters = SymbolRangeToken('a', 'z'); var integer = zero.Or(digit9.Cat(digit.EClosure())); var frac = dot.Cat(digit.Closure()); var exp = expSign.Cat(sign.Optional()).Cat(digit.Closure()); var quote = SymbolToken('"'); var backSlash = SymbolToken('\\'); var specialEscapeChars = SymbolSetToken('\\', '"', '/', 'b', 'f', 't', 'n', 'r'); var unicodeEspace = SymbolToken('u').Cat(hexDigit.Repeat(4)); var whitespace = SymbolSetToken('\n', '\r', '\t', ' ').EClosure(); var beginObject = whitespace.Cat(SymbolToken('{')).Cat(whitespace); var endObject = whitespace.Cat(SymbolToken('}')).Cat(whitespace); var beginArray = whitespace.Cat(SymbolToken('[')).Cat(whitespace); var endArray = whitespace.Cat(SymbolToken(']')).Cat(whitespace); var nameSep = whitespace.Cat(SymbolToken(':')).Cat(whitespace); var valueSep = whitespace.Cat(SymbolToken(',')).Cat(whitespace); var number = minus.Optional().Cat(integer).Cat(frac.Optional()).Cat(exp.Optional()); var literal = letters.Closure(); var unescaped = SymbolTokenExcept(Enumerable.Range(0, 0x20).Union(new int[] { '\\', '"' }).Select(x => (char)x)); var jsonExpression = number.Tag(TokenType.Number) .Or(literal.Tag(TokenType.Literal)) .Or(quote.Tag(TokenType.StringBound)) .Or(beginObject.Tag(TokenType.BeginObject)) .Or(endObject.Tag(TokenType.EndObject)) .Or(beginArray.Tag(TokenType.BeginArray)) .Or(endArray.Tag(TokenType.EndArray)) .Or(nameSep.Tag(TokenType.NameSeparator)) .Or(valueSep.Tag(TokenType.ValueSeparator)); var jsonStringExpression = quote.Tag(TokenType.StringBound) .Or(backSlash.Cat(specialEscapeChars).Tag(TokenType.EscapedChar)) .Or(backSlash.Cat(unicodeEspace).Tag(TokenType.EscapedUnicode)) .Or(unescaped.Closure().Tag(TokenType.UnescapedChar)); m_jsonDFA = new RegularCharDFADefinition(new CharAlphabet()); BuildDFA(jsonExpression, m_jsonDFA, m_jsonDFA.InputAlphabet); m_stringDFA = new RegularCharDFADefinition(new CharAlphabet()); BuildDFA(jsonStringExpression, m_jsonDFA, m_jsonDFA.InputAlphabet); } public RegularCharDFADefinition JsonDFA { get { return m_jsonDFA; } } public RegularDFADefinition JsonStringDFA { get { return m_stringDFA; } } Token SymbolRangeToken(char start, char stop) { return SymbolToken(Enumerable.Range(start,stop - start).Cast()); } } }