diff --git a/Implab/Automaton/DummyAlphabet.cs b/Implab/Automaton/DummyAlphabet.cs new file mode 100644 --- /dev/null +++ b/Implab/Automaton/DummyAlphabet.cs @@ -0,0 +1,46 @@ +using System; +using System.Collections.Generic; +using System.Linq; + +namespace Implab.Automaton { + public class DummyAlphabet : IAlphabet { + readonly int m_size; + public DummyAlphabet(int size) { + Safe.ArgumentAssert(size > 0); + m_size = 0; + } + + #region IAlphabet implementation + + public List[] CreateReverseMap() { + Enumerable.Range(0, m_size).ToArray(); + } + + public int[] Reclassify(IAlphabetBuilder newAlphabet, IEnumerable> classes) { + Safe.ArgumentNotNull(newAlphabet, "newAlphabet"); + Safe.ArgumentNotNull(classes, "classes"); + var map = new int[m_size]; + foreach (var cls in classes) { + var newid = newAlphabet.DefineClass(cls); + foreach (var id in cls) + map[id] = newid; + } + + return map; + } + + public int Translate(int symobl) { + Safe.ArgumentInRange(symobl, 0, m_size, "symbol"); + return symobl; + } + + public int Count { + get { + return m_size; + } + } + + #endregion + } +} + diff --git a/Implab/Automaton/MapAlphabet.cs b/Implab/Automaton/MapAlphabet.cs new file mode 100644 --- /dev/null +++ b/Implab/Automaton/MapAlphabet.cs @@ -0,0 +1,103 @@ +using System; +using System.Collections.Generic; +using System.Linq; + +namespace Implab.Automaton { + public class MapAlphabet : IAlphabetBuilder { + readonly Dictionary m_map; + int m_nextCls; + + public MapAlphabet(IEqualityComparer comparer) { + m_map = new Dictionary(comparer); + m_nextCls = 1; + } + + #region IAlphabetBuilder implementation + + public int DefineSymbol(T symbol) { + int cls; + if (m_map.TryGetValue(symbol, out cls)) + return cls; + + cls = m_nextCls++; + + m_map.Add(symbol, cls); + + return cls; + } + + public int DefineClass(IEnumerable symbols) { + Safe.ArgumentNotNull(symbols, "symbols"); + symbols = symbols.Distinct(); + + foreach (var symbol in symbols) { + if (!m_map.Contains(symbol)) + m_map.Add(symbol, m_nextCls); + else + throw new InvalidOperationException(String.Format("Symbol '{0}' already in use", symbol)); + } + return m_nextCls++; + } + + #endregion + + #region IAlphabet implementation + + public List[] CreateReverseMap() { + var empty = new List(); + var rmap = new List[m_nextCls]; + + for (int i = 0; i < rmap.Length; i++) + rmap[i] = empty; + + foreach (var pair in m_map) { + var symbols = rmap[pair.Value]; + if (symbols == null) { + symbols = new List(); + rmap[pair.Value] = symbols; + } + + symbols.Add(pair.Key); + } + + return rmap; + } + + public int[] Reclassify(IAlphabetBuilder newAlphabet, IEnumerable> classes) { + Safe.ArgumentNotNull(newAlphabet, "newAlphabet"); + Safe.ArgumentNotNull(classes, "classes"); + + var rmap = CreateReverseMap(); + var map = new int[rmap.Length]; + + foreach (var cls in classes) { + var symbols = new List(); + foreach (var id in cls) { + if (id < 0 || id >= rmap.Length) + throw new ArgumentOutOfRangeException(String.Format("Class {0} is not valid for the current alphabet", id)); + if (rmap[id] != null) + symbols.AddRange(rmap[id]); + } + + var newId = newAlphabet.DefineClass(symbols); + + foreach (var id in cls) + map[id] = newId; + } + } + + public int Translate(T symobl) { + int cls; + return m_map.TryGetValue(symobl, out cls) ? cls : DFAConst.UNCLASSIFIED_INPUT; + } + + public int Count { + get { + return m_nextCls; + } + } + + #endregion + } +} + diff --git a/Implab/Automaton/RegularExpressions/DFABuilder.cs b/Implab/Automaton/RegularExpressions/DFABuilder.cs deleted file mode 100644 --- a/Implab/Automaton/RegularExpressions/DFABuilder.cs +++ /dev/null @@ -1,181 +0,0 @@ -using Implab; -using System; -using System.Collections.Generic; -using System.Diagnostics; -using System.Linq; - -namespace Implab.Automaton.RegularExpressions { - /// - /// Используется для построения ДКА по регулярному выражению, сначала обходит - /// регулярное выражение и вычисляет followpos, затем используется метод - /// для построения автомата. - /// - public class DFABuilder : IVisitor { - int m_idx = 0; - Token m_root; - HashSet m_firstpos; - HashSet m_lastpos; - - readonly Dictionary> m_followpos = new Dictionary>(); - readonly Dictionary m_indexes = new Dictionary(); - readonly Dictionary m_ends = new Dictionary(); - - public Dictionary> FollowposMap { - get { return m_followpos; } - } - - public HashSet Followpos(int pos) { - HashSet set; - if (m_followpos.TryGetValue(pos, out set)) - return set; - return m_followpos[pos] = new HashSet(); - } - - bool Nullable(object n) { - if (n is EmptyToken || n is StarToken) - return true; - if (n is AltToken) - return Nullable(((AltToken)n).Left) || Nullable(((AltToken)n).Right); - if (n is CatToken) - return Nullable(((CatToken)n).Left) && Nullable(((CatToken)n).Right); - return false; - } - - - public void Visit(AltToken token) { - if (m_root == null) - m_root = token; - var firtspos = new HashSet(); - var lastpos = new HashSet(); - - token.Left.Accept(this); - firtspos.UnionWith(m_firstpos); - lastpos.UnionWith(m_lastpos); - - token.Right.Accept(this); - firtspos.UnionWith(m_firstpos); - lastpos.UnionWith(m_lastpos); - - m_firstpos = firtspos; - m_lastpos = lastpos; - } - - public void Visit(StarToken token) { - if (m_root == null) - m_root = token; - token.Token.Accept(this); - - foreach (var i in m_lastpos) - Followpos(i).UnionWith(m_firstpos); - } - - public void Visit(CatToken token) { - if (m_root == null) - m_root = token; - - var firtspos = new HashSet(); - var lastpos = new HashSet(); - token.Left.Accept(this); - firtspos.UnionWith(m_firstpos); - var leftLastpos = m_lastpos; - - token.Right.Accept(this); - lastpos.UnionWith(m_lastpos); - var rightFirstpos = m_firstpos; - - if (Nullable(token.Left)) - firtspos.UnionWith(rightFirstpos); - - if (Nullable(token.Right)) - lastpos.UnionWith(leftLastpos); - - m_firstpos = firtspos; - m_lastpos = lastpos; - - foreach (var i in leftLastpos) - Followpos(i).UnionWith(rightFirstpos); - - } - - public void Visit(EmptyToken token) { - if (m_root == null) - m_root = token; - } - - public void Visit(SymbolToken token) { - if (m_root == null) - m_root = token; - m_idx++; - m_indexes[m_idx] = token.Value; - m_firstpos = new HashSet(new[] { m_idx }); - m_lastpos = new HashSet(new[] { m_idx }); - } - - public void Visit(EndToken token) { - if (m_root == null) - m_root = token; - m_idx++; - m_indexes[m_idx] = DFAConst.UNCLASSIFIED_INPUT; - m_firstpos = new HashSet(new[] { m_idx }); - m_lastpos = new HashSet(new[] { m_idx }); - Followpos(m_idx); - m_ends.Add(m_idx, token.Tag); - } - - public void BuildDFA(IDFADefinitionBuilder dfa, IAlphabetBuilder states) { - Safe.ArgumentNotNull(dfa,"dfa"); - - var stateMap = new Dictionary, int>(new CustomEqualityComparer>( - (x, y) => x.SetEquals(y), - x => x.Sum(n => n.GetHashCode()) - )); - - int nextState = 0; - - int initialState = states.DefineSymbol(nextState++); - stateMap[m_firstpos] = initialState; - - var tags = GetStateTags(m_firstpos); - if (tags != null && tags.Length > 0) - dfa.MarkFinalState(initialState, tags); - - var inputMax = m_indexes.Values.Max(); - var queue = new Queue>(); - - queue.Enqueue(m_firstpos); - - while (queue.Count > 0) { - var state = queue.Dequeue(); - var s1 = stateMap[state]; - - for (int a = 0; a <= inputMax; a++) { - var next = new HashSet(); - foreach (var p in state) { - if (m_indexes[p] == a) { - next.UnionWith(Followpos(p)); - } - } - if (next.Count > 0) { - int s2; - if (!stateMap.TryGetValue(next, out s2)) { - s2 = states.DefineSymbol(nextState++); - stateMap[next] = s2; - tags = GetStateTags(next); - if (tags != null && tags.Length > 0) - dfa.MarkFinalState(s2, tags); - - queue.Enqueue(next); - } - dfa.DefineTransition(s1, s2, a); - } - } - } - } - - TTag[] GetStateTags(IEnumerable state) { - Debug.Assert(state != null); - return state.Where(m_ends.ContainsKey).Select(pos => m_ends[pos]).ToArray(); - } - - } -} diff --git a/Implab/Automaton/RegularExpressions/Grammar.cs b/Implab/Automaton/RegularExpressions/Grammar.cs --- a/Implab/Automaton/RegularExpressions/Grammar.cs +++ b/Implab/Automaton/RegularExpressions/Grammar.cs @@ -9,86 +9,71 @@ namespace Implab.Automaton.RegularExpres /// /// Базовый абстрактный класс. Грамматика, позволяет формулировать выражения над алфавитом типа char. /// - /// - public abstract class Grammar where TGrammar: Grammar, new() { - static TGrammar _instance; + public abstract class Grammar { - public static TGrammar Instance{ - get { - if (_instance == null) - _instance = new TGrammar(); - return _instance; - } + public abstract IAlphabetBuilder Alphabet { + get; } - readonly CharAlphabet m_alphabet = new CharAlphabet(); - - public CharAlphabet Alphabet { - get { return m_alphabet; } + public SymbolToken UnclassifiedToken() { + return new SymbolToken(DFAConst.UNCLASSIFIED_INPUT); } - public SymbolToken UnclassifiedToken() { - return new SymbolToken(CharAlphabet.UNCLASSIFIED); - } - - public void DefineAlphabet(IEnumerable alphabet) { + public void DefineAlphabet(IEnumerable alphabet) { Safe.ArgumentNotNull(alphabet, "alphabet"); foreach (var ch in alphabet) - m_alphabet.DefineSymbol(ch); + Alphabet.DefineSymbol(ch); } - public Token SymbolRangeToken(char start, char end) { - return SymbolToken(Enumerable.Range(start, end - start + 1).Select(x => (char)x)); + + public Token SymbolToken(TSymbol symbol) { + return Token.New(TranslateOrAdd(symbol)); } - public Token SymbolToken(char symbol) { - return Token.New(TranslateOrAdd(symbol)); + public Token SymbolToken(IEnumerable symbols) { + Safe.ArgumentNotNull(symbols, "symbols"); + + return Token.New(TranslateOrAdd(symbols).ToArray()); } - public Token SymbolToken(IEnumerable symbols) { - Safe.ArgumentNotNull(symbols, "symbols"); - - return Token.New(TranslateOrAdd(symbols).ToArray()); - } - - public Token SymbolSetToken(params char[] set) { + public Token SymbolSetToken(params TSymbol[] set) { return SymbolToken(set); } - int TranslateOrAdd(char ch) { - var t = m_alphabet.Translate(ch); - if (t == CharAlphabet.UNCLASSIFIED) - t = m_alphabet.DefineSymbol(ch); + int TranslateOrAdd(TSymbol ch) { + var t = Alphabet.Translate(ch); + if (t == DFAConst.UNCLASSIFIED_INPUT) + t = Alphabet.DefineSymbol(ch); return t; } - IEnumerable TranslateOrAdd(IEnumerable symbols) { + IEnumerable TranslateOrAdd(IEnumerable symbols) { return symbols.Distinct().Select(TranslateOrAdd); } - int TranslateOrDie(char ch) { - var t = m_alphabet.Translate(ch); - if (t == CharAlphabet.UNCLASSIFIED) + int TranslateOrDie(TSymbol ch) { + var t = Alphabet.Translate(ch); + if (t == DFAConst.UNCLASSIFIED_INPUT) throw new ApplicationException(String.Format("Symbol '{0}' is UNCLASSIFIED", ch)); return t; } - IEnumerable TranslateOrDie(IEnumerable symbols) { + IEnumerable TranslateOrDie(IEnumerable symbols) { return symbols.Distinct().Select(TranslateOrDie); } - public Token SymbolTokenExcept(IEnumerable symbols) { + public Token SymbolTokenExcept(IEnumerable symbols) { Safe.ArgumentNotNull(symbols, "symbols"); - return Token.New( Enumerable.Range(0, m_alphabet.Count).Except(TranslateOrDie(symbols)).ToArray()); + return Token.New( Enumerable.Range(0, Alphabet.Count).Except(TranslateOrDie(symbols)).ToArray() ); } - protected CDFADefinition BuildDFA(Token lang) { + protected CDFADefinition BuildDFA(Token lang) { Safe.ArgumentNotNull(lang, "lang"); - var dfa = new CDFADefinition(m_alphabet); + var dfa = new CDFADefinition(Alphabet); - var builder = new DFABuilder(); + var builder = new RegularDFABuilder(); lang.Accept( builder ); diff --git a/Implab/Automaton/RegularExpressions/RegularDFABuilder.cs b/Implab/Automaton/RegularExpressions/RegularDFABuilder.cs new file mode 100644 --- /dev/null +++ b/Implab/Automaton/RegularExpressions/RegularDFABuilder.cs @@ -0,0 +1,179 @@ +using Implab; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; + +namespace Implab.Automaton.RegularExpressions { + /// + /// Используется для построения ДКА по регулярному выражению, сначала обходит + /// регулярное выражение и вычисляет followpos, затем используется метод + /// для построения автомата. + /// + public class RegularDFABuilder : IVisitor { + int m_idx = 0; + Token m_root; + HashSet m_firstpos; + HashSet m_lastpos; + + readonly Dictionary> m_followpos = new Dictionary>(); + readonly Dictionary m_indexes = new Dictionary(); + readonly Dictionary m_ends = new Dictionary(); + + public Dictionary> FollowposMap { + get { return m_followpos; } + } + + public HashSet Followpos(int pos) { + HashSet set; + if (m_followpos.TryGetValue(pos, out set)) + return set; + return m_followpos[pos] = new HashSet(); + } + + bool Nullable(object n) { + if (n is EmptyToken || n is StarToken) + return true; + if (n is AltToken) + return Nullable(((AltToken)n).Left) || Nullable(((AltToken)n).Right); + if (n is CatToken) + return Nullable(((CatToken)n).Left) && Nullable(((CatToken)n).Right); + return false; + } + + + public void Visit(AltToken token) { + if (m_root == null) + m_root = token; + var firtspos = new HashSet(); + var lastpos = new HashSet(); + + token.Left.Accept(this); + firtspos.UnionWith(m_firstpos); + lastpos.UnionWith(m_lastpos); + + token.Right.Accept(this); + firtspos.UnionWith(m_firstpos); + lastpos.UnionWith(m_lastpos); + + m_firstpos = firtspos; + m_lastpos = lastpos; + } + + public void Visit(StarToken token) { + if (m_root == null) + m_root = token; + token.Token.Accept(this); + + foreach (var i in m_lastpos) + Followpos(i).UnionWith(m_firstpos); + } + + public void Visit(CatToken token) { + if (m_root == null) + m_root = token; + + var firtspos = new HashSet(); + var lastpos = new HashSet(); + token.Left.Accept(this); + firtspos.UnionWith(m_firstpos); + var leftLastpos = m_lastpos; + + token.Right.Accept(this); + lastpos.UnionWith(m_lastpos); + var rightFirstpos = m_firstpos; + + if (Nullable(token.Left)) + firtspos.UnionWith(rightFirstpos); + + if (Nullable(token.Right)) + lastpos.UnionWith(leftLastpos); + + m_firstpos = firtspos; + m_lastpos = lastpos; + + foreach (var i in leftLastpos) + Followpos(i).UnionWith(rightFirstpos); + + } + + public void Visit(EmptyToken token) { + if (m_root == null) + m_root = token; + } + + public void Visit(SymbolToken token) { + if (m_root == null) + m_root = token; + m_idx++; + m_indexes[m_idx] = token.Value; + m_firstpos = new HashSet(new[] { m_idx }); + m_lastpos = new HashSet(new[] { m_idx }); + } + + public void Visit(EndToken token) { + if (m_root == null) + m_root = token; + m_idx++; + m_indexes[m_idx] = DFAConst.UNCLASSIFIED_INPUT; + m_firstpos = new HashSet(new[] { m_idx }); + m_lastpos = new HashSet(new[] { m_idx }); + Followpos(m_idx); + m_ends.Add(m_idx, token.Tag); + } + + public void BuildDFA(IDFADefinitionBuilder dfa) { + Safe.ArgumentNotNull(dfa,"dfa"); + + var states = new MapAlphabet>(new CustomEqualityComparer>( + (x, y) => x.SetEquals(y), + x => x.Sum(n => n.GetHashCode()) + )); + + var initialState = states.DefineSymbol(m_firstpos); + + var tags = GetStateTags(m_firstpos); + if (tags != null && tags.Length > 0) + dfa.MarkFinalState(initialState, tags); + + var inputMax = m_indexes.Values.Max(); + var queue = new Queue>(); + + queue.Enqueue(m_firstpos); + + while (queue.Count > 0) { + var state = queue.Dequeue(); + var s1 = states.Translate(state); + Debug.Assert(s1 != DFAConst.UNCLASSIFIED_INPUT); + + for (int a = 0; a <= inputMax; a++) { + var next = new HashSet(); + foreach (var p in state) { + if (m_indexes[p] == a) { + next.UnionWith(Followpos(p)); + } + } + if (next.Count > 0) { + int s2 = states.Translate(next); + if (s2 == DFAConst.UNCLASSIFIED_INPUT) { + s2 = states.DefineSymbol(next); + + tags = GetStateTags(next); + if (tags != null && tags.Length > 0) + dfa.MarkFinalState(s2, tags); + + queue.Enqueue(next); + } + dfa.DefineTransition(s1, s2, a); + } + } + } + } + + TTag[] GetStateTags(IEnumerable state) { + Debug.Assert(state != null); + return state.Where(m_ends.ContainsKey).Select(pos => m_ends[pos]).ToArray(); + } + + } +} diff --git a/Implab/Formats/JSON/JSONElementContext.cs b/Implab/Formats/JSON/JSONElementContext.cs new file mode 100644 --- /dev/null +++ b/Implab/Formats/JSON/JSONElementContext.cs @@ -0,0 +1,17 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Implab.JSON { + /// + /// internal + /// + public enum JSONElementContext { + None, + Object, + Array, + Closed + } +} diff --git a/Implab/Formats/JSON/JSONElementType.cs b/Implab/Formats/JSON/JSONElementType.cs new file mode 100644 --- /dev/null +++ b/Implab/Formats/JSON/JSONElementType.cs @@ -0,0 +1,34 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Implab.JSON { + /// + /// Тип элемента на котором находится парсер + /// + public enum JSONElementType { + None, + /// + /// Начало объекта + /// + BeginObject, + /// + /// Конец объекта + /// + EndObject, + /// + /// Начало массива + /// + BeginArray, + /// + /// Конец массива + /// + EndArray, + /// + /// Простое значение + /// + Value + } +} diff --git a/Implab/Formats/JSON/JSONGrammar.cs b/Implab/Formats/JSON/JSONGrammar.cs new file mode 100644 --- /dev/null +++ b/Implab/Formats/JSON/JSONGrammar.cs @@ -0,0 +1,99 @@ +using System.Linq; +using Implab.Automaton.RegularExpressions; + +namespace Implab.Formats.JSON { + class JSONGrammar : Grammar { + public enum TokenType { + None, + BeginObject, + EndObject, + BeginArray, + EndArray, + String, + Number, + Literal, + NameSeparator, + ValueSeparator, + + StringBound, + EscapedChar, + UnescapedChar, + EscapedUnicode, + + Minus, + Plus, + Sign, + Integer, + Dot, + Exp + } + + readonly CDFADefinition m_jsonDFA; + readonly CDFADefinition m_stringDFA; + + public JSONGrammar() { + DefineAlphabet(Enumerable.Range(0, 0x20).Select(x => (char)x)); + var hexDigit = SymbolRangeToken('a','f').Or(SymbolRangeToken('A','F')).Or(SymbolRangeToken('0','9')); + var digit9 = SymbolRangeToken('1', '9'); + var zero = SymbolToken('0'); + var digit = zero.Or(digit9); + var dot = SymbolToken('.'); + var minus = SymbolToken('-'); + var sign = SymbolSetToken('-', '+'); + var expSign = SymbolSetToken('e', 'E'); + var letters = SymbolRangeToken('a', 'z'); + var integer = zero.Or(digit9.Cat(digit.EClosure())); + var frac = dot.Cat(digit.Closure()); + var exp = expSign.Cat(sign.Optional()).Cat(digit.Closure()); + var quote = SymbolToken('"'); + var backSlash = SymbolToken('\\'); + var specialEscapeChars = SymbolSetToken('\\', '"', '/', 'b', 'f', 't', 'n', 'r'); + var unicodeEspace = SymbolToken('u').Cat(hexDigit.Repeat(4)); + var whitespace = SymbolSetToken('\n', '\r', '\t', ' ').EClosure(); + var beginObject = whitespace.Cat(SymbolToken('{')).Cat(whitespace); + var endObject = whitespace.Cat(SymbolToken('}')).Cat(whitespace); + var beginArray = whitespace.Cat(SymbolToken('[')).Cat(whitespace); + var endArray = whitespace.Cat(SymbolToken(']')).Cat(whitespace); + var nameSep = whitespace.Cat(SymbolToken(':')).Cat(whitespace); + var valueSep = whitespace.Cat(SymbolToken(',')).Cat(whitespace); + + var number = minus.Optional().Cat(integer).Cat(frac.Optional()).Cat(exp.Optional()); + var literal = letters.Closure(); + var unescaped = SymbolTokenExcept(Enumerable.Range(0, 0x20).Union(new int[] { '\\', '"' }).Select(x => (char)x)); + + var jsonExpression = + number.Tag(TokenType.Number) + .Or(literal.Tag(TokenType.Literal)) + .Or(quote.Tag(TokenType.StringBound)) + .Or(beginObject.Tag(TokenType.BeginObject)) + .Or(endObject.Tag(TokenType.EndObject)) + .Or(beginArray.Tag(TokenType.BeginArray)) + .Or(endArray.Tag(TokenType.EndArray)) + .Or(nameSep.Tag(TokenType.NameSeparator)) + .Or(valueSep.Tag(TokenType.ValueSeparator)); + + + var jsonStringExpression = + quote.Tag(TokenType.StringBound) + .Or(backSlash.Cat(specialEscapeChars).Tag(TokenType.EscapedChar)) + .Or(backSlash.Cat(unicodeEspace).Tag(TokenType.EscapedUnicode)) + .Or(unescaped.Closure().Tag(TokenType.UnescapedChar)); + + + m_jsonDFA = BuildDFA(jsonExpression); + m_stringDFA = BuildDFA(jsonStringExpression); + } + + public CDFADefinition JsonDFA { + get { + return m_jsonDFA; + } + } + + public CDFADefinition JsonStringDFA { + get { + return m_stringDFA; + } + } + } +} diff --git a/Implab/Formats/JSON/JSONParser.cs b/Implab/Formats/JSON/JSONParser.cs new file mode 100644 --- /dev/null +++ b/Implab/Formats/JSON/JSONParser.cs @@ -0,0 +1,277 @@ +using Implab.Parsing; +using System; +using System.Diagnostics; +using System.IO; + +namespace Implab.JSON { + /// + /// internal + /// + public struct JSONParserContext { + public string memberName; + public JSONElementContext elementContext; + } + + /// + /// Pull парсер JSON данных. + /// + /// + /// Следует отметить отдельную интерпретацию свойства , + /// оно означает текущий уровень вложенности объектов, однако закрывающий + /// элемент объекта и массива имеет уровень меньше, чем сам объект. + /// + /// { // Level = 1 + /// "name" : "Peter", // Level = 1 + /// "address" : { // Level = 2 + /// city : "Stern" // Level = 2 + /// } // Level = 1 + /// } // Level = 0 + /// + /// + public class JSONParser : DFAutomaton, IDisposable { + + enum MemberContext { + MemberName, + MemberValue + } + + static readonly EnumAlphabet _alphabet = EnumAlphabet.FullAlphabet; + static readonly DFAStateDescriptior[] _jsonDFA; + static readonly DFAStateDescriptior[] _objectDFA; + static readonly DFAStateDescriptior[] _arrayDFA; + + static JSONParser() { + + + var valueExpression = Token.New(JsonTokenType.BeginArray, JsonTokenType.BeginObject, JsonTokenType.Literal, JsonTokenType.Number, JsonTokenType.String); + var memberExpression = Token.New(JsonTokenType.String).Cat(Token.New(JsonTokenType.NameSeparator)).Cat(valueExpression); + + var objectExpression = memberExpression + .Cat( + Token.New(JsonTokenType.ValueSeparator) + .Cat(memberExpression) + .EClosure() + ) + .Optional() + .Cat(Token.New(JsonTokenType.EndObject)) + .Tag(0); + var arrayExpression = valueExpression + .Cat( + Token.New(JsonTokenType.ValueSeparator) + .Cat(valueExpression) + .EClosure() + ) + .Optional() + .Cat(Token.New(JsonTokenType.EndArray)) + .Tag(0); + + var jsonExpression = valueExpression.Tag(0); + + _jsonDFA = BuildDFA(jsonExpression).States; + _objectDFA = BuildDFA(objectExpression).States; + _arrayDFA = BuildDFA(arrayExpression).States; + } + + static EDFADefinition BuildDFA(Token expr) { + var builder = new DFABuilder(); + var dfa = new EDFADefinition(_alphabet); + expr.Accept(builder); + + builder.BuildDFA(dfa); + return dfa; + } + + JSONScanner m_scanner; + MemberContext m_memberContext; + + JSONElementType m_elementType; + object m_elementValue; + + /// + /// Создает новый парсер на основе строки, содержащей JSON + /// + /// + public JSONParser(string text) + : base(_jsonDFA, INITIAL_STATE, new JSONParserContext { elementContext = JSONElementContext.None, memberName = String.Empty }) { + Safe.ArgumentNotEmpty(text, "text"); + m_scanner = new JSONScanner(); + m_scanner.Feed(text.ToCharArray()); + } + + /// + /// Создает новый экземпляр парсера, на основе текстового потока. + /// + /// Текстовый поток. + /// Признак того, что парсер должен конролировать время жизни входного потока. + public JSONParser(TextReader reader, bool dispose) + : base(_jsonDFA, INITIAL_STATE, new JSONParserContext { elementContext = JSONElementContext.None, memberName = String.Empty }) { + Safe.ArgumentNotNull(reader, "reader"); + m_scanner = new JSONScanner(); + m_scanner.Feed(reader, dispose); + } + + /// + /// Тип текущего элемента на котором стоит парсер. + /// + public JSONElementType ElementType { + get { return m_elementType; } + } + + /// + /// Имя элемента - имя свойства родительского контейнера. Для элементов массивов и корневого всегда + /// пустая строка. + /// + public string ElementName { + get { return m_context.info.memberName; } + } + + /// + /// Значение элемента. Только для элементов типа , для остальных null + /// + public object ElementValue { + get { return m_elementValue; } + } + + /// + /// Читает слеюудущий объект из потока + /// + /// true - операция чтения прошла успешно, false - конец данных + public bool Read() { + if (m_context.current == UNREACHEBLE_STATE) + throw new InvalidOperationException("The parser is in invalid state"); + object tokenValue; + JsonTokenType tokenType; + m_context.info.memberName = String.Empty; + while (m_scanner.ReadToken(out tokenValue, out tokenType)) { + Move((int)tokenType); + if (m_context.current == UNREACHEBLE_STATE) + UnexpectedToken(tokenValue, tokenType); + switch (tokenType) { + case JsonTokenType.BeginObject: + Switch( + _objectDFA, + INITIAL_STATE, + new JSONParserContext { + memberName = m_context.info.memberName, + elementContext = JSONElementContext.Object + } + ); + m_elementValue = null; + m_memberContext = MemberContext.MemberName; + m_elementType = JSONElementType.BeginObject; + return true; + case JsonTokenType.EndObject: + Restore(); + m_elementValue = null; + m_elementType = JSONElementType.EndObject; + return true; + case JsonTokenType.BeginArray: + Switch( + _arrayDFA, + INITIAL_STATE, + new JSONParserContext { + memberName = m_context.info.memberName, + elementContext = JSONElementContext.Array + } + ); + m_elementValue = null; + m_memberContext = MemberContext.MemberValue; + m_elementType = JSONElementType.BeginArray; + return true; + case JsonTokenType.EndArray: + Restore(); + m_elementValue = null; + m_elementType = JSONElementType.EndArray; + return true; + case JsonTokenType.String: + if (m_memberContext == MemberContext.MemberName) { + m_context.info.memberName = (string)tokenValue; + break; + } + m_elementType = JSONElementType.Value; + m_elementValue = tokenValue; + return true; + case JsonTokenType.Number: + m_elementType = JSONElementType.Value; + m_elementValue = tokenValue; + return true; + case JsonTokenType.Literal: + m_elementType = JSONElementType.Value; + m_elementValue = ParseLiteral((string)tokenValue); + return true; + case JsonTokenType.NameSeparator: + m_memberContext = MemberContext.MemberValue; + break; + case JsonTokenType.ValueSeparator: + m_memberContext = m_context.info.elementContext == JSONElementContext.Object ? MemberContext.MemberName : MemberContext.MemberValue; + break; + default: + UnexpectedToken(tokenValue, tokenType); + break; + } + } + if (m_context.info.elementContext != JSONElementContext.None) + throw new ParserException("Unexpedted end of data"); + return false; + } + + object ParseLiteral(string literal) { + switch (literal) { + case "null": + return null; + case "false": + return false; + case "true": + return true; + default: + UnexpectedToken(literal, JsonTokenType.Literal); + return null; // avoid compliler error + } + } + + void UnexpectedToken(object value, JsonTokenType tokenType) { + throw new ParserException(String.Format("Unexpected token {0}: '{1}'", tokenType, value)); + } + + + /// + /// Признак конца потока + /// + public bool EOF { + get { + return m_scanner.EOF; + } + } + + protected virtual void Dispose(bool disposing) { + if (disposing) { + m_scanner.Dispose(); + } + } + + /// + /// Освобождает парсер и связанный с ним сканнер. + /// + public void Dispose() { + Dispose(true); + GC.SuppressFinalize(this); + } + + ~JSONParser() { + Dispose(false); + } + + /// + /// Переходит в конец текущего объекта. + /// + public void SeekElementEnd() { + var level = Level - 1; + + Debug.Assert(level >= 0); + + while (Level != level) + Read(); + } + } + +} diff --git a/Implab/Formats/JSON/JSONScanner.cs b/Implab/Formats/JSON/JSONScanner.cs new file mode 100644 --- /dev/null +++ b/Implab/Formats/JSON/JSONScanner.cs @@ -0,0 +1,100 @@ +using Implab.Parsing; +using System; +using System.Collections.Generic; +using System.Globalization; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Implab.JSON { + /// + /// Сканнер (лексер), разбивающий поток символов на токены JSON. + /// + public class JSONScanner : Scanner { + char[] m_stringBuffer; + DFAStateDescriptior[] m_stringDFA; + int[] m_stringAlphabet; + + /// + /// Создает новый экземпляр сканнера + /// + public JSONScanner() + : base(JSONGrammar.Instance.JsonDFA.States, JSONGrammar.Instance.JsonDFA.Alphabet.GetTranslationMap()) { + m_stringBuffer = new char[1024]; + var dfa = JSONGrammar.Instance.JsonStringDFA; + m_stringAlphabet = dfa.Alphabet.GetTranslationMap(); + m_stringDFA = dfa.States; + } + + /// + /// Читает следующий лексический элемент из входных данных. + /// + /// Возвращает значение прочитанного токена. + /// Возвращает тип прочитанного токена. + /// true - чтение произведено успешно. false - достигнут конец входных данных + /// В случе если токен не распознается, возникает исключение. Значения токенов обрабатываются, т.е. + /// в строках обрабатываются экранированные символы, числа становтся типа double. + public bool ReadToken(out object tokenValue, out JsonTokenType tokenType) { + if (ReadTokenInternal()) { + switch ((JSONGrammar.TokenType)m_currentState.tag[0]) { + case JSONGrammar.TokenType.StringBound: + tokenValue = ReadString(); + tokenType = JsonTokenType.String; + break; + case JSONGrammar.TokenType.Number: + tokenValue = Double.Parse(new String(m_buffer, m_tokenOffset, m_tokenLen), CultureInfo.InvariantCulture); + tokenType = JsonTokenType.Number; + break; + default: + tokenType = (JsonTokenType)m_currentState.tag[0]; + tokenValue = new String(m_buffer, m_tokenOffset, m_tokenLen); + break; + } + return true; + } + tokenValue = null; + tokenType = JsonTokenType.None; + return false; + } + + string ReadString() { + int pos = 0; + Switch(m_stringDFA, m_stringAlphabet); + while (ReadTokenInternal()) { + switch ((JSONGrammar.TokenType)m_currentState.tag[0]) { + case JSONGrammar.TokenType.StringBound: + Restore(); + return new String(m_stringBuffer, 0, pos); + case JSONGrammar.TokenType.UnescapedChar: + EnsureStringBufferSize(pos + m_tokenLen); + Array.Copy(m_buffer, m_tokenOffset, m_stringBuffer, pos, m_tokenLen); + pos += m_tokenLen; + break; + case JSONGrammar.TokenType.EscapedUnicode: + EnsureStringBufferSize(pos + 1); + m_stringBuffer[pos] = StringTranslator.TranslateHexUnicode(m_buffer, m_tokenOffset + 2); + pos++; + break; + case JSONGrammar.TokenType.EscapedChar: + EnsureStringBufferSize(pos + 1); + m_stringBuffer[pos] = StringTranslator.TranslateEscapedChar(m_buffer[m_tokenOffset + 1]); + pos++; + break; + default: + break; + } + + } + + throw new ParserException("Unexpected end of data"); + } + + void EnsureStringBufferSize(int size) { + if (size > m_stringBuffer.Length) { + var newBuffer = new char[size]; + m_stringBuffer.CopyTo(newBuffer, 0); + m_stringBuffer = newBuffer; + } + } + } +} diff --git a/Implab/Formats/JSON/JSONWriter.cs b/Implab/Formats/JSON/JSONWriter.cs new file mode 100644 --- /dev/null +++ b/Implab/Formats/JSON/JSONWriter.cs @@ -0,0 +1,319 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Globalization; +using System.Diagnostics; + +namespace Implab.JSON { + public class JSONWriter { + struct Context { + public bool needComma; + public JSONElementContext element; + } + Stack m_contextStack = new Stack(); + Context m_context; + + const int BUFFER_SIZE = 64; + + TextWriter m_writer; + readonly bool m_indent = true; + readonly int m_indentSize = 4; + readonly char[] m_buffer = new char[BUFFER_SIZE]; + int m_bufferPos; + + static readonly char [] _hex = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; + static readonly char [] _escapeBKS, + _escapeFWD, + _escapeCR, + _escapeNL, + _escapeTAB, + _escapeBSLASH, + _escapeQ; + + static JSONWriter() { + _escapeBKS = "\\b".ToCharArray(); + _escapeFWD = "\\f".ToCharArray(); + _escapeCR = "\\r".ToCharArray(); + _escapeNL = "\\n".ToCharArray(); + _escapeTAB = "\\t".ToCharArray(); + _escapeBSLASH = "\\\\".ToCharArray(); + _escapeQ = "\\\"".ToCharArray(); + } + + public JSONWriter(TextWriter writer) { + Safe.ArgumentNotNull(writer, "writer"); + m_writer = writer; + } + + public JSONWriter(TextWriter writer, bool indent) { + Safe.ArgumentNotNull(writer, "writer"); + + m_writer = writer; + m_indent = indent; + } + + void WriteIndent() { + if (m_indent) { + var indent = new char[m_contextStack.Count * m_indentSize + 1]; + indent[0] = '\n'; + for (int i = 1; i < indent.Length; i++) + indent[i] = ' '; + m_writer.Write(new String(indent)); + } else { + m_writer.Write(' '); + } + } + + void WriteMemberName(string name) { + Safe.ArgumentNotEmpty(name, "name"); + if (m_context.element != JSONElementContext.Object) + OperationNotApplicable("WriteMember"); + if (m_context.needComma) + m_writer.Write(","); + + WriteIndent(); + m_context.needComma = true; + Write(name); + m_writer.Write(" : "); + } + + public void WriteValue(string name, string value) { + WriteMemberName(name); + Write(value); + } + + public void WriteValue(string name, bool value) { + WriteMemberName(name); + Write(value); + } + + public void WriteValue(string name, double value) { + WriteMemberName(name); + Write(value); + } + + public void WriteValue(string value) { + if (m_context.element == JSONElementContext.Array) { + + if (m_context.needComma) + m_writer.Write(","); + WriteIndent(); + m_context.needComma = true; + + Write(value); + } else if (m_context.element == JSONElementContext.None) { + Write(value); + m_context.element = JSONElementContext.Closed; + } else { + OperationNotApplicable("WriteValue"); + } + } + + public void WriteValue(bool value) { + if (m_context.element == JSONElementContext.Array) { + + if (m_context.needComma) + m_writer.Write(","); + WriteIndent(); + m_context.needComma = true; + + Write(value); + } else if (m_context.element == JSONElementContext.None) { + Write(value); + m_context.element = JSONElementContext.Closed; + } else { + OperationNotApplicable("WriteValue"); + } + } + + public void WriteValue(double value) { + if (m_context.element == JSONElementContext.Array) { + + if (m_context.needComma) + m_writer.Write(","); + WriteIndent(); + m_context.needComma = true; + + Write(value); + } else if (m_context.element == JSONElementContext.None) { + Write(value); + m_context.element = JSONElementContext.Closed; + } else { + OperationNotApplicable("WriteValue"); + } + } + + public void BeginObject() { + if (m_context.element != JSONElementContext.None && m_context.element != JSONElementContext.Array) + OperationNotApplicable("BeginObject"); + if (m_context.needComma) + m_writer.Write(","); + + WriteIndent(); + + m_context.needComma = true; + + m_contextStack.Push(m_context); + + m_context = new Context { element = JSONElementContext.Object, needComma = false }; + m_writer.Write("{"); + } + + public void BeginObject(string name) { + WriteMemberName(name); + + m_contextStack.Push(m_context); + + m_context = new Context { element = JSONElementContext.Object, needComma = false }; + m_writer.Write("{"); + } + + public void EndObject() { + if (m_context.element != JSONElementContext.Object) + OperationNotApplicable("EndObject"); + + m_context = m_contextStack.Pop(); + if (m_contextStack.Count == 0) + m_context.element = JSONElementContext.Closed; + WriteIndent(); + m_writer.Write("}"); + } + + public void BeginArray() { + if (m_context.element != JSONElementContext.None && m_context.element != JSONElementContext.Array) + throw new InvalidOperationException(); + if (m_context.needComma) { + m_writer.Write(","); + + } + m_context.needComma = true; + + WriteIndent(); + m_contextStack.Push(m_context); + m_context = new Context { element = JSONElementContext.Array, needComma = false }; + m_writer.Write("["); + } + + public void BeginArray(string name) { + WriteMemberName(name); + + m_contextStack.Push(m_context); + + m_context = new Context { element = JSONElementContext.Array, needComma = false }; + m_writer.Write("["); + } + + public void EndArray() { + if (m_context.element != JSONElementContext.Array) + OperationNotApplicable("EndArray"); + + m_context = m_contextStack.Pop(); + if (m_contextStack.Count == 0) + m_context.element = JSONElementContext.Closed; + WriteIndent(); + m_writer.Write("]"); + } + + void Write(bool value) { + m_writer.Write(value ? "true" : "false"); + } + + void FlushBuffer() { + if (m_bufferPos > 0) { + m_writer.Write(m_buffer, 0, m_bufferPos); + m_bufferPos = 0; + } + } + + void Write(string value) { + if (value == null) { + m_writer.Write("null"); + return; + } + + Debug.Assert(m_bufferPos == 0); + + var chars = value.ToCharArray(); + m_buffer[m_bufferPos++] = '"'; + + // Analysis disable once ForCanBeConvertedToForeach + for (int i = 0; i < chars.Length; i++) { + var ch = chars[i]; + + char[] escapeSeq; + + switch (ch) { + case '\b': + escapeSeq = _escapeBKS; + break; + case '\f': + escapeSeq = _escapeFWD; + break; + case '\r': + escapeSeq = _escapeCR; + break; + case '\n': + escapeSeq = _escapeNL; + break; + case '\t': + escapeSeq = _escapeTAB; + break; + case '\\': + escapeSeq = _escapeBSLASH; + break; + case '"': + escapeSeq = _escapeQ; + break; + default: + if (ch < 0x20) { + if (m_bufferPos + 6 > BUFFER_SIZE) + FlushBuffer(); + + m_buffer[m_bufferPos++] = '\\'; + m_buffer[m_bufferPos++] = 'u'; + m_buffer[m_bufferPos++] = '0'; + m_buffer[m_bufferPos++] = '0'; + m_buffer[m_bufferPos++] = _hex[ch >> 4 & 0xf]; + m_buffer[m_bufferPos++] = _hex[ch & 0xf]; + + } else { + if (m_bufferPos >= BUFFER_SIZE) + FlushBuffer(); + m_buffer[m_bufferPos++] = ch; + } + continue; + } + + if (m_bufferPos + escapeSeq.Length > BUFFER_SIZE) + FlushBuffer(); + + Array.Copy(escapeSeq, 0, m_buffer, m_bufferPos, escapeSeq.Length); + m_bufferPos += escapeSeq.Length; + + } + + if (m_bufferPos >= BUFFER_SIZE) + FlushBuffer(); + + m_buffer[m_bufferPos++] = '"'; + + FlushBuffer(); + } + + void Write(double value) { + if (double.IsNaN(value)) + Write("NaN"); + else if (double.IsNegativeInfinity(value)) + Write("-Infinity"); + else if (double.IsPositiveInfinity(value)) + Write("Infinity"); + else + m_writer.Write(value.ToString(CultureInfo.InvariantCulture)); + } + + void OperationNotApplicable(string opName) { + throw new InvalidOperationException(String.Format("The operation '{0}' isn't applicable in the context of '{1}'", opName, m_context.element )); + } + + } +} diff --git a/Implab/Formats/JSON/JSONXmlReader.cs b/Implab/Formats/JSON/JSONXmlReader.cs new file mode 100644 --- /dev/null +++ b/Implab/Formats/JSON/JSONXmlReader.cs @@ -0,0 +1,343 @@ +using Implab; +using Implab.Parsing; +using System; +using System.Collections.Generic; +using System.Globalization; +using System.IO; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using System.Xml; + +namespace Implab.JSON { + public class JSONXmlReader : XmlReader { + + enum ValueContext { + Undefined, + ElementStart, + ElementValue, + ElementEnd, + ElementEmpty + } + + struct LocalNameContext { + public string localName; + public bool isArray; + } + + JSONParser m_parser; + ValueContext m_valueContext; + ReadState m_state = ReadState.Initial; + Stack m_localNameStack = new Stack(); + LocalNameContext m_localName; + int m_depthCorrection = 0; + + readonly string m_rootName; + readonly string m_prefix; + readonly string m_namespaceUri; + readonly bool m_flattenArrays; + readonly string m_arrayItemName; + readonly XmlNameTable m_nameTable; + + JSONXmlReader(JSONParser parser, JSONXmlReaderOptions options) { + m_parser = parser; + + if (options != null) { + m_prefix = options.NodesPrefix ?? String.Empty; + m_namespaceUri = options.NamespaceURI ?? String.Empty; + m_rootName = options.RootName ?? "json"; + m_flattenArrays = options.FlattenArrays; + m_arrayItemName = options.ArrayItemName ?? "item"; + m_nameTable = options.NameTable ?? new NameTable(); + } else { + m_prefix = String.Empty; + m_namespaceUri = String.Empty; + m_rootName = "json"; + m_flattenArrays = false; + m_arrayItemName = "item"; + m_nameTable = new NameTable(); + } + } + + /// + /// Always 0, JSON doesn't support attributes + /// + public override int AttributeCount { + get { return 0; } + } + + public override string BaseURI { + get { return String.Empty; } + } + + public override int Depth { + get { + return m_localNameStack.Count + m_depthCorrection; + } + } + + public override bool EOF { + get { return m_parser.EOF; } + } + + /// + /// Always throws an exception + /// + /// + /// + public override string GetAttribute(int i) { + throw new ArgumentOutOfRangeException(); + } + + /// + /// Always returns empty string + /// + /// + /// + /// + public override string GetAttribute(string name, string namespaceURI) { + return String.Empty; + } + + /// + /// Always returns empty string + /// + /// + /// + public override string GetAttribute(string name) { + return String.Empty; + } + + public override bool IsEmptyElement { + get { return m_parser.ElementType == JSONElementType.Value && m_valueContext == ValueContext.ElementEmpty; } + } + + public override string LocalName { + get { return m_localName.localName; } + } + + public override string LookupNamespace(string prefix) { + if (String.IsNullOrEmpty(prefix) || prefix == m_prefix) + return m_namespaceUri; + else + return String.Empty; + } + + public override bool MoveToAttribute(string name, string ns) { + return false; + } + + public override bool MoveToAttribute(string name) { + return false; + } + + public override bool MoveToElement() { + return false; + } + + public override bool MoveToFirstAttribute() { + return false; + } + + public override bool MoveToNextAttribute() { + return false; + } + + public override XmlNameTable NameTable { + get { return m_nameTable; } + } + + public override string NamespaceURI { + get { return m_namespaceUri; } + } + + public override XmlNodeType NodeType { + get { + switch (m_parser.ElementType) { + case JSONElementType.BeginObject: + case JSONElementType.BeginArray: + return XmlNodeType.Element; + case JSONElementType.EndObject: + case JSONElementType.EndArray: + return XmlNodeType.EndElement; + case JSONElementType.Value: + switch (m_valueContext) { + case ValueContext.ElementStart: + case ValueContext.ElementEmpty: + return XmlNodeType.Element; + case ValueContext.ElementValue: + return XmlNodeType.Text; + case ValueContext.ElementEnd: + return XmlNodeType.EndElement; + default: + throw new InvalidOperationException(); + } + default: + throw new InvalidOperationException(); + } + } + } + + public override string Prefix { + get { return m_prefix; } + } + + public override bool Read() { + if (m_state != System.Xml.ReadState.Interactive && m_state != System.Xml.ReadState.Initial) + return false; + + if (m_state == ReadState.Initial) + m_state = System.Xml.ReadState.Interactive; + + try { + switch (m_parser.ElementType) { + case JSONElementType.Value: + switch (m_valueContext) { + case ValueContext.ElementStart: + SetLocalName(String.Empty); + m_valueContext = ValueContext.ElementValue; + return true; + case ValueContext.ElementValue: + RestoreLocalName(); + m_valueContext = ValueContext.ElementEnd; + return true; + case ValueContext.ElementEmpty: + case ValueContext.ElementEnd: + RestoreLocalName(); + break; + } + break; + case JSONElementType.EndArray: + case JSONElementType.EndObject: + RestoreLocalName(); + break; + } + string itemName = m_parser.ElementType == JSONElementType.None ? m_rootName : m_flattenArrays ? m_localName.localName : m_arrayItemName; + while (m_parser.Read()) { + if (!String.IsNullOrEmpty(m_parser.ElementName)) + itemName = m_parser.ElementName; + + switch (m_parser.ElementType) { + case JSONElementType.BeginArray: + if (m_flattenArrays && !m_localName.isArray) { + m_depthCorrection--; + SetLocalName(itemName, true); + continue; + } else { + SetLocalName(itemName, true); + } + break; + case JSONElementType.BeginObject: + SetLocalName(itemName); + break; + case JSONElementType.EndArray: + if (m_flattenArrays && !m_localNameStack.Peek().isArray) { + RestoreLocalName(); + m_depthCorrection++; + continue; + } + break; + case JSONElementType.EndObject: + break; + case JSONElementType.Value: + SetLocalName(itemName); + m_valueContext = m_parser.ElementValue == null ? ValueContext.ElementEmpty : ValueContext.ElementStart; + break; + default: + break; + } + return true; + } + + m_state = System.Xml.ReadState.EndOfFile; + return false; + } catch { + m_state = System.Xml.ReadState.Error; + throw; + } + } + + public override bool ReadAttributeValue() { + return false; + } + + public override ReadState ReadState { + get { return m_state; } + } + + public override void ResolveEntity() { + // do nothing + } + + public override string Value { + get { + if (m_parser.ElementValue == null) + return String.Empty; + if (Convert.GetTypeCode(m_parser.ElementValue) == TypeCode.Double) + return ((double)m_parser.ElementValue).ToString(CultureInfo.InvariantCulture); + else + return m_parser.ElementValue.ToString(); + } + } + + void SetLocalName(string name) { + m_localNameStack.Push(m_localName); + m_localName.localName = name; + m_localName.isArray = false; + } + + void SetLocalName(string name, bool isArray) { + m_localNameStack.Push(m_localName); + m_localName.localName = name; + m_localName.isArray = isArray; + } + + void RestoreLocalName() { + m_localName = m_localNameStack.Pop(); + } + + public override void Close() { + + } + + protected override void Dispose(bool disposing) { + #if MONO + disposing = true; + #endif + if (disposing) { + m_parser.Dispose(); + } + base.Dispose(disposing); + } + + public static JSONXmlReader Create(string file, JSONXmlReaderOptions options) { + return Create(File.OpenText(file), options); + } + + /// + /// Creates the XmlReader for the specified text stream with JSON data. + /// + /// Text reader. + /// Options. + /// + /// The reader will be disposed when the XmlReader is disposed. + /// + public static JSONXmlReader Create(TextReader reader, JSONXmlReaderOptions options) { + return new JSONXmlReader(new JSONParser(reader, true), options); + } + + /// + /// Creates the XmlReader for the specified stream with JSON data. + /// + /// Stream. + /// Options. + /// + /// The stream will be disposed when the XmlReader is disposed. + /// + public static JSONXmlReader Create(Stream stream, JSONXmlReaderOptions options) { + Safe.ArgumentNotNull(stream, "stream"); + // HACK don't dispose StreaReader to keep stream opened + return Create(new StreamReader(stream), options); + } + } +} diff --git a/Implab/Formats/JSON/JSONXmlReaderOptions.cs b/Implab/Formats/JSON/JSONXmlReaderOptions.cs new file mode 100644 --- /dev/null +++ b/Implab/Formats/JSON/JSONXmlReaderOptions.cs @@ -0,0 +1,65 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Xml; + +namespace Implab.JSON { + /// + /// Набор необязательных параметров для , позволяющий управлять процессом + /// интерпретации JSON документа. + /// + public class JSONXmlReaderOptions { + /// + /// Пространство имен в котором будут располагаться читаемые элементы документа + /// + public string NamespaceURI { + get; + set; + } + + /// + /// Интерпретировать массивы как множественные элементы (убирает один уровень вложенности), иначе массив + /// представляется в виде узла, дочерними элементами которого являются элементы массива, имена дочерних элементов + /// определяются свойством . По умолчанию false. + /// + public bool FlattenArrays { + get; + set; + } + + /// + /// Префикс, для узлов документа + /// + public string NodesPrefix { + get; + set; + } + + /// + /// Имя корневого элемента в xml документе + /// + public string RootName { + get; + set; + } + + /// + /// Имя элемента для массивов, если не включена опция . + /// По умолчанию item. + /// + public string ArrayItemName { + get; + set; + } + + /// + /// Таблица атомизированных строк для построения документа. + /// + public XmlNameTable NameTable { + get; + set; + } + + } +} diff --git a/Implab/Formats/JSON/JsonTokenType.cs b/Implab/Formats/JSON/JsonTokenType.cs new file mode 100644 --- /dev/null +++ b/Implab/Formats/JSON/JsonTokenType.cs @@ -0,0 +1,50 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Implab.JSON { + /// + /// Тип токенов, возвращаемых . + /// + public enum JsonTokenType : int { + None = 0, + /// + /// Начало объекта + /// + BeginObject, + /// + /// Конец объекта + /// + EndObject, + /// + /// Начало массива + /// + BeginArray, + /// + /// Конец массива + /// + EndArray, + /// + /// Строка + /// + String, + /// + /// Число + /// + Number, + /// + /// Литерал + /// + Literal, + /// + /// Разделитель имени : + /// + NameSeparator, + /// + /// Разделитель имени , + /// + ValueSeparator + } +} diff --git a/Implab/Formats/JSON/StringTranslator.cs b/Implab/Formats/JSON/StringTranslator.cs new file mode 100644 --- /dev/null +++ b/Implab/Formats/JSON/StringTranslator.cs @@ -0,0 +1,96 @@ +using Implab; +using Implab.Parsing; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Implab.JSON { + /// + /// Класс для преобразования экранированной строки JSON + /// + public class StringTranslator : Scanner { + static readonly char[] _escMap; + static readonly int[] _hexMap; + + static StringTranslator() { + var chars = new char[] { 'b', 'f', 't', 'r', 'n', '\\', '/' }; + var vals = new char[] { '\b', '\f', '\t', '\r', '\n', '\\', '/' }; + + _escMap = new char[chars.Max() + 1]; + + for (int i = 0; i < chars.Length; i++) + _escMap[chars[i]] = vals[i]; + + var hexs = new char[] { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', 'A', 'B', 'C', 'D', 'E', 'F' }; + var ints = new int[] { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 10, 11, 12, 13, 14, 15 }; + + _hexMap = new int[hexs.Max() + 1]; + + for (int i = 0; i < hexs.Length; i++) + _hexMap[hexs[i]] = ints[i]; + + } + + public StringTranslator() + : base(JSONGrammar.Instance.JsonStringDFA.States, JSONGrammar.Instance.JsonStringDFA.Alphabet.GetTranslationMap()) { + } + + public string Translate(string data) { + Safe.ArgumentNotNull(data, "data"); + return Translate(data.ToCharArray()); + } + + public string Translate(char[] data) { + Safe.ArgumentNotNull(data, "data"); + return Translate(data, data.Length); + } + + public string Translate(char[] data, int length) { + Safe.ArgumentNotNull(data, "data"); + Safe.ArgumentInRange(length, 0, data.Length, "length"); + + var translated = new char[length]; + + Feed(data,length); + + int pos = 0; + + while (ReadTokenInternal()) { + switch ((JSONGrammar.TokenType)TokenTags[0]) { + case JSONGrammar.TokenType.UnescapedChar: + Array.Copy(m_buffer,m_tokenOffset,translated,pos,m_tokenLen); + pos += m_tokenLen; + break; + case JSONGrammar.TokenType.EscapedChar: + translated[pos] = _escMap[m_buffer[m_tokenOffset + 1]]; + pos++; + break; + case JSONGrammar.TokenType.EscapedUnicode: + translated[pos] = TranslateHexUnicode(m_buffer,m_tokenOffset + 2); + pos++; + break; + } + } + + return new String(translated, 0, pos); + } + + internal static char TranslateEscapedChar(char symbol) { + return _escMap[symbol]; + } + + internal static char TranslateHexUnicode(char[] symbols, int offset) { + Debug.Assert(symbols != null); + Debug.Assert(symbols.Length - offset >= 4); + + int value = (_hexMap[symbols[offset]] << 12) + | (_hexMap[symbols[offset + 1]] << 8) + | (_hexMap[symbols[offset + 2]] << 4) + | (_hexMap[symbols[offset + 3]]); + return (char)value; + } + } +} diff --git a/Implab/Implab.csproj b/Implab/Implab.csproj --- a/Implab/Implab.csproj +++ b/Implab/Implab.csproj @@ -88,16 +88,6 @@ - - - - - - - - - - @@ -183,9 +173,21 @@ - + + + + + + + + + + + + + @@ -261,5 +263,7 @@ + + \ No newline at end of file diff --git a/Implab/JSON/JSONElementContext.cs b/Implab/JSON/JSONElementContext.cs deleted file mode 100644 --- a/Implab/JSON/JSONElementContext.cs +++ /dev/null @@ -1,17 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; - -namespace Implab.JSON { - /// - /// internal - /// - public enum JSONElementContext { - None, - Object, - Array, - Closed - } -} diff --git a/Implab/JSON/JSONElementType.cs b/Implab/JSON/JSONElementType.cs deleted file mode 100644 --- a/Implab/JSON/JSONElementType.cs +++ /dev/null @@ -1,34 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; - -namespace Implab.JSON { - /// - /// Тип элемента на котором находится парсер - /// - public enum JSONElementType { - None, - /// - /// Начало объекта - /// - BeginObject, - /// - /// Конец объекта - /// - EndObject, - /// - /// Начало массива - /// - BeginArray, - /// - /// Конец массива - /// - EndArray, - /// - /// Простое значение - /// - Value - } -} diff --git a/Implab/JSON/JSONGrammar.cs b/Implab/JSON/JSONGrammar.cs deleted file mode 100644 --- a/Implab/JSON/JSONGrammar.cs +++ /dev/null @@ -1,99 +0,0 @@ -using Implab.Parsing; -using System.Linq; - -namespace Implab.JSON { - class JSONGrammar : Grammar { - public enum TokenType { - None, - BeginObject, - EndObject, - BeginArray, - EndArray, - String, - Number, - Literal, - NameSeparator, - ValueSeparator, - - StringBound, - EscapedChar, - UnescapedChar, - EscapedUnicode, - - Minus, - Plus, - Sign, - Integer, - Dot, - Exp - } - - readonly CDFADefinition m_jsonDFA; - readonly CDFADefinition m_stringDFA; - - public JSONGrammar() { - DefineAlphabet(Enumerable.Range(0, 0x20).Select(x => (char)x)); - var hexDigit = SymbolRangeToken('a','f').Or(SymbolRangeToken('A','F')).Or(SymbolRangeToken('0','9')); - var digit9 = SymbolRangeToken('1', '9'); - var zero = SymbolToken('0'); - var digit = zero.Or(digit9); - var dot = SymbolToken('.'); - var minus = SymbolToken('-'); - var sign = SymbolSetToken('-', '+'); - var expSign = SymbolSetToken('e', 'E'); - var letters = SymbolRangeToken('a', 'z'); - var integer = zero.Or(digit9.Cat(digit.EClosure())); - var frac = dot.Cat(digit.Closure()); - var exp = expSign.Cat(sign.Optional()).Cat(digit.Closure()); - var quote = SymbolToken('"'); - var backSlash = SymbolToken('\\'); - var specialEscapeChars = SymbolSetToken('\\', '"', '/', 'b', 'f', 't', 'n', 'r'); - var unicodeEspace = SymbolToken('u').Cat(hexDigit.Repeat(4)); - var whitespace = SymbolSetToken('\n', '\r', '\t', ' ').EClosure(); - var beginObject = whitespace.Cat(SymbolToken('{')).Cat(whitespace); - var endObject = whitespace.Cat(SymbolToken('}')).Cat(whitespace); - var beginArray = whitespace.Cat(SymbolToken('[')).Cat(whitespace); - var endArray = whitespace.Cat(SymbolToken(']')).Cat(whitespace); - var nameSep = whitespace.Cat(SymbolToken(':')).Cat(whitespace); - var valueSep = whitespace.Cat(SymbolToken(',')).Cat(whitespace); - - var number = minus.Optional().Cat(integer).Cat(frac.Optional()).Cat(exp.Optional()); - var literal = letters.Closure(); - var unescaped = SymbolTokenExcept(Enumerable.Range(0, 0x20).Union(new int[] { '\\', '"' }).Select(x => (char)x)); - - var jsonExpression = - number.Tag(TokenType.Number) - .Or(literal.Tag(TokenType.Literal)) - .Or(quote.Tag(TokenType.StringBound)) - .Or(beginObject.Tag(TokenType.BeginObject)) - .Or(endObject.Tag(TokenType.EndObject)) - .Or(beginArray.Tag(TokenType.BeginArray)) - .Or(endArray.Tag(TokenType.EndArray)) - .Or(nameSep.Tag(TokenType.NameSeparator)) - .Or(valueSep.Tag(TokenType.ValueSeparator)); - - - var jsonStringExpression = - quote.Tag(TokenType.StringBound) - .Or(backSlash.Cat(specialEscapeChars).Tag(TokenType.EscapedChar)) - .Or(backSlash.Cat(unicodeEspace).Tag(TokenType.EscapedUnicode)) - .Or(unescaped.Closure().Tag(TokenType.UnescapedChar)); - - - m_jsonDFA = BuildDFA(jsonExpression); - m_stringDFA = BuildDFA(jsonStringExpression); - } - - public CDFADefinition JsonDFA { - get { - return m_jsonDFA; - } - } - - public CDFADefinition JsonStringDFA { - get { - return m_stringDFA; - } - } - } -} diff --git a/Implab/JSON/JSONParser.cs b/Implab/JSON/JSONParser.cs deleted file mode 100644 --- a/Implab/JSON/JSONParser.cs +++ /dev/null @@ -1,277 +0,0 @@ -using Implab.Parsing; -using System; -using System.Diagnostics; -using System.IO; - -namespace Implab.JSON { - /// - /// internal - /// - public struct JSONParserContext { - public string memberName; - public JSONElementContext elementContext; - } - - /// - /// Pull парсер JSON данных. - /// - /// - /// Следует отметить отдельную интерпретацию свойства , - /// оно означает текущий уровень вложенности объектов, однако закрывающий - /// элемент объекта и массива имеет уровень меньше, чем сам объект. - /// - /// { // Level = 1 - /// "name" : "Peter", // Level = 1 - /// "address" : { // Level = 2 - /// city : "Stern" // Level = 2 - /// } // Level = 1 - /// } // Level = 0 - /// - /// - public class JSONParser : DFAutomaton, IDisposable { - - enum MemberContext { - MemberName, - MemberValue - } - - static readonly EnumAlphabet _alphabet = EnumAlphabet.FullAlphabet; - static readonly DFAStateDescriptior[] _jsonDFA; - static readonly DFAStateDescriptior[] _objectDFA; - static readonly DFAStateDescriptior[] _arrayDFA; - - static JSONParser() { - - - var valueExpression = Token.New(JsonTokenType.BeginArray, JsonTokenType.BeginObject, JsonTokenType.Literal, JsonTokenType.Number, JsonTokenType.String); - var memberExpression = Token.New(JsonTokenType.String).Cat(Token.New(JsonTokenType.NameSeparator)).Cat(valueExpression); - - var objectExpression = memberExpression - .Cat( - Token.New(JsonTokenType.ValueSeparator) - .Cat(memberExpression) - .EClosure() - ) - .Optional() - .Cat(Token.New(JsonTokenType.EndObject)) - .Tag(0); - var arrayExpression = valueExpression - .Cat( - Token.New(JsonTokenType.ValueSeparator) - .Cat(valueExpression) - .EClosure() - ) - .Optional() - .Cat(Token.New(JsonTokenType.EndArray)) - .Tag(0); - - var jsonExpression = valueExpression.Tag(0); - - _jsonDFA = BuildDFA(jsonExpression).States; - _objectDFA = BuildDFA(objectExpression).States; - _arrayDFA = BuildDFA(arrayExpression).States; - } - - static EDFADefinition BuildDFA(Token expr) { - var builder = new DFABuilder(); - var dfa = new EDFADefinition(_alphabet); - expr.Accept(builder); - - builder.BuildDFA(dfa); - return dfa; - } - - JSONScanner m_scanner; - MemberContext m_memberContext; - - JSONElementType m_elementType; - object m_elementValue; - - /// - /// Создает новый парсер на основе строки, содержащей JSON - /// - /// - public JSONParser(string text) - : base(_jsonDFA, INITIAL_STATE, new JSONParserContext { elementContext = JSONElementContext.None, memberName = String.Empty }) { - Safe.ArgumentNotEmpty(text, "text"); - m_scanner = new JSONScanner(); - m_scanner.Feed(text.ToCharArray()); - } - - /// - /// Создает новый экземпляр парсера, на основе текстового потока. - /// - /// Текстовый поток. - /// Признак того, что парсер должен конролировать время жизни входного потока. - public JSONParser(TextReader reader, bool dispose) - : base(_jsonDFA, INITIAL_STATE, new JSONParserContext { elementContext = JSONElementContext.None, memberName = String.Empty }) { - Safe.ArgumentNotNull(reader, "reader"); - m_scanner = new JSONScanner(); - m_scanner.Feed(reader, dispose); - } - - /// - /// Тип текущего элемента на котором стоит парсер. - /// - public JSONElementType ElementType { - get { return m_elementType; } - } - - /// - /// Имя элемента - имя свойства родительского контейнера. Для элементов массивов и корневого всегда - /// пустая строка. - /// - public string ElementName { - get { return m_context.info.memberName; } - } - - /// - /// Значение элемента. Только для элементов типа , для остальных null - /// - public object ElementValue { - get { return m_elementValue; } - } - - /// - /// Читает слеюудущий объект из потока - /// - /// true - операция чтения прошла успешно, false - конец данных - public bool Read() { - if (m_context.current == UNREACHEBLE_STATE) - throw new InvalidOperationException("The parser is in invalid state"); - object tokenValue; - JsonTokenType tokenType; - m_context.info.memberName = String.Empty; - while (m_scanner.ReadToken(out tokenValue, out tokenType)) { - Move((int)tokenType); - if (m_context.current == UNREACHEBLE_STATE) - UnexpectedToken(tokenValue, tokenType); - switch (tokenType) { - case JsonTokenType.BeginObject: - Switch( - _objectDFA, - INITIAL_STATE, - new JSONParserContext { - memberName = m_context.info.memberName, - elementContext = JSONElementContext.Object - } - ); - m_elementValue = null; - m_memberContext = MemberContext.MemberName; - m_elementType = JSONElementType.BeginObject; - return true; - case JsonTokenType.EndObject: - Restore(); - m_elementValue = null; - m_elementType = JSONElementType.EndObject; - return true; - case JsonTokenType.BeginArray: - Switch( - _arrayDFA, - INITIAL_STATE, - new JSONParserContext { - memberName = m_context.info.memberName, - elementContext = JSONElementContext.Array - } - ); - m_elementValue = null; - m_memberContext = MemberContext.MemberValue; - m_elementType = JSONElementType.BeginArray; - return true; - case JsonTokenType.EndArray: - Restore(); - m_elementValue = null; - m_elementType = JSONElementType.EndArray; - return true; - case JsonTokenType.String: - if (m_memberContext == MemberContext.MemberName) { - m_context.info.memberName = (string)tokenValue; - break; - } - m_elementType = JSONElementType.Value; - m_elementValue = tokenValue; - return true; - case JsonTokenType.Number: - m_elementType = JSONElementType.Value; - m_elementValue = tokenValue; - return true; - case JsonTokenType.Literal: - m_elementType = JSONElementType.Value; - m_elementValue = ParseLiteral((string)tokenValue); - return true; - case JsonTokenType.NameSeparator: - m_memberContext = MemberContext.MemberValue; - break; - case JsonTokenType.ValueSeparator: - m_memberContext = m_context.info.elementContext == JSONElementContext.Object ? MemberContext.MemberName : MemberContext.MemberValue; - break; - default: - UnexpectedToken(tokenValue, tokenType); - break; - } - } - if (m_context.info.elementContext != JSONElementContext.None) - throw new ParserException("Unexpedted end of data"); - return false; - } - - object ParseLiteral(string literal) { - switch (literal) { - case "null": - return null; - case "false": - return false; - case "true": - return true; - default: - UnexpectedToken(literal, JsonTokenType.Literal); - return null; // avoid compliler error - } - } - - void UnexpectedToken(object value, JsonTokenType tokenType) { - throw new ParserException(String.Format("Unexpected token {0}: '{1}'", tokenType, value)); - } - - - /// - /// Признак конца потока - /// - public bool EOF { - get { - return m_scanner.EOF; - } - } - - protected virtual void Dispose(bool disposing) { - if (disposing) { - m_scanner.Dispose(); - } - } - - /// - /// Освобождает парсер и связанный с ним сканнер. - /// - public void Dispose() { - Dispose(true); - GC.SuppressFinalize(this); - } - - ~JSONParser() { - Dispose(false); - } - - /// - /// Переходит в конец текущего объекта. - /// - public void SeekElementEnd() { - var level = Level - 1; - - Debug.Assert(level >= 0); - - while (Level != level) - Read(); - } - } - -} diff --git a/Implab/JSON/JSONScanner.cs b/Implab/JSON/JSONScanner.cs deleted file mode 100644 --- a/Implab/JSON/JSONScanner.cs +++ /dev/null @@ -1,100 +0,0 @@ -using Implab.Parsing; -using System; -using System.Collections.Generic; -using System.Globalization; -using System.Linq; -using System.Text; -using System.Threading.Tasks; - -namespace Implab.JSON { - /// - /// Сканнер (лексер), разбивающий поток символов на токены JSON. - /// - public class JSONScanner : Scanner { - char[] m_stringBuffer; - DFAStateDescriptior[] m_stringDFA; - int[] m_stringAlphabet; - - /// - /// Создает новый экземпляр сканнера - /// - public JSONScanner() - : base(JSONGrammar.Instance.JsonDFA.States, JSONGrammar.Instance.JsonDFA.Alphabet.GetTranslationMap()) { - m_stringBuffer = new char[1024]; - var dfa = JSONGrammar.Instance.JsonStringDFA; - m_stringAlphabet = dfa.Alphabet.GetTranslationMap(); - m_stringDFA = dfa.States; - } - - /// - /// Читает следующий лексический элемент из входных данных. - /// - /// Возвращает значение прочитанного токена. - /// Возвращает тип прочитанного токена. - /// true - чтение произведено успешно. false - достигнут конец входных данных - /// В случе если токен не распознается, возникает исключение. Значения токенов обрабатываются, т.е. - /// в строках обрабатываются экранированные символы, числа становтся типа double. - public bool ReadToken(out object tokenValue, out JsonTokenType tokenType) { - if (ReadTokenInternal()) { - switch ((JSONGrammar.TokenType)m_currentState.tag[0]) { - case JSONGrammar.TokenType.StringBound: - tokenValue = ReadString(); - tokenType = JsonTokenType.String; - break; - case JSONGrammar.TokenType.Number: - tokenValue = Double.Parse(new String(m_buffer, m_tokenOffset, m_tokenLen), CultureInfo.InvariantCulture); - tokenType = JsonTokenType.Number; - break; - default: - tokenType = (JsonTokenType)m_currentState.tag[0]; - tokenValue = new String(m_buffer, m_tokenOffset, m_tokenLen); - break; - } - return true; - } - tokenValue = null; - tokenType = JsonTokenType.None; - return false; - } - - string ReadString() { - int pos = 0; - Switch(m_stringDFA, m_stringAlphabet); - while (ReadTokenInternal()) { - switch ((JSONGrammar.TokenType)m_currentState.tag[0]) { - case JSONGrammar.TokenType.StringBound: - Restore(); - return new String(m_stringBuffer, 0, pos); - case JSONGrammar.TokenType.UnescapedChar: - EnsureStringBufferSize(pos + m_tokenLen); - Array.Copy(m_buffer, m_tokenOffset, m_stringBuffer, pos, m_tokenLen); - pos += m_tokenLen; - break; - case JSONGrammar.TokenType.EscapedUnicode: - EnsureStringBufferSize(pos + 1); - m_stringBuffer[pos] = StringTranslator.TranslateHexUnicode(m_buffer, m_tokenOffset + 2); - pos++; - break; - case JSONGrammar.TokenType.EscapedChar: - EnsureStringBufferSize(pos + 1); - m_stringBuffer[pos] = StringTranslator.TranslateEscapedChar(m_buffer[m_tokenOffset + 1]); - pos++; - break; - default: - break; - } - - } - - throw new ParserException("Unexpected end of data"); - } - - void EnsureStringBufferSize(int size) { - if (size > m_stringBuffer.Length) { - var newBuffer = new char[size]; - m_stringBuffer.CopyTo(newBuffer, 0); - m_stringBuffer = newBuffer; - } - } - } -} diff --git a/Implab/JSON/JSONWriter.cs b/Implab/JSON/JSONWriter.cs deleted file mode 100644 --- a/Implab/JSON/JSONWriter.cs +++ /dev/null @@ -1,319 +0,0 @@ -using System; -using System.Collections.Generic; -using System.IO; -using System.Globalization; -using System.Diagnostics; - -namespace Implab.JSON { - public class JSONWriter { - struct Context { - public bool needComma; - public JSONElementContext element; - } - Stack m_contextStack = new Stack(); - Context m_context; - - const int BUFFER_SIZE = 64; - - TextWriter m_writer; - readonly bool m_indent = true; - readonly int m_indentSize = 4; - readonly char[] m_buffer = new char[BUFFER_SIZE]; - int m_bufferPos; - - static readonly char [] _hex = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; - static readonly char [] _escapeBKS, - _escapeFWD, - _escapeCR, - _escapeNL, - _escapeTAB, - _escapeBSLASH, - _escapeQ; - - static JSONWriter() { - _escapeBKS = "\\b".ToCharArray(); - _escapeFWD = "\\f".ToCharArray(); - _escapeCR = "\\r".ToCharArray(); - _escapeNL = "\\n".ToCharArray(); - _escapeTAB = "\\t".ToCharArray(); - _escapeBSLASH = "\\\\".ToCharArray(); - _escapeQ = "\\\"".ToCharArray(); - } - - public JSONWriter(TextWriter writer) { - Safe.ArgumentNotNull(writer, "writer"); - m_writer = writer; - } - - public JSONWriter(TextWriter writer, bool indent) { - Safe.ArgumentNotNull(writer, "writer"); - - m_writer = writer; - m_indent = indent; - } - - void WriteIndent() { - if (m_indent) { - var indent = new char[m_contextStack.Count * m_indentSize + 1]; - indent[0] = '\n'; - for (int i = 1; i < indent.Length; i++) - indent[i] = ' '; - m_writer.Write(new String(indent)); - } else { - m_writer.Write(' '); - } - } - - void WriteMemberName(string name) { - Safe.ArgumentNotEmpty(name, "name"); - if (m_context.element != JSONElementContext.Object) - OperationNotApplicable("WriteMember"); - if (m_context.needComma) - m_writer.Write(","); - - WriteIndent(); - m_context.needComma = true; - Write(name); - m_writer.Write(" : "); - } - - public void WriteValue(string name, string value) { - WriteMemberName(name); - Write(value); - } - - public void WriteValue(string name, bool value) { - WriteMemberName(name); - Write(value); - } - - public void WriteValue(string name, double value) { - WriteMemberName(name); - Write(value); - } - - public void WriteValue(string value) { - if (m_context.element == JSONElementContext.Array) { - - if (m_context.needComma) - m_writer.Write(","); - WriteIndent(); - m_context.needComma = true; - - Write(value); - } else if (m_context.element == JSONElementContext.None) { - Write(value); - m_context.element = JSONElementContext.Closed; - } else { - OperationNotApplicable("WriteValue"); - } - } - - public void WriteValue(bool value) { - if (m_context.element == JSONElementContext.Array) { - - if (m_context.needComma) - m_writer.Write(","); - WriteIndent(); - m_context.needComma = true; - - Write(value); - } else if (m_context.element == JSONElementContext.None) { - Write(value); - m_context.element = JSONElementContext.Closed; - } else { - OperationNotApplicable("WriteValue"); - } - } - - public void WriteValue(double value) { - if (m_context.element == JSONElementContext.Array) { - - if (m_context.needComma) - m_writer.Write(","); - WriteIndent(); - m_context.needComma = true; - - Write(value); - } else if (m_context.element == JSONElementContext.None) { - Write(value); - m_context.element = JSONElementContext.Closed; - } else { - OperationNotApplicable("WriteValue"); - } - } - - public void BeginObject() { - if (m_context.element != JSONElementContext.None && m_context.element != JSONElementContext.Array) - OperationNotApplicable("BeginObject"); - if (m_context.needComma) - m_writer.Write(","); - - WriteIndent(); - - m_context.needComma = true; - - m_contextStack.Push(m_context); - - m_context = new Context { element = JSONElementContext.Object, needComma = false }; - m_writer.Write("{"); - } - - public void BeginObject(string name) { - WriteMemberName(name); - - m_contextStack.Push(m_context); - - m_context = new Context { element = JSONElementContext.Object, needComma = false }; - m_writer.Write("{"); - } - - public void EndObject() { - if (m_context.element != JSONElementContext.Object) - OperationNotApplicable("EndObject"); - - m_context = m_contextStack.Pop(); - if (m_contextStack.Count == 0) - m_context.element = JSONElementContext.Closed; - WriteIndent(); - m_writer.Write("}"); - } - - public void BeginArray() { - if (m_context.element != JSONElementContext.None && m_context.element != JSONElementContext.Array) - throw new InvalidOperationException(); - if (m_context.needComma) { - m_writer.Write(","); - - } - m_context.needComma = true; - - WriteIndent(); - m_contextStack.Push(m_context); - m_context = new Context { element = JSONElementContext.Array, needComma = false }; - m_writer.Write("["); - } - - public void BeginArray(string name) { - WriteMemberName(name); - - m_contextStack.Push(m_context); - - m_context = new Context { element = JSONElementContext.Array, needComma = false }; - m_writer.Write("["); - } - - public void EndArray() { - if (m_context.element != JSONElementContext.Array) - OperationNotApplicable("EndArray"); - - m_context = m_contextStack.Pop(); - if (m_contextStack.Count == 0) - m_context.element = JSONElementContext.Closed; - WriteIndent(); - m_writer.Write("]"); - } - - void Write(bool value) { - m_writer.Write(value ? "true" : "false"); - } - - void FlushBuffer() { - if (m_bufferPos > 0) { - m_writer.Write(m_buffer, 0, m_bufferPos); - m_bufferPos = 0; - } - } - - void Write(string value) { - if (value == null) { - m_writer.Write("null"); - return; - } - - Debug.Assert(m_bufferPos == 0); - - var chars = value.ToCharArray(); - m_buffer[m_bufferPos++] = '"'; - - // Analysis disable once ForCanBeConvertedToForeach - for (int i = 0; i < chars.Length; i++) { - var ch = chars[i]; - - char[] escapeSeq; - - switch (ch) { - case '\b': - escapeSeq = _escapeBKS; - break; - case '\f': - escapeSeq = _escapeFWD; - break; - case '\r': - escapeSeq = _escapeCR; - break; - case '\n': - escapeSeq = _escapeNL; - break; - case '\t': - escapeSeq = _escapeTAB; - break; - case '\\': - escapeSeq = _escapeBSLASH; - break; - case '"': - escapeSeq = _escapeQ; - break; - default: - if (ch < 0x20) { - if (m_bufferPos + 6 > BUFFER_SIZE) - FlushBuffer(); - - m_buffer[m_bufferPos++] = '\\'; - m_buffer[m_bufferPos++] = 'u'; - m_buffer[m_bufferPos++] = '0'; - m_buffer[m_bufferPos++] = '0'; - m_buffer[m_bufferPos++] = _hex[ch >> 4 & 0xf]; - m_buffer[m_bufferPos++] = _hex[ch & 0xf]; - - } else { - if (m_bufferPos >= BUFFER_SIZE) - FlushBuffer(); - m_buffer[m_bufferPos++] = ch; - } - continue; - } - - if (m_bufferPos + escapeSeq.Length > BUFFER_SIZE) - FlushBuffer(); - - Array.Copy(escapeSeq, 0, m_buffer, m_bufferPos, escapeSeq.Length); - m_bufferPos += escapeSeq.Length; - - } - - if (m_bufferPos >= BUFFER_SIZE) - FlushBuffer(); - - m_buffer[m_bufferPos++] = '"'; - - FlushBuffer(); - } - - void Write(double value) { - if (double.IsNaN(value)) - Write("NaN"); - else if (double.IsNegativeInfinity(value)) - Write("-Infinity"); - else if (double.IsPositiveInfinity(value)) - Write("Infinity"); - else - m_writer.Write(value.ToString(CultureInfo.InvariantCulture)); - } - - void OperationNotApplicable(string opName) { - throw new InvalidOperationException(String.Format("The operation '{0}' isn't applicable in the context of '{1}'", opName, m_context.element )); - } - - } -} diff --git a/Implab/JSON/JSONXmlReader.cs b/Implab/JSON/JSONXmlReader.cs deleted file mode 100644 --- a/Implab/JSON/JSONXmlReader.cs +++ /dev/null @@ -1,343 +0,0 @@ -using Implab; -using Implab.Parsing; -using System; -using System.Collections.Generic; -using System.Globalization; -using System.IO; -using System.Linq; -using System.Text; -using System.Threading.Tasks; -using System.Xml; - -namespace Implab.JSON { - public class JSONXmlReader : XmlReader { - - enum ValueContext { - Undefined, - ElementStart, - ElementValue, - ElementEnd, - ElementEmpty - } - - struct LocalNameContext { - public string localName; - public bool isArray; - } - - JSONParser m_parser; - ValueContext m_valueContext; - ReadState m_state = ReadState.Initial; - Stack m_localNameStack = new Stack(); - LocalNameContext m_localName; - int m_depthCorrection = 0; - - readonly string m_rootName; - readonly string m_prefix; - readonly string m_namespaceUri; - readonly bool m_flattenArrays; - readonly string m_arrayItemName; - readonly XmlNameTable m_nameTable; - - JSONXmlReader(JSONParser parser, JSONXmlReaderOptions options) { - m_parser = parser; - - if (options != null) { - m_prefix = options.NodesPrefix ?? String.Empty; - m_namespaceUri = options.NamespaceURI ?? String.Empty; - m_rootName = options.RootName ?? "json"; - m_flattenArrays = options.FlattenArrays; - m_arrayItemName = options.ArrayItemName ?? "item"; - m_nameTable = options.NameTable ?? new NameTable(); - } else { - m_prefix = String.Empty; - m_namespaceUri = String.Empty; - m_rootName = "json"; - m_flattenArrays = false; - m_arrayItemName = "item"; - m_nameTable = new NameTable(); - } - } - - /// - /// Always 0, JSON doesn't support attributes - /// - public override int AttributeCount { - get { return 0; } - } - - public override string BaseURI { - get { return String.Empty; } - } - - public override int Depth { - get { - return m_localNameStack.Count + m_depthCorrection; - } - } - - public override bool EOF { - get { return m_parser.EOF; } - } - - /// - /// Always throws an exception - /// - /// - /// - public override string GetAttribute(int i) { - throw new ArgumentOutOfRangeException(); - } - - /// - /// Always returns empty string - /// - /// - /// - /// - public override string GetAttribute(string name, string namespaceURI) { - return String.Empty; - } - - /// - /// Always returns empty string - /// - /// - /// - public override string GetAttribute(string name) { - return String.Empty; - } - - public override bool IsEmptyElement { - get { return m_parser.ElementType == JSONElementType.Value && m_valueContext == ValueContext.ElementEmpty; } - } - - public override string LocalName { - get { return m_localName.localName; } - } - - public override string LookupNamespace(string prefix) { - if (String.IsNullOrEmpty(prefix) || prefix == m_prefix) - return m_namespaceUri; - else - return String.Empty; - } - - public override bool MoveToAttribute(string name, string ns) { - return false; - } - - public override bool MoveToAttribute(string name) { - return false; - } - - public override bool MoveToElement() { - return false; - } - - public override bool MoveToFirstAttribute() { - return false; - } - - public override bool MoveToNextAttribute() { - return false; - } - - public override XmlNameTable NameTable { - get { return m_nameTable; } - } - - public override string NamespaceURI { - get { return m_namespaceUri; } - } - - public override XmlNodeType NodeType { - get { - switch (m_parser.ElementType) { - case JSONElementType.BeginObject: - case JSONElementType.BeginArray: - return XmlNodeType.Element; - case JSONElementType.EndObject: - case JSONElementType.EndArray: - return XmlNodeType.EndElement; - case JSONElementType.Value: - switch (m_valueContext) { - case ValueContext.ElementStart: - case ValueContext.ElementEmpty: - return XmlNodeType.Element; - case ValueContext.ElementValue: - return XmlNodeType.Text; - case ValueContext.ElementEnd: - return XmlNodeType.EndElement; - default: - throw new InvalidOperationException(); - } - default: - throw new InvalidOperationException(); - } - } - } - - public override string Prefix { - get { return m_prefix; } - } - - public override bool Read() { - if (m_state != System.Xml.ReadState.Interactive && m_state != System.Xml.ReadState.Initial) - return false; - - if (m_state == ReadState.Initial) - m_state = System.Xml.ReadState.Interactive; - - try { - switch (m_parser.ElementType) { - case JSONElementType.Value: - switch (m_valueContext) { - case ValueContext.ElementStart: - SetLocalName(String.Empty); - m_valueContext = ValueContext.ElementValue; - return true; - case ValueContext.ElementValue: - RestoreLocalName(); - m_valueContext = ValueContext.ElementEnd; - return true; - case ValueContext.ElementEmpty: - case ValueContext.ElementEnd: - RestoreLocalName(); - break; - } - break; - case JSONElementType.EndArray: - case JSONElementType.EndObject: - RestoreLocalName(); - break; - } - string itemName = m_parser.ElementType == JSONElementType.None ? m_rootName : m_flattenArrays ? m_localName.localName : m_arrayItemName; - while (m_parser.Read()) { - if (!String.IsNullOrEmpty(m_parser.ElementName)) - itemName = m_parser.ElementName; - - switch (m_parser.ElementType) { - case JSONElementType.BeginArray: - if (m_flattenArrays && !m_localName.isArray) { - m_depthCorrection--; - SetLocalName(itemName, true); - continue; - } else { - SetLocalName(itemName, true); - } - break; - case JSONElementType.BeginObject: - SetLocalName(itemName); - break; - case JSONElementType.EndArray: - if (m_flattenArrays && !m_localNameStack.Peek().isArray) { - RestoreLocalName(); - m_depthCorrection++; - continue; - } - break; - case JSONElementType.EndObject: - break; - case JSONElementType.Value: - SetLocalName(itemName); - m_valueContext = m_parser.ElementValue == null ? ValueContext.ElementEmpty : ValueContext.ElementStart; - break; - default: - break; - } - return true; - } - - m_state = System.Xml.ReadState.EndOfFile; - return false; - } catch { - m_state = System.Xml.ReadState.Error; - throw; - } - } - - public override bool ReadAttributeValue() { - return false; - } - - public override ReadState ReadState { - get { return m_state; } - } - - public override void ResolveEntity() { - // do nothing - } - - public override string Value { - get { - if (m_parser.ElementValue == null) - return String.Empty; - if (Convert.GetTypeCode(m_parser.ElementValue) == TypeCode.Double) - return ((double)m_parser.ElementValue).ToString(CultureInfo.InvariantCulture); - else - return m_parser.ElementValue.ToString(); - } - } - - void SetLocalName(string name) { - m_localNameStack.Push(m_localName); - m_localName.localName = name; - m_localName.isArray = false; - } - - void SetLocalName(string name, bool isArray) { - m_localNameStack.Push(m_localName); - m_localName.localName = name; - m_localName.isArray = isArray; - } - - void RestoreLocalName() { - m_localName = m_localNameStack.Pop(); - } - - public override void Close() { - - } - - protected override void Dispose(bool disposing) { - #if MONO - disposing = true; - #endif - if (disposing) { - m_parser.Dispose(); - } - base.Dispose(disposing); - } - - public static JSONXmlReader Create(string file, JSONXmlReaderOptions options) { - return Create(File.OpenText(file), options); - } - - /// - /// Creates the XmlReader for the specified text stream with JSON data. - /// - /// Text reader. - /// Options. - /// - /// The reader will be disposed when the XmlReader is disposed. - /// - public static JSONXmlReader Create(TextReader reader, JSONXmlReaderOptions options) { - return new JSONXmlReader(new JSONParser(reader, true), options); - } - - /// - /// Creates the XmlReader for the specified stream with JSON data. - /// - /// Stream. - /// Options. - /// - /// The stream will be disposed when the XmlReader is disposed. - /// - public static JSONXmlReader Create(Stream stream, JSONXmlReaderOptions options) { - Safe.ArgumentNotNull(stream, "stream"); - // HACK don't dispose StreaReader to keep stream opened - return Create(new StreamReader(stream), options); - } - } -} diff --git a/Implab/JSON/JSONXmlReaderOptions.cs b/Implab/JSON/JSONXmlReaderOptions.cs deleted file mode 100644 --- a/Implab/JSON/JSONXmlReaderOptions.cs +++ /dev/null @@ -1,65 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Xml; - -namespace Implab.JSON { - /// - /// Набор необязательных параметров для , позволяющий управлять процессом - /// интерпретации JSON документа. - /// - public class JSONXmlReaderOptions { - /// - /// Пространство имен в котором будут располагаться читаемые элементы документа - /// - public string NamespaceURI { - get; - set; - } - - /// - /// Интерпретировать массивы как множественные элементы (убирает один уровень вложенности), иначе массив - /// представляется в виде узла, дочерними элементами которого являются элементы массива, имена дочерних элементов - /// определяются свойством . По умолчанию false. - /// - public bool FlattenArrays { - get; - set; - } - - /// - /// Префикс, для узлов документа - /// - public string NodesPrefix { - get; - set; - } - - /// - /// Имя корневого элемента в xml документе - /// - public string RootName { - get; - set; - } - - /// - /// Имя элемента для массивов, если не включена опция . - /// По умолчанию item. - /// - public string ArrayItemName { - get; - set; - } - - /// - /// Таблица атомизированных строк для построения документа. - /// - public XmlNameTable NameTable { - get; - set; - } - - } -} diff --git a/Implab/JSON/JsonTokenType.cs b/Implab/JSON/JsonTokenType.cs deleted file mode 100644 --- a/Implab/JSON/JsonTokenType.cs +++ /dev/null @@ -1,50 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; - -namespace Implab.JSON { - /// - /// Тип токенов, возвращаемых . - /// - public enum JsonTokenType : int { - None = 0, - /// - /// Начало объекта - /// - BeginObject, - /// - /// Конец объекта - /// - EndObject, - /// - /// Начало массива - /// - BeginArray, - /// - /// Конец массива - /// - EndArray, - /// - /// Строка - /// - String, - /// - /// Число - /// - Number, - /// - /// Литерал - /// - Literal, - /// - /// Разделитель имени : - /// - NameSeparator, - /// - /// Разделитель имени , - /// - ValueSeparator - } -} diff --git a/Implab/JSON/StringTranslator.cs b/Implab/JSON/StringTranslator.cs deleted file mode 100644 --- a/Implab/JSON/StringTranslator.cs +++ /dev/null @@ -1,96 +0,0 @@ -using Implab; -using Implab.Parsing; -using System; -using System.Collections.Generic; -using System.Diagnostics; -using System.Linq; -using System.Text; -using System.Threading.Tasks; - -namespace Implab.JSON { - /// - /// Класс для преобразования экранированной строки JSON - /// - public class StringTranslator : Scanner { - static readonly char[] _escMap; - static readonly int[] _hexMap; - - static StringTranslator() { - var chars = new char[] { 'b', 'f', 't', 'r', 'n', '\\', '/' }; - var vals = new char[] { '\b', '\f', '\t', '\r', '\n', '\\', '/' }; - - _escMap = new char[chars.Max() + 1]; - - for (int i = 0; i < chars.Length; i++) - _escMap[chars[i]] = vals[i]; - - var hexs = new char[] { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', 'A', 'B', 'C', 'D', 'E', 'F' }; - var ints = new int[] { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 10, 11, 12, 13, 14, 15 }; - - _hexMap = new int[hexs.Max() + 1]; - - for (int i = 0; i < hexs.Length; i++) - _hexMap[hexs[i]] = ints[i]; - - } - - public StringTranslator() - : base(JSONGrammar.Instance.JsonStringDFA.States, JSONGrammar.Instance.JsonStringDFA.Alphabet.GetTranslationMap()) { - } - - public string Translate(string data) { - Safe.ArgumentNotNull(data, "data"); - return Translate(data.ToCharArray()); - } - - public string Translate(char[] data) { - Safe.ArgumentNotNull(data, "data"); - return Translate(data, data.Length); - } - - public string Translate(char[] data, int length) { - Safe.ArgumentNotNull(data, "data"); - Safe.ArgumentInRange(length, 0, data.Length, "length"); - - var translated = new char[length]; - - Feed(data,length); - - int pos = 0; - - while (ReadTokenInternal()) { - switch ((JSONGrammar.TokenType)TokenTags[0]) { - case JSONGrammar.TokenType.UnescapedChar: - Array.Copy(m_buffer,m_tokenOffset,translated,pos,m_tokenLen); - pos += m_tokenLen; - break; - case JSONGrammar.TokenType.EscapedChar: - translated[pos] = _escMap[m_buffer[m_tokenOffset + 1]]; - pos++; - break; - case JSONGrammar.TokenType.EscapedUnicode: - translated[pos] = TranslateHexUnicode(m_buffer,m_tokenOffset + 2); - pos++; - break; - } - } - - return new String(translated, 0, pos); - } - - internal static char TranslateEscapedChar(char symbol) { - return _escMap[symbol]; - } - - internal static char TranslateHexUnicode(char[] symbols, int offset) { - Debug.Assert(symbols != null); - Debug.Assert(symbols.Length - offset >= 4); - - int value = (_hexMap[symbols[offset]] << 12) - | (_hexMap[symbols[offset + 1]] << 8) - | (_hexMap[symbols[offset + 2]] << 4) - | (_hexMap[symbols[offset + 3]]); - return (char)value; - } - } -}