# HG changeset patch # User cin # Date 2016-03-23 23:30:46 # Node ID c32688129f14e03d0e25529019541c4ea581754d # Parent 478ef706906aab6bbe5514f0637844974a47fdbb refactoring complete, JSONParser rewritten diff --git a/.hgignore b/.hgignore --- a/.hgignore +++ b/.hgignore @@ -15,3 +15,5 @@ Implab.Diagnostics.Interactive/bin/ Implab.Diagnostics.Interactive/obj/ MonoPlay/bin/ MonoPlay/obj/ +Implab.Test/Implab.Format.Test/bin/ +Implab.Test/Implab.Format.Test/obj/ diff --git a/Implab/Automaton/DFATable.cs b/Implab/Automaton/DFATable.cs --- a/Implab/Automaton/DFATable.cs +++ b/Implab/Automaton/DFATable.cs @@ -77,7 +77,7 @@ namespace Implab.Automaton { } public bool Remove(AutomatonTransition item) { - m_transitions.Remove(item); + return m_transitions.Remove(item); } public int Count { @@ -168,9 +168,9 @@ namespace Implab.Automaton { var rmap = m_transitions .GroupBy(t => t.s2) - .ToLookup( + .ToDictionary( g => g.Key, // s2 - g => g.ToLookup(t => t.edge, t => t.s1) + g => g.GroupBy(t => t.edge, t => t.s1).ToDictionary(p => p.Key) ); while (queue.Count > 0) { @@ -180,7 +180,7 @@ namespace Implab.Automaton { for (int c = 0; c < m_symbolCount; c++) { var stateX = new HashSet(); foreach(var a in stateA) - stateX.UnionWith(rmap[a][c]); // all states from wich 'c' leads to 'a' + stateX.UnionWith(rmap[a][c]); // all states from wich the symbol 'c' leads to the state 'a' foreach (var stateY in optimalStates.ToArray()) { if (stateX.Overlaps(stateY) && !stateY.IsSubsetOf(stateX)) { diff --git a/Implab/Automaton/DummyAlphabet.cs b/Implab/Automaton/DummyAlphabet.cs deleted file mode 100644 --- a/Implab/Automaton/DummyAlphabet.cs +++ /dev/null @@ -1,46 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Linq; - -namespace Implab.Automaton { - /// - /// Dummy alphabet consists of integer numbers which are identical to their classes. - /// - public class DummyAlphabet : IAlphabet { - readonly int m_size; - - /// - /// Creates a new dummy alphabet with given size. - /// - /// The size of the alphabet, must be greater then zero. - public DummyAlphabet(int size) { - Safe.ArgumentAssert(size > 0); - m_size = 0; - } - - #region IAlphabet implementation - - public List[] CreateReverseMap() { - Enumerable.Range(0, m_size).ToArray(); - } - - public int Translate(int symbol) { - Safe.ArgumentInRange(symbol, 0, m_size, "symbol"); - return symbol; - } - - public bool Contains(int symbol) { - Safe.ArgumentInRange(symbol, 0, m_size, "symbol"); - return true; - } - - public int Count { - get { - return m_size; - } - } - - #endregion - } -} - diff --git a/Implab/Automaton/EnumAlphabet.cs b/Implab/Automaton/EnumAlphabet.cs --- a/Implab/Automaton/EnumAlphabet.cs +++ b/Implab/Automaton/EnumAlphabet.cs @@ -62,9 +62,5 @@ namespace Implab.Automaton { return symbol.ToInt32(CultureInfo.InvariantCulture); } - public override IEnumerable InputSymbols { - get { return _symbols.Value; } - } - } } diff --git a/Implab/Automaton/IndexedAlphabetBase.cs b/Implab/Automaton/IndexedAlphabetBase.cs --- a/Implab/Automaton/IndexedAlphabetBase.cs +++ b/Implab/Automaton/IndexedAlphabetBase.cs @@ -30,9 +30,9 @@ namespace Implab.Automaton { /// /// The translation map. public int[] GetTranslationMap() { - Dictionary map = new Dictionary(); + var map = new Dictionary(); - int max; + int max = 0; foreach (var p in Mappings) { var index = GetSymbolIndex(p.Key); max = Math.Max(max, index); diff --git a/Implab/Automaton/RegularExpressions/RegularDFA.cs b/Implab/Automaton/RegularExpressions/RegularDFA.cs --- a/Implab/Automaton/RegularExpressions/RegularDFA.cs +++ b/Implab/Automaton/RegularExpressions/RegularDFA.cs @@ -53,7 +53,6 @@ namespace Implab.Automaton.RegularExpres var dfa = new RegularDFA(alphabet); - var states = new DummyAlphabet(StateCount); var alphaMap = new Dictionary(); var stateMap = new Dictionary(); diff --git a/Implab/Components/LazyAndWeak.cs b/Implab/Components/LazyAndWeak.cs --- a/Implab/Components/LazyAndWeak.cs +++ b/Implab/Components/LazyAndWeak.cs @@ -2,6 +2,13 @@ using System.Threading; namespace Implab.Components { + /// + /// Creates an instace on-demand and allows it to be garbage collected. + /// + /// + /// Usefull when dealing with memory-intensive objects which are frequently used. + /// This class is similar to except is a singleton. + /// public class LazyAndWeak where T : class { readonly Func m_factory; @@ -35,6 +42,18 @@ namespace Implab.Components { if (Interlocked.CompareExchange(ref m_reference, new WeakReference(value), weak) == weak) return value; } else { + lock (m_lock) { + // double check + if (weak != null) { + value = weak.Target as T; + if (value != null) + return value; + } + // we are safe to write + value = m_factory(); + m_reference = new WeakReference(value); + return value; + } } } } diff --git a/Implab/Components/RunnableComponent.cs b/Implab/Components/RunnableComponent.cs --- a/Implab/Components/RunnableComponent.cs +++ b/Implab/Components/RunnableComponent.cs @@ -1,5 +1,5 @@ using System; -using Implab.Parsing; +using Implab.Formats; namespace Implab.Components { public class RunnableComponent : Disposable, IRunnable, IInitializable { diff --git a/Implab/Formats/JSON/JSONElementContext.cs b/Implab/Formats/JSON/JSONElementContext.cs --- a/Implab/Formats/JSON/JSONElementContext.cs +++ b/Implab/Formats/JSON/JSONElementContext.cs @@ -5,6 +5,7 @@ enum JSONElementContext { None, Object, - Array + Array, + Closed } } diff --git a/Implab/Formats/JSON/JSONGrammar.cs b/Implab/Formats/JSON/JSONGrammar.cs --- a/Implab/Formats/JSON/JSONGrammar.cs +++ b/Implab/Formats/JSON/JSONGrammar.cs @@ -2,6 +2,7 @@ using Implab.Automaton.RegularExpressions; using System; using Implab.Automaton; +using Implab.Components; namespace Implab.Formats.JSON { class JSONGrammar : Grammar { @@ -23,7 +24,7 @@ namespace Implab.Formats.JSON { EscapedUnicode } - static Lazy _instance = new Lazy(); + static LazyAndWeak _instance = new LazyAndWeak(() => new JSONGrammar()); public static JSONGrammar Instance { get { return _instance.Value; } @@ -31,6 +32,7 @@ namespace Implab.Formats.JSON { readonly ScannerContext m_jsonExpression; readonly ScannerContext m_stringExpression; + readonly CharAlphabet m_defaultAlphabet = new CharAlphabet(); public JSONGrammar() { DefineAlphabet(Enumerable.Range(0, 0x20).Select(x => (char)x)); @@ -87,6 +89,12 @@ namespace Implab.Formats.JSON { } + protected override IAlphabetBuilder AlphabetBuilder { + get { + return m_defaultAlphabet; + } + } + public ScannerContext JsonExpression { get { return m_jsonExpression; @@ -103,7 +111,7 @@ namespace Implab.Formats.JSON { return SymbolToken(Enumerable.Range(start,stop - start).Cast()); } - protected override IAlphabetBuilder CreateAlphabet() { + protected override IndexedAlphabetBase CreateAlphabet() { return new CharAlphabet(); } diff --git a/Implab/Formats/JSON/JSONParser.cs b/Implab/Formats/JSON/JSONParser.cs --- a/Implab/Formats/JSON/JSONParser.cs +++ b/Implab/Formats/JSON/JSONParser.cs @@ -5,17 +5,10 @@ using Implab.Automaton; using Implab.Automaton.RegularExpressions; using System.Linq; using Implab.Components; +using System.Collections.Generic; namespace Implab.Formats.JSON { /// - /// internal - /// - public struct JSONParserContext { - public string memberName; - public JSONElementContext elementContext; - } - - /// /// Pull парсер JSON данных. /// /// @@ -52,10 +45,11 @@ namespace Implab.Formats.JSON { } public bool Move(JsonTokenType token) { - var next = m_dfa[m_state, token]; + var next = m_dfa[m_state, (int)token]; if (next == AutomatonConst.UNREACHABLE_STATE) return false; m_state = next; + return true; } public JSONElementContext ElementContext { @@ -63,40 +57,43 @@ namespace Implab.Formats.JSON { } } + static readonly ParserContext _jsonContext; + static readonly ParserContext _objectContext; + static readonly ParserContext _arrayContext; + static JSONParser() { - - var valueExpression = Token(JsonTokenType.BeginArray, JsonTokenType.BeginObject, JsonTokenType.Literal, JsonTokenType.Number, JsonTokenType.String); - var memberExpression = Token(JsonTokenType.String).Cat(Token(JsonTokenType.NameSeparator)).Cat(valueExpression); + var valueExpression = MakeToken(JsonTokenType.BeginArray, JsonTokenType.BeginObject, JsonTokenType.Literal, JsonTokenType.Number, JsonTokenType.String); + var memberExpression = MakeToken(JsonTokenType.String).Cat(MakeToken(JsonTokenType.NameSeparator)).Cat(valueExpression); var objectExpression = memberExpression .Cat( - Token(JsonTokenType.ValueSeparator) + MakeToken(JsonTokenType.ValueSeparator) .Cat(memberExpression) .EClosure() ) .Optional() - .Cat(Token(JsonTokenType.EndObject)) + .Cat(MakeToken(JsonTokenType.EndObject)) .End(); var arrayExpression = valueExpression .Cat( - Token(JsonTokenType.ValueSeparator) + MakeToken(JsonTokenType.ValueSeparator) .Cat(valueExpression) .EClosure() ) .Optional() - .Cat(Token(JsonTokenType.EndArray)) + .Cat(MakeToken(JsonTokenType.EndArray)) .End(); var jsonExpression = valueExpression.End(); - _jsonDFA = CreateParserContext(jsonExpression, JSONElementContext.None); - _objectDFA = CreateParserContext(objectExpression, JSONElementContext.Object); - _arrayDFA = CreateParserContext(arrayExpression, JSONElementContext.Array); + _jsonContext = CreateParserContext(jsonExpression, JSONElementContext.None); + _objectContext = CreateParserContext(objectExpression, JSONElementContext.Object); + _arrayContext = CreateParserContext(arrayExpression, JSONElementContext.Array); } - static Token Token(params JsonTokenType[] input) { + static Token MakeToken(params JsonTokenType[] input) { return Token.New( input.Select(t => (int)t).ToArray() ); } @@ -112,32 +109,36 @@ namespace Implab.Formats.JSON { #endregion - JSONScanner m_scanner; + readonly JSONScanner m_scanner; MemberContext m_memberContext; JSONElementType m_elementType; object m_elementValue; + string m_memberName = String.Empty; + + Stack m_stack = new Stack(); + ParserContext m_context = _jsonContext; /// /// Создает новый парсер на основе строки, содержащей JSON /// /// - public JSONParser(string text) - : base(_jsonDFA, INITIAL_STATE, new JSONParserContext { elementContext = JSONElementContext.None, memberName = String.Empty }) { + public JSONParser(string text) { Safe.ArgumentNotEmpty(text, "text"); - m_scanner = new JSONScanner(); - m_scanner.Feed(text.ToCharArray()); + m_scanner = new JSONScanner(text); } /// /// Создает новый экземпляр парсера, на основе текстового потока. /// /// Текстовый поток. - public JSONParser(TextReader reader) - : base(_jsonDFA, INITIAL_STATE, new JSONParserContext { elementContext = JSONElementContext.None, memberName = String.Empty }) { + public JSONParser(TextReader reader) { Safe.ArgumentNotNull(reader, "reader"); - m_scanner = new JSONScanner(); - m_scanner.Feed(reader, dispose); + m_scanner = new JSONScanner(reader); + } + + public int Level { + get { return m_stack.Count; } } /// @@ -152,7 +153,7 @@ namespace Implab.Formats.JSON { /// пустая строка. /// public string ElementName { - get { return m_context.info.memberName; } + get { return m_memberName; } } /// @@ -167,55 +168,51 @@ namespace Implab.Formats.JSON { /// /// true - операция чтения прошла успешно, false - конец данных public bool Read() { - if (m_context.current == UNREACHEBLE_STATE) - throw new InvalidOperationException("The parser is in invalid state"); object tokenValue; JsonTokenType tokenType; - m_context.info.memberName = String.Empty; + + m_memberName = String.Empty; + while (m_scanner.ReadToken(out tokenValue, out tokenType)) { - Move((int)tokenType); - if (m_context.current == UNREACHEBLE_STATE) + if(!m_context.Move(tokenType)) UnexpectedToken(tokenValue, tokenType); + switch (tokenType) { case JsonTokenType.BeginObject: - Switch( - _objectDFA, - INITIAL_STATE, - new JSONParserContext { - memberName = m_context.info.memberName, - elementContext = JSONElementContext.Object - } - ); + m_stack.Push(m_context); + m_context = _objectContext; + m_elementValue = null; m_memberContext = MemberContext.MemberName; m_elementType = JSONElementType.BeginObject; return true; case JsonTokenType.EndObject: - Restore(); + if (m_stack.Count == 0) + UnexpectedToken(tokenValue, tokenType); + m_context = m_stack.Pop(); + m_elementValue = null; m_elementType = JSONElementType.EndObject; return true; case JsonTokenType.BeginArray: - Switch( - _arrayDFA, - INITIAL_STATE, - new JSONParserContext { - memberName = m_context.info.memberName, - elementContext = JSONElementContext.Array - } - ); + m_stack.Push(m_context); + m_context = _arrayContext; + m_elementValue = null; m_memberContext = MemberContext.MemberValue; m_elementType = JSONElementType.BeginArray; return true; case JsonTokenType.EndArray: - Restore(); + if (m_stack.Count == 0) + UnexpectedToken(tokenValue, tokenType); + m_context = m_stack.Pop(); + m_elementValue = null; m_elementType = JSONElementType.EndArray; return true; case JsonTokenType.String: if (m_memberContext == MemberContext.MemberName) { - m_context.info.memberName = (string)tokenValue; + m_memberName = (string)tokenValue; break; } m_elementType = JSONElementType.Value; @@ -233,15 +230,18 @@ namespace Implab.Formats.JSON { m_memberContext = MemberContext.MemberValue; break; case JsonTokenType.ValueSeparator: - m_memberContext = m_context.info.elementContext == JSONElementContext.Object ? MemberContext.MemberName : MemberContext.MemberValue; + m_memberContext = m_context.ElementContext == JSONElementContext.Object ? MemberContext.MemberName : MemberContext.MemberValue; break; default: UnexpectedToken(tokenValue, tokenType); break; } } - if (m_context.info.elementContext != JSONElementContext.None) + if (m_context.ElementContext != JSONElementContext.None) throw new ParserException("Unexpedted end of data"); + + EOF = true; + return false; } @@ -268,15 +268,13 @@ namespace Implab.Formats.JSON { /// Признак конца потока /// public bool EOF { - get { - return m_scanner.EOF; - } + get; + private set; } protected override void Dispose(bool disposing) { - if (disposing) { - m_scanner.Dispose(); - } + if (disposing) + Safe.Dispose(m_scanner); } /// diff --git a/Implab/Formats/JSON/JSONScanner.cs b/Implab/Formats/JSON/JSONScanner.cs --- a/Implab/Formats/JSON/JSONScanner.cs +++ b/Implab/Formats/JSON/JSONScanner.cs @@ -33,6 +33,9 @@ namespace Implab.Formats.JSON { m_scanner = new ReaderScanner(reader, bufferMax, chunkSize); } + public JSONScanner(TextReader reader) : this(reader, 1024*1024, 1024){ + } + /// /// Читает следующий лексический элемент из входных данных. /// diff --git a/Implab/Formats/JSON/JSONWriter.cs b/Implab/Formats/JSON/JSONWriter.cs --- a/Implab/Formats/JSON/JSONWriter.cs +++ b/Implab/Formats/JSON/JSONWriter.cs @@ -4,7 +4,7 @@ using System.IO; using System.Globalization; using System.Diagnostics; -namespace Implab.JSON { +namespace Implab.Formats.JSON { public class JSONWriter { struct Context { public bool needComma; diff --git a/Implab/Formats/JSON/JSONXmlReader.cs b/Implab/Formats/JSON/JSONXmlReader.cs --- a/Implab/Formats/JSON/JSONXmlReader.cs +++ b/Implab/Formats/JSON/JSONXmlReader.cs @@ -1,15 +1,11 @@ using Implab; -using Implab.Parsing; using System; using System.Collections.Generic; using System.Globalization; using System.IO; -using System.Linq; -using System.Text; -using System.Threading.Tasks; using System.Xml; -namespace Implab.JSON { +namespace Implab.Formats.JSON { public class JSONXmlReader : XmlReader { enum ValueContext { @@ -30,7 +26,7 @@ namespace Implab.JSON { ReadState m_state = ReadState.Initial; Stack m_localNameStack = new Stack(); LocalNameContext m_localName; - int m_depthCorrection = 0; + int m_depthCorrection; readonly string m_rootName; readonly string m_prefix; @@ -119,8 +115,8 @@ namespace Implab.JSON { public override string LookupNamespace(string prefix) { if (String.IsNullOrEmpty(prefix) || prefix == m_prefix) return m_namespaceUri; - else - return String.Empty; + + return String.Empty; } public override bool MoveToAttribute(string name, string ns) { @@ -183,11 +179,11 @@ namespace Implab.JSON { } public override bool Read() { - if (m_state != System.Xml.ReadState.Interactive && m_state != System.Xml.ReadState.Initial) + if (m_state != ReadState.Interactive && m_state != ReadState.Initial) return false; if (m_state == ReadState.Initial) - m_state = System.Xml.ReadState.Interactive; + m_state = ReadState.Interactive; try { switch (m_parser.ElementType) { @@ -223,9 +219,8 @@ namespace Implab.JSON { m_depthCorrection--; SetLocalName(itemName, true); continue; - } else { - SetLocalName(itemName, true); } + SetLocalName(itemName, true); break; case JSONElementType.BeginObject: SetLocalName(itemName); @@ -243,16 +238,14 @@ namespace Implab.JSON { SetLocalName(itemName); m_valueContext = m_parser.ElementValue == null ? ValueContext.ElementEmpty : ValueContext.ElementStart; break; - default: - break; } return true; } - m_state = System.Xml.ReadState.EndOfFile; + m_state = ReadState.EndOfFile; return false; } catch { - m_state = System.Xml.ReadState.Error; + m_state = ReadState.Error; throw; } } @@ -275,8 +268,7 @@ namespace Implab.JSON { return String.Empty; if (Convert.GetTypeCode(m_parser.ElementValue) == TypeCode.Double) return ((double)m_parser.ElementValue).ToString(CultureInfo.InvariantCulture); - else - return m_parser.ElementValue.ToString(); + return m_parser.ElementValue.ToString(); } } @@ -323,7 +315,7 @@ namespace Implab.JSON { /// The reader will be disposed when the XmlReader is disposed. /// public static JSONXmlReader Create(TextReader reader, JSONXmlReaderOptions options) { - return new JSONXmlReader(new JSONParser(reader, true), options); + return new JSONXmlReader(new JSONParser(reader), options); } /// diff --git a/Implab/Formats/JSON/JSONXmlReaderOptions.cs b/Implab/Formats/JSON/JSONXmlReaderOptions.cs --- a/Implab/Formats/JSON/JSONXmlReaderOptions.cs +++ b/Implab/Formats/JSON/JSONXmlReaderOptions.cs @@ -1,10 +1,7 @@ -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; + using System.Xml; -namespace Implab.JSON { +namespace Implab.Formats.JSON { /// /// Набор необязательных параметров для , позволяющий управлять процессом /// интерпретации JSON документа. diff --git a/Implab/Formats/JSON/StringTranslator.cs b/Implab/Formats/JSON/StringTranslator.cs --- a/Implab/Formats/JSON/StringTranslator.cs +++ b/Implab/Formats/JSON/StringTranslator.cs @@ -11,7 +11,7 @@ namespace Implab.Formats.JSON { /// /// Класс для преобразования экранированной строки JSON /// - public class StringTranslator : TextScanner { + static class StringTranslator { static readonly char[] _escMap; static readonly int[] _hexMap; @@ -34,49 +34,6 @@ namespace Implab.Formats.JSON { } - public StringTranslator() { - } - - public string Translate(string data) { - Safe.ArgumentNotNull(data, "data"); - return Translate(data.ToCharArray()); - } - - public string Translate(char[] data) { - Safe.ArgumentNotNull(data, "data"); - return Translate(data, data.Length); - } - - public string Translate(char[] data, int length) { - Safe.ArgumentNotNull(data, "data"); - Safe.ArgumentInRange(length, 0, data.Length, "length"); - - var translated = new char[length]; - - Feed(data,length); - - int pos = 0; - - while (ReadTokenInternal()) { - switch ((JSONGrammar.TokenType)Tags[0]) { - case JSONGrammar.TokenType.UnescapedChar: - Array.Copy(m_buffer,m_tokenOffset,translated,pos,m_tokenLen); - pos += m_tokenLen; - break; - case JSONGrammar.TokenType.EscapedChar: - translated[pos] = _escMap[m_buffer[m_tokenOffset + 1]]; - pos++; - break; - case JSONGrammar.TokenType.EscapedUnicode: - translated[pos] = TranslateHexUnicode(m_buffer,m_tokenOffset + 2); - pos++; - break; - } - } - - return new String(translated, 0, pos); - } - internal static char TranslateEscapedChar(char symbol) { return _escMap[symbol]; } diff --git a/Implab/Formats/TextScanner.cs b/Implab/Formats/TextScanner.cs --- a/Implab/Formats/TextScanner.cs +++ b/Implab/Formats/TextScanner.cs @@ -49,8 +49,8 @@ namespace Implab.Formats { /// /// internal bool ReadToken(int[,] dfa, bool[] final, TTag[][] tags, int state, int[] alphabet, out TTag[] tag) { - Safe.ArgumentNotNull(); m_tokenLength = 0; + tag = null; var maxSymbol = alphabet.Length - 1; @@ -109,7 +109,7 @@ namespace Implab.Formats { var size = used + free; if (size > m_bufferMax) - throw new ParserException(String.Format("The buffer limit ({0} Kb) is reached", m_bufferMax/1024)); + throw new ParserException(String.Format("The buffer limit ({0} Kb) is reached", m_bufferMax / 1024)); var temp = new char[size]; @@ -122,7 +122,13 @@ namespace Implab.Formats { m_bufferOffset = 0; m_bufferSize = used + read; m_buffer = temp; + } else { + var read = Read(m_buffer, m_bufferSize, m_chunkSize); + if (read == 0) + return false; + m_bufferSize += m_chunkSize; } + return true; } else { Debug.Assert(m_bufferOffset == 0); m_buffer = new char[m_chunkSize]; diff --git a/Implab/Implab.csproj b/Implab/Implab.csproj --- a/Implab/Implab.csproj +++ b/Implab/Implab.csproj @@ -151,7 +151,6 @@ - @@ -176,7 +175,6 @@ -