diff --git a/Implab.Format.Test/JsonTests.cs b/Implab.Format.Test/JsonTests.cs --- a/Implab.Format.Test/JsonTests.cs +++ b/Implab.Format.Test/JsonTests.cs @@ -1,41 +1,42 @@ using NUnit.Framework; using System; -using Implab.Formats.JSON; using Implab.Automaton; using Implab.Xml; using System.Xml; -using System.Text; +using Implab.Formats; +using Implab.Formats.Json; namespace Implab.Format.Test { [TestFixture] public class JsonTests { + [Test] public void TestScannerValidTokens() { - using (var scanner = new JSONScanner(@"9123, -123, 0, 0.1, -0.2, -0.1e3, 1.3E-3, ""some \t\n\u0020 text"", literal []{}:")) { + using (var scanner = JsonStringScanner.Create(@"9123, -123, 0, 0.1, -0.2, -0.1e3, 1.3E-3, ""some \t\n\u0020 text"", literal []{}:")) { Tuple[] expexted = { new Tuple(JsonTokenType.Number, 9123d), - new Tuple(JsonTokenType.ValueSeparator, ", "), + new Tuple(JsonTokenType.ValueSeparator, null), new Tuple(JsonTokenType.Number, -123d), - new Tuple(JsonTokenType.ValueSeparator, ", "), + new Tuple(JsonTokenType.ValueSeparator, null), new Tuple(JsonTokenType.Number, 0d), - new Tuple(JsonTokenType.ValueSeparator, ", "), + new Tuple(JsonTokenType.ValueSeparator, null), new Tuple(JsonTokenType.Number, 0.1d), - new Tuple(JsonTokenType.ValueSeparator, ", "), + new Tuple(JsonTokenType.ValueSeparator, null), new Tuple(JsonTokenType.Number, -0.2d), - new Tuple(JsonTokenType.ValueSeparator, ", "), + new Tuple(JsonTokenType.ValueSeparator, null), new Tuple(JsonTokenType.Number, -0.1e3d), - new Tuple(JsonTokenType.ValueSeparator, ", "), + new Tuple(JsonTokenType.ValueSeparator, null), new Tuple(JsonTokenType.Number, 1.3E-3d), - new Tuple(JsonTokenType.ValueSeparator, ", "), + new Tuple(JsonTokenType.ValueSeparator, null), new Tuple(JsonTokenType.String, "some \t\n text"), - new Tuple(JsonTokenType.ValueSeparator, ", "), + new Tuple(JsonTokenType.ValueSeparator, null), new Tuple(JsonTokenType.Literal, "literal"), - new Tuple(JsonTokenType.BeginArray, " ["), - new Tuple(JsonTokenType.EndArray, "]"), - new Tuple(JsonTokenType.BeginObject, "{"), - new Tuple(JsonTokenType.EndObject, "}"), - new Tuple(JsonTokenType.NameSeparator, ":") + new Tuple(JsonTokenType.BeginArray, null), + new Tuple(JsonTokenType.EndArray, null), + new Tuple(JsonTokenType.BeginObject, null), + new Tuple(JsonTokenType.EndObject, null), + new Tuple(JsonTokenType.NameSeparator, null) }; object value; @@ -70,7 +71,7 @@ namespace Implab.Format.Test { }; foreach (var json in bad) { - using (var scanner = new JSONScanner(json)) { + using (var scanner = JsonStringScanner.Create(json)) { try { object value; JsonTokenType token; @@ -122,7 +123,7 @@ namespace Implab.Format.Test { void DumpJsonParse(string json) { Console.WriteLine($"JSON: {json}"); Console.WriteLine("XML"); - using (var xmlReader = new JsonXmlReader(new JSONParser(json), new JsonXmlReaderOptions { NamespaceUri = "JsonXmlReaderSimpleTest", NodesPrefix = "json" })) { + using (var xmlReader = new JsonXmlReader(new JsonParser(json), new JsonXmlReaderOptions { NamespaceUri = "JsonXmlReaderSimpleTest", NodesPrefix = "json" })) { while (xmlReader.Read()) Console.WriteLine($"{new string(' ', xmlReader.Depth * 2)}{xmlReader}"); } @@ -136,7 +137,7 @@ namespace Implab.Format.Test { CloseOutput = false, ConformanceLevel = ConformanceLevel.Document })) - using (var xmlReader = new JsonXmlReader(new JSONParser(json), new JsonXmlReaderOptions { NamespaceUri = "JsonXmlReaderSimpleTest", NodesPrefix = "", FlattenArrays = true })) { + using (var xmlReader = new JsonXmlReader(new JsonParser(json), new JsonXmlReaderOptions { NamespaceUri = "JsonXmlReaderSimpleTest", NodesPrefix = "", FlattenArrays = true })) { xmlWriter.WriteNode(xmlReader, false); } } diff --git a/Implab/Formats/CharAlphabet.cs b/Implab/Formats/CharAlphabet.cs --- a/Implab/Formats/CharAlphabet.cs +++ b/Implab/Formats/CharAlphabet.cs @@ -1,6 +1,7 @@ using System.Collections.Generic; using System.Linq; using Implab.Automaton; +using System; namespace Implab.Formats { public class CharAlphabet: IndexedAlphabetBase { @@ -12,5 +13,24 @@ namespace Implab.Formats { public IEnumerable InputSymbols { get { return Enumerable.Range(char.MinValue, char.MaxValue).Cast(); } } + + public CharMap CreateCharMap() { + var map = new Dictionary(); + + int max = 0, min = char.MaxValue; + foreach (var p in Mappings) { + var index = GetSymbolIndex(p.Key); + max = Math.Max(max, index); + min = Math.Min(min, index); + map[index] = p.Value; + } + + var result = new int[max - min + 1]; + + for (int i = 0; i < result.Length; i++) + map.TryGetValue(min + i, out result[i]); + + return new CharMap((char)min, result); + } } } diff --git a/Implab/Formats/CharMap.cs b/Implab/Formats/CharMap.cs new file mode 100644 --- /dev/null +++ b/Implab/Formats/CharMap.cs @@ -0,0 +1,42 @@ +using Implab.Automaton; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Runtime.CompilerServices; +using System.Text; +using System.Threading.Tasks; + +namespace Implab.Formats { + public class CharMap : IAlphabet { + readonly char m_min; + readonly char m_max; + readonly int[] m_map; + + public CharMap(char min, int[] map) { + Safe.ArgumentNotNull(map, nameof(map)); + Count = map.Max()+1; + m_min = min; + m_map = map; + m_max = (char)(min + map.Length); + } + + public int Count { + get; private set; + } + + public bool Contains(char symbol) { + return symbol >= m_min && symbol <= m_max && m_map[symbol-m_min] != AutomatonConst.UNCLASSIFIED_INPUT; + } + + public IEnumerable GetSymbols(int cls) { + for (var i = 0; i < m_map.Length; i++) + if (m_map[i] == cls) + yield return (char)(i + m_min); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public int Translate(char symbol) { + return symbol >= m_min && symbol <= m_max ? m_map[symbol-m_min] : AutomatonConst.UNCLASSIFIED_INPUT; + } + } +} diff --git a/Implab/Formats/Grammar.cs b/Implab/Formats/Grammar.cs --- a/Implab/Formats/Grammar.cs +++ b/Implab/Formats/Grammar.cs @@ -67,32 +67,6 @@ namespace Implab.Formats { return Token.New( Enumerable.Range(0, AlphabetBuilder.Count).Except(TranslateOrDie(symbols)).ToArray() ); } - - protected abstract IndexedAlphabetBase CreateAlphabet(); - - protected ScannerContext BuildScannerContext(Token regexp) { - - var dfa = new RegularDFA(AlphabetBuilder); - - var visitor = new RegularExpressionVisitor(dfa); - regexp.Accept(visitor); - visitor.BuildDFA(); - - if (dfa.IsFinalState(dfa.InitialState)) - throw new ApplicationException("The specified language contains empty token"); - - var ab = CreateAlphabet(); - var optimal = dfa.Optimize(ab); - - return new ScannerContext( - optimal.CreateTransitionTable(), - optimal.CreateFinalStateTable(), - optimal.CreateTagTable(), - optimal.InitialState, - ab.GetTranslationMap() - ); - } - } diff --git a/Implab/Formats/InputScanner.cs b/Implab/Formats/InputScanner.cs new file mode 100644 --- /dev/null +++ b/Implab/Formats/InputScanner.cs @@ -0,0 +1,84 @@ +using Implab.Automaton; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Implab.Formats { + public class InputScanner { + readonly TTag[] m_tags; + readonly int m_initialState; + readonly int[,] m_dfa; + readonly CharMap m_alphabet; + readonly bool[] m_final; + + int m_position; + int m_state; + + public InputScanner(int[,] dfaTable, bool[] finalStates, TTag[] tags, int initialState, CharMap alphabet) { + Safe.ArgumentNotNull(dfaTable, nameof(dfaTable)); + Safe.ArgumentNotNull(finalStates, nameof(finalStates)); + Safe.ArgumentNotNull(tags, nameof(tags)); + Safe.ArgumentNotNull(alphabet, nameof(alphabet)); + + m_dfa = dfaTable; + m_final = finalStates; + m_tags = tags; + m_initialState = initialState; + m_alphabet = alphabet; + } + + public TTag Tag { + get { + return m_tags[m_state]; + } + } + + public int Position { + get { + return m_position; + } + } + + public bool IsFinal { + get { + return m_final[m_state]; + } + } + + public void Reset() { + m_state = m_initialState; + } + + public InputScanner Clone() { + var clone = new InputScanner(m_dfa, m_final, m_tags, m_initialState, m_alphabet); + clone.m_state = m_state; + clone.m_position = m_position; + return clone; + } + + public bool Scan(char[] data, int offset, int length) { + if (length <= 0) { + m_position = offset; + return false; // EOF + } + + var max = offset + length; + var next = m_state; + + while(offset < max) { + next = m_dfa[next, m_alphabet.Translate(data[offset])]; + if (next == AutomatonConst.UNREACHABLE_STATE) { + // scanner stops on the next position after last recognized symbol + m_position = offset; + return false; + } + m_state = next; + offset++; + } + m_position = offset; + return true; + } + } +} diff --git a/Implab/Formats/JSON/JSONXmlReader.cs b/Implab/Formats/JSON/JSONXmlReader.cs deleted file mode 100644 --- a/Implab/Formats/JSON/JSONXmlReader.cs +++ /dev/null @@ -1,343 +0,0 @@ -using Implab; -using System; -using System.Collections.Generic; -using System.Globalization; -using System.IO; -using System.Xml; - -namespace Implab.Formats.JSON { - public class JSONXmlReader : XmlReader { - - enum ValueContext { - Undefined, - ElementStart, - ElementValue, - ElementEnd, - ElementEmpty - } - - struct LocalNameContext { - public string localName; - public bool isArray; - } - - JSONParser m_parser; - ValueContext m_valueContext; - ReadState m_state = ReadState.Initial; - Stack m_localNameStack = new Stack(); - LocalNameContext m_localName; - int m_depthCorrection; - - readonly string m_rootName; - readonly string m_prefix; - readonly string m_namespaceUri; - readonly bool m_flattenArrays; - readonly string m_arrayItemName; - readonly XmlNameTable m_nameTable; - - JSONXmlReader(JSONParser parser, JSONXmlReaderOptions options) { - m_parser = parser; - - if (options != null) { - m_prefix = options.NodesPrefix ?? String.Empty; - m_namespaceUri = options.NamespaceURI ?? String.Empty; - m_rootName = options.RootName ?? "json"; - m_flattenArrays = options.FlattenArrays; - m_arrayItemName = options.ArrayItemName ?? "item"; - m_nameTable = options.NameTable ?? new NameTable(); - } else { - m_prefix = String.Empty; - m_namespaceUri = String.Empty; - m_rootName = "json"; - m_flattenArrays = false; - m_arrayItemName = "item"; - m_nameTable = new NameTable(); - } - } - - /// - /// Always 0, JSON doesn't support attributes - /// - public override int AttributeCount { - get { return 0; } - } - - public override string BaseURI { - get { return String.Empty; } - } - - public override int Depth { - get { - return m_localNameStack.Count + m_depthCorrection; - } - } - - public override bool EOF { - get { return m_parser.EOF; } - } - - /// - /// Always throws an exception - /// - /// - /// - public override string GetAttribute(int i) { - throw new ArgumentOutOfRangeException(); - } - - /// - /// Always returns empty string - /// - /// - /// - /// - public override string GetAttribute(string name, string namespaceURI) { - return String.Empty; - } - - /// - /// Always returns empty string - /// - /// - /// - public override string GetAttribute(string name) { - return String.Empty; - } - - public override bool IsEmptyElement { - get { return m_parser.ElementType == JSONElementType.Value && m_valueContext == ValueContext.ElementEmpty; } - } - - public override string LocalName { - get { return m_localName.localName; } - } - - public override string LookupNamespace(string prefix) { - if (String.IsNullOrEmpty(prefix) || prefix == m_prefix) - return m_namespaceUri; - - return String.Empty; - } - - public override bool MoveToAttribute(string name, string ns) { - return false; - } - - public override bool MoveToAttribute(string name) { - return false; - } - - public override bool MoveToElement() { - return false; - } - - public override bool MoveToFirstAttribute() { - return false; - } - - public override bool MoveToNextAttribute() { - return false; - } - - public override XmlNameTable NameTable { - get { return m_nameTable; } - } - - public override string NamespaceURI { - get { return m_namespaceUri; } - } - - public override XmlNodeType NodeType { - get { - switch (m_parser.ElementType) { - case JSONElementType.BeginObject: - case JSONElementType.BeginArray: - return XmlNodeType.Element; - case JSONElementType.EndObject: - case JSONElementType.EndArray: - return XmlNodeType.EndElement; - case JSONElementType.Value: - switch (m_valueContext) { - case ValueContext.ElementStart: - case ValueContext.ElementEmpty: - return XmlNodeType.Element; - case ValueContext.ElementValue: - return XmlNodeType.Text; - case ValueContext.ElementEnd: - return XmlNodeType.EndElement; - default: - throw new InvalidOperationException(); - } - default: - throw new InvalidOperationException(); - } - } - } - - public override string Prefix { - get { return m_prefix; } - } - - public override bool Read() { - if (m_state != ReadState.Interactive && m_state != ReadState.Initial) - return false; - - if (m_state == ReadState.Initial) - m_state = ReadState.Interactive; - - try { - switch (m_parser.ElementType) { - case JSONElementType.Value: - switch (m_valueContext) { - case ValueContext.ElementStart: - SetLocalName(String.Empty); - m_valueContext = ValueContext.ElementValue; - return true; - case ValueContext.ElementValue: - RestoreLocalName(); - m_valueContext = ValueContext.ElementEnd; - return true; - case ValueContext.ElementEmpty: - case ValueContext.ElementEnd: - RestoreLocalName(); - break; - } - break; - case JSONElementType.EndArray: - case JSONElementType.EndObject: - RestoreLocalName(); - break; - } - string itemName = m_parser.ElementType == JSONElementType.None ? m_rootName : m_flattenArrays ? m_localName.localName : m_arrayItemName; - while (m_parser.Read()) { - if (!String.IsNullOrEmpty(m_parser.ElementName)) - itemName = m_parser.ElementName; - - switch (m_parser.ElementType) { - case JSONElementType.BeginArray: - if (m_flattenArrays && !m_localName.isArray) { - m_depthCorrection--; - SetLocalName(itemName, true); - continue; - } - SetLocalName(itemName, true); - break; - case JSONElementType.BeginObject: - SetLocalName(itemName); - break; - case JSONElementType.EndArray: - if (m_flattenArrays && !m_localNameStack.Peek().isArray) { - RestoreLocalName(); - m_depthCorrection++; - continue; - } - break; - case JSONElementType.EndObject: - break; - case JSONElementType.Value: - SetLocalName(itemName); - m_valueContext = m_parser.ElementValue == null ? ValueContext.ElementEmpty : ValueContext.ElementStart; - break; - } - return true; - } - - m_state = ReadState.EndOfFile; - return false; - } catch { - m_state = ReadState.Error; - throw; - } - } - - public override bool ReadAttributeValue() { - return false; - } - - public override ReadState ReadState { - get { return m_state; } - } - - public override void ResolveEntity() { - // do nothing - } - - public override string Value { - get { - if (m_parser.ElementValue == null) - return String.Empty; - - switch(Convert.GetTypeCode (m_parser.ElementValue)) { - case TypeCode.Double: - return ((double)m_parser.ElementValue).ToString (CultureInfo.InvariantCulture); - case TypeCode.String: - return (string)m_parser.ElementValue; - case TypeCode.Boolean: - return (bool)m_parser.ElementValue ? "true" : "false"; - default: - return m_parser.ElementValue.ToString (); - } - } - } - - void SetLocalName(string name) { - m_localNameStack.Push(m_localName); - m_localName.localName = name; - m_localName.isArray = false; - } - - void SetLocalName(string name, bool isArray) { - m_localNameStack.Push(m_localName); - m_localName.localName = name; - m_localName.isArray = isArray; - } - - void RestoreLocalName() { - m_localName = m_localNameStack.Pop(); - } - - public override void Close() { - - } - - protected override void Dispose(bool disposing) { - #if MONO - disposing = true; - #endif - if (disposing) { - m_parser.Dispose(); - } - base.Dispose(disposing); - } - - public static JSONXmlReader Create(string file, JSONXmlReaderOptions options) { - return Create(File.OpenText(file), options); - } - - /// - /// Creates the XmlReader for the specified text stream with JSON data. - /// - /// Text reader. - /// Options. - /// - /// The reader will be disposed when the XmlReader is disposed. - /// - public static JSONXmlReader Create(TextReader reader, JSONXmlReaderOptions options) { - return new JSONXmlReader(new JSONParser(reader), options); - } - - /// - /// Creates the XmlReader for the specified stream with JSON data. - /// - /// Stream. - /// Options. - /// - /// The stream will be disposed when the XmlReader is disposed. - /// - public static JSONXmlReader Create(Stream stream, JSONXmlReaderOptions options) { - Safe.ArgumentNotNull(stream, "stream"); - // HACK don't dispose StreaReader to keep stream opened - return Create(new StreamReader(stream), options); - } - } -} diff --git a/Implab/Formats/JSON/JSONXmlReaderOptions.cs b/Implab/Formats/JSON/JSONXmlReaderOptions.cs deleted file mode 100644 --- a/Implab/Formats/JSON/JSONXmlReaderOptions.cs +++ /dev/null @@ -1,62 +0,0 @@ - -using System.Xml; - -namespace Implab.Xml { - /// - /// Набор необязательных параметров для , позволяющий управлять процессом - /// интерпретации JSON документа. - /// - public class JsonXmlReaderOptions { - /// - /// Пространство имен в котором будут располагаться читаемые элементы документа - /// - public string NamespaceUri { - get; - set; - } - - /// - /// Интерпретировать массивы как множественные элементы (убирает один уровень вложенности), иначе массив - /// представляется в виде узла, дочерними элементами которого являются элементы массива, имена дочерних элементов - /// определяются свойством . По умолчанию false. - /// - public bool FlattenArrays { - get; - set; - } - - /// - /// Префикс, для узлов документа - /// - public string NodesPrefix { - get; - set; - } - - /// - /// Имя корневого элемента в xml документе - /// - public string RootName { - get; - set; - } - - /// - /// Имя элемента для массивов, если не включена опция . - /// По умолчанию item. - /// - public string ArrayItemName { - get; - set; - } - - /// - /// Таблица атомизированных строк для построения документа. - /// - public XmlNameTable NameTable { - get; - set; - } - - } -} diff --git a/Implab/Formats/JSON/JSONElementContext.cs b/Implab/Formats/JSON/JsonElementContext.cs rename from Implab/Formats/JSON/JSONElementContext.cs rename to Implab/Formats/JSON/JsonElementContext.cs --- a/Implab/Formats/JSON/JSONElementContext.cs +++ b/Implab/Formats/JSON/JsonElementContext.cs @@ -1,8 +1,8 @@ -namespace Implab.Formats.JSON { +namespace Implab.Formats.Json { /// /// internal /// - enum JSONElementContext { + enum JsonElementContext { None, Object, Array, diff --git a/Implab/Formats/JSON/JSONElementType.cs b/Implab/Formats/JSON/JsonElementType.cs rename from Implab/Formats/JSON/JSONElementType.cs rename to Implab/Formats/JSON/JsonElementType.cs --- a/Implab/Formats/JSON/JSONElementType.cs +++ b/Implab/Formats/JSON/JsonElementType.cs @@ -1,8 +1,8 @@ -namespace Implab.Formats.JSON { +namespace Implab.Formats.Json { /// /// Тип элемента на котором находится парсер /// - public enum JSONElementType { + public enum JsonElementType { None, /// /// Начало объекта diff --git a/Implab/Formats/JSON/JSONGrammar.cs b/Implab/Formats/JSON/JsonGrammar.cs rename from Implab/Formats/JSON/JSONGrammar.cs rename to Implab/Formats/JSON/JsonGrammar.cs --- a/Implab/Formats/JSON/JSONGrammar.cs +++ b/Implab/Formats/JSON/JsonGrammar.cs @@ -4,8 +4,8 @@ using System; using Implab.Automaton; using Implab.Components; -namespace Implab.Formats.JSON { - class JSONGrammar : Grammar { +namespace Implab.Formats.Json { + public class JsonGrammar : Grammar { public enum TokenType { None, BeginObject, @@ -25,17 +25,19 @@ namespace Implab.Formats.JSON { EscapedUnicode } - static LazyAndWeak _instance = new LazyAndWeak(() => new JSONGrammar()); + static LazyAndWeak _instance = new LazyAndWeak(() => new JsonGrammar()); - public static JSONGrammar Instance { + public static JsonGrammar Instance { get { return _instance.Value; } } - readonly ScannerContext m_jsonExpression; - readonly ScannerContext m_stringExpression; + readonly InputScanner m_jsonExpression; + readonly InputScanner m_stringExpression; readonly CharAlphabet m_defaultAlphabet = new CharAlphabet(); - public JSONGrammar() { + public CharAlphabet DefaultAlphabet { get { return m_defaultAlphabet; } } + + public JsonGrammar() { DefineAlphabet(Enumerable.Range(0, 0x20).Select(x => (char)x)); var hexDigit = SymbolRangeToken('a','f').Or(SymbolRangeToken('A','F')).Or(SymbolRangeToken('0','9')); var digit9 = SymbolRangeToken('1', '9'); @@ -85,10 +87,16 @@ namespace Implab.Formats.JSON { .Or(unescaped.Closure().Tag(TokenType.UnescapedChar)); - m_jsonExpression = BuildScannerContext(jsonExpression); - m_stringExpression = BuildScannerContext(jsonStringExpression); + m_jsonExpression = BuildScanner(jsonExpression); + m_stringExpression = BuildScanner(jsonStringExpression); + } + public static InputScanner CreateJsonExpressionScanner() { + return Instance.m_jsonExpression.Clone(); + } + public static InputScanner CreateStringExpressionScanner() { + return Instance.m_stringExpression.Clone(); } protected override IAlphabetBuilder AlphabetBuilder { @@ -97,24 +105,43 @@ namespace Implab.Formats.JSON { } } - public ScannerContext JsonExpression { - get { - return m_jsonExpression; - } - } - - public ScannerContext JsonStringExpression { - get { - return m_stringExpression; - } - } - Token SymbolRangeToken(char start, char stop) { return SymbolToken(Enumerable.Range(start, stop - start + 1).Select(x => (char)x)); } - protected override IndexedAlphabetBase CreateAlphabet() { - return new CharAlphabet(); + public InputScanner BuildScanner(Token regexp) { + var dfa = new RegularDFA(AlphabetBuilder); + + var visitor = new RegularExpressionVisitor(dfa); + regexp.Accept(visitor); + visitor.BuildDFA(); + + if (dfa.IsFinalState(dfa.InitialState)) + throw new ApplicationException("The specified language contains empty token"); + + var ab = new CharAlphabet(); + var optimal = dfa.Optimize(ab); + + return new InputScanner( + optimal.CreateTransitionTable(), + optimal.CreateFinalStateTable(), + NormalizeTags(optimal.CreateTagTable()), + optimal.InitialState, + ab.CreateCharMap() + ); + } + + static TokenType[] NormalizeTags(TokenType[][] tags) { + var result = new TokenType[tags.Length]; + for(var i = 0; i< tags.Length; i++) { + if (tags[i] == null || tags[i].Length == 0) + result[i] = default(TokenType); + else if (tags[i].Length == 1) + result[i] = tags[i][0]; + else + throw new Exception($"Ambigous state tags {string.Join(", ", tags[i])}"); + } + return result; } } diff --git a/Implab/Formats/JSON/JSONParser.cs b/Implab/Formats/JSON/JsonParser.cs rename from Implab/Formats/JSON/JSONParser.cs rename to Implab/Formats/JSON/JsonParser.cs --- a/Implab/Formats/JSON/JSONParser.cs +++ b/Implab/Formats/JSON/JsonParser.cs @@ -7,7 +7,7 @@ using System.Linq; using Implab.Components; using System.Collections.Generic; -namespace Implab.Formats.JSON { +namespace Implab.Formats.Json { /// /// Pull парсер JSON данных. /// @@ -24,7 +24,7 @@ namespace Implab.Formats.JSON { /// } // Level = 0 /// /// - public class JSONParser : Disposable { + public class JsonParser : Disposable { enum MemberContext { MemberName, @@ -36,9 +36,9 @@ namespace Implab.Formats.JSON { readonly int[,] m_dfa; int m_state; - readonly JSONElementContext m_elementContext; + readonly JsonElementContext m_elementContext; - public ParserContext(int[,] dfa, int state, JSONElementContext context) { + public ParserContext(int[,] dfa, int state, JsonElementContext context) { m_dfa = dfa; m_state = state; m_elementContext = context; @@ -52,7 +52,7 @@ namespace Implab.Formats.JSON { return true; } - public JSONElementContext ElementContext { + public JsonElementContext ElementContext { get { return m_elementContext; } } } @@ -61,7 +61,7 @@ namespace Implab.Formats.JSON { static readonly ParserContext _objectContext; static readonly ParserContext _arrayContext; - static JSONParser() { + static JsonParser() { var valueExpression = MakeToken(JsonTokenType.BeginArray, JsonTokenType.BeginObject, JsonTokenType.Literal, JsonTokenType.Number, JsonTokenType.String); var memberExpression = MakeToken(JsonTokenType.String).Cat(MakeToken(JsonTokenType.NameSeparator)).Cat(valueExpression); @@ -88,16 +88,16 @@ namespace Implab.Formats.JSON { var jsonExpression = valueExpression.End(); - _jsonContext = CreateParserContext(jsonExpression, JSONElementContext.None); - _objectContext = CreateParserContext(objectExpression, JSONElementContext.Object); - _arrayContext = CreateParserContext(arrayExpression, JSONElementContext.Array); + _jsonContext = CreateParserContext(jsonExpression, JsonElementContext.None); + _objectContext = CreateParserContext(objectExpression, JsonElementContext.Object); + _arrayContext = CreateParserContext(arrayExpression, JsonElementContext.Array); } static Token MakeToken(params JsonTokenType[] input) { return Token.New( input.Select(t => (int)t).ToArray() ); } - static ParserContext CreateParserContext(Token expr, JSONElementContext context) { + static ParserContext CreateParserContext(Token expr, JsonElementContext context) { var dfa = new DFATable(); var builder = new RegularExpressionVisitor(dfa); @@ -109,11 +109,11 @@ namespace Implab.Formats.JSON { #endregion - readonly JSONScanner m_scanner; + readonly JsonScanner m_scanner; // json starts from the value context and may content even a single literal MemberContext m_memberContext = MemberContext.MemberValue; - JSONElementType m_elementType; + JsonElementType m_elementType; object m_elementValue; string m_memberName = String.Empty; @@ -124,18 +124,18 @@ namespace Implab.Formats.JSON { /// Создает новый парсер на основе строки, содержащей JSON /// /// - public JSONParser(string text) { + public JsonParser(string text) { Safe.ArgumentNotEmpty(text, "text"); - m_scanner = new JSONScanner(text); + m_scanner = JsonStringScanner.Create(text); } /// /// Создает новый экземпляр парсера, на основе текстового потока. /// /// Текстовый поток. - public JSONParser(TextReader reader) { + public JsonParser(TextReader reader) { Safe.ArgumentNotNull(reader, "reader"); - m_scanner = new JSONScanner(reader); + m_scanner = JsonTextScanner.Create(reader); } public int Level { @@ -145,7 +145,7 @@ namespace Implab.Formats.JSON { /// /// Тип текущего элемента на котором стоит парсер. /// - public JSONElementType ElementType { + public JsonElementType ElementType { get { return m_elementType; } } @@ -158,7 +158,7 @@ namespace Implab.Formats.JSON { } /// - /// Значение элемента. Только для элементов типа , для остальных null + /// Значение элемента. Только для элементов типа , для остальных null /// public object ElementValue { get { return m_elementValue; } @@ -185,7 +185,7 @@ namespace Implab.Formats.JSON { m_elementValue = null; m_memberContext = MemberContext.MemberName; - m_elementType = JSONElementType.BeginObject; + m_elementType = JsonElementType.BeginObject; return true; case JsonTokenType.EndObject: if (m_stack.Count == 0) @@ -193,7 +193,7 @@ namespace Implab.Formats.JSON { m_context = m_stack.Pop(); m_elementValue = null; - m_elementType = JSONElementType.EndObject; + m_elementType = JsonElementType.EndObject; return true; case JsonTokenType.BeginArray: m_stack.Push(m_context); @@ -201,7 +201,7 @@ namespace Implab.Formats.JSON { m_elementValue = null; m_memberContext = MemberContext.MemberValue; - m_elementType = JSONElementType.BeginArray; + m_elementType = JsonElementType.BeginArray; return true; case JsonTokenType.EndArray: if (m_stack.Count == 0) @@ -209,36 +209,36 @@ namespace Implab.Formats.JSON { m_context = m_stack.Pop(); m_elementValue = null; - m_elementType = JSONElementType.EndArray; + m_elementType = JsonElementType.EndArray; return true; case JsonTokenType.String: if (m_memberContext == MemberContext.MemberName) { m_memberName = (string)tokenValue; break; } - m_elementType = JSONElementType.Value; + m_elementType = JsonElementType.Value; m_elementValue = tokenValue; return true; case JsonTokenType.Number: - m_elementType = JSONElementType.Value; + m_elementType = JsonElementType.Value; m_elementValue = tokenValue; return true; case JsonTokenType.Literal: - m_elementType = JSONElementType.Value; + m_elementType = JsonElementType.Value; m_elementValue = ParseLiteral((string)tokenValue); return true; case JsonTokenType.NameSeparator: m_memberContext = MemberContext.MemberValue; break; case JsonTokenType.ValueSeparator: - m_memberContext = m_context.ElementContext == JSONElementContext.Object ? MemberContext.MemberName : MemberContext.MemberValue; + m_memberContext = m_context.ElementContext == JsonElementContext.Object ? MemberContext.MemberName : MemberContext.MemberValue; break; default: UnexpectedToken(tokenValue, tokenType); break; } } - if (m_context.ElementContext != JSONElementContext.None) + if (m_context.ElementContext != JsonElementContext.None) throw new ParserException("Unexpedted end of data"); EOF = true; diff --git a/Implab/Formats/JSON/JSONScanner.cs b/Implab/Formats/JSON/JsonScanner.cs rename from Implab/Formats/JSON/JSONScanner.cs rename to Implab/Formats/JSON/JsonScanner.cs --- a/Implab/Formats/JSON/JSONScanner.cs +++ b/Implab/Formats/JSON/JsonScanner.cs @@ -5,36 +5,64 @@ using System.Text; using Implab.Components; using System.IO; -namespace Implab.Formats.JSON { +namespace Implab.Formats.Json { /// /// Сканнер (лексер), разбивающий поток символов на токены JSON. /// - public class JSONScanner : Disposable { - readonly StringBuilder m_builder = new StringBuilder(); - - readonly ScannerContext m_jsonContext = JSONGrammar.Instance.JsonExpression; - readonly ScannerContext m_stringContext = JSONGrammar.Instance.JsonStringExpression; - + public abstract class JsonScanner : Disposable { + readonly InputScanner m_jsonContext = JsonGrammar.CreateJsonExpressionScanner(); + readonly InputScanner m_stringContext = JsonGrammar.CreateStringExpressionScanner(); - readonly TextScanner m_scanner; + readonly char[] m_unescapeBuf = new char[4]; + readonly char[] m_buffer; + int m_length; + int m_pos; + readonly StringBuilder m_tokenBuilder = new StringBuilder(); - /// - /// Создает новый экземпляр сканнера - /// - public JSONScanner(string text) { - Safe.ArgumentNotEmpty(text, "text"); - - m_scanner = new StringScanner(text); + protected JsonScanner(char[] buffer, int pos, int length) { + m_buffer = buffer; + m_pos = pos; + m_length = length; } - public JSONScanner(TextReader reader, int bufferMax, int chunkSize) { - Safe.ArgumentNotNull(reader, "reader"); + bool Read(InputScanner scanner, out JsonGrammar.TokenType tokenType) { + scanner.Reset(); - m_scanner = new ReaderScanner(reader, bufferMax, chunkSize); + if (m_pos == m_length) { + m_pos = 0; + m_length = Read(m_buffer, 0, m_buffer.Length); + if (m_length == 0) { + tokenType = JsonGrammar.TokenType.None; + return false; // EOF + } + } + + while(scanner.Scan(m_buffer, m_pos, m_length - m_pos)) { + m_tokenBuilder.Append(m_buffer, m_pos, m_length - m_pos); + m_pos = 0; + m_length = Read(m_buffer, 0, m_buffer.Length); + } + var scannerPos = scanner.Position; + if (scannerPos != m_pos) { + m_tokenBuilder.Append(m_buffer, m_pos, scannerPos - m_pos); + m_pos = scannerPos; + } + + if (!scanner.IsFinal) { + if (m_length == 0) { + // unexpected EOF + throw new ParserException("Unexpected EOF"); + } else { + // unecpected character + throw new ParserException($"Unexpected character '{m_buffer[m_pos + 1]}'"); + } + } + tokenType = scanner.Tag; + return true; } - public JSONScanner(TextReader reader) : this(reader, 1024*1024, 1024){ - } + protected abstract int Read(char[] buffer, int offset, int size); + /// /// Читает следующий лексический элемент из входных данных. @@ -45,22 +73,28 @@ namespace Implab.Formats.JSON { /// В случе если токен не распознается, возникает исключение. Значения токенов обрабатываются, т.е. /// в строках обрабатываются экранированные символы, числа становтся типа double. public bool ReadToken(out object tokenValue, out JsonTokenType tokenType) { - JSONGrammar.TokenType[] tag; - while (m_jsonContext.Execute(m_scanner, out tag)) { - switch (tag[0]) { - case JSONGrammar.TokenType.StringBound: + JsonGrammar.TokenType tag; + m_tokenBuilder.Clear(); + while (Read(m_jsonContext, out tag)) { + switch (tag) { + case JsonGrammar.TokenType.StringBound: tokenValue = ReadString(); tokenType = JsonTokenType.String; break; - case JSONGrammar.TokenType.Number: - tokenValue = Double.Parse(m_scanner.GetTokenValue(), CultureInfo.InvariantCulture); + case JsonGrammar.TokenType.Number: + tokenValue = Double.Parse(m_tokenBuilder.ToString(), CultureInfo.InvariantCulture); tokenType = JsonTokenType.Number; break; - case JSONGrammar.TokenType.Whitespace: + case JsonGrammar.TokenType.Literal: + tokenType = JsonTokenType.Literal; + tokenValue = m_tokenBuilder.ToString(); + break; + case JsonGrammar.TokenType.Whitespace: + m_tokenBuilder.Clear(); continue; default: - tokenType = (JsonTokenType)tag[0]; - tokenValue = m_scanner.GetTokenValue(); + tokenType = (JsonTokenType)tag; + tokenValue = null; break; } return true; @@ -71,39 +105,30 @@ namespace Implab.Formats.JSON { } string ReadString() { - int pos = 0; - var buf = new char[6]; // the buffer for unescaping chars - - JSONGrammar.TokenType[] tag; - m_builder.Clear(); + JsonGrammar.TokenType tag; + m_tokenBuilder.Clear(); - while (m_stringContext.Execute(m_scanner, out tag)) { - switch (tag[0]) { - case JSONGrammar.TokenType.StringBound: - return m_builder.ToString(); - case JSONGrammar.TokenType.UnescapedChar: - m_scanner.CopyTokenTo(m_builder); + while (Read(m_stringContext, out tag)) { + switch (tag) { + case JsonGrammar.TokenType.StringBound: + m_tokenBuilder.Length--; + return m_tokenBuilder.ToString(); + case JsonGrammar.TokenType.UnescapedChar: break; - case JSONGrammar.TokenType.EscapedUnicode: // \xXXXX - unicode escape sequence - m_scanner.CopyTokenTo(buf, 0); - m_builder.Append(StringTranslator.TranslateHexUnicode(buf, 2)); - pos++; + case JsonGrammar.TokenType.EscapedUnicode: // \xXXXX - unicode escape sequence + m_tokenBuilder.CopyTo(m_tokenBuilder.Length - 4, m_unescapeBuf, 0, 4); + m_tokenBuilder.Length -= 6; + m_tokenBuilder.Append(StringTranslator.TranslateHexUnicode(m_unescapeBuf, 0)); break; - case JSONGrammar.TokenType.EscapedChar: // \t - escape sequence - m_scanner.CopyTokenTo(buf, 0); - m_builder.Append(StringTranslator.TranslateEscapedChar(buf[1])); + case JsonGrammar.TokenType.EscapedChar: // \t - escape sequence + var ch = m_tokenBuilder[m_tokenBuilder.Length-1]; + m_tokenBuilder.Length -= 2; + m_tokenBuilder.Append(StringTranslator.TranslateEscapedChar(ch)); break; } - } throw new ParserException("Unexpected end of data"); } - - protected override void Dispose(bool disposing) { - if (disposing) - m_scanner.Dispose(); - base.Dispose(disposing); - } } } diff --git a/Implab/Formats/JSON/JsonStringScanner.cs b/Implab/Formats/JSON/JsonStringScanner.cs new file mode 100644 --- /dev/null +++ b/Implab/Formats/JSON/JsonStringScanner.cs @@ -0,0 +1,76 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Implab.Formats.Json { + public class JsonStringScanner : JsonScanner { + const int _defaultBuffer = 64; + + readonly string m_data; + int m_offset; + + JsonStringScanner(string data, char[] buffer, int pos, int length, int offset) : base(buffer, pos, length) { + m_data = data; + m_offset = offset; + } + + protected override int Read(char[] buffer, int offset, int size) { + if (m_data == null) + return 0; + if (m_offset >= m_data.Length) + return 0; + + var count = Math.Min(size, m_data.Length - m_offset); + + m_data.CopyTo(m_offset, buffer, offset, count); + m_offset += count; + + return count; + } + + public static JsonStringScanner Create(string data) { + Safe.ArgumentNotNull(data, nameof(data)); + + if (data.Length <= _defaultBuffer) + return new JsonStringScanner(null, data.ToCharArray(), 0, data.Length, data.Length); + + var buffer = new char[_defaultBuffer]; + data.CopyTo(0, buffer, 0, _defaultBuffer); + return new JsonStringScanner(data, buffer, 0, _defaultBuffer, _defaultBuffer); + } + + public static JsonStringScanner Create(string data, int offset, int length) { + Safe.ArgumentNotNull(data, nameof(data)); + Safe.ArgumentGreaterThan(offset, 0, nameof(offset)); + Safe.ArgumentGreaterThan(length, 0, nameof(length)); + + if (offset + length > data.Length) + throw new ArgumentOutOfRangeException("Specified offset and length are out of the string bounds"); + + if (length <= _defaultBuffer) { + var buffer = new char[length]; + data.CopyTo(offset, buffer, 0, length); + + return new JsonStringScanner(null, buffer, 0, length, length); + } else { + var buffer = new char[_defaultBuffer]; + data.CopyTo(offset, buffer, 0, _defaultBuffer); + return new JsonStringScanner(data, buffer, 0, _defaultBuffer, offset + _defaultBuffer); + } + } + + public static JsonStringScanner Create(char[] data, int offset, int length) { + Safe.ArgumentNotNull(data, nameof(data)); + Safe.ArgumentGreaterThan(offset, 0, nameof(offset)); + Safe.ArgumentGreaterThan(length, 0, nameof(length)); + + if (offset + length > data.Length) + throw new ArgumentOutOfRangeException("Specified offset and length are out of the array bounds"); + + return new JsonStringScanner(null, data, offset, offset + length, offset + length); + + } + } +} diff --git a/Implab/Formats/JSON/JsonTextScanner.cs b/Implab/Formats/JSON/JsonTextScanner.cs new file mode 100644 --- /dev/null +++ b/Implab/Formats/JSON/JsonTextScanner.cs @@ -0,0 +1,49 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Implab.Formats.Json { + public class JsonTextScanner : JsonScanner { + const int _bufferSize = 4096; + readonly TextReader m_reader; + + JsonTextScanner(TextReader reader, char[] buffer) : base(buffer, 0, 0) { + m_reader = reader; + } + + protected override int Read(char[] buffer, int offset, int size) { + return m_reader.Read(buffer, offset, size); + } + + public static JsonTextScanner Create(string file, Encoding encoding) { + return new JsonTextScanner(new StreamReader(file, encoding), new char[_bufferSize]); + } + + public static JsonTextScanner Create(string file) { + return new JsonTextScanner(new StreamReader(file), new char[_bufferSize]); + } + + public static JsonTextScanner Create(Stream stream, Encoding encoding) { + return new JsonTextScanner(new StreamReader(stream, encoding), new char[_bufferSize]); + } + + public static JsonTextScanner Create(Stream stream) { + return new JsonTextScanner(new StreamReader(stream), new char[_bufferSize]); + } + + public static JsonTextScanner Create(TextReader reader) { + Safe.ArgumentNotNull(reader, nameof(reader)); + return new JsonTextScanner(reader, new char[_bufferSize]); + } + + protected override void Dispose(bool disposing) { + if (disposing) + Safe.Dispose(m_reader); + + base.Dispose(disposing); + } + } +} diff --git a/Implab/Formats/JSON/JsonTokenType.cs b/Implab/Formats/JSON/JsonTokenType.cs --- a/Implab/Formats/JSON/JsonTokenType.cs +++ b/Implab/Formats/JSON/JsonTokenType.cs @@ -1,6 +1,6 @@ -namespace Implab.Formats.JSON { +namespace Implab.Formats.Json { /// - /// Тип токенов, возвращаемых . + /// Тип токенов, возвращаемых . /// public enum JsonTokenType : int { None = 0, diff --git a/Implab/Formats/JSON/JSONWriter.cs b/Implab/Formats/JSON/JsonWriter.cs rename from Implab/Formats/JSON/JSONWriter.cs rename to Implab/Formats/JSON/JsonWriter.cs --- a/Implab/Formats/JSON/JSONWriter.cs +++ b/Implab/Formats/JSON/JsonWriter.cs @@ -4,11 +4,11 @@ using System.IO; using System.Globalization; using System.Diagnostics; -namespace Implab.Formats.JSON { - public class JSONWriter { +namespace Implab.Formats.Json { + public class JsonWriter { struct Context { public bool needComma; - public JSONElementContext element; + public JsonElementContext element; } Stack m_contextStack = new Stack(); Context m_context; @@ -30,7 +30,7 @@ namespace Implab.Formats.JSON { _escapeBSLASH, _escapeQ; - static JSONWriter() { + static JsonWriter() { _escapeBKS = "\\b".ToCharArray(); _escapeFWD = "\\f".ToCharArray(); _escapeCR = "\\r".ToCharArray(); @@ -40,12 +40,12 @@ namespace Implab.Formats.JSON { _escapeQ = "\\\"".ToCharArray(); } - public JSONWriter(TextWriter writer) { + public JsonWriter(TextWriter writer) { Safe.ArgumentNotNull(writer, "writer"); m_writer = writer; } - public JSONWriter(TextWriter writer, bool indent) { + public JsonWriter(TextWriter writer, bool indent) { Safe.ArgumentNotNull(writer, "writer"); m_writer = writer; @@ -66,7 +66,7 @@ namespace Implab.Formats.JSON { void WriteMemberName(string name) { Safe.ArgumentNotEmpty(name, "name"); - if (m_context.element != JSONElementContext.Object) + if (m_context.element != JsonElementContext.Object) OperationNotApplicable("WriteMember"); if (m_context.needComma) m_writer.Write(","); @@ -93,7 +93,7 @@ namespace Implab.Formats.JSON { } public void WriteValue(string value) { - if (m_context.element == JSONElementContext.Array) { + if (m_context.element == JsonElementContext.Array) { if (m_context.needComma) m_writer.Write(","); @@ -101,16 +101,16 @@ namespace Implab.Formats.JSON { m_context.needComma = true; Write(value); - } else if (m_context.element == JSONElementContext.None) { + } else if (m_context.element == JsonElementContext.None) { Write(value); - m_context.element = JSONElementContext.Closed; + m_context.element = JsonElementContext.Closed; } else { OperationNotApplicable("WriteValue"); } } public void WriteValue(bool value) { - if (m_context.element == JSONElementContext.Array) { + if (m_context.element == JsonElementContext.Array) { if (m_context.needComma) m_writer.Write(","); @@ -118,16 +118,16 @@ namespace Implab.Formats.JSON { m_context.needComma = true; Write(value); - } else if (m_context.element == JSONElementContext.None) { + } else if (m_context.element == JsonElementContext.None) { Write(value); - m_context.element = JSONElementContext.Closed; + m_context.element = JsonElementContext.Closed; } else { OperationNotApplicable("WriteValue"); } } public void WriteValue(double value) { - if (m_context.element == JSONElementContext.Array) { + if (m_context.element == JsonElementContext.Array) { if (m_context.needComma) m_writer.Write(","); @@ -135,16 +135,16 @@ namespace Implab.Formats.JSON { m_context.needComma = true; Write(value); - } else if (m_context.element == JSONElementContext.None) { + } else if (m_context.element == JsonElementContext.None) { Write(value); - m_context.element = JSONElementContext.Closed; + m_context.element = JsonElementContext.Closed; } else { OperationNotApplicable("WriteValue"); } } public void BeginObject() { - if (m_context.element != JSONElementContext.None && m_context.element != JSONElementContext.Array) + if (m_context.element != JsonElementContext.None && m_context.element != JsonElementContext.Array) OperationNotApplicable("BeginObject"); if (m_context.needComma) m_writer.Write(","); @@ -155,7 +155,7 @@ namespace Implab.Formats.JSON { m_contextStack.Push(m_context); - m_context = new Context { element = JSONElementContext.Object, needComma = false }; + m_context = new Context { element = JsonElementContext.Object, needComma = false }; m_writer.Write("{"); } @@ -164,23 +164,23 @@ namespace Implab.Formats.JSON { m_contextStack.Push(m_context); - m_context = new Context { element = JSONElementContext.Object, needComma = false }; + m_context = new Context { element = JsonElementContext.Object, needComma = false }; m_writer.Write("{"); } public void EndObject() { - if (m_context.element != JSONElementContext.Object) + if (m_context.element != JsonElementContext.Object) OperationNotApplicable("EndObject"); m_context = m_contextStack.Pop(); if (m_contextStack.Count == 0) - m_context.element = JSONElementContext.Closed; + m_context.element = JsonElementContext.Closed; WriteIndent(); m_writer.Write("}"); } public void BeginArray() { - if (m_context.element != JSONElementContext.None && m_context.element != JSONElementContext.Array) + if (m_context.element != JsonElementContext.None && m_context.element != JsonElementContext.Array) throw new InvalidOperationException(); if (m_context.needComma) { m_writer.Write(","); @@ -190,7 +190,7 @@ namespace Implab.Formats.JSON { WriteIndent(); m_contextStack.Push(m_context); - m_context = new Context { element = JSONElementContext.Array, needComma = false }; + m_context = new Context { element = JsonElementContext.Array, needComma = false }; m_writer.Write("["); } @@ -199,17 +199,17 @@ namespace Implab.Formats.JSON { m_contextStack.Push(m_context); - m_context = new Context { element = JSONElementContext.Array, needComma = false }; + m_context = new Context { element = JsonElementContext.Array, needComma = false }; m_writer.Write("["); } public void EndArray() { - if (m_context.element != JSONElementContext.Array) + if (m_context.element != JsonElementContext.Array) OperationNotApplicable("EndArray"); m_context = m_contextStack.Pop(); if (m_contextStack.Count == 0) - m_context.element = JSONElementContext.Closed; + m_context.element = JsonElementContext.Closed; WriteIndent(); m_writer.Write("]"); } diff --git a/Implab/Formats/JSON/StringTranslator.cs b/Implab/Formats/JSON/StringTranslator.cs --- a/Implab/Formats/JSON/StringTranslator.cs +++ b/Implab/Formats/JSON/StringTranslator.cs @@ -7,7 +7,7 @@ using System.Linq; using System.Text; using System.Threading.Tasks; -namespace Implab.Formats.JSON { +namespace Implab.Formats.Json { /// /// Класс для преобразования экранированной строки JSON /// diff --git a/Implab/Formats/ReaderScanner.cs b/Implab/Formats/ReaderScanner.cs deleted file mode 100644 --- a/Implab/Formats/ReaderScanner.cs +++ /dev/null @@ -1,30 +0,0 @@ -using System; -using System.IO; - -namespace Implab.Formats { - public class ReaderScanner: TextScanner { - const int CHUNK_SIZE = 1024*4; - const int BUFFER_MAX = CHUNK_SIZE*1024; - - readonly TextReader m_reader; - - public ReaderScanner(TextReader reader, int limit, int chunk) : base(limit, chunk) { - Safe.ArgumentNotNull(reader, "reader"); - m_reader = reader; - } - - public ReaderScanner(TextReader reader) : this(reader, BUFFER_MAX, CHUNK_SIZE) { - } - - protected override int Read(char[] buffer, int offset, int size) { - return m_reader.Read(buffer, offset, size); - } - - protected override void Dispose(bool disposing) { - if (disposing) - Safe.Dispose(m_reader); - base.Dispose(disposing); - } - } -} - diff --git a/Implab/Formats/ScannerContext.cs b/Implab/Formats/ScannerContext.cs deleted file mode 100644 --- a/Implab/Formats/ScannerContext.cs +++ /dev/null @@ -1,30 +0,0 @@ -namespace Implab.Formats { - /// - /// Represents a scanner configuration usefull to recongnize token, based on the DFA. - /// - public class ScannerContext { - - public int[,] Dfa { get; private set; } - - public bool[] Final { get; private set; } - - public TTag[][] Tags { get; private set; } - - public int State { get; private set; } - - public int[] Alphabet { get; private set; } - - public ScannerContext(int[,] dfa, bool[] final, TTag[][] tags, int state, int[] alphabet) { - Dfa = dfa; - Final = final; - Tags = tags; - State = state; - Alphabet = alphabet; - } - - public bool Execute(TextScanner scanner, out TTag[] tag) { - return scanner.ReadToken(Dfa, Final, Tags, State, Alphabet, out tag); - } - } -} - diff --git a/Implab/Formats/StringScanner.cs b/Implab/Formats/StringScanner.cs deleted file mode 100644 --- a/Implab/Formats/StringScanner.cs +++ /dev/null @@ -1,18 +0,0 @@ -using System; - -namespace Implab.Formats { - public class StringScanner: TextScanner { - const int CHUNK_SIZE = 1024; - - public StringScanner(string text) : base(null) { - Safe.ArgumentNotNull(text, "text"); - var data = text.ToCharArray(); - Feed(data, 0, data.Length); - } - - protected override int Read(char[] buffer, int offset, int size) { - return 0; - } - } -} - diff --git a/Implab/Formats/TextScanner.cs b/Implab/Formats/TextScanner.cs deleted file mode 100644 --- a/Implab/Formats/TextScanner.cs +++ /dev/null @@ -1,157 +0,0 @@ -using System; -using Implab.Components; -using System.Diagnostics; -using Implab.Automaton; -using System.Text; - -namespace Implab.Formats { - public abstract class TextScanner : Disposable { - readonly int m_bufferMax; - readonly int m_chunkSize; - - char[] m_buffer; - int m_bufferOffset; - int m_bufferSize; - int m_tokenOffset; - int m_tokenLength; - - /// - /// Initializes a new instance of the class. - /// - /// Buffer max. - /// Chunk size. - protected TextScanner(int bufferMax, int chunkSize) { - Debug.Assert(m_chunkSize <= m_bufferMax); - - m_bufferMax = bufferMax; - m_chunkSize = chunkSize; - } - - /// - /// Initializes a new instance of the class. - /// - /// Buffer. - protected TextScanner(char[] buffer) { - if (buffer != null) { - m_buffer = buffer; - m_bufferSize = buffer.Length; - } - } - - /// - /// (hungry) Reads the next token. - /// - /// true, if token internal was read, false if there is no more tokens in the stream. - /// The transition map for the automaton - /// Final states of the automaton. - /// Tags. - /// The initial state for the automaton. - /// - /// - internal bool ReadToken(int[,] dfa, bool[] final, TTag[][] tags, int state, int[] alphabet, out TTag[] tag) { - m_tokenLength = 0; - tag = null; - - var maxSymbol = alphabet.Length - 1; - int next; - do { - // after the next chunk is read the offset in the buffer may change - int pos = m_bufferOffset + m_tokenLength; - next = state; - while (pos < m_bufferSize) { - var ch = m_buffer[pos]; - - next = dfa[next, ch > maxSymbol ? AutomatonConst.UNCLASSIFIED_INPUT : alphabet[ch]]; - - if (next == AutomatonConst.UNREACHABLE_STATE) - break; - - state = next; - pos++; - } - m_tokenLength = pos - m_bufferOffset; - } while (next != AutomatonConst.UNREACHABLE_STATE && Feed()); - - m_tokenOffset = m_bufferOffset; - m_bufferOffset += m_tokenLength; - - if (final[state]) { - tag = tags[state]; - return true; - } - - if (m_bufferOffset == m_bufferSize) { - if (m_tokenLength == 0) //EOF - return false; - - throw new ParserException(); - } - - throw new ParserException(String.Format("Unexpected symbol '{0}'", m_buffer[m_bufferOffset])); - - } - - protected void Feed(char[] buffer, int offset, int length) { - m_buffer = buffer; - m_bufferOffset = offset; - m_bufferSize = offset + length; - } - - protected bool Feed() { - if (m_chunkSize <= 0) - return false; - - if (m_buffer != null) { - var free = m_buffer.Length - m_bufferSize; - - if (free < m_chunkSize) { - free += m_chunkSize; - var used = m_bufferSize - m_bufferOffset; - var size = used + free; - - if (size > m_bufferMax) - throw new ParserException(String.Format("The buffer limit ({0} Kb) is reached", m_bufferMax / 1024)); - - var temp = new char[size]; - - var read = Read(temp, used, m_chunkSize); - if (read == 0) - return false; - - Array.Copy(m_buffer, m_bufferOffset, temp, 0, used); - - m_bufferOffset = 0; - m_bufferSize = used + read; - m_buffer = temp; - } else { - var read = Read(m_buffer, m_bufferSize, m_chunkSize); - if (read == 0) - return false; - m_bufferSize += m_chunkSize; - } - return true; - } else { - Debug.Assert(m_bufferOffset == 0); - m_buffer = new char[m_chunkSize]; - m_bufferSize = Read(m_buffer, 0, m_chunkSize); - return (m_bufferSize != 0); - } - } - - protected abstract int Read(char[] buffer, int offset, int size); - - public string GetTokenValue() { - return new String(m_buffer, m_tokenOffset, m_tokenLength); - } - - public void CopyTokenTo(char[] buffer, int offset) { - Array.Copy(m_buffer, m_tokenOffset,buffer, offset, m_tokenLength); - } - - public void CopyTokenTo(StringBuilder sb) { - sb.Append(m_buffer, m_tokenOffset, m_tokenLength); - } - - } -} - diff --git a/Implab/Implab.csproj b/Implab/Implab.csproj --- a/Implab/Implab.csproj +++ b/Implab/Implab.csproj @@ -83,6 +83,10 @@ + + + + @@ -164,16 +168,14 @@ - - - - - - - - - - + + + + + + + + @@ -182,10 +184,6 @@ - - - - diff --git a/Implab/Safe.cs b/Implab/Safe.cs --- a/Implab/Safe.cs +++ b/Implab/Safe.cs @@ -5,6 +5,7 @@ using System.Text; using System.Text.RegularExpressions; using System.Diagnostics; using System.Collections; +using System.Runtime.CompilerServices; #if NET_4_5 using System.Threading.Tasks; @@ -14,11 +15,13 @@ namespace Implab { public static class Safe { + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static void ArgumentAssert(bool condition, string paramName) { if (!condition) throw new ArgumentException("The parameter is invalid", paramName); } + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static void ArgumentMatch(string value, string paramName, Regex rx) { if (rx == null) throw new ArgumentNullException("rx"); @@ -26,26 +29,37 @@ namespace Implab throw new ArgumentException(String.Format("The prameter value must match {0}", rx), paramName); } + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static void ArgumentNotEmpty(string value, string paramName) { if (String.IsNullOrEmpty(value)) throw new ArgumentException("The parameter can't be empty", paramName); } + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static void ArgumentNotEmpty(T[] value, string paramName) { if (value == null || value.Length == 0) throw new ArgumentException("The array must be not emty", paramName); } + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static void ArgumentNotNull(object value, string paramName) { if (value == null) throw new ArgumentNullException(paramName); } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static void ArgumentGreaterThan(int value, int min, string paramName) { + if (value < min) + throw new ArgumentOutOfRangeException(paramName); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static void ArgumentInRange(int value, int min, int max, string paramName) { if (value < min || value > max) throw new ArgumentOutOfRangeException(paramName); } + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static void ArgumentOfType(object value, Type type, string paramName) { if (!type.IsInstanceOfType(value)) throw new ArgumentException(String.Format("The parameter must be of type {0}", type), paramName); diff --git a/Implab/Xml/JsonXmlReader.cs b/Implab/Xml/JsonXmlReader.cs --- a/Implab/Xml/JsonXmlReader.cs +++ b/Implab/Xml/JsonXmlReader.cs @@ -1,10 +1,8 @@ -using Implab.Formats.JSON; +using Implab.Formats.Json; using System; using System.Collections.Generic; using System.Globalization; using System.Linq; -using System.Text; -using System.Threading.Tasks; using System.Xml; namespace Implab.Xml { @@ -14,7 +12,7 @@ namespace Implab.Xml { public bool skip; } - JSONParser m_parser; + JsonParser m_parser; JsonXmlReaderOptions m_options; JsonXmlReaderPosition m_position = JsonXmlReaderPosition.Initial; XmlNameTable m_nameTable; @@ -52,7 +50,6 @@ namespace Implab.Xml { XmlNameContext m_context; - int m_nextPrefix = 1; readonly string m_xmlnsPrefix; readonly string m_xmlnsNamespace; @@ -60,7 +57,7 @@ namespace Implab.Xml { readonly string m_xsiNamespace; - public JsonXmlReader(JSONParser parser, JsonXmlReaderOptions options) { + public JsonXmlReader(JsonParser parser, JsonXmlReaderOptions options) { Safe.ArgumentNotNull(parser, nameof(parser)); m_parser = parser; @@ -480,35 +477,35 @@ namespace Implab.Xml { var jsonName = m_nameTable.Add(m_parser.ElementName); switch (m_parser.ElementType) { - case JSONElementType.BeginObject: + case JsonElementType.BeginObject: if (!EnterJsonObject(jsonName, out elementName)) continue; m_position = JsonXmlReaderPosition.BeginObject; ElementNode(elementName, m_jsonNamespace, elementAttrs, false); break; - case JSONElementType.EndObject: + case JsonElementType.EndObject: if (!LeaveJsonScope(out elementName)) continue; m_position = JsonXmlReaderPosition.EndObject; EndElementNode(elementName, m_jsonNamespace); break; - case JSONElementType.BeginArray: + case JsonElementType.BeginArray: if (!EnterJsonArray(jsonName, out elementName)) continue; m_position = JsonXmlReaderPosition.BeginArray; ElementNode(elementName, m_jsonNamespace, elementAttrs, false); break; - case JSONElementType.EndArray: + case JsonElementType.EndArray: if (!LeaveJsonScope(out elementName)) continue; m_position = JsonXmlReaderPosition.EndArray; EndElementNode(elementName, m_jsonNamespace); break; - case JSONElementType.Value: + case JsonElementType.Value: if (!VisitJsonValue(jsonName, out m_jsonValueName)) continue; diff --git a/Implab/Xml/JsonXmlReaderOptions.cs b/Implab/Xml/JsonXmlReaderOptions.cs --- a/Implab/Xml/JsonXmlReaderOptions.cs +++ b/Implab/Xml/JsonXmlReaderOptions.cs @@ -2,16 +2,16 @@ using System; using System.Xml; -namespace Implab.Formats.JSON { +namespace Implab.Xml { /// - /// Набор необязательных параметров для , позволяющий управлять процессом + /// Набор необязательных параметров для , позволяющий управлять процессом /// интерпретации JSON документа. /// - public class JSONXmlReaderOptions : ICloneable { + public class JsonXmlReaderOptions : ICloneable { /// /// Пространство имен в котором будут располагаться читаемые элементы документа /// - public string NamespaceURI { + public string NamespaceUri { get; set; }