|
|
using System;
|
|
|
using System.Globalization;
|
|
|
using Implab.Automaton;
|
|
|
using System.Text;
|
|
|
using Implab.Components;
|
|
|
using System.IO;
|
|
|
|
|
|
namespace Implab.Formats.Json {
|
|
|
/// <summary>
|
|
|
/// Сканнер (лексер), разбивающий поток символов на токены JSON.
|
|
|
/// </summary>
|
|
|
public abstract class JsonScanner : Disposable {
|
|
|
readonly InputScanner<JsonGrammar.TokenType> m_jsonContext = JsonGrammar.CreateJsonExpressionScanner();
|
|
|
readonly InputScanner<JsonGrammar.TokenType> m_stringContext = JsonGrammar.CreateStringExpressionScanner();
|
|
|
|
|
|
readonly char[] m_unescapeBuf = new char[4];
|
|
|
readonly char[] m_buffer;
|
|
|
int m_length;
|
|
|
int m_pos;
|
|
|
readonly StringBuilder m_tokenBuilder = new StringBuilder();
|
|
|
|
|
|
protected JsonScanner(char[] buffer, int pos, int length) {
|
|
|
m_buffer = buffer;
|
|
|
m_pos = pos;
|
|
|
m_length = length;
|
|
|
}
|
|
|
|
|
|
bool Read(InputScanner<JsonGrammar.TokenType> scanner, out JsonGrammar.TokenType tokenType) {
|
|
|
scanner.Reset();
|
|
|
|
|
|
if (m_pos == m_length) {
|
|
|
m_pos = 0;
|
|
|
m_length = Read(m_buffer, 0, m_buffer.Length);
|
|
|
if (m_length == 0) {
|
|
|
tokenType = JsonGrammar.TokenType.None;
|
|
|
return false; // EOF
|
|
|
}
|
|
|
}
|
|
|
|
|
|
while(scanner.Scan(m_buffer, m_pos, m_length - m_pos)) {
|
|
|
m_tokenBuilder.Append(m_buffer, m_pos, m_length - m_pos);
|
|
|
m_pos = 0;
|
|
|
m_length = Read(m_buffer, 0, m_buffer.Length);
|
|
|
}
|
|
|
var scannerPos = scanner.Position;
|
|
|
if (scannerPos != m_pos) {
|
|
|
m_tokenBuilder.Append(m_buffer, m_pos, scannerPos - m_pos);
|
|
|
m_pos = scannerPos;
|
|
|
}
|
|
|
|
|
|
if (!scanner.IsFinal) {
|
|
|
if (m_length == 0) {
|
|
|
// unexpected EOF
|
|
|
throw new ParserException("Unexpected EOF");
|
|
|
} else {
|
|
|
// unecpected character
|
|
|
throw new ParserException($"Unexpected character '{m_buffer[m_pos + 1]}'");
|
|
|
}
|
|
|
}
|
|
|
tokenType = scanner.Tag;
|
|
|
return true;
|
|
|
}
|
|
|
|
|
|
protected abstract int Read(char[] buffer, int offset, int size);
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
/// Читает следующий лексический элемент из входных данных.
|
|
|
/// </summary>
|
|
|
/// <param name="tokenValue">Возвращает значение прочитанного токена.</param>
|
|
|
/// <param name="tokenType">Возвращает тип прочитанного токена.</param>
|
|
|
/// <returns><c>true</c> - чтение произведено успешно. <c>false</c> - достигнут конец входных данных</returns>
|
|
|
/// <remarks>В случе если токен не распознается, возникает исключение. Значения токенов обрабатываются, т.е.
|
|
|
/// в строках обрабатываются экранированные символы, числа становтся типа double.</remarks>
|
|
|
public bool ReadToken(out object tokenValue, out JsonTokenType tokenType) {
|
|
|
JsonGrammar.TokenType tag;
|
|
|
m_tokenBuilder.Clear();
|
|
|
while (Read(m_jsonContext, out tag)) {
|
|
|
switch (tag) {
|
|
|
case JsonGrammar.TokenType.StringBound:
|
|
|
tokenValue = ReadString();
|
|
|
tokenType = JsonTokenType.String;
|
|
|
break;
|
|
|
case JsonGrammar.TokenType.Number:
|
|
|
tokenValue = Double.Parse(m_tokenBuilder.ToString(), CultureInfo.InvariantCulture);
|
|
|
tokenType = JsonTokenType.Number;
|
|
|
break;
|
|
|
case JsonGrammar.TokenType.Literal:
|
|
|
tokenType = JsonTokenType.Literal;
|
|
|
tokenValue = m_tokenBuilder.ToString();
|
|
|
break;
|
|
|
case JsonGrammar.TokenType.Whitespace:
|
|
|
m_tokenBuilder.Clear();
|
|
|
continue;
|
|
|
default:
|
|
|
tokenType = (JsonTokenType)tag;
|
|
|
tokenValue = null;
|
|
|
break;
|
|
|
}
|
|
|
return true;
|
|
|
}
|
|
|
tokenValue = null;
|
|
|
tokenType = JsonTokenType.None;
|
|
|
return false;
|
|
|
}
|
|
|
|
|
|
string ReadString() {
|
|
|
JsonGrammar.TokenType tag;
|
|
|
m_tokenBuilder.Clear();
|
|
|
|
|
|
while (Read(m_stringContext, out tag)) {
|
|
|
switch (tag) {
|
|
|
case JsonGrammar.TokenType.StringBound:
|
|
|
m_tokenBuilder.Length--;
|
|
|
return m_tokenBuilder.ToString();
|
|
|
case JsonGrammar.TokenType.UnescapedChar:
|
|
|
break;
|
|
|
case JsonGrammar.TokenType.EscapedUnicode: // \xXXXX - unicode escape sequence
|
|
|
m_tokenBuilder.CopyTo(m_tokenBuilder.Length - 4, m_unescapeBuf, 0, 4);
|
|
|
m_tokenBuilder.Length -= 6;
|
|
|
m_tokenBuilder.Append(StringTranslator.TranslateHexUnicode(m_unescapeBuf, 0));
|
|
|
break;
|
|
|
case JsonGrammar.TokenType.EscapedChar: // \t - escape sequence
|
|
|
var ch = m_tokenBuilder[m_tokenBuilder.Length-1];
|
|
|
m_tokenBuilder.Length -= 2;
|
|
|
m_tokenBuilder.Append(StringTranslator.TranslateEscapedChar(ch));
|
|
|
break;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
throw new ParserException("Unexpected end of data");
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|