##// END OF EJS Templates
Merge pull request !2 from ImplabNet v3...
Merge pull request !2 from ImplabNet v3 Changes from branch: V3

File last commit:

r289:95896f882995 v3.0.14 v3
r294:abef3ebaa230 merge default
Show More
JsonScanner.cs
190 lines | 8.0 KiB | text/x-csharp | CSharpLexer
using System;
using System.Globalization;
using Implab.Automaton;
using System.Text;
using Implab.Components;
using System.IO;
namespace Implab.Formats.Json {
/// <summary>
/// Сканнер (лексер), разбивающий поток символов на токены JSON.
/// </summary>
public abstract class JsonScanner : Disposable {
readonly FastInputScanner<JsonGrammar.TokenType> m_jsonContext = JsonGrammar.CreateJsonExpressionScanner();
readonly FastInputScanner<JsonGrammar.TokenType> m_stringContext = JsonGrammar.CreateStringExpressionScanner();
readonly char[] m_unescapeBuf = new char[4];
readonly char[] m_buffer;
int m_length;
int m_pos;
readonly StringBuilder m_tokenBuilder = new StringBuilder();
protected JsonScanner(char[] buffer, int pos, int length) {
m_buffer = buffer;
m_pos = pos;
m_length = length;
}
bool ReadChunk(FastInputScanner<JsonGrammar.TokenType> scanner, out JsonGrammar.TokenType tokenType) {
scanner.ResetState();
while(scanner.Scan(m_buffer, m_pos, m_length)) {
// scanner requests new data
if (m_pos != m_length) // capture results for the future
m_tokenBuilder.Append(m_buffer, m_pos, m_length - m_pos);
// read next data
m_length = Read(m_buffer, 0, m_buffer.Length);
if (m_length == 0) {
// no data is read
if (scanner.Position == m_pos) {
// scanned hasn't moved, that's the end
m_pos = 0;
tokenType = JsonGrammar.TokenType.None;
return false;
}
if (scanner.IsFinal) {
m_pos = 0;
tokenType = scanner.Tag;
return true;
} else {
throw new ParserException("Unexpected EOF");
}
}
m_pos = 0;
}
var scannerPos = scanner.Position;
// scanner stops as scannerPos
if (!scanner.IsFinal)
throw new ParserException($"Unexpected character '{m_buffer[scannerPos + 1]}'");
tokenType = scanner.Tag;
if (scannerPos != m_pos && tokenType == JsonGrammar.TokenType.Number || tokenType == JsonGrammar.TokenType.Literal)
m_tokenBuilder.Append(m_buffer, m_pos, scannerPos - m_pos);
m_pos = scannerPos;
return true;
}
bool ReadStringChunk(FastInputScanner<JsonGrammar.TokenType> scanner, out JsonGrammar.TokenType tokenType) {
scanner.ResetState();
while (scanner.Scan(m_buffer, m_pos, m_length)) {
// scanner requests new data
if (m_pos != m_length) // capture results for the future
m_tokenBuilder.Append(m_buffer, m_pos, m_length - m_pos);
// read next data
m_length = Read(m_buffer, 0, m_buffer.Length);
if (m_length == 0) {
// no data is read
if (scanner.Position == m_pos) {
// scanned hasn't moved, that's the end
m_pos = 0;
tokenType = JsonGrammar.TokenType.None;
return false;
}
if (scanner.IsFinal) {
m_pos = 0;
tokenType = scanner.Tag;
return true;
} else {
throw new ParserException("Unexpected EOF");
}
}
m_pos = 0;
}
var scannerPos = scanner.Position;
// scanner stops as scannerPos
if (!scanner.IsFinal)
throw new ParserException($"Unexpected character '{m_buffer[scannerPos]}'");
if (scannerPos != m_pos) {
m_tokenBuilder.Append(m_buffer, m_pos, scannerPos - m_pos);
m_pos = scannerPos;
}
tokenType = scanner.Tag;
return true;
}
protected abstract int Read(char[] buffer, int offset, int size);
/// <summary>
/// Читает следующий лексический элемент из входных данных.
/// </summary>
/// <param name="tokenValue">Возвращает значение прочитанного токена.</param>
/// <param name="tokenType">Возвращает тип прочитанного токена.</param>
/// <returns><c>true</c> - чтение произведено успешно. <c>false</c> - достигнут конец входных данных</returns>
/// <remarks>В случе если токен не распознается, возникает исключение. Значения токенов обрабатываются, т.е.
/// в строках обрабатываются экранированные символы, числа становтся типа double.</remarks>
public bool ReadToken(out string tokenValue, out JsonTokenType tokenType) {
JsonGrammar.TokenType tag;
m_tokenBuilder.Clear();
while (ReadChunk(m_jsonContext, out tag)) {
switch (tag) {
case JsonGrammar.TokenType.StringBound:
tokenValue = ReadString();
tokenType = JsonTokenType.String;
break;
case JsonGrammar.TokenType.Number:
tokenValue = m_tokenBuilder.ToString();
tokenType = JsonTokenType.Number;
break;
case JsonGrammar.TokenType.Literal:
tokenType = JsonTokenType.Literal;
tokenValue = m_tokenBuilder.ToString();
break;
case JsonGrammar.TokenType.Whitespace:
m_tokenBuilder.Clear();
continue;
default:
tokenType = (JsonTokenType)tag;
tokenValue = null;
break;
}
return true;
}
tokenValue = null;
tokenType = JsonTokenType.None;
return false;
}
string ReadString() {
JsonGrammar.TokenType tag;
m_tokenBuilder.Clear();
while (ReadStringChunk(m_stringContext, out tag)) {
switch (tag) {
case JsonGrammar.TokenType.StringBound:
m_tokenBuilder.Length--;
return m_tokenBuilder.ToString();
case JsonGrammar.TokenType.UnescapedChar:
break;
case JsonGrammar.TokenType.EscapedUnicode: // \xXXXX - unicode escape sequence
m_tokenBuilder.CopyTo(m_tokenBuilder.Length - 4, m_unescapeBuf, 0, 4);
m_tokenBuilder.Length -= 6;
m_tokenBuilder.Append(StringTranslator.TranslateHexUnicode(m_unescapeBuf, 0));
break;
case JsonGrammar.TokenType.EscapedChar: // \t - escape sequence
var ch = m_tokenBuilder[m_tokenBuilder.Length-1];
m_tokenBuilder.Length -= 2;
m_tokenBuilder.Append(StringTranslator.TranslateEscapedChar(ch));
break;
}
}
throw new ParserException("Unexpected end of data");
}
}
}