JsonScanner.cs
190 lines
| 8.0 KiB
| text/x-csharp
|
CSharpLexer
cin
|
r228 | using System; | ||
using System.Globalization; | ||||
using Implab.Automaton; | ||||
using System.Text; | ||||
using Implab.Components; | ||||
using System.IO; | ||||
namespace Implab.Formats.Json { | ||||
/// <summary> | ||||
/// Сканнер (лексер), разбивающий поток символов на токены JSON. | ||||
/// </summary> | ||||
public abstract class JsonScanner : Disposable { | ||||
readonly InputScanner<JsonGrammar.TokenType> m_jsonContext = JsonGrammar.CreateJsonExpressionScanner(); | ||||
readonly InputScanner<JsonGrammar.TokenType> m_stringContext = JsonGrammar.CreateStringExpressionScanner(); | ||||
readonly char[] m_unescapeBuf = new char[4]; | ||||
readonly char[] m_buffer; | ||||
int m_length; | ||||
int m_pos; | ||||
readonly StringBuilder m_tokenBuilder = new StringBuilder(); | ||||
protected JsonScanner(char[] buffer, int pos, int length) { | ||||
m_buffer = buffer; | ||||
m_pos = pos; | ||||
m_length = length; | ||||
} | ||||
cin
|
r229 | bool ReadChunk(InputScanner<JsonGrammar.TokenType> scanner, out JsonGrammar.TokenType tokenType) { | ||
scanner.ResetState(); | ||||
while(scanner.Scan(m_buffer, m_pos, m_length)) { | ||||
// scanner requests new data | ||||
cin
|
r228 | |||
cin
|
r229 | if (m_pos != m_length) // capture results for the future | ||
m_tokenBuilder.Append(m_buffer, m_pos, m_length - m_pos); | ||||
// read next data | ||||
cin
|
r228 | m_length = Read(m_buffer, 0, m_buffer.Length); | ||
cin
|
r229 | |||
cin
|
r228 | if (m_length == 0) { | ||
cin
|
r229 | // no data is read | ||
if (scanner.Position == m_pos) { | ||||
// scanned hasn't moved, that's the end | ||||
m_pos = 0; | ||||
tokenType = JsonGrammar.TokenType.None; | ||||
return false; | ||||
} | ||||
if (scanner.IsFinal) { | ||||
m_pos = 0; | ||||
tokenType = scanner.Tag; | ||||
return true; | ||||
} else { | ||||
throw new ParserException("Unexpected EOF"); | ||||
} | ||||
cin
|
r228 | } | ||
cin
|
r229 | |||
cin
|
r228 | m_pos = 0; | ||
} | ||||
var scannerPos = scanner.Position; | ||||
cin
|
r229 | |||
// scanner stops as scannerPos | ||||
if (!scanner.IsFinal) | ||||
throw new ParserException($"Unexpected character '{m_buffer[scannerPos + 1]}'"); | ||||
tokenType = scanner.Tag; | ||||
if (scannerPos != m_pos && tokenType == JsonGrammar.TokenType.Number || tokenType == JsonGrammar.TokenType.Literal) | ||||
m_tokenBuilder.Append(m_buffer, m_pos, scannerPos - m_pos); | ||||
m_pos = scannerPos; | ||||
return true; | ||||
} | ||||
bool ReadStringChunk(InputScanner<JsonGrammar.TokenType> scanner, out JsonGrammar.TokenType tokenType) { | ||||
scanner.ResetState(); | ||||
while (scanner.Scan(m_buffer, m_pos, m_length)) { | ||||
// scanner requests new data | ||||
if (m_pos != m_length) // capture results for the future | ||||
m_tokenBuilder.Append(m_buffer, m_pos, m_length - m_pos); | ||||
// read next data | ||||
m_length = Read(m_buffer, 0, m_buffer.Length); | ||||
if (m_length == 0) { | ||||
// no data is read | ||||
if (scanner.Position == m_pos) { | ||||
// scanned hasn't moved, that's the end | ||||
m_pos = 0; | ||||
tokenType = JsonGrammar.TokenType.None; | ||||
return false; | ||||
} | ||||
if (scanner.IsFinal) { | ||||
m_pos = 0; | ||||
tokenType = scanner.Tag; | ||||
return true; | ||||
} else { | ||||
throw new ParserException("Unexpected EOF"); | ||||
} | ||||
} | ||||
m_pos = 0; | ||||
} | ||||
var scannerPos = scanner.Position; | ||||
// scanner stops as scannerPos | ||||
if (!scanner.IsFinal) | ||||
throw new ParserException($"Unexpected character '{m_buffer[scannerPos + 1]}'"); | ||||
cin
|
r228 | if (scannerPos != m_pos) { | ||
m_tokenBuilder.Append(m_buffer, m_pos, scannerPos - m_pos); | ||||
m_pos = scannerPos; | ||||
} | ||||
tokenType = scanner.Tag; | ||||
return true; | ||||
} | ||||
protected abstract int Read(char[] buffer, int offset, int size); | ||||
/// <summary> | ||||
/// Читает следующий лексический элемент из входных данных. | ||||
/// </summary> | ||||
/// <param name="tokenValue">Возвращает значение прочитанного токена.</param> | ||||
/// <param name="tokenType">Возвращает тип прочитанного токена.</param> | ||||
/// <returns><c>true</c> - чтение произведено успешно. <c>false</c> - достигнут конец входных данных</returns> | ||||
/// <remarks>В случе если токен не распознается, возникает исключение. Значения токенов обрабатываются, т.е. | ||||
/// в строках обрабатываются экранированные символы, числа становтся типа double.</remarks> | ||||
cin
|
r229 | public bool ReadToken(out string tokenValue, out JsonTokenType tokenType) { | ||
cin
|
r228 | JsonGrammar.TokenType tag; | ||
m_tokenBuilder.Clear(); | ||||
cin
|
r229 | while (ReadChunk(m_jsonContext, out tag)) { | ||
cin
|
r228 | switch (tag) { | ||
case JsonGrammar.TokenType.StringBound: | ||||
tokenValue = ReadString(); | ||||
tokenType = JsonTokenType.String; | ||||
break; | ||||
case JsonGrammar.TokenType.Number: | ||||
cin
|
r229 | tokenValue = m_tokenBuilder.ToString(); | ||
cin
|
r228 | tokenType = JsonTokenType.Number; | ||
break; | ||||
case JsonGrammar.TokenType.Literal: | ||||
tokenType = JsonTokenType.Literal; | ||||
tokenValue = m_tokenBuilder.ToString(); | ||||
break; | ||||
case JsonGrammar.TokenType.Whitespace: | ||||
m_tokenBuilder.Clear(); | ||||
continue; | ||||
default: | ||||
tokenType = (JsonTokenType)tag; | ||||
tokenValue = null; | ||||
break; | ||||
} | ||||
return true; | ||||
} | ||||
tokenValue = null; | ||||
tokenType = JsonTokenType.None; | ||||
return false; | ||||
} | ||||
string ReadString() { | ||||
JsonGrammar.TokenType tag; | ||||
m_tokenBuilder.Clear(); | ||||
cin
|
r229 | while (ReadStringChunk(m_stringContext, out tag)) { | ||
cin
|
r228 | switch (tag) { | ||
case JsonGrammar.TokenType.StringBound: | ||||
m_tokenBuilder.Length--; | ||||
return m_tokenBuilder.ToString(); | ||||
case JsonGrammar.TokenType.UnescapedChar: | ||||
break; | ||||
case JsonGrammar.TokenType.EscapedUnicode: // \xXXXX - unicode escape sequence | ||||
m_tokenBuilder.CopyTo(m_tokenBuilder.Length - 4, m_unescapeBuf, 0, 4); | ||||
m_tokenBuilder.Length -= 6; | ||||
m_tokenBuilder.Append(StringTranslator.TranslateHexUnicode(m_unescapeBuf, 0)); | ||||
break; | ||||
case JsonGrammar.TokenType.EscapedChar: // \t - escape sequence | ||||
var ch = m_tokenBuilder[m_tokenBuilder.Length-1]; | ||||
m_tokenBuilder.Length -= 2; | ||||
m_tokenBuilder.Append(StringTranslator.TranslateEscapedChar(ch)); | ||||
break; | ||||
} | ||||
} | ||||
throw new ParserException("Unexpected end of data"); | ||||
} | ||||
} | ||||
} | ||||