Repository: thrift Updated Branches: refs/heads/master f593dd3a9 -> f2ab1346c
THRIFT-3396 DART: UTF-8 sent by PHP as JSON is not understood by TJsonProtocol Client: Dart Patch: Phongphan Phuttha <[email protected]> This closes #658 Project: http://git-wip-us.apache.org/repos/asf/thrift/repo Commit: http://git-wip-us.apache.org/repos/asf/thrift/commit/f2ab1346 Tree: http://git-wip-us.apache.org/repos/asf/thrift/tree/f2ab1346 Diff: http://git-wip-us.apache.org/repos/asf/thrift/diff/f2ab1346 Branch: refs/heads/master Commit: f2ab1346c6dc2290ef9b3290e1712e7987c2d562 Parents: f593dd3 Author: Jens Geyer <[email protected]> Authored: Tue Oct 20 21:41:47 2015 +0200 Committer: Jens Geyer <[email protected]> Committed: Tue Oct 20 21:41:47 2015 +0200 ---------------------------------------------------------------------- lib/dart/lib/src/protocol/t_json_protocol.dart | 44 +++++++++++++++++---- lib/dart/test/protocol/t_protocol_test.dart | 23 +++++++++++ 2 files changed, 60 insertions(+), 7 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/thrift/blob/f2ab1346/lib/dart/lib/src/protocol/t_json_protocol.dart ---------------------------------------------------------------------- diff --git a/lib/dart/lib/src/protocol/t_json_protocol.dart b/lib/dart/lib/src/protocol/t_json_protocol.dart index 4fa6499..ca91c8b 100644 --- a/lib/dart/lib/src/protocol/t_json_protocol.dart +++ b/lib/dart/lib/src/protocol/t_json_protocol.dart @@ -41,6 +41,7 @@ class TJsonProtocol extends TProtocol { TJsonProtocol(TTransport transport) : super(transport) { _rootContext = new _BaseContext(this); _reader = new _LookaheadReader(this); + _resetContext(); } void _pushContext(_BaseContext c) { @@ -284,10 +285,15 @@ class TJsonProtocol extends TProtocol { _writeJsonBase64(bytes); } + bool _isHighSurrogate(int b) => b >= 0xD800 && b <= 0xDBFF; + + bool _isLowSurrogate(int b) => b >= 0xDC00 && b <= 0xDFFF; + /// read Uint8List _readJsonString({bool skipContext: false}) { List<int> bytes = []; + List<int> codeunits = []; if (!skipContext) { _context.read(); @@ -308,7 +314,7 @@ class TJsonProtocol extends TProtocol { byte = _reader.read(); - // distinguish between \u00XX and control chars like \n + // distinguish between \uXXXX and control chars like \n if (byte != _Constants.ESCSEQ_BYTES[1]) { String char = new String.fromCharCode(byte); int offset = _Constants.ESCAPE_CHARS.indexOf(char); @@ -321,12 +327,36 @@ class TJsonProtocol extends TProtocol { continue; } - // it's \u00XX - _readJsonSyntaxChar(_Constants.HEX_0_BYTES[0]); - _readJsonSyntaxChar(_Constants.HEX_0_BYTES[0]); - transport.readAll(_tempBuffer, 0, 2); - byte = _hexVal(_tempBuffer[0]) << 4 + _hexVal(_tempBuffer[1]); - bytes.add(byte); + // it's \uXXXX + transport.readAll(_tempBuffer, 0, 4); + byte = (_hexVal(_tempBuffer[0]) << 12) + + (_hexVal(_tempBuffer[1]) << 8) + + (_hexVal(_tempBuffer[2]) << 4) + + _hexVal(_tempBuffer[3]); + if (_isHighSurrogate(byte)) { + if (codeunits.isNotEmpty) { + throw new TProtocolError( + TProtocolErrorType.INVALID_DATA, "Expected low surrogate"); + } + codeunits.add(byte); + } + else if (_isLowSurrogate(byte)) { + if (codeunits.isEmpty) { + throw new TProtocolError( + TProtocolErrorType.INVALID_DATA, "Expected high surrogate"); + } + codeunits.add(byte); + bytes.addAll(utf8Codec.encode(new String.fromCharCodes(codeunits))); + codeunits.clear(); + } + else { + bytes.addAll(utf8Codec.encode(new String.fromCharCode(byte))); + } + } + + if (codeunits.isNotEmpty) { + throw new TProtocolError( + TProtocolErrorType.INVALID_DATA, "Expected low surrogate"); } return new Uint8List.fromList(bytes); http://git-wip-us.apache.org/repos/asf/thrift/blob/f2ab1346/lib/dart/test/protocol/t_protocol_test.dart ---------------------------------------------------------------------- diff --git a/lib/dart/test/protocol/t_protocol_test.dart b/lib/dart/test/protocol/t_protocol_test.dart index 88ddd4f..7362884 100644 --- a/lib/dart/test/protocol/t_protocol_test.dart +++ b/lib/dart/test/protocol/t_protocol_test.dart @@ -18,6 +18,7 @@ library thrift.test.transport.t_json_protocol_test; import 'dart:async'; +import 'dart:convert' show UTF8; import 'dart:typed_data' show Uint8List; import 'package:test/test.dart'; @@ -352,6 +353,28 @@ void main() { protocol.writeMessageBegin(message); }); + + test('Test escaped unicode', () async { + /* + KOR_KAI + UTF-8: 0xE0 0xB8 0x81 + UTF-16: 0x0E01 + G clef: + UTF-8: 0xF0 0x9D 0x84 0x9E + UTF-16: 0xD834 0xDD1E + */ + var buffer = UTF8.encode(r'"\u0001\u0e01 \ud834\udd1e"'); + var transport = new TBufferedTransport(); + transport.writeAll(buffer); + + var protocol = new TJsonProtocol(transport); + + await protocol.transport.flush(); + + var subject = protocol.readString(); + expect(subject, UTF8.decode([0x01, 0xE0, 0xB8, 0x81, 0x20, 0xF0, 0x9D, 0x84, 0x9E])); + }); + group('shared tests', sharedTests); });
