Repository: thrift Updated Branches: refs/heads/master d6ca81b2c -> a6509f7b3
THRIFT-3404 Fixed JSON String reader doesn't recognize UTF-16 surrogate pairs. Client: Delphi Patch: Phongphan Phuttha <[email protected]> This closes #671 Project: http://git-wip-us.apache.org/repos/asf/thrift/repo Commit: http://git-wip-us.apache.org/repos/asf/thrift/commit/a6509f7b Tree: http://git-wip-us.apache.org/repos/asf/thrift/tree/a6509f7b Diff: http://git-wip-us.apache.org/repos/asf/thrift/diff/a6509f7b Branch: refs/heads/master Commit: a6509f7b378ed6591d550134fdda18e4a436fe77 Parents: d6ca81b Author: Phongphan Phuttha <[email protected]> Authored: Sat Oct 31 01:09:47 2015 +0700 Committer: Jens Geyer <[email protected]> Committed: Fri Oct 30 21:45:36 2015 +0200 ---------------------------------------------------------------------- lib/delphi/src/Thrift.Protocol.JSON.pas | 28 ++++++++++++++++++++++++++-- lib/delphi/test/TestClient.pas | 14 +++++++------- 2 files changed, 33 insertions(+), 9 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/thrift/blob/a6509f7b/lib/delphi/src/Thrift.Protocol.JSON.pas ---------------------------------------------------------------------- diff --git a/lib/delphi/src/Thrift.Protocol.JSON.pas b/lib/delphi/src/Thrift.Protocol.JSON.pas index f491f53..36c3d72 100644 --- a/lib/delphi/src/Thrift.Protocol.JSON.pas +++ b/lib/delphi/src/Thrift.Protocol.JSON.pas @@ -24,6 +24,7 @@ unit Thrift.Protocol.JSON; interface uses + Character, Classes, SysUtils, Math, @@ -821,9 +822,12 @@ function TJSONProtocolImpl.ReadJSONString( skipContext : Boolean) : TBytes; var buffer : TMemoryStream; ch : Byte; wch : Word; + highSurogate: Char; + surrogatePairs: Array[0..1] of Char; off : Integer; tmp : TBytes; begin + highSurogate := #0; buffer := TMemoryStream.Create; try if not skipContext @@ -862,11 +866,31 @@ begin + (HexVal(tmp[1]) shl 8) + (HexVal(tmp[2]) shl 4) + HexVal(tmp[3]); + // we need to make UTF8 bytes from it, to be decoded later - tmp := SysUtils.TEncoding.UTF8.GetBytes(Char(wch)); - buffer.Write( tmp[0], length(tmp)); + if Character.IsHighSurrogate(char(wch)) then begin + if highSurogate <> #0 + then raise TProtocolException.Create( TProtocolException.INVALID_DATA, 'Expected low surrogate char'); + highSurogate := char(wch); + end + else if Character.IsLowSurrogate(char(wch)) then begin + if highSurogate = #0 + then TProtocolException.Create( TProtocolException.INVALID_DATA, 'Expected high surrogate char'); + surrogatePairs[0] := highSurogate; + surrogatePairs[1] := char(wch); + tmp := TEncoding.UTF8.GetBytes(surrogatePairs); + buffer.Write( tmp[0], Length(tmp)); + highSurogate := #0; + end + else begin + tmp := SysUtils.TEncoding.UTF8.GetBytes(Char(wch)); + buffer.Write( tmp[0], Length(tmp)); + end; end; + if highSurogate <> #0 + then raise TProtocolException.Create( TProtocolException.INVALID_DATA, 'Expected low surrogate char'); + SetLength( result, buffer.Size); if buffer.Size > 0 then Move( buffer.Memory^, result[0], Length(result)); http://git-wip-us.apache.org/repos/asf/thrift/blob/a6509f7b/lib/delphi/test/TestClient.pas ---------------------------------------------------------------------- diff --git a/lib/delphi/test/TestClient.pas b/lib/delphi/test/TestClient.pas index 5f375ef..144334b 100644 --- a/lib/delphi/test/TestClient.pas +++ b/lib/delphi/test/TestClient.pas @@ -1028,9 +1028,9 @@ const TEST_DOUBLE = -1.234e-56; DELTA_DOUBLE = TEST_DOUBLE * 1e-14; TEST_STRING = 'abc-'#$00E4#$00f6#$00fc; // german umlauts (en-us: "funny chars") - // Test THRIFT-2336 with 'Ð ÑÑÑкое Ðазвание'; - RUSSIAN_TEXT = #$0420#$0443#$0441#$0441#$043a#$043e#$0435' '#$041d#$0430#$0437#$0432#$0430#$043d#$0438#$0435; - RUSSIAN_JSON = '"\u0420\u0443\u0441\u0441\u043a\u043e\u0435 \u041d\u0430\u0437\u0432\u0430\u043d\u0438\u0435"'; + // Test THRIFT-2336 and THRIFT-3404 with U+1D11E (G Clef symbol) and 'Ð ÑÑÑкое Ðазвание'; + G_CLEF_AND_CYRILLIC_TEXT = #$1d11e' '#$0420#$0443#$0441#$0441#$043a#$043e#$0435' '#$041d#$0430#$0437#$0432#$0430#$043d#$0438#$0435; + G_CLEF_AND_CYRILLIC_JSON = '"\ud834\udd1e \u0420\u0443\u0441\u0441\u043a\u043e\u0435 \u041d\u0430\u0437\u0432\u0430\u043d\u0438\u0435"'; // test both possible solidus encodings SOLIDUS_JSON_DATA = '"one/two\/three"'; SOLIDUS_EXCPECTED = 'one/two/three'; @@ -1117,22 +1117,22 @@ begin prot := TJSONProtocolImpl.Create( TStreamTransportImpl.Create( nil, TThriftStreamAdapterDelphi.Create( stm, FALSE))); - prot.WriteString( RUSSIAN_TEXT); + prot.WriteString( G_CLEF_AND_CYRILLIC_TEXT); stm.Position := 0; prot := TJSONProtocolImpl.Create( TStreamTransportImpl.Create( TThriftStreamAdapterDelphi.Create( stm, FALSE), nil)); - Expect( prot.ReadString = RUSSIAN_TEXT, 'Writing JSON with chars > 8 bit'); + Expect( prot.ReadString = G_CLEF_AND_CYRILLIC_TEXT, 'Writing JSON with chars > 8 bit'); // Widechars should work with hex-encoding too. Do they? stm.Position := 0; stm.Size := 0; - stm.WriteString( RUSSIAN_JSON); + stm.WriteString( G_CLEF_AND_CYRILLIC_JSON); stm.Position := 0; prot := TJSONProtocolImpl.Create( TStreamTransportImpl.Create( TThriftStreamAdapterDelphi.Create( stm, FALSE), nil)); - Expect( prot.ReadString = RUSSIAN_TEXT, 'Reading JSON with chars > 8 bit'); + Expect( prot.ReadString = G_CLEF_AND_CYRILLIC_TEXT, 'Reading JSON with chars > 8 bit'); finally
