Repository: thrift Updated Branches: refs/heads/master 27378fa34 -> 11b515cd2
THRIFT-3403 Fixed JSON string reader doesn't recognize UTF-16 surrogate pairs Client: C# Patch: Phongphan Phuttha <[email protected]> This closes #668 Project: http://git-wip-us.apache.org/repos/asf/thrift/repo Commit: http://git-wip-us.apache.org/repos/asf/thrift/commit/11b515cd Tree: http://git-wip-us.apache.org/repos/asf/thrift/tree/11b515cd Diff: http://git-wip-us.apache.org/repos/asf/thrift/diff/11b515cd Branch: refs/heads/master Commit: 11b515cd29292358305ace4ce20d7e626c7e7f42 Parents: 27378fa Author: Phongphan Phuttha <[email protected]> Authored: Fri Oct 30 01:31:44 2015 +0700 Committer: Jens Geyer <[email protected]> Committed: Thu Oct 29 22:09:19 2015 +0200 ---------------------------------------------------------------------- lib/csharp/src/Protocol/TJSONProtocol.cs | 37 +++++++++++++++++++++++++-- lib/csharp/test/JSON/Program.cs | 13 ++++++++++ 2 files changed, 48 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/thrift/blob/11b515cd/lib/csharp/src/Protocol/TJSONProtocol.cs ---------------------------------------------------------------------- diff --git a/lib/csharp/src/Protocol/TJSONProtocol.cs b/lib/csharp/src/Protocol/TJSONProtocol.cs index 9d51c74..5e6589e 100644 --- a/lib/csharp/src/Protocol/TJSONProtocol.cs +++ b/lib/csharp/src/Protocol/TJSONProtocol.cs @@ -725,6 +725,7 @@ namespace Thrift.Protocol private byte[] ReadJSONString(bool skipContext) { MemoryStream buffer = new MemoryStream(); + List<char> codeunits = new List<char>(); if (!skipContext) @@ -769,9 +770,41 @@ namespace Thrift.Protocol (HexVal((byte)tempBuffer[1]) << 8) + (HexVal((byte)tempBuffer[2]) << 4) + HexVal(tempBuffer[3])); - var tmp = utf8Encoding.GetBytes(new char[] { (char)wch }); - buffer.Write(tmp, 0, tmp.Length); + if (Char.IsHighSurrogate((char)wch)) + { + if (codeunits.Count > 0) + { + throw new TProtocolException(TProtocolException.INVALID_DATA, + "Expected low surrogate char"); + } + codeunits.Add((char)wch); + } + else if (Char.IsLowSurrogate((char)wch)) + { + if (codeunits.Count == 0) + { + throw new TProtocolException(TProtocolException.INVALID_DATA, + "Expected high surrogate char"); + } + codeunits.Add((char)wch); + var tmp = utf8Encoding.GetBytes(codeunits.ToArray()); + buffer.Write(tmp, 0, tmp.Length); + codeunits.Clear(); + } + else + { + var tmp = utf8Encoding.GetBytes(new char[] { (char)wch }); + buffer.Write(tmp, 0, tmp.Length); + } } + + + if (codeunits.Count > 0) + { + throw new TProtocolException(TProtocolException.INVALID_DATA, + "Expected low surrogate char"); + } + return buffer.ToArray(); } http://git-wip-us.apache.org/repos/asf/thrift/blob/11b515cd/lib/csharp/test/JSON/Program.cs ---------------------------------------------------------------------- diff --git a/lib/csharp/test/JSON/Program.cs b/lib/csharp/test/JSON/Program.cs index 9823221..f61388a 100644 --- a/lib/csharp/test/JSON/Program.cs +++ b/lib/csharp/test/JSON/Program.cs @@ -34,6 +34,7 @@ namespace JSONTest { TestThrift2365(); // JSON binary decodes too much data TestThrift2336(); // hex encoding using \uXXXX where 0xXXXX > 0xFF + TestThrift3403(); // JSON escaped unicode surrogate pair support. } @@ -78,5 +79,17 @@ namespace JSONTest var prot = new TJSONProtocol(trans); Debug.Assert(prot.ReadString() == RUSSIAN_TEXT, "reading JSON with hex-encoded chars > 8 bit"); } + + public static void TestThrift3403() + { + string GCLEF_TEXT = "\ud834\udd1e"; + const string GCLEF_JSON = "\"\\ud834\\udd1e\""; + + // parse and check + var stm = new MemoryStream(Encoding.UTF8.GetBytes(GCLEF_JSON)); + var trans = new TStreamTransport(stm, null); + var prot = new TJSONProtocol(trans); + Debug.Assert(prot.ReadString() == GCLEF_TEXT, "reading JSON with surrogate pair hex-encoded chars"); + } } }
