THRIFT-2779: Always write unescaped JSON unicode string. Client: PHP Patch: Phongphan Phuttha
This closes #666 Project: http://git-wip-us.apache.org/repos/asf/thrift/repo Commit: http://git-wip-us.apache.org/repos/asf/thrift/commit/90ea4f64 Tree: http://git-wip-us.apache.org/repos/asf/thrift/tree/90ea4f64 Diff: http://git-wip-us.apache.org/repos/asf/thrift/diff/90ea4f64 Branch: refs/heads/master Commit: 90ea4f64c124404f346c66de47f84fc9ced63132 Parents: c04fb00 Author: Phongphan Phuttha <[email protected]> Authored: Fri Oct 30 00:00:10 2015 +0700 Committer: Nobuaki Sukegawa <[email protected]> Committed: Wed Nov 11 00:32:55 2015 +0900 ---------------------------------------------------------------------- lib/php/lib/Thrift/Protocol/TJSONProtocol.php | 40 +++++++++++++++++++- lib/php/test/Test/Thrift/Fixtures.php | 3 ++ .../Test/Thrift/Protocol/TestTJSONProtocol.php | 21 +++++++++- 3 files changed, 62 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/thrift/blob/90ea4f64/lib/php/lib/Thrift/Protocol/TJSONProtocol.php ---------------------------------------------------------------------- diff --git a/lib/php/lib/Thrift/Protocol/TJSONProtocol.php b/lib/php/lib/Thrift/Protocol/TJSONProtocol.php index 6c93b09..6d8e81f 100644 --- a/lib/php/lib/Thrift/Protocol/TJSONProtocol.php +++ b/lib/php/lib/Thrift/Protocol/TJSONProtocol.php @@ -215,6 +215,44 @@ class TJSONProtocol extends TProtocol return dechex($val); } + private function hasJSONUnescapedUnicode() + { + if (PHP_MAJOR_VERSION > 5 + || (PHP_MAJOR_VERSION == 5 && PHP_MINOR_VERSION >= 4)) + return true; + + return false; + } + + private function unescapedUnicode($str) + { + if ($this->hasJSONUnescapedUnicode()) { + return json_encode($str, JSON_UNESCAPED_UNICODE); + } + + $json = json_encode($str); + + /* + * Unescaped character outside the Basic Multilingual Plane + * High surrogate: 0xD800 - 0xDBFF + * Low surrogate: 0xDC00 - 0xDFFF + */ + $json = preg_replace_callback('/\\\\u(d[89ab][0-9a-f]{2})\\\\u(d[cdef][0-9a-f]{2})/i', + function ($matches) { + return mb_convert_encoding(pack('H*', $matches[1].$matches[2]), 'UTF-8', 'UTF-16BE'); + }, $json); + + /* + * Unescaped characters within the Basic Multilingual Plane + */ + $json = preg_replace_callback('/\\\\u([0-9a-f]{4})/i', + function ($matches) { + return mb_convert_encoding(pack('H*', $matches[1]), 'UTF-8', 'UTF-16BE'); + }, $json); + + return $json; + } + private function writeJSONString($b) { $this->context_->write(); @@ -223,7 +261,7 @@ class TJSONProtocol extends TProtocol $this->trans_->write(self::QUOTE); } - $this->trans_->write(json_encode($b)); + $this->trans_->write($this->unescapedUnicode($b)); if (is_numeric($b) && $this->context_->escapeNum()) { $this->trans_->write(self::QUOTE); http://git-wip-us.apache.org/repos/asf/thrift/blob/90ea4f64/lib/php/test/Test/Thrift/Fixtures.php ---------------------------------------------------------------------- diff --git a/lib/php/test/Test/Thrift/Fixtures.php b/lib/php/test/Test/Thrift/Fixtures.php index d9d487f..2c60a08 100644 --- a/lib/php/test/Test/Thrift/Fixtures.php +++ b/lib/php/test/Test/Thrift/Fixtures.php @@ -46,6 +46,9 @@ class Fixtures self::$testArgs['testString3'] = "string that ends in double-backslash \\\\"; + self::$testArgs['testUnicodeStringWithNonBMP'] = + "สวัสà¸à¸µ/ð¯"; + self::$testArgs['testDouble'] = 3.1415926535898; // TODO: add testBinary() call http://git-wip-us.apache.org/repos/asf/thrift/blob/90ea4f64/lib/php/test/Test/Thrift/Protocol/TestTJSONProtocol.php ---------------------------------------------------------------------- diff --git a/lib/php/test/Test/Thrift/Protocol/TestTJSONProtocol.php b/lib/php/test/Test/Thrift/Protocol/TestTJSONProtocol.php index 7ba3441..a4ca9d5 100755 --- a/lib/php/test/Test/Thrift/Protocol/TestTJSONProtocol.php +++ b/lib/php/test/Test/Thrift/Protocol/TestTJSONProtocol.php @@ -200,7 +200,12 @@ class TestTJSONProtocol extends \PHPUnit_Framework_TestCase $actual = $this->transport->read( BUFSIZ ); $expected = TestTJSONProtocol_Fixtures::$testArgsJSON['testStringMap']; - $this->assertEquals( $expected, $actual ); + /* + * The $actual returns unescaped string. + * It is required to to decode then encode it again + * to get the expected escaped unicode. + */ + $this->assertEquals( $expected, json_encode(json_decode($actual)) ); } public function testSet_Write() @@ -308,6 +313,18 @@ class TestTJSONProtocol extends \PHPUnit_Framework_TestCase $this->assertEquals( $expected, $actual ); } + public function testString4_Write() + { + $args = new \ThriftTest\ThriftTest_testString_args(); + $args->thing = Fixtures::$testArgs['testUnicodeStringWithNonBMP']; + $args->write( $this->protocol ); + + $actual = $this->transport->read( BUFSIZ ); + $expected = TestTJSONProtocol_Fixtures::$testArgsJSON['testUnicodeStringWithNonBMP']; + + $this->assertEquals( $expected, $actual ); + } + public function testDouble_Read() { $this->transport->write( @@ -528,6 +545,8 @@ class TestTJSONProtocol_Fixtures self::$testArgsJSON['testString3'] = '{"1":{"str":"string that ends in double-backslash \\\\\\\\"}}'; + self::$testArgsJSON['testUnicodeStringWithNonBMP'] = '{"1":{"str":"สวัสà¸à¸µ\/ð¯"}}'; + self::$testArgsJSON['testDouble'] = '{"1":{"dbl":3.1415926535898}}'; self::$testArgsJSON['testByte'] = '{"1":{"i8":1}}';
