[ 
https://issues.apache.org/jira/browse/THRIFT-4207?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16693774#comment-16693774
 ] 

ASF GitHub Bot commented on THRIFT-4207:
----------------------------------------

jeking3 closed pull request #1274: THRIFT-4207: Make sure Python Accelerated 
protocol does not allow invalid UTF-8
URL: https://github.com/apache/thrift/pull/1274
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/lib/py/src/ext/protocol.tcc b/lib/py/src/ext/protocol.tcc
index c025d0c968..e2e782a6c1 100644
--- a/lib/py/src/ext/protocol.tcc
+++ b/lib/py/src/ext/protocol.tcc
@@ -419,6 +419,8 @@ bool ProtocolBase<Impl>::encodeValue(PyObject* value, TType 
type, PyObject* type
 
   case T_STRING: {
     ScopedPyObject nval;
+    Py_ssize_t len;
+    char *str;
 
     if (PyUnicode_Check(value)) {
       nval.reset(PyUnicode_AsUTF8String(value));
@@ -426,11 +428,21 @@ bool ProtocolBase<Impl>::encodeValue(PyObject* value, 
TType type, PyObject* type
         return false;
       }
     } else {
+      if (isUtf8(typeargs)) {
+        if (PyBytes_AsStringAndSize(value, &str, &len) < 0) {
+          return false;
+        }
+        // Check that input is a valid UTF-8 string.
+        nval.reset(PyUnicode_DecodeUTF8(str, len, 0));
+        if (!nval) {
+          return false;
+        }
+      }
       Py_INCREF(value);
       nval.reset(value);
     }
 
-    Py_ssize_t len = PyBytes_Size(nval.get());
+    len = PyBytes_Size(nval.get());
     if (!detail::check_ssize_t_32(len)) {
       return false;
     }
diff --git a/lib/py/src/protocol/TProtocol.py b/lib/py/src/protocol/TProtocol.py
index fd20cb7906..588d997e57 100644
--- a/lib/py/src/protocol/TProtocol.py
+++ b/lib/py/src/protocol/TProtocol.py
@@ -118,6 +118,8 @@ def writeDouble(self, dub):
         pass
 
     def writeString(self, str_val):
+        if isinstance(str_val, bytes):
+            str_val = str_val.decode('utf8')
         self.writeBinary(str_to_binary(str_val))
 
     def writeBinary(self, str_val):
diff --git a/test/py/FastbinaryTest.py b/test/py/FastbinaryTest.py
index 05c0bb6d15..2a87d5fddc 100755
--- a/test/py/FastbinaryTest.py
+++ b/test/py/FastbinaryTest.py
@@ -74,6 +74,9 @@ def isOpen(self):
                     u"\x20\xce\x91\x74\x74\xce\xb1\xe2\x85\xbd\xce\xba"\
                     u"\xc7\x83\xe2\x80\xbc"
 
+ooe_bad = OneOfEach()
+ooe_bad.zomg_unicode = b'\xbe\xef\xff'
+
 if sys.version_info[0] == 2 and 
os.environ.get('THRIFT_TEST_PY_NO_UTF8STRINGS'):
     ooe1.zomg_unicode = ooe1.zomg_unicode.encode('utf8')
     ooe2.zomg_unicode = ooe2.zomg_unicode.encode('utf8')
@@ -167,6 +170,27 @@ def _check_read(self, o):
             pprint(repr(o))
             raise Exception('read value mismatch')
 
+    def _check_bad_unicode(self, o):
+        if (sys.version_info[0] == 2 and
+                os.environ.get('THRIFT_TEST_PY_NO_UTF8STRINGS')):
+            return
+
+        try:
+            prot_slow = self._slow(TTransport.TMemoryBuffer())
+            o.write(prot_slow)
+        except UnicodeError:
+            pass
+        else:
+            raise Exception('UnicodeError not raised')
+
+        try:
+            prot_fast = self._fast(TTransport.TMemoryBuffer())
+            o.write(prot_fast)
+        except UnicodeError:
+            pass
+        else:
+            raise Exception('UnicodeError not raised')
+
     def do_test(self):
         self._check_write(HolyMoley())
         self._check_read(HolyMoley())
@@ -188,6 +212,8 @@ def do_test(self):
 
         self._check_read(Backwards(**{"first_tag2": 4, "second_tag1": 2}))
 
+        self._check_bad_unicode(ooe_bad)
+
         # One case where the serialized form changes, but only superficially.
         o = Backwards(**{"first_tag2": 4, "second_tag1": 2})
         trans_fast = TTransport.TMemoryBuffer()


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


> Accelerated version of TBinaryProtocol allows invalid input to string fields.
> -----------------------------------------------------------------------------
>
>                 Key: THRIFT-4207
>                 URL: https://issues.apache.org/jira/browse/THRIFT-4207
>             Project: Thrift
>          Issue Type: Bug
>          Components: Python - Library
>    Affects Versions: 0.10.0
>            Reporter: Elvis Pranskevichus
>            Assignee: Aki Sukegawa
>            Priority: Major
>
> {{TBinaryProtocolAccelerated}} and {{TCompactProtocolAccelerated}} currently 
> accept arbitrary bytes as input to string fields even when {{py:utf8strings}} 
> is on.



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to