pitrou commented on a change in pull request #7656: URL: https://github.com/apache/arrow/pull/7656#discussion_r452810358
########## File path: python/pyarrow/tests/test_compute.py ########## @@ -95,6 +95,128 @@ def test_binary_contains_exact(): assert expected.equals(result) +# We use isprintable to find about codepoints that Python doesn't know, but +# utfproc does (or in future version of Python the other way around). +# These codepoints cannot be compared between Arrow and the Python +# implementation. +def _find_new_unicode_codepoints(): + new = set() + for i in range(0x80, 0x11000): + c = chr(i) + if i in range(0xD800, 0xE000): + continue # bug? pyarrow doesn't allow utf16 surrogates Review comment: Why bug? ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org