This is an automated email from the ASF dual-hosted git repository.

rusackas pushed a commit to branch dont-stringify-unicode
in repository https://gitbox.apache.org/repos/asf/superset.git


The following commit(s) were added to refs/heads/dont-stringify-unicode by this 
push:
     new 8e88064ad9 fix: retain non-ascii characters in result sets
8e88064ad9 is described below

commit 8e88064ad9dcb7e57461b03aeb210d2b338997f8
Author: Evan Rusackas <[email protected]>
AuthorDate: Thu Apr 4 09:55:04 2024 -0600

    fix: retain non-ascii characters in result sets
---
 superset/result_set.py              | 16 ++++++++--------
 tests/unit_tests/result_set_test.py | 10 +++++-----
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/superset/result_set.py b/superset/result_set.py
index 5483271035..1fb06479a0 100644
--- a/superset/result_set.py
+++ b/superset/result_set.py
@@ -69,16 +69,16 @@ def stringify_values(array: NDArray[Any]) -> NDArray[Any]:
 
     with np.nditer(result, flags=["refs_ok"], op_flags=[["readwrite"]]) as it:
         for obj in it:
-            if na_obj := pd.isna(obj):
-                # pandas <NA> type cannot be converted to string
-                obj[na_obj] = None
+            if pd.isna(obj.item()):  # Correctly check for pandas NA or numpy 
NaN values
+                obj[...] = None  # Directly assign None to handle missing 
values appropriately
             else:
                 try:
-                    # for simple string conversions
-                    # this handles odd character types better
-                    obj[...] = obj.astype(str)
-                except ValueError:
-                    obj[...] = stringify(obj)
+                    # Convert only if the string is fully ASCII, otherwise 
leave as is
+                    if obj.item().encode('ascii', 'ignore').decode() == 
obj.item():
+                        obj[...] = obj.astype(str)
+                except (ValueError, UnicodeEncodeError):
+                    # Catch exceptions for non-ASCII or other conversion 
issues, leaving them unchanged
+                    continue
 
     return result
 
diff --git a/tests/unit_tests/result_set_test.py 
b/tests/unit_tests/result_set_test.py
index a629c2e2ec..f1d450b288 100644
--- a/tests/unit_tests/result_set_test.py
+++ b/tests/unit_tests/result_set_test.py
@@ -84,8 +84,8 @@ def test_stringify_with_null_integers():
 
     data = [
         ("foo", "bar", pd.NA, None),
-        ("foo", "bar", pd.NA, True),
-        ("foo", "bar", pd.NA, None),
+        ("你好", "bar", pd.NA, True),
+        ("foo", "你好", pd.NA, False),
     ]
     numpy_dtype = [
         ("id", "object"),
@@ -101,10 +101,10 @@ def test_stringify_with_null_integers():
 
     expected = np.array(
         [
-            array(["foo", "foo", "foo"], dtype=object),
-            array(["bar", "bar", "bar"], dtype=object),
+            array(["foo", "你好", "foo"], dtype=object),
+            array(["bar", "bar", "你好"], dtype=object),
             array([None, None, None], dtype=object),
-            array([None, "True", None], dtype=object),
+            array([None, "True", "False"], dtype=object),
         ]
     )
 

Reply via email to