This is an automated email from the ASF dual-hosted git repository.
rusackas pushed a commit to branch dont-stringify-unicode
in repository https://gitbox.apache.org/repos/asf/superset.git
The following commit(s) were added to refs/heads/dont-stringify-unicode by this
push:
new 8e88064ad9 fix: retain non-ascii characters in result sets
8e88064ad9 is described below
commit 8e88064ad9dcb7e57461b03aeb210d2b338997f8
Author: Evan Rusackas <[email protected]>
AuthorDate: Thu Apr 4 09:55:04 2024 -0600
fix: retain non-ascii characters in result sets
---
superset/result_set.py | 16 ++++++++--------
tests/unit_tests/result_set_test.py | 10 +++++-----
2 files changed, 13 insertions(+), 13 deletions(-)
diff --git a/superset/result_set.py b/superset/result_set.py
index 5483271035..1fb06479a0 100644
--- a/superset/result_set.py
+++ b/superset/result_set.py
@@ -69,16 +69,16 @@ def stringify_values(array: NDArray[Any]) -> NDArray[Any]:
with np.nditer(result, flags=["refs_ok"], op_flags=[["readwrite"]]) as it:
for obj in it:
- if na_obj := pd.isna(obj):
- # pandas <NA> type cannot be converted to string
- obj[na_obj] = None
+ if pd.isna(obj.item()): # Correctly check for pandas NA or numpy
NaN values
+ obj[...] = None # Directly assign None to handle missing
values appropriately
else:
try:
- # for simple string conversions
- # this handles odd character types better
- obj[...] = obj.astype(str)
- except ValueError:
- obj[...] = stringify(obj)
+ # Convert only if the string is fully ASCII, otherwise
leave as is
+ if obj.item().encode('ascii', 'ignore').decode() ==
obj.item():
+ obj[...] = obj.astype(str)
+ except (ValueError, UnicodeEncodeError):
+ # Catch exceptions for non-ASCII or other conversion
issues, leaving them unchanged
+ continue
return result
diff --git a/tests/unit_tests/result_set_test.py
b/tests/unit_tests/result_set_test.py
index a629c2e2ec..f1d450b288 100644
--- a/tests/unit_tests/result_set_test.py
+++ b/tests/unit_tests/result_set_test.py
@@ -84,8 +84,8 @@ def test_stringify_with_null_integers():
data = [
("foo", "bar", pd.NA, None),
- ("foo", "bar", pd.NA, True),
- ("foo", "bar", pd.NA, None),
+ ("你好", "bar", pd.NA, True),
+ ("foo", "你好", pd.NA, False),
]
numpy_dtype = [
("id", "object"),
@@ -101,10 +101,10 @@ def test_stringify_with_null_integers():
expected = np.array(
[
- array(["foo", "foo", "foo"], dtype=object),
- array(["bar", "bar", "bar"], dtype=object),
+ array(["foo", "你好", "foo"], dtype=object),
+ array(["bar", "bar", "你好"], dtype=object),
array([None, None, None], dtype=object),
- array([None, "True", None], dtype=object),
+ array([None, "True", "False"], dtype=object),
]
)