aminghadersohi commented on code in PR #38172:
URL: https://github.com/apache/superset/pull/38172#discussion_r3360779528
##########
tests/unit_tests/result_set_test.py:
##########
@@ -264,3 +264,91 @@ def
test_empty_column_names_do_not_rename_explicit_synthetic_names() -> None:
df = result_set.to_pandas_df()
assert list(df.columns) == ["_col_1", "_col_0"]
assert df.iloc[0].tolist() == [10, 20]
+
+
+def test_json_data_type_preserved_as_objects() -> None:
+ """
+ Test that JSON/JSONB data is preserved as Python objects (dicts/lists)
+ instead of being converted to strings.
+
+ This is important for Handlebars templates and other features that need
+ to access JSON data as objects rather than strings.
+
+ See: https://github.com/apache/superset/issues/25125
+ """
+ # Simulate data from PostgreSQL JSONB column - psycopg2 returns dicts
+ data = [
+ (1, {"key": "value1", "nested": {"a": 1}}, "text1"),
+ (2, {"key": "value2", "items": [1, 2, 3]}, "text2"),
+ (3, None, "text3"),
+ (4, {"mixed": "string"}, "text4"),
+ ]
+ description = [
+ ("id", 23, None, None, None, None, None), # INT
+ ("json_col", 3802, None, None, None, None, None), # JSONB
+ ("text_col", 1043, None, None, None, None, None), # VARCHAR
+ ]
+ result_set = SupersetResultSet(data, description, BaseEngineSpec) # type:
ignore
+ df = result_set.to_pandas_df()
+
+ # JSON column should be preserved as Python objects, not strings
+ assert df["json_col"].iloc[0] == {"key": "value1", "nested": {"a": 1}}
+ assert isinstance(df["json_col"].iloc[0], dict)
+ assert df["json_col"].iloc[1] == {"key": "value2", "items": [1, 2, 3]}
+ assert df["json_col"].iloc[2] is None
+ assert df["json_col"].iloc[3] == {"mixed": "string"}
+
+ # Verify the data can be serialized to JSON (as it would be for API
response)
+ from superset.utils import json as superset_json
+
+ records = df.to_dict(orient="records")
+ json_output = superset_json.dumps(records)
Review Comment:
**MEDIUM (non-blocking):** `test_json_data_type_preserved_as_objects`
includes a `text_col` in the input data but never asserts on it after the
round-trip. The non-corruption guarantee — that a plain `TEXT`/`VARCHAR` column
containing a JSON-formatted string (e.g. `'{"key": "val"}'` as a Python `str`)
is left unchanged as a string — is untested in the suite.
Suggested additions:
1. `assert df["text_col"].iloc[0] == "text1"` in this test
2. A dedicated test with a `TEXT` column whose values are JSON-formatted
strings (e.g. `'{"key": "val"}'`), asserting they come back as `str` (not
`dict`) — documents the "no content-sniffing" invariant
##########
superset/result_set.py:
##########
@@ -286,7 +301,13 @@ def data_type(self, col_name: str, pa_dtype: pa.DataType)
-> Optional[str]:
return None
def to_pandas_df(self) -> pd.DataFrame:
- return self.convert_table_to_df(self.table)
+ df = self.convert_table_to_df(self.table)
+ # Restore nested/JSON columns as Python objects instead of strings
+ # This allows JSON data to be used directly in templates like
Handlebars
+ for column, values in self._nested_columns.items():
+ if column in df.columns:
+ df[column] = values
Review Comment:
**NIT:** `_nested_columns` keys come from the same `column_names` list used
to build `self.table` → `df`, so `column in df.columns` is always `True` here
(the only code path that resets `column_names = []` also leaves
`_nested_columns` empty). Consider removing the guard or replacing it with
`assert column in df.columns` to document the invariant explicitly.
##########
tests/unit_tests/result_set_test.py:
##########
@@ -264,3 +264,91 @@ def
test_empty_column_names_do_not_rename_explicit_synthetic_names() -> None:
df = result_set.to_pandas_df()
assert list(df.columns) == ["_col_1", "_col_0"]
assert df.iloc[0].tolist() == [10, 20]
+
+
+def test_json_data_type_preserved_as_objects() -> None:
+ """
+ Test that JSON/JSONB data is preserved as Python objects (dicts/lists)
+ instead of being converted to strings.
+
+ This is important for Handlebars templates and other features that need
+ to access JSON data as objects rather than strings.
+
+ See: https://github.com/apache/superset/issues/25125
+ """
+ # Simulate data from PostgreSQL JSONB column - psycopg2 returns dicts
+ data = [
+ (1, {"key": "value1", "nested": {"a": 1}}, "text1"),
+ (2, {"key": "value2", "items": [1, 2, 3]}, "text2"),
+ (3, None, "text3"),
+ (4, {"mixed": "string"}, "text4"),
+ ]
+ description = [
+ ("id", 23, None, None, None, None, None), # INT
+ ("json_col", 3802, None, None, None, None, None), # JSONB
+ ("text_col", 1043, None, None, None, None, None), # VARCHAR
+ ]
+ result_set = SupersetResultSet(data, description, BaseEngineSpec) # type:
ignore
+ df = result_set.to_pandas_df()
+
+ # JSON column should be preserved as Python objects, not strings
+ assert df["json_col"].iloc[0] == {"key": "value1", "nested": {"a": 1}}
+ assert isinstance(df["json_col"].iloc[0], dict)
+ assert df["json_col"].iloc[1] == {"key": "value2", "items": [1, 2, 3]}
+ assert df["json_col"].iloc[2] is None
+ assert df["json_col"].iloc[3] == {"mixed": "string"}
+
+ # Verify the data can be serialized to JSON (as it would be for API
response)
+ from superset.utils import json as superset_json
Review Comment:
**NIT:** `from superset.utils import json as superset_json` is inside the
test function body; no circular-import justification. Move to module-level
imports at the top of the file.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]