rusackas commented on code in PR #38172:
URL: https://github.com/apache/superset/pull/38172#discussion_r3364286186
##########
tests/unit_tests/result_set_test.py:
##########
@@ -264,3 +264,91 @@ def
test_empty_column_names_do_not_rename_explicit_synthetic_names() -> None:
df = result_set.to_pandas_df()
assert list(df.columns) == ["_col_1", "_col_0"]
assert df.iloc[0].tolist() == [10, 20]
+
+
+def test_json_data_type_preserved_as_objects() -> None:
+ """
+ Test that JSON/JSONB data is preserved as Python objects (dicts/lists)
+ instead of being converted to strings.
+
+ This is important for Handlebars templates and other features that need
+ to access JSON data as objects rather than strings.
+
+ See: https://github.com/apache/superset/issues/25125
+ """
+ # Simulate data from PostgreSQL JSONB column - psycopg2 returns dicts
+ data = [
+ (1, {"key": "value1", "nested": {"a": 1}}, "text1"),
+ (2, {"key": "value2", "items": [1, 2, 3]}, "text2"),
+ (3, None, "text3"),
+ (4, {"mixed": "string"}, "text4"),
+ ]
+ description = [
+ ("id", 23, None, None, None, None, None), # INT
+ ("json_col", 3802, None, None, None, None, None), # JSONB
+ ("text_col", 1043, None, None, None, None, None), # VARCHAR
+ ]
+ result_set = SupersetResultSet(data, description, BaseEngineSpec) # type:
ignore
+ df = result_set.to_pandas_df()
+
+ # JSON column should be preserved as Python objects, not strings
+ assert df["json_col"].iloc[0] == {"key": "value1", "nested": {"a": 1}}
+ assert isinstance(df["json_col"].iloc[0], dict)
+ assert df["json_col"].iloc[1] == {"key": "value2", "items": [1, 2, 3]}
+ assert df["json_col"].iloc[2] is None
+ assert df["json_col"].iloc[3] == {"mixed": "string"}
+
+ # Verify the data can be serialized to JSON (as it would be for API
response)
+ from superset.utils import json as superset_json
+
+ records = df.to_dict(orient="records")
+ json_output = superset_json.dumps(records)
Review Comment:
Good call. I added `text_col` assertions to
`test_json_data_type_preserved_as_objects` confirming the plain VARCHAR values
round-trip unchanged, and added a dedicated
`test_json_formatted_string_in_text_column_stays_string` test that documents
the no-content-sniffing invariant: a TEXT column whose values are
JSON-formatted strings comes back as `str`, never parsed into dict/list.
##########
tests/unit_tests/result_set_test.py:
##########
@@ -264,3 +264,91 @@ def
test_empty_column_names_do_not_rename_explicit_synthetic_names() -> None:
df = result_set.to_pandas_df()
assert list(df.columns) == ["_col_1", "_col_0"]
assert df.iloc[0].tolist() == [10, 20]
+
+
+def test_json_data_type_preserved_as_objects() -> None:
+ """
+ Test that JSON/JSONB data is preserved as Python objects (dicts/lists)
+ instead of being converted to strings.
+
+ This is important for Handlebars templates and other features that need
+ to access JSON data as objects rather than strings.
+
+ See: https://github.com/apache/superset/issues/25125
+ """
+ # Simulate data from PostgreSQL JSONB column - psycopg2 returns dicts
+ data = [
+ (1, {"key": "value1", "nested": {"a": 1}}, "text1"),
+ (2, {"key": "value2", "items": [1, 2, 3]}, "text2"),
+ (3, None, "text3"),
+ (4, {"mixed": "string"}, "text4"),
+ ]
+ description = [
+ ("id", 23, None, None, None, None, None), # INT
+ ("json_col", 3802, None, None, None, None, None), # JSONB
+ ("text_col", 1043, None, None, None, None, None), # VARCHAR
+ ]
+ result_set = SupersetResultSet(data, description, BaseEngineSpec) # type:
ignore
+ df = result_set.to_pandas_df()
+
+ # JSON column should be preserved as Python objects, not strings
+ assert df["json_col"].iloc[0] == {"key": "value1", "nested": {"a": 1}}
+ assert isinstance(df["json_col"].iloc[0], dict)
+ assert df["json_col"].iloc[1] == {"key": "value2", "items": [1, 2, 3]}
+ assert df["json_col"].iloc[2] is None
+ assert df["json_col"].iloc[3] == {"mixed": "string"}
+
+ # Verify the data can be serialized to JSON (as it would be for API
response)
+ from superset.utils import json as superset_json
Review Comment:
Done - moved `from superset.utils import json as superset_json` up to the
module-level imports. There was no circular-import concern, just leftover from
iterating.
##########
superset/result_set.py:
##########
@@ -286,7 +301,13 @@ def data_type(self, col_name: str, pa_dtype: pa.DataType)
-> Optional[str]:
return None
def to_pandas_df(self) -> pd.DataFrame:
- return self.convert_table_to_df(self.table)
+ df = self.convert_table_to_df(self.table)
+ # Restore nested/JSON columns as Python objects instead of strings
+ # This allows JSON data to be used directly in templates like
Handlebars
+ for column, values in self._nested_columns.items():
+ if column in df.columns:
+ df[column] = values
Review Comment:
Agreed - replaced the `if column in df.columns` guard with an `assert column
in df.columns` plus a comment, since the keys always come from the same
`column_names` used to build the table/df. This documents the invariant rather
than silently swallowing a case that cannot occur.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]