madhushreeag commented on code in PR #40677:
URL: https://github.com/apache/superset/pull/40677#discussion_r3352415139


##########
tests/unit_tests/result_set_test.py:
##########
@@ -264,3 +264,116 @@ def 
test_empty_column_names_do_not_rename_explicit_synthetic_names() -> None:
     df = result_set.to_pandas_df()
     assert list(df.columns) == ["_col_1", "_col_0"]
     assert df.iloc[0].tolist() == [10, 20]
+
+
+# ---------------------------------------------------------------------------
+# DruidEngineSpec column normalization tests
+#
+# pydruid infers column types from the first row value, which causes two
+# related problems:
+#
+#   Case 1 – Mixed IEEE special-float strings and numbers:
+#     Druid cannot represent NaN/Infinity in JSON, so pydruid emits them as
+#     the strings "NaN", "Infinity", or "-Infinity".  When these appear in a
+#     numeric column, pa.array() raises ArrowInvalid on the mixed str/float
+#     list and the column falls back to string serialisation.
+#
+#   Case 2 – None as the first value:
+#     pydruid's get_type(None) returns Type.STRING, so any nullable numeric
+#     column whose first row is null gets labelled STRING in the cursor
+#     description.  pa.array() succeeds (producing float64) but
+#     data_type() used to return STRING because the cursor description won.
+#
+# DruidEngineSpec overrides normalize_column_values and resolve_column_type
+# to handle both cases.  BaseEngineSpec preserves the original behaviour.
+# ---------------------------------------------------------------------------
+
+
+def test_druid_ieee_special_floats_preserved_as_numeric() -> None:
+    """
+    Case 1, DruidEngineSpec: columns that mix IEEE special-float strings with
+    real numbers must keep their numeric type (specials become null).
+    """
+    from superset.db_engine_specs.druid import DruidEngineSpec
+
+    data = [("NaN",), (1.5,), ("Infinity",), (2.3,), ("-Infinity",), (None,)]
+    description = [("metric", "STRING", None, None, None, None, None)]
+    result_set = SupersetResultSet(data, description, DruidEngineSpec)  # 
type: ignore
+
+    col = result_set.columns[0]
+    assert col["type"] == "FLOAT"
+
+    df = result_set.to_pandas_df()
+    assert pd.isna(df["metric"].iloc[0])  # "NaN" → null
+    assert df["metric"].iloc[1] == 1.5
+    assert pd.isna(df["metric"].iloc[2])  # "Infinity" → null
+    assert df["metric"].iloc[3] == 2.3
+    assert pd.isna(df["metric"].iloc[4])  # "-Infinity" → null
+    assert pd.isna(df["metric"].iloc[5])  # None → null

Review Comment:
   Moved the tests as suggested



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to