itholic commented on code in PR #42332:
URL: https://github.com/apache/spark/pull/42332#discussion_r1286515194


##########
python/pyspark/testing/utils.py:
##########
@@ -464,23 +467,42 @@ def assertDataFrameEqual(
         raise PySparkAssertionError(
             error_class="INVALID_TYPE_DF_EQUALITY_ARG",
             message_parameters={
-                "expected_type": Union[DataFrame, ps.DataFrame, List[Row]],
+                "expected_type": "Union[DataFrame, ps.DataFrame, List[Row]]",
                 "arg_name": "expected",
                 "actual_type": None,
             },
         )
 
+    has_pandas = False
     try:
-        # If Spark Connect dependencies are available, allow Spark Connect 
DataFrame
-        from pyspark.sql.connect.dataframe import DataFrame as ConnectDataFrame
+        # If pandas dependencies are available, allow pandas or 
pandas-on-Spark DataFrame
+        import pyspark.pandas as ps
+        import pandas as pd
+        from pyspark.testing.pandasutils import PandasOnSparkTestUtils
 
-        if isinstance(actual, ps.DataFrame) or isinstance(expected, 
ps.DataFrame):
+        has_pandas = True
+    except Exception:
+        # no pandas, so we won't call pandasutils functions
+        pass
+
+    if has_pandas:
+        if (
+            isinstance(actual, pd.DataFrame)
+            or isinstance(expected, pd.DataFrame)
+            or isinstance(actual, ps.DataFrame)
+            or isinstance(expected, ps.DataFrame)
+        ):
             # handle pandas DataFrames
             # assert approximate equality for float data
-            return assertPandasOnSparkEqual(
-                actual, expected, checkExact=False, checkRowOrder=checkRowOrder
+            return PandasOnSparkTestUtils().assert_eq(
+                actual, expected, almost=True, rtol=rtol, atol=atol, 
check_row_order=checkRowOrder
             )
-        elif not isinstance(actual, (DataFrame, ConnectDataFrame, list)):
+
+    try:
+        # If Spark Connect dependencies are available, allow Spark Connect 
DataFrame
+        from pyspark.sql.connect.dataframe import DataFrame as ConnectDataFrame
+
+        if not isinstance(actual, (DataFrame, ConnectDataFrame, list)):

Review Comment:
   Yeah, we have `pyspark.sql.utils.get_dataframe_class` to get proper 
DataFrame class.
   
   e.g.
   ```python
   from pyspark.sql.utils import get_dataframe_class
   ...
           SparkDataFrame = get_dataframe_class()
           if not isinstance(actual, (SparkDataFrame, list)):
   ...
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to