asl3 commented on code in PR #42332:
URL: https://github.com/apache/spark/pull/42332#discussion_r1287489874
##########
python/pyspark/testing/utils.py:
##########
@@ -464,23 +467,42 @@ def assertDataFrameEqual(
raise PySparkAssertionError(
error_class="INVALID_TYPE_DF_EQUALITY_ARG",
message_parameters={
- "expected_type": Union[DataFrame, ps.DataFrame, List[Row]],
+ "expected_type": "Union[DataFrame, ps.DataFrame, List[Row]]",
"arg_name": "expected",
"actual_type": None,
},
)
+ has_pandas = False
try:
- # If Spark Connect dependencies are available, allow Spark Connect
DataFrame
- from pyspark.sql.connect.dataframe import DataFrame as ConnectDataFrame
+ # If pandas dependencies are available, allow pandas or
pandas-on-Spark DataFrame
+ import pyspark.pandas as ps
+ import pandas as pd
+ from pyspark.testing.pandasutils import PandasOnSparkTestUtils
- if isinstance(actual, ps.DataFrame) or isinstance(expected,
ps.DataFrame):
+ has_pandas = True
+ except Exception:
+ # no pandas, so we won't call pandasutils functions
+ pass
+
+ if has_pandas:
+ if (
+ isinstance(actual, pd.DataFrame)
+ or isinstance(expected, pd.DataFrame)
+ or isinstance(actual, ps.DataFrame)
+ or isinstance(expected, ps.DataFrame)
+ ):
# handle pandas DataFrames
# assert approximate equality for float data
- return assertPandasOnSparkEqual(
- actual, expected, checkExact=False, checkRowOrder=checkRowOrder
+ return PandasOnSparkTestUtils().assert_eq(
+ actual, expected, almost=True, rtol=rtol, atol=atol,
check_row_order=checkRowOrder
)
- elif not isinstance(actual, (DataFrame, ConnectDataFrame, list)):
+
+ try:
+ # If Spark Connect dependencies are available, allow Spark Connect
DataFrame
+ from pyspark.sql.connect.dataframe import DataFrame as ConnectDataFrame
+
+ if not isinstance(actual, (DataFrame, ConnectDataFrame, list)):
Review Comment:
sounds good, i'll allow for spark DataFrame or connect DataFrame if
is_remote() is True, to allow for these comparisons
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]