This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new a8e395b79fa [SPARK-41745][CONNECT][TESTS][FOLLOW-UP] Reeanble related test cases a8e395b79fa is described below commit a8e395b79fa2f16654da50c31644c4487d5ee804 Author: Hyukjin Kwon <gurwls...@apache.org> AuthorDate: Mon Jan 2 19:59:46 2023 +0900 [SPARK-41745][CONNECT][TESTS][FOLLOW-UP] Reeanble related test cases ### What changes were proposed in this pull request? This PR is a followup of https://github.com/apache/spark/pull/39313 that enables the skipped tests back. ### Why are the changes needed? In order to make sure on the test coverage. ### Does this PR introduce _any_ user-facing change? No, test-only. ### How was this patch tested? Manually checked locally, and CI in this PR should verify them. Closes #39342 from HyukjinKwon/SPARK-41745-followup. Authored-by: Hyukjin Kwon <gurwls...@apache.org> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- python/pyspark/sql/column.py | 8 +++++--- python/pyspark/sql/connect/column.py | 16 ++++------------ 2 files changed, 9 insertions(+), 15 deletions(-) diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py index cd7b6932c2f..f2264685f48 100644 --- a/python/pyspark/sql/column.py +++ b/python/pyspark/sql/column.py @@ -282,6 +282,8 @@ class Column: __ge__ = _bin_op("geq") __gt__ = _bin_op("gt") + # TODO(SPARK-41812): DataFrame.join: ambiguous column + # TODO(SPARK-41814): Column.eqNullSafe fails on NaN comparison _eqNullSafe_doc = """ Equality test that is safe for null values. @@ -317,9 +319,9 @@ class Column: ... Row(value = 'bar'), ... Row(value = None) ... ]) - >>> df1.join(df2, df1["value"] == df2["value"]).count() + >>> df1.join(df2, df1["value"] == df2["value"]).count() # doctest: +SKIP 0 - >>> df1.join(df2, df1["value"].eqNullSafe(df2["value"])).count() + >>> df1.join(df2, df1["value"].eqNullSafe(df2["value"])).count() # doctest: +SKIP 1 >>> df2 = spark.createDataFrame([ ... Row(id=1, value=float('NaN')), @@ -330,7 +332,7 @@ class Column: ... df2['value'].eqNullSafe(None), ... df2['value'].eqNullSafe(float('NaN')), ... df2['value'].eqNullSafe(42.0) - ... ).show() + ... ).show() # doctest: +SKIP +----------------+---------------+----------------+ |(value <=> NULL)|(value <=> NaN)|(value <=> 42.0)| +----------------+---------------+----------------+ diff --git a/python/pyspark/sql/connect/column.py b/python/pyspark/sql/connect/column.py index d9f96325c17..6fda15e084a 100644 --- a/python/pyspark/sql/connect/column.py +++ b/python/pyspark/sql/connect/column.py @@ -441,25 +441,17 @@ def _test() -> None: # Creates a remote Spark session. os.environ["SPARK_REMOTE"] = "sc://localhost" globs["spark"] = PySparkSession.builder.remote("sc://localhost").getOrCreate() + # Spark Connect has a different string representation for Column. + del pyspark.sql.connect.column.Column.getItem.__doc__ # TODO(SPARK-41746): SparkSession.createDataFrame does not support nested datatypes del pyspark.sql.connect.column.Column.dropFields.__doc__ # TODO(SPARK-41772): Enable pyspark.sql.connect.column.Column.withField doctest del pyspark.sql.connect.column.Column.withField.__doc__ - # TODO(SPARK-41745): SparkSession.createDataFrame does not respect the column names in - # the row - del pyspark.sql.connect.column.Column.bitwiseAND.__doc__ - del pyspark.sql.connect.column.Column.bitwiseOR.__doc__ - del pyspark.sql.connect.column.Column.bitwiseXOR.__doc__ - # TODO(SPARK-41745): SparkSession.createDataFrame does not respect the column names in - # the row - del pyspark.sql.connect.column.Column.eqNullSafe.__doc__ - # TODO(SPARK-41745): SparkSession.createDataFrame does not respect the column names in - # the row - del pyspark.sql.connect.column.Column.isNotNull.__doc__ + # TODO(SPARK-41815): Column.isNull returns nan instead of None del pyspark.sql.connect.column.Column.isNull.__doc__ + # TODO(SPARK-41746): SparkSession.createDataFrame does not support nested datatypes del pyspark.sql.connect.column.Column.getField.__doc__ - del pyspark.sql.connect.column.Column.getItem.__doc__ (failure_count, test_count) = doctest.testmod( pyspark.sql.connect.column, --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org