This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new a8e395b79fa [SPARK-41745][CONNECT][TESTS][FOLLOW-UP] Reeanble related
test cases
a8e395b79fa is described below
commit a8e395b79fa2f16654da50c31644c4487d5ee804
Author: Hyukjin Kwon <[email protected]>
AuthorDate: Mon Jan 2 19:59:46 2023 +0900
[SPARK-41745][CONNECT][TESTS][FOLLOW-UP] Reeanble related test cases
### What changes were proposed in this pull request?
This PR is a followup of https://github.com/apache/spark/pull/39313 that
enables the skipped tests back.
### Why are the changes needed?
In order to make sure on the test coverage.
### Does this PR introduce _any_ user-facing change?
No, test-only.
### How was this patch tested?
Manually checked locally, and CI in this PR should verify them.
Closes #39342 from HyukjinKwon/SPARK-41745-followup.
Authored-by: Hyukjin Kwon <[email protected]>
Signed-off-by: Hyukjin Kwon <[email protected]>
---
python/pyspark/sql/column.py | 8 +++++---
python/pyspark/sql/connect/column.py | 16 ++++------------
2 files changed, 9 insertions(+), 15 deletions(-)
diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py
index cd7b6932c2f..f2264685f48 100644
--- a/python/pyspark/sql/column.py
+++ b/python/pyspark/sql/column.py
@@ -282,6 +282,8 @@ class Column:
__ge__ = _bin_op("geq")
__gt__ = _bin_op("gt")
+ # TODO(SPARK-41812): DataFrame.join: ambiguous column
+ # TODO(SPARK-41814): Column.eqNullSafe fails on NaN comparison
_eqNullSafe_doc = """
Equality test that is safe for null values.
@@ -317,9 +319,9 @@ class Column:
... Row(value = 'bar'),
... Row(value = None)
... ])
- >>> df1.join(df2, df1["value"] == df2["value"]).count()
+ >>> df1.join(df2, df1["value"] == df2["value"]).count() # doctest: +SKIP
0
- >>> df1.join(df2, df1["value"].eqNullSafe(df2["value"])).count()
+ >>> df1.join(df2, df1["value"].eqNullSafe(df2["value"])).count() #
doctest: +SKIP
1
>>> df2 = spark.createDataFrame([
... Row(id=1, value=float('NaN')),
@@ -330,7 +332,7 @@ class Column:
... df2['value'].eqNullSafe(None),
... df2['value'].eqNullSafe(float('NaN')),
... df2['value'].eqNullSafe(42.0)
- ... ).show()
+ ... ).show() # doctest: +SKIP
+----------------+---------------+----------------+
|(value <=> NULL)|(value <=> NaN)|(value <=> 42.0)|
+----------------+---------------+----------------+
diff --git a/python/pyspark/sql/connect/column.py
b/python/pyspark/sql/connect/column.py
index d9f96325c17..6fda15e084a 100644
--- a/python/pyspark/sql/connect/column.py
+++ b/python/pyspark/sql/connect/column.py
@@ -441,25 +441,17 @@ def _test() -> None:
# Creates a remote Spark session.
os.environ["SPARK_REMOTE"] = "sc://localhost"
globs["spark"] =
PySparkSession.builder.remote("sc://localhost").getOrCreate()
+ # Spark Connect has a different string representation for Column.
+ del pyspark.sql.connect.column.Column.getItem.__doc__
# TODO(SPARK-41746): SparkSession.createDataFrame does not support
nested datatypes
del pyspark.sql.connect.column.Column.dropFields.__doc__
# TODO(SPARK-41772): Enable
pyspark.sql.connect.column.Column.withField doctest
del pyspark.sql.connect.column.Column.withField.__doc__
- # TODO(SPARK-41745): SparkSession.createDataFrame does not respect the
column names in
- # the row
- del pyspark.sql.connect.column.Column.bitwiseAND.__doc__
- del pyspark.sql.connect.column.Column.bitwiseOR.__doc__
- del pyspark.sql.connect.column.Column.bitwiseXOR.__doc__
- # TODO(SPARK-41745): SparkSession.createDataFrame does not respect the
column names in
- # the row
- del pyspark.sql.connect.column.Column.eqNullSafe.__doc__
- # TODO(SPARK-41745): SparkSession.createDataFrame does not respect the
column names in
- # the row
- del pyspark.sql.connect.column.Column.isNotNull.__doc__
+ # TODO(SPARK-41815): Column.isNull returns nan instead of None
del pyspark.sql.connect.column.Column.isNull.__doc__
+ # TODO(SPARK-41746): SparkSession.createDataFrame does not support
nested datatypes
del pyspark.sql.connect.column.Column.getField.__doc__
- del pyspark.sql.connect.column.Column.getItem.__doc__
(failure_count, test_count) = doctest.testmod(
pyspark.sql.connect.column,
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]