This is an automated email from the ASF dual-hosted git repository. ruifengz pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 4c7f8106bf1 [SPARK-41751][CONNECT][PYTHON] Fix `Column.{isNull, isNotNull, eqNullSafe}` 4c7f8106bf1 is described below commit 4c7f8106bf145203d0b1aed5f6d5762e915c83ca Author: Ruifeng Zheng <ruife...@apache.org> AuthorDate: Thu Dec 29 16:39:10 2022 +0800 [SPARK-41751][CONNECT][PYTHON] Fix `Column.{isNull, isNotNull, eqNullSafe}` ### What changes were proposed in this pull request? Fix `Column.{isNull, isNotNull, eqNullSafe}` ### Why are the changes needed? they were wrongly implemented ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? added UT Closes #39273 from zhengruifeng/connect_column_fix_null. Authored-by: Ruifeng Zheng <ruife...@apache.org> Signed-off-by: Ruifeng Zheng <ruife...@apache.org> --- python/pyspark/sql/connect/column.py | 6 ++-- .../sql/tests/connect/test_connect_column.py | 40 ++++++++++++++++++++++ 2 files changed, 43 insertions(+), 3 deletions(-) diff --git a/python/pyspark/sql/connect/column.py b/python/pyspark/sql/connect/column.py index 30916ecafbf..5248c1c0ab5 100644 --- a/python/pyspark/sql/connect/column.py +++ b/python/pyspark/sql/connect/column.py @@ -124,7 +124,7 @@ class Column: __ge__ = _bin_op(">=") __le__ = _bin_op("<=") - eqNullSafe = _bin_op("eqNullSafe", PySparkColumn.eqNullSafe.__doc__) + eqNullSafe = _bin_op("<=>", PySparkColumn.eqNullSafe.__doc__) __neg__ = _func_op("negate") @@ -148,8 +148,8 @@ class Column: bitwiseAND = _bin_op("&", PySparkColumn.bitwiseAND.__doc__) bitwiseXOR = _bin_op("^", PySparkColumn.bitwiseXOR.__doc__) - isNull = _unary_op("isNull", PySparkColumn.isNull.__doc__) - isNotNull = _unary_op("isNotNull", PySparkColumn.isNotNull.__doc__) + isNull = _unary_op("isnull", PySparkColumn.isNull.__doc__) + isNotNull = _unary_op("isnotnull", PySparkColumn.isNotNull.__doc__) def __ne__( # type: ignore[override] self, diff --git a/python/pyspark/sql/tests/connect/test_connect_column.py b/python/pyspark/sql/tests/connect/test_connect_column.py index 7eb30505504..e34f27aee98 100644 --- a/python/pyspark/sql/tests/connect/test_connect_column.py +++ b/python/pyspark/sql/tests/connect/test_connect_column.py @@ -112,6 +112,46 @@ class SparkConnectTests(SparkConnectSQLTestCase): df4.filter(df4.name.isNotNull()).toPandas(), ) + def test_column_with_null(self): + # SPARK-41751: test isNull, isNotNull, eqNullSafe + from pyspark.sql import functions as SF + from pyspark.sql.connect import functions as CF + + query = """ + SELECT * FROM VALUES + (1, 1, NULL), (2, NULL, NULL), (3, 3, 1) + AS tab(a, b, c) + """ + + # +---+----+----+ + # | a| b| c| + # +---+----+----+ + # | 1| 1|null| + # | 2|null|null| + # | 3| 3| 1| + # +---+----+----+ + + cdf = self.connect.sql(query) + sdf = self.spark.sql(query) + + # test isNull + self.assert_eq( + cdf.select(cdf.a.isNull(), cdf["b"].isNull(), CF.col("c").isNull()).toPandas(), + sdf.select(sdf.a.isNull(), sdf["b"].isNull(), SF.col("c").isNull()).toPandas(), + ) + + # test isNotNull + self.assert_eq( + cdf.select(cdf.a.isNotNull(), cdf["b"].isNotNull(), CF.col("c").isNotNull()).toPandas(), + sdf.select(sdf.a.isNotNull(), sdf["b"].isNotNull(), SF.col("c").isNotNull()).toPandas(), + ) + + # test eqNullSafe + self.assert_eq( + cdf.select(cdf.a.eqNullSafe(cdf.b), cdf["b"].eqNullSafe(CF.col("c"))).toPandas(), + sdf.select(sdf.a.eqNullSafe(sdf.b), sdf["b"].eqNullSafe(SF.col("c"))).toPandas(), + ) + def test_invalid_ops(self): query = """ SELECT * FROM VALUES --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org