This is an automated email from the ASF dual-hosted git repository. xinrong pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new fb7719869666 [SPARK-53441][PS] Bools `|`/`&`/`^` None should fail under ANSI fb7719869666 is described below commit fb7719869666dbca179951b55fe78be7672f51c7 Author: Xinrong Meng <xinr...@apache.org> AuthorDate: Thu Sep 4 17:38:50 2025 -0700 [SPARK-53441][PS] Bools `|`/`&`/`^` None should fail under ANSI ### What changes were proposed in this pull request? Bools `|`/`&`/`^` None should fail under ANSI, following native pandas For example, ```py >>> pd.Series([True, False]) | None Traceback (most recent call last): ... TypeError: unsupported operand type(s) for |: 'bool' and 'NoneType' ``` but under ANSI ```py >>> ps.Series([True, False]) | None 0 False 1 False dtype: bool ``` ### Why are the changes needed? Part of https://issues.apache.org/jira/browse/SPARK-53389 ### Does this PR introduce _any_ user-facing change? No, the feature hasn't been released yet. Now bools `|`/`&`/`^` None fails under ANSI, e.g. ```py >>> ps.Series([True, False]) | None Traceback (most recent call last): ... TypeError: OR can not be applied to given types. ``` ### How was this patch tested? Unit tests Commands below passed: ``` 1027 SPARK_ANSI_SQL_MODE=true ./python/run-tests --python-executables=python3.11 --testnames "pyspark.pandas.tests.data_type_ops.test_boolean_ops BooleanOpsTests" 1028 SPARK_ANSI_SQL_MODE=false ./python/run-tests --python-executables=python3.11 --testnames "pyspark.pandas.tests.data_type_ops.test_boolean_ops BooleanOpsTests" ``` ### Was this patch authored or co-authored using generative AI tooling? No Closes #52182 from xinrong-meng/cmp_none. Authored-by: Xinrong Meng <xinr...@apache.org> Signed-off-by: Xinrong Meng <xinr...@apache.org> --- python/pyspark/pandas/data_type_ops/boolean_ops.py | 18 ++++++++++++++++++ .../pandas/tests/data_type_ops/test_boolean_ops.py | 19 +++++++++++++++---- 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/python/pyspark/pandas/data_type_ops/boolean_ops.py b/python/pyspark/pandas/data_type_ops/boolean_ops.py index d8fccb9d1884..d9a24dee0802 100644 --- a/python/pyspark/pandas/data_type_ops/boolean_ops.py +++ b/python/pyspark/pandas/data_type_ops/boolean_ops.py @@ -237,6 +237,12 @@ class BooleanOps(DataTypeOps): def __and__(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex: _sanitize_list_like(right) + if ( + is_ansi_mode_enabled(left._internal.spark_frame.sparkSession) + and self.dtype == bool + and right is None + ): + raise TypeError("AND can not be applied to given types.") if isinstance(right, IndexOpsMixin) and isinstance(right.dtype, extension_dtypes): return right.__and__(left) else: @@ -256,6 +262,12 @@ class BooleanOps(DataTypeOps): def xor(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex: _sanitize_list_like(right) + if ( + is_ansi_mode_enabled(left._internal.spark_frame.sparkSession) + and self.dtype == bool + and right is None + ): + raise TypeError("XOR can not be applied to given types.") if isinstance(right, IndexOpsMixin) and isinstance(right.dtype, extension_dtypes): return right ^ left elif _is_valid_for_logical_operator(right): @@ -277,6 +289,12 @@ class BooleanOps(DataTypeOps): def __or__(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex: _sanitize_list_like(right) + if ( + is_ansi_mode_enabled(left._internal.spark_frame.sparkSession) + and self.dtype == bool + and right is None + ): + raise TypeError("OR can not be applied to given types.") if isinstance(right, IndexOpsMixin) and isinstance(right.dtype, extension_dtypes): return right.__or__(left) else: diff --git a/python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py index ea4b23d537f9..961a63e77366 100644 --- a/python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py +++ b/python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py @@ -24,6 +24,7 @@ from pandas.api.types import CategoricalDtype from pyspark import pandas as ps from pyspark.pandas import option_context +from pyspark.testing.utils import is_ansi_mode_test from pyspark.testing.pandasutils import PandasOnSparkTestCase from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase from pyspark.pandas.typedef.typehints import ( @@ -258,11 +259,15 @@ class BooleanOpsTestsMixin: self.assert_eq(pser & pser, psser & psser) self.assert_eq(pser & other_pser, psser & other_psser) self.assert_eq(other_pser & pser, other_psser & psser) + if is_ansi_mode_test: + self.assertRaises(TypeError, lambda: psser & None) def test_rand(self): pser, psser = self.pdf["bool"], self.psdf["bool"] self.assert_eq(True & pser, True & psser) self.assert_eq(False & pser, False & psser) + if is_ansi_mode_test: + self.assertRaises(TypeError, lambda: None & psser) def test_or(self): pdf, psdf = self.bool_pdf, self.bool_psdf @@ -272,16 +277,18 @@ class BooleanOpsTestsMixin: self.assert_eq(pser | True, psser | True) self.assert_eq(pser | False, psser | False) self.assert_eq(pser | pser, psser | psser) - self.assert_eq(True | pser, True | psser) - self.assert_eq(False | pser, False | psser) self.assert_eq(pser | other_pser, psser | other_psser) self.assert_eq(other_pser | pser, other_psser | psser) + if is_ansi_mode_test: + self.assertRaises(TypeError, lambda: psser | None) def test_ror(self): pser, psser = self.pdf["bool"], self.psdf["bool"] self.assert_eq(True | pser, True | psser) self.assert_eq(False | pser, False | psser) + if is_ansi_mode_test: + self.assertRaises(TypeError, lambda: None | psser) def test_xor(self): pdf, psdf = self.bool_pdf, self.bool_psdf @@ -296,6 +303,8 @@ class BooleanOpsTestsMixin: with self.assertRaisesRegex(TypeError, "XOR can not be applied to given types."): psser ^ "a" + if is_ansi_mode_test: + self.assertRaises(TypeError, lambda: psser ^ None) with option_context("compute.ops_on_diff_frames", True): pser, other_pser = self.pdf["bool"], self.integral_pdf["this"] @@ -308,6 +317,8 @@ class BooleanOpsTestsMixin: self.assert_eq(True ^ pser, True ^ psser) self.assert_eq(False ^ pser, False ^ psser) self.assert_eq(1 ^ pser, 1 ^ psser) + if is_ansi_mode_test: + self.assertRaises(TypeError, lambda: None ^ psser) def test_isnull(self): self.assert_eq(self.pdf["bool"].isnull(), self.psdf["bool"].isnull()) @@ -724,8 +735,8 @@ class BooleanExtensionOpsTest(OpsTestBase): def test_rxor(self): pser, psser = self.boolean_pdf["this"], self.boolean_psdf["this"] - self.check_extension(True | pser, True | psser) - self.check_extension(False | pser, False | psser) + self.check_extension(True ^ pser, True ^ psser) + self.check_extension(False ^ pser, False ^ psser) with self.assertRaisesRegex(TypeError, "XOR can not be applied to given types."): 1 ^ psser --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org