This is an automated email from the ASF dual-hosted git repository.

xinrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new fb7719869666 [SPARK-53441][PS] Bools `|`/`&`/`^` None should fail 
under ANSI
fb7719869666 is described below

commit fb7719869666dbca179951b55fe78be7672f51c7
Author: Xinrong Meng <xinr...@apache.org>
AuthorDate: Thu Sep 4 17:38:50 2025 -0700

    [SPARK-53441][PS] Bools `|`/`&`/`^` None should fail under ANSI
    
    ### What changes were proposed in this pull request?
    Bools `|`/`&`/`^` None should fail under ANSI, following native pandas
    
    For example,
    ```py
    >>> pd.Series([True, False]) | None
    Traceback (most recent call last):
    ...
    TypeError: unsupported operand type(s) for |: 'bool' and 'NoneType'
    ```
    
    but under ANSI
    ```py
    >>> ps.Series([True, False]) | None
    0    False
    1    False
    dtype: bool
    ```
    
    ### Why are the changes needed?
    Part of https://issues.apache.org/jira/browse/SPARK-53389
    
    ### Does this PR introduce _any_ user-facing change?
    No, the feature hasn't been released yet.
    
    Now bools `|`/`&`/`^` None fails under ANSI, e.g.
    
    ```py
    >>> ps.Series([True, False]) | None
    Traceback (most recent call last):
    ...
    TypeError: OR can not be applied to given types.
    ```
    
    ### How was this patch tested?
    Unit tests
    
    Commands below passed:
    ```
     1027  SPARK_ANSI_SQL_MODE=true ./python/run-tests 
--python-executables=python3.11 --testnames 
"pyspark.pandas.tests.data_type_ops.test_boolean_ops BooleanOpsTests"
     1028  SPARK_ANSI_SQL_MODE=false ./python/run-tests 
--python-executables=python3.11 --testnames 
"pyspark.pandas.tests.data_type_ops.test_boolean_ops BooleanOpsTests"
    ```
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No
    
    Closes #52182 from xinrong-meng/cmp_none.
    
    Authored-by: Xinrong Meng <xinr...@apache.org>
    Signed-off-by: Xinrong Meng <xinr...@apache.org>
---
 python/pyspark/pandas/data_type_ops/boolean_ops.py    | 18 ++++++++++++++++++
 .../pandas/tests/data_type_ops/test_boolean_ops.py    | 19 +++++++++++++++----
 2 files changed, 33 insertions(+), 4 deletions(-)

diff --git a/python/pyspark/pandas/data_type_ops/boolean_ops.py 
b/python/pyspark/pandas/data_type_ops/boolean_ops.py
index d8fccb9d1884..d9a24dee0802 100644
--- a/python/pyspark/pandas/data_type_ops/boolean_ops.py
+++ b/python/pyspark/pandas/data_type_ops/boolean_ops.py
@@ -237,6 +237,12 @@ class BooleanOps(DataTypeOps):
 
     def __and__(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
         _sanitize_list_like(right)
+        if (
+            is_ansi_mode_enabled(left._internal.spark_frame.sparkSession)
+            and self.dtype == bool
+            and right is None
+        ):
+            raise TypeError("AND can not be applied to given types.")
         if isinstance(right, IndexOpsMixin) and isinstance(right.dtype, 
extension_dtypes):
             return right.__and__(left)
         else:
@@ -256,6 +262,12 @@ class BooleanOps(DataTypeOps):
 
     def xor(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
         _sanitize_list_like(right)
+        if (
+            is_ansi_mode_enabled(left._internal.spark_frame.sparkSession)
+            and self.dtype == bool
+            and right is None
+        ):
+            raise TypeError("XOR can not be applied to given types.")
         if isinstance(right, IndexOpsMixin) and isinstance(right.dtype, 
extension_dtypes):
             return right ^ left
         elif _is_valid_for_logical_operator(right):
@@ -277,6 +289,12 @@ class BooleanOps(DataTypeOps):
 
     def __or__(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
         _sanitize_list_like(right)
+        if (
+            is_ansi_mode_enabled(left._internal.spark_frame.sparkSession)
+            and self.dtype == bool
+            and right is None
+        ):
+            raise TypeError("OR can not be applied to given types.")
         if isinstance(right, IndexOpsMixin) and isinstance(right.dtype, 
extension_dtypes):
             return right.__or__(left)
         else:
diff --git a/python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py 
b/python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py
index ea4b23d537f9..961a63e77366 100644
--- a/python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py
+++ b/python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py
@@ -24,6 +24,7 @@ from pandas.api.types import CategoricalDtype
 
 from pyspark import pandas as ps
 from pyspark.pandas import option_context
+from pyspark.testing.utils import is_ansi_mode_test
 from pyspark.testing.pandasutils import PandasOnSparkTestCase
 from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase
 from pyspark.pandas.typedef.typehints import (
@@ -258,11 +259,15 @@ class BooleanOpsTestsMixin:
         self.assert_eq(pser & pser, psser & psser)
         self.assert_eq(pser & other_pser, psser & other_psser)
         self.assert_eq(other_pser & pser, other_psser & psser)
+        if is_ansi_mode_test:
+            self.assertRaises(TypeError, lambda: psser & None)
 
     def test_rand(self):
         pser, psser = self.pdf["bool"], self.psdf["bool"]
         self.assert_eq(True & pser, True & psser)
         self.assert_eq(False & pser, False & psser)
+        if is_ansi_mode_test:
+            self.assertRaises(TypeError, lambda: None & psser)
 
     def test_or(self):
         pdf, psdf = self.bool_pdf, self.bool_psdf
@@ -272,16 +277,18 @@ class BooleanOpsTestsMixin:
         self.assert_eq(pser | True, psser | True)
         self.assert_eq(pser | False, psser | False)
         self.assert_eq(pser | pser, psser | psser)
-        self.assert_eq(True | pser, True | psser)
-        self.assert_eq(False | pser, False | psser)
 
         self.assert_eq(pser | other_pser, psser | other_psser)
         self.assert_eq(other_pser | pser, other_psser | psser)
+        if is_ansi_mode_test:
+            self.assertRaises(TypeError, lambda: psser | None)
 
     def test_ror(self):
         pser, psser = self.pdf["bool"], self.psdf["bool"]
         self.assert_eq(True | pser, True | psser)
         self.assert_eq(False | pser, False | psser)
+        if is_ansi_mode_test:
+            self.assertRaises(TypeError, lambda: None | psser)
 
     def test_xor(self):
         pdf, psdf = self.bool_pdf, self.bool_psdf
@@ -296,6 +303,8 @@ class BooleanOpsTestsMixin:
 
         with self.assertRaisesRegex(TypeError, "XOR can not be applied to 
given types."):
             psser ^ "a"
+        if is_ansi_mode_test:
+            self.assertRaises(TypeError, lambda: psser ^ None)
 
         with option_context("compute.ops_on_diff_frames", True):
             pser, other_pser = self.pdf["bool"], self.integral_pdf["this"]
@@ -308,6 +317,8 @@ class BooleanOpsTestsMixin:
         self.assert_eq(True ^ pser, True ^ psser)
         self.assert_eq(False ^ pser, False ^ psser)
         self.assert_eq(1 ^ pser, 1 ^ psser)
+        if is_ansi_mode_test:
+            self.assertRaises(TypeError, lambda: None ^ psser)
 
     def test_isnull(self):
         self.assert_eq(self.pdf["bool"].isnull(), self.psdf["bool"].isnull())
@@ -724,8 +735,8 @@ class BooleanExtensionOpsTest(OpsTestBase):
 
     def test_rxor(self):
         pser, psser = self.boolean_pdf["this"], self.boolean_psdf["this"]
-        self.check_extension(True | pser, True | psser)
-        self.check_extension(False | pser, False | psser)
+        self.check_extension(True ^ pser, True ^ psser)
+        self.check_extension(False ^ pser, False ^ psser)
         with self.assertRaisesRegex(TypeError, "XOR can not be applied to 
given types."):
             1 ^ psser
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to