ueshin commented on a change in pull request #33911:
URL: https://github.com/apache/spark/pull/33911#discussion_r705688661
##########
File path: python/pyspark/pandas/tests/data_type_ops/test_num_ops.py
##########
@@ -527,6 +527,32 @@ def test_ge(self):
for pser, psser in self.intergral_extension_pser_psser_pairs:
self.check_extension(pser >= pser, (psser >=
psser).sort_index())
+ def test_xor(self):
+ pdf, psdf = self.integral_pdf, self.integral_psdf
+ pser, other_pser = pdf["this"], pdf["that"]
+ psser, other_psser = psdf["this"], psdf["that"]
+
+ self.assert_eq(pser ^ other_pser, psser ^ other_psser)
+ self.assert_eq(pser ^ 2, psser ^ 2)
+ self.assert_eq(pser ^ 3, psser ^ 3)
+ self.assert_eq(pser ^ False, psser ^ False)
+ self.assert_eq(pser ^ True, psser ^ True)
+
+ with self.assertRaisesRegex(TypeError, "XOR can not be applied to
given types."):
+ psser ^ "a"
+
+ with option_context("compute.ops_on_diff_frames", True):
+ pser, other_pser = self.integral_pdf["this"], self.pdf["bool"]
+ psser, other_psser = self.integral_psdf["this"], self.psdf["bool"]
+
+ self.assert_eq(pser ^ other_pser, psser ^ other_psser)
+
+ def test_rxor(self):
Review comment:
ditto.
##########
File path: python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py
##########
@@ -286,6 +287,32 @@ def test_ror(self):
self.assert_eq(True | pser, True | psser)
self.assert_eq(False | pser, False | psser)
+ def test_xor(self):
+ pdf, psdf = self.bool_pdf, self.bool_psdf
+ pser, other_pser = pdf["this"], pdf["that"]
+ psser, other_psser = psdf["this"], psdf["that"]
+
+ self.assert_eq(pser ^ other_pser, psser ^ other_psser)
+ self.assert_eq(pser ^ True, psser ^ True)
+ self.assert_eq(pser ^ False, psser ^ False)
+ self.assert_eq(pser ^ 2, psser ^ 2)
+ self.assert_eq(pser ^ 99, psser ^ 99)
+
+ with self.assertRaisesRegex(TypeError, "XOR can not be applied to
given types."):
+ psser ^ "a"
+
+ with option_context("compute.ops_on_diff_frames", True):
+ pser, other_pser = self.pdf["bool"], self.integral_pdf["this"]
+ psser, other_psser = self.psdf["bool"], self.integral_psdf["this"]
+
+ self.assert_eq(pser ^ other_pser, psser ^ other_psser)
+
+ def test_rxor(self):
+ pser, psser = self.pdf["bool"], self.psdf["bool"]
+ self.assert_eq(True ^ pser, True ^ psser)
+ self.assert_eq(False ^ pser, False ^ psser)
+ self.assert_eq(1 ^ pser, 1 ^ psser)
Review comment:
Could you also add tests in `BooleanExtensionOpsTest`?
##########
File path: python/pyspark/pandas/data_type_ops/base.py
##########
@@ -194,6 +194,19 @@ def _sanitize_list_like(operand: Any) -> None:
raise TypeError("The operation can not be applied to %s." %
type(operand).__name__)
+def _is_valid_for_logical_operator(right: Any) -> bool:
+ from pyspark.pandas.base import IndexOpsMixin
+
+ return (
+ isinstance(right, IndexOpsMixin)
+ and (
+ isinstance(right.spark.data_type, BooleanType)
+ or isinstance(right.spark.data_type, IntegralType)
+ )
+ or isinstance(right, (int, bool))
+ )
Review comment:
Could you use parentheses properly when `and` and `or` are mixed?
```py
return (
(
isinstance(right, IndexOpsMixin)
and (
isinstance(right.spark.data_type, BooleanType)
or isinstance(right.spark.data_type, IntegralType)
)
)
or isinstance(right, (int, bool))
)
```
##########
File path: python/pyspark/pandas/indexes/base.py
##########
@@ -2603,6 +2603,9 @@ def __iter__(self) -> Iterator:
def __xor__(self, other: "Index") -> "Index":
return self.symmetric_difference(other)
+ def __rxor__(self, other: Any) -> "Index":
+ return other.__xor__(self)
Review comment:
I guess `return NotImplemented`?
##########
File path: python/pyspark/pandas/tests/data_type_ops/test_num_ops.py
##########
@@ -527,6 +527,32 @@ def test_ge(self):
for pser, psser in self.intergral_extension_pser_psser_pairs:
self.check_extension(pser >= pser, (psser >=
psser).sort_index())
+ def test_xor(self):
Review comment:
These tests should rather be in `NumOpsTest` and we should put tests
using nullable ints here?
##########
File path: python/pyspark/pandas/data_type_ops/num_ops.py
##########
@@ -181,6 +182,36 @@ class IntegralOps(NumericOps):
LongType, IntegerType, ByteType and ShortType.
"""
+ def xor(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+ _sanitize_list_like(right)
+
+ if isinstance(right, IndexOpsMixin) and isinstance(right.dtype,
extension_dtypes):
+ return right ^ left
+ elif _is_valid_for_logical_operator(right):
+ right_is_boolean = (
+ True
+ if isinstance(right, IndexOpsMixin)
+ and isinstance(right.spark.data_type, BooleanType)
+ or isinstance(right, bool)
+ else False
+ )
Review comment:
nit:
```py
right_is_boolean = (
(
isinstance(right, IndexOpsMixin)
and isinstance(right.spark.data_type, BooleanType)
)
or isinstance(right, bool)
)
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]