This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 58217fd4337 [SPARK-43666][SPARK-43667][SPARK-43668][SPARK-43669][PS]
Fix `BinaryOps` for Spark Connect
58217fd4337 is described below
commit 58217fd4337ad0d15d20f1bc258e239e1e68d17e
Author: itholic <[email protected]>
AuthorDate: Mon May 29 09:22:27 2023 +0900
[SPARK-43666][SPARK-43667][SPARK-43668][SPARK-43669][PS] Fix `BinaryOps`
for Spark Connect
### What changes were proposed in this pull request?
This PR proposes to fix `BinaryOps` test for pandas API on Spark with Spark
Connect.
This includes SPARK-43666, SPARK-43667, SPARK-43668, SPARK-43669 at once,
because they are all related similar modifications in single file.
### Why are the changes needed?
To support all features for pandas API on Spark with Spark Connect.
### Does this PR introduce _any_ user-facing change?
Yes, `BinaryOps.lt`, `BinaryOps.le`, `BinaryOps.ge`, `BinaryOps.gt` are
now working as expected on Spark Connect.
### How was this patch tested?
Uncomment the UTs, and tested manually.
Closes #41305 from itholic/SPARK-43666-9.
Authored-by: itholic <[email protected]>
Signed-off-by: Hyukjin Kwon <[email protected]>
---
python/pyspark/pandas/data_type_ops/binary_ops.py | 21 ++++++---------------
.../connect/data_type_ops/test_parity_binary_ops.py | 16 ----------------
2 files changed, 6 insertions(+), 31 deletions(-)
diff --git a/python/pyspark/pandas/data_type_ops/binary_ops.py
b/python/pyspark/pandas/data_type_ops/binary_ops.py
index 6d5c8633023..ba31156178a 100644
--- a/python/pyspark/pandas/data_type_ops/binary_ops.py
+++ b/python/pyspark/pandas/data_type_ops/binary_ops.py
@@ -29,8 +29,9 @@ from pyspark.pandas.data_type_ops.base import (
_sanitize_list_like,
)
from pyspark.pandas.typedef import pandas_on_spark_type
-from pyspark.sql import functions as F, Column
+from pyspark.sql import functions as F
from pyspark.sql.types import BinaryType, BooleanType, StringType
+from pyspark.sql.utils import pyspark_column_op
class BinaryOps(DataTypeOps):
@@ -67,30 +68,20 @@ class BinaryOps(DataTypeOps):
)
def lt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
- from pyspark.pandas.base import column_op
-
_sanitize_list_like(right)
-
- return column_op(Column.__lt__)(left, right)
+ return pyspark_column_op("__lt__")(left, right)
def le(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
- from pyspark.pandas.base import column_op
-
_sanitize_list_like(right)
-
- return column_op(Column.__le__)(left, right)
+ return pyspark_column_op("__le__")(left, right)
def ge(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
- from pyspark.pandas.base import column_op
-
_sanitize_list_like(right)
- return column_op(Column.__ge__)(left, right)
+ return pyspark_column_op("__ge__")(left, right)
def gt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
- from pyspark.pandas.base import column_op
-
_sanitize_list_like(right)
- return column_op(Column.__gt__)(left, right)
+ return pyspark_column_op("__gt__")(left, right)
def astype(self, index_ops: IndexOpsLike, dtype: Union[str, type, Dtype])
-> IndexOpsLike:
dtype, spark_type = pandas_on_spark_type(dtype)
diff --git
a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_binary_ops.py
b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_binary_ops.py
index 7d941c4c788..663c0007389 100644
---
a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_binary_ops.py
+++
b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_binary_ops.py
@@ -29,22 +29,6 @@ class BinaryOpsParityTests(
def test_astype(self):
super().test_astype()
- @unittest.skip("TODO(SPARK-43666): Fix BinaryOps.ge to work with Spark
Connect Column.")
- def test_ge(self):
- super().test_ge()
-
- @unittest.skip("TODO(SPARK-43667): Fix BinaryOps.gt to work with Spark
Connect Column.")
- def test_gt(self):
- super().test_gt()
-
- @unittest.skip("TODO(SPARK-43667): Fix BinaryOps.le to work with Spark
Connect Column.")
- def test_le(self):
- super().test_le()
-
- @unittest.skip("TODO(SPARK-43667): Fix BinaryOps.lt to work with Spark
Connect Column.")
- def test_lt(self):
- super().test_lt()
-
if __name__ == "__main__":
from pyspark.pandas.tests.connect.data_type_ops.test_parity_binary_ops
import * # noqa: F401
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]