This is an automated email from the ASF dual-hosted git repository.
ruifengz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new d5865d0c085 [SPARK-41700][CONNECT][PYTHON] Remove `FunctionBuilder`
d5865d0c085 is described below
commit d5865d0c085cc41b39e0d615970bce7da270def6
Author: Ruifeng Zheng <[email protected]>
AuthorDate: Sun Dec 25 13:33:02 2022 +0800
[SPARK-41700][CONNECT][PYTHON] Remove `FunctionBuilder`
### What changes were proposed in this pull request?
Remove `FunctionBuilder`
### Why are the changes needed?
since we had supported almost all the functions
### Does this PR introduce _any_ user-facing change?
no
### How was this patch tested?
updated UT
Closes #39204 from zhengruifeng/connect_remove_FunctionBuilder.
Authored-by: Ruifeng Zheng <[email protected]>
Signed-off-by: Ruifeng Zheng <[email protected]>
---
python/pyspark/sql/connect/_typing.py | 5 -----
python/pyspark/sql/connect/column.py | 21 ++++++++++-----------
python/pyspark/sql/connect/expressions.py | 4 ++++
python/pyspark/sql/connect/function_builder.py | 15 ---------------
.../pyspark/sql/tests/connect/test_connect_basic.py | 4 ++--
5 files changed, 16 insertions(+), 33 deletions(-)
diff --git a/python/pyspark/sql/connect/_typing.py
b/python/pyspark/sql/connect/_typing.py
index 4962df1c7d5..29a14384c82 100644
--- a/python/pyspark/sql/connect/_typing.py
+++ b/python/pyspark/sql/connect/_typing.py
@@ -42,11 +42,6 @@ DecimalLiteral = decimal.Decimal
DateTimeLiteral = Union[datetime.datetime, datetime.date]
-class FunctionBuilderCallable(Protocol):
- def __call__(self, *_: ColumnOrName) -> Column:
- ...
-
-
class UserDefinedFunctionCallable(Protocol):
def __call__(self, *_: ColumnOrName) -> Column:
...
diff --git a/python/pyspark/sql/connect/column.py
b/python/pyspark/sql/connect/column.py
index 440e559f365..f1107b507bc 100644
--- a/python/pyspark/sql/connect/column.py
+++ b/python/pyspark/sql/connect/column.py
@@ -202,9 +202,6 @@ class Column:
...
def substr(self, startPos: Union[int, "Column"], length: Union[int,
"Column"]) -> "Column":
- from pyspark.sql.connect.function_builder import functions as F
- from pyspark.sql.connect.functions import lit
-
if type(startPos) != type(length):
raise TypeError(
"startPos and length must be the same type. "
@@ -214,19 +211,21 @@ class Column:
)
)
- if isinstance(length, int):
- length_exp = lit(length)
- elif isinstance(length, Column):
- length_exp = length
+ if isinstance(length, Column):
+ length_expr = length._expr
+ elif isinstance(length, int):
+ length_expr = LiteralExpression._from_value(length)
else:
raise TypeError("Unsupported type for substr().")
- if isinstance(startPos, int):
- start_exp = lit(startPos)
+ if isinstance(startPos, Column):
+ start_expr = startPos._expr
+ elif isinstance(startPos, int):
+ start_expr = LiteralExpression._from_value(startPos)
else:
- start_exp = startPos
+ raise TypeError("Unsupported type for substr().")
- return F.substr(self, start_exp, length_exp)
+ return Column(UnresolvedFunction("substring", [self._expr, start_expr,
length_expr]))
substr.__doc__ = PySparkColumn.substr.__doc__
diff --git a/python/pyspark/sql/connect/expressions.py
b/python/pyspark/sql/connect/expressions.py
index 0c2717d7fe3..02d6047bd66 100644
--- a/python/pyspark/sql/connect/expressions.py
+++ b/python/pyspark/sql/connect/expressions.py
@@ -261,6 +261,10 @@ class LiteralExpression(Expression):
else:
raise ValueError(f"Unsupported Data Type {type(value).__name__}")
+ @classmethod
+ def _from_value(cls, value: Any) -> "LiteralExpression":
+ return LiteralExpression(value=value,
dataType=LiteralExpression._infer_type(value))
+
def to_plan(self, session: "SparkConnectClient") -> "proto.Expression":
"""Converts the literal expression to the literal in proto."""
diff --git a/python/pyspark/sql/connect/function_builder.py
b/python/pyspark/sql/connect/function_builder.py
index 7be43353b9e..7eb0ffc26ae 100644
--- a/python/pyspark/sql/connect/function_builder.py
+++ b/python/pyspark/sql/connect/function_builder.py
@@ -29,7 +29,6 @@ from pyspark.sql.connect.functions import col
if TYPE_CHECKING:
from pyspark.sql.connect._typing import (
ColumnOrName,
- FunctionBuilderCallable,
UserDefinedFunctionCallable,
)
from pyspark.sql.connect.client import SparkConnectClient
@@ -51,20 +50,6 @@ def _build(name: str, *args: "ColumnOrName") -> Column:
return Column(UnresolvedFunction(name, [col._expr for col in cols]))
-class FunctionBuilder:
- """This class is used to build arbitrary functions used in expressions"""
-
- def __getattr__(self, name: str) -> "FunctionBuilderCallable":
- def _(*args: "ColumnOrName") -> Column:
- return _build(name, *args)
-
- _.__doc__ = f"""Function to apply {name}"""
- return _
-
-
-functions = FunctionBuilder()
-
-
class UserDefinedFunction(Expression):
"""A user defied function is an expression that has a reference to the
actual
Python callable attached. During plan generation, the client sends a
command to
diff --git a/python/pyspark/sql/tests/connect/test_connect_basic.py
b/python/pyspark/sql/tests/connect/test_connect_basic.py
index bced3fd5e7e..4c9e29326e1 100644
--- a/python/pyspark/sql/tests/connect/test_connect_basic.py
+++ b/python/pyspark/sql/tests/connect/test_connect_basic.py
@@ -1118,13 +1118,13 @@ class SparkConnectTests(SparkConnectSQLTestCase):
self.assertEqual(5.0, res[1][1])
# Additional GroupBy tests with 3 rows
- from pyspark.sql.connect.function_builder import functions as FB
+ import pyspark.sql.connect.functions as CF
import pyspark.sql.functions as PF
df_a = self.connect.range(10).groupBy((col("id") %
lit(3)).alias("moded"))
df_b = self.spark.range(10).groupBy((PF.col("id") %
PF.lit(3)).alias("moded"))
self.assertEqual(
- set(df_b.agg(PF.sum("id")).collect()),
set(df_a.agg(FB.sum("id")).collect())
+ set(df_b.agg(PF.sum("id")).collect()),
set(df_a.agg(CF.sum("id")).collect())
)
# Dict agg
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]