This is an automated email from the ASF dual-hosted git repository. ruifengz pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new d5865d0c085 [SPARK-41700][CONNECT][PYTHON] Remove `FunctionBuilder` d5865d0c085 is described below commit d5865d0c085cc41b39e0d615970bce7da270def6 Author: Ruifeng Zheng <ruife...@apache.org> AuthorDate: Sun Dec 25 13:33:02 2022 +0800 [SPARK-41700][CONNECT][PYTHON] Remove `FunctionBuilder` ### What changes were proposed in this pull request? Remove `FunctionBuilder` ### Why are the changes needed? since we had supported almost all the functions ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? updated UT Closes #39204 from zhengruifeng/connect_remove_FunctionBuilder. Authored-by: Ruifeng Zheng <ruife...@apache.org> Signed-off-by: Ruifeng Zheng <ruife...@apache.org> --- python/pyspark/sql/connect/_typing.py | 5 ----- python/pyspark/sql/connect/column.py | 21 ++++++++++----------- python/pyspark/sql/connect/expressions.py | 4 ++++ python/pyspark/sql/connect/function_builder.py | 15 --------------- .../pyspark/sql/tests/connect/test_connect_basic.py | 4 ++-- 5 files changed, 16 insertions(+), 33 deletions(-) diff --git a/python/pyspark/sql/connect/_typing.py b/python/pyspark/sql/connect/_typing.py index 4962df1c7d5..29a14384c82 100644 --- a/python/pyspark/sql/connect/_typing.py +++ b/python/pyspark/sql/connect/_typing.py @@ -42,11 +42,6 @@ DecimalLiteral = decimal.Decimal DateTimeLiteral = Union[datetime.datetime, datetime.date] -class FunctionBuilderCallable(Protocol): - def __call__(self, *_: ColumnOrName) -> Column: - ... - - class UserDefinedFunctionCallable(Protocol): def __call__(self, *_: ColumnOrName) -> Column: ... diff --git a/python/pyspark/sql/connect/column.py b/python/pyspark/sql/connect/column.py index 440e559f365..f1107b507bc 100644 --- a/python/pyspark/sql/connect/column.py +++ b/python/pyspark/sql/connect/column.py @@ -202,9 +202,6 @@ class Column: ... def substr(self, startPos: Union[int, "Column"], length: Union[int, "Column"]) -> "Column": - from pyspark.sql.connect.function_builder import functions as F - from pyspark.sql.connect.functions import lit - if type(startPos) != type(length): raise TypeError( "startPos and length must be the same type. " @@ -214,19 +211,21 @@ class Column: ) ) - if isinstance(length, int): - length_exp = lit(length) - elif isinstance(length, Column): - length_exp = length + if isinstance(length, Column): + length_expr = length._expr + elif isinstance(length, int): + length_expr = LiteralExpression._from_value(length) else: raise TypeError("Unsupported type for substr().") - if isinstance(startPos, int): - start_exp = lit(startPos) + if isinstance(startPos, Column): + start_expr = startPos._expr + elif isinstance(startPos, int): + start_expr = LiteralExpression._from_value(startPos) else: - start_exp = startPos + raise TypeError("Unsupported type for substr().") - return F.substr(self, start_exp, length_exp) + return Column(UnresolvedFunction("substring", [self._expr, start_expr, length_expr])) substr.__doc__ = PySparkColumn.substr.__doc__ diff --git a/python/pyspark/sql/connect/expressions.py b/python/pyspark/sql/connect/expressions.py index 0c2717d7fe3..02d6047bd66 100644 --- a/python/pyspark/sql/connect/expressions.py +++ b/python/pyspark/sql/connect/expressions.py @@ -261,6 +261,10 @@ class LiteralExpression(Expression): else: raise ValueError(f"Unsupported Data Type {type(value).__name__}") + @classmethod + def _from_value(cls, value: Any) -> "LiteralExpression": + return LiteralExpression(value=value, dataType=LiteralExpression._infer_type(value)) + def to_plan(self, session: "SparkConnectClient") -> "proto.Expression": """Converts the literal expression to the literal in proto.""" diff --git a/python/pyspark/sql/connect/function_builder.py b/python/pyspark/sql/connect/function_builder.py index 7be43353b9e..7eb0ffc26ae 100644 --- a/python/pyspark/sql/connect/function_builder.py +++ b/python/pyspark/sql/connect/function_builder.py @@ -29,7 +29,6 @@ from pyspark.sql.connect.functions import col if TYPE_CHECKING: from pyspark.sql.connect._typing import ( ColumnOrName, - FunctionBuilderCallable, UserDefinedFunctionCallable, ) from pyspark.sql.connect.client import SparkConnectClient @@ -51,20 +50,6 @@ def _build(name: str, *args: "ColumnOrName") -> Column: return Column(UnresolvedFunction(name, [col._expr for col in cols])) -class FunctionBuilder: - """This class is used to build arbitrary functions used in expressions""" - - def __getattr__(self, name: str) -> "FunctionBuilderCallable": - def _(*args: "ColumnOrName") -> Column: - return _build(name, *args) - - _.__doc__ = f"""Function to apply {name}""" - return _ - - -functions = FunctionBuilder() - - class UserDefinedFunction(Expression): """A user defied function is an expression that has a reference to the actual Python callable attached. During plan generation, the client sends a command to diff --git a/python/pyspark/sql/tests/connect/test_connect_basic.py b/python/pyspark/sql/tests/connect/test_connect_basic.py index bced3fd5e7e..4c9e29326e1 100644 --- a/python/pyspark/sql/tests/connect/test_connect_basic.py +++ b/python/pyspark/sql/tests/connect/test_connect_basic.py @@ -1118,13 +1118,13 @@ class SparkConnectTests(SparkConnectSQLTestCase): self.assertEqual(5.0, res[1][1]) # Additional GroupBy tests with 3 rows - from pyspark.sql.connect.function_builder import functions as FB + import pyspark.sql.connect.functions as CF import pyspark.sql.functions as PF df_a = self.connect.range(10).groupBy((col("id") % lit(3)).alias("moded")) df_b = self.spark.range(10).groupBy((PF.col("id") % PF.lit(3)).alias("moded")) self.assertEqual( - set(df_b.agg(PF.sum("id")).collect()), set(df_a.agg(FB.sum("id")).collect()) + set(df_b.agg(PF.sum("id")).collect()), set(df_a.agg(CF.sum("id")).collect()) ) # Dict agg --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org