[spark] branch master updated: [SPARK-41700][CONNECT][PYTHON] Remove `FunctionBuilder`

ruifengz Sat, 24 Dec 2022 21:33:47 -0800

This is an automated email from the ASF dual-hosted git repository.

ruifengz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new d5865d0c085 [SPARK-41700][CONNECT][PYTHON] Remove `FunctionBuilder`
d5865d0c085 is described below

commit d5865d0c085cc41b39e0d615970bce7da270def6
Author: Ruifeng Zheng <[email protected]>
AuthorDate: Sun Dec 25 13:33:02 2022 +0800

    [SPARK-41700][CONNECT][PYTHON] Remove `FunctionBuilder`
    
    ### What changes were proposed in this pull request?
    Remove `FunctionBuilder`
    
    ### Why are the changes needed?
    since we had supported almost all the functions
    
    ### Does this PR introduce _any_ user-facing change?
    no
    
    ### How was this patch tested?
    updated UT
    
    Closes #39204 from zhengruifeng/connect_remove_FunctionBuilder.
    
    Authored-by: Ruifeng Zheng <[email protected]>
    Signed-off-by: Ruifeng Zheng <[email protected]>
---
 python/pyspark/sql/connect/_typing.py               |  5 -----
 python/pyspark/sql/connect/column.py                | 21 ++++++++++-----------
 python/pyspark/sql/connect/expressions.py           |  4 ++++
 python/pyspark/sql/connect/function_builder.py      | 15 ---------------
 .../pyspark/sql/tests/connect/test_connect_basic.py |  4 ++--
 5 files changed, 16 insertions(+), 33 deletions(-)

diff --git a/python/pyspark/sql/connect/_typing.py 
b/python/pyspark/sql/connect/_typing.py
index 4962df1c7d5..29a14384c82 100644
--- a/python/pyspark/sql/connect/_typing.py
+++ b/python/pyspark/sql/connect/_typing.py
@@ -42,11 +42,6 @@ DecimalLiteral = decimal.Decimal
 DateTimeLiteral = Union[datetime.datetime, datetime.date]
 
 
-class FunctionBuilderCallable(Protocol):
-    def __call__(self, *_: ColumnOrName) -> Column:
-        ...
-
-
 class UserDefinedFunctionCallable(Protocol):
     def __call__(self, *_: ColumnOrName) -> Column:
         ...
diff --git a/python/pyspark/sql/connect/column.py 
b/python/pyspark/sql/connect/column.py
index 440e559f365..f1107b507bc 100644
--- a/python/pyspark/sql/connect/column.py
+++ b/python/pyspark/sql/connect/column.py
@@ -202,9 +202,6 @@ class Column:
         ...
 
     def substr(self, startPos: Union[int, "Column"], length: Union[int, 
"Column"]) -> "Column":
-        from pyspark.sql.connect.function_builder import functions as F
-        from pyspark.sql.connect.functions import lit
-
         if type(startPos) != type(length):
             raise TypeError(
                 "startPos and length must be the same type. "
@@ -214,19 +211,21 @@ class Column:
                 )
             )
 
-        if isinstance(length, int):
-            length_exp = lit(length)
-        elif isinstance(length, Column):
-            length_exp = length
+        if isinstance(length, Column):
+            length_expr = length._expr
+        elif isinstance(length, int):
+            length_expr = LiteralExpression._from_value(length)
         else:
             raise TypeError("Unsupported type for substr().")
 
-        if isinstance(startPos, int):
-            start_exp = lit(startPos)
+        if isinstance(startPos, Column):
+            start_expr = startPos._expr
+        elif isinstance(startPos, int):
+            start_expr = LiteralExpression._from_value(startPos)
         else:
-            start_exp = startPos
+            raise TypeError("Unsupported type for substr().")
 
-        return F.substr(self, start_exp, length_exp)
+        return Column(UnresolvedFunction("substring", [self._expr, start_expr, 
length_expr]))
 
     substr.__doc__ = PySparkColumn.substr.__doc__
 
diff --git a/python/pyspark/sql/connect/expressions.py 
b/python/pyspark/sql/connect/expressions.py
index 0c2717d7fe3..02d6047bd66 100644
--- a/python/pyspark/sql/connect/expressions.py
+++ b/python/pyspark/sql/connect/expressions.py
@@ -261,6 +261,10 @@ class LiteralExpression(Expression):
         else:
             raise ValueError(f"Unsupported Data Type {type(value).__name__}")
 
+    @classmethod
+    def _from_value(cls, value: Any) -> "LiteralExpression":
+        return LiteralExpression(value=value, 
dataType=LiteralExpression._infer_type(value))
+
     def to_plan(self, session: "SparkConnectClient") -> "proto.Expression":
         """Converts the literal expression to the literal in proto."""
 
diff --git a/python/pyspark/sql/connect/function_builder.py 
b/python/pyspark/sql/connect/function_builder.py
index 7be43353b9e..7eb0ffc26ae 100644
--- a/python/pyspark/sql/connect/function_builder.py
+++ b/python/pyspark/sql/connect/function_builder.py
@@ -29,7 +29,6 @@ from pyspark.sql.connect.functions import col
 if TYPE_CHECKING:
     from pyspark.sql.connect._typing import (
         ColumnOrName,
-        FunctionBuilderCallable,
         UserDefinedFunctionCallable,
     )
     from pyspark.sql.connect.client import SparkConnectClient
@@ -51,20 +50,6 @@ def _build(name: str, *args: "ColumnOrName") -> Column:
     return Column(UnresolvedFunction(name, [col._expr for col in cols]))
 
 
-class FunctionBuilder:
-    """This class is used to build arbitrary functions used in expressions"""
-
-    def __getattr__(self, name: str) -> "FunctionBuilderCallable":
-        def _(*args: "ColumnOrName") -> Column:
-            return _build(name, *args)
-
-        _.__doc__ = f"""Function to apply {name}"""
-        return _
-
-
-functions = FunctionBuilder()
-
-
 class UserDefinedFunction(Expression):
     """A user defied function is an expression that has a reference to the 
actual
     Python callable attached. During plan generation, the client sends a 
command to
diff --git a/python/pyspark/sql/tests/connect/test_connect_basic.py 
b/python/pyspark/sql/tests/connect/test_connect_basic.py
index bced3fd5e7e..4c9e29326e1 100644
--- a/python/pyspark/sql/tests/connect/test_connect_basic.py
+++ b/python/pyspark/sql/tests/connect/test_connect_basic.py
@@ -1118,13 +1118,13 @@ class SparkConnectTests(SparkConnectSQLTestCase):
         self.assertEqual(5.0, res[1][1])
 
         # Additional GroupBy tests with 3 rows
-        from pyspark.sql.connect.function_builder import functions as FB
+        import pyspark.sql.connect.functions as CF
         import pyspark.sql.functions as PF
 
         df_a = self.connect.range(10).groupBy((col("id") % 
lit(3)).alias("moded"))
         df_b = self.spark.range(10).groupBy((PF.col("id") % 
PF.lit(3)).alias("moded"))
         self.assertEqual(
-            set(df_b.agg(PF.sum("id")).collect()), 
set(df_a.agg(FB.sum("id")).collect())
+            set(df_b.agg(PF.sum("id")).collect()), 
set(df_a.agg(CF.sum("id")).collect())
         )
 
         # Dict agg


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[spark] branch master updated: [SPARK-41700][CONNECT][PYTHON] Remove `FunctionBuilder`

Reply via email to