This is an automated email from the ASF dual-hosted git repository.

ruifengz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 692d8692ef08 [SPARK-48591][PYTHON] Add a helper function to simplify 
`Column.py`
692d8692ef08 is described below

commit 692d8692ef0816e00b303df94609fd58c8fe7045
Author: Ruifeng Zheng <[email protected]>
AuthorDate: Thu Jun 20 13:43:39 2024 +0800

    [SPARK-48591][PYTHON] Add a helper function to simplify `Column.py`
    
    ### What changes were proposed in this pull request?
    Add a helper function to simplify `Column.py`
    
    ### Why are the changes needed?
    code clean up
    
    ### Does this PR introduce _any_ user-facing change?
    No
    
    ### How was this patch tested?
    CI
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No
    
    Closes #47023 from zhengruifeng/column_to_expr.
    
    Authored-by: Ruifeng Zheng <[email protected]>
    Signed-off-by: Ruifeng Zheng <[email protected]>
---
 python/pyspark/sql/connect/column.py | 58 ++++++++++++++----------------------
 1 file changed, 22 insertions(+), 36 deletions(-)

diff --git a/python/pyspark/sql/connect/column.py 
b/python/pyspark/sql/connect/column.py
index b63e06bccae1..ef48091a35b0 100644
--- a/python/pyspark/sql/connect/column.py
+++ b/python/pyspark/sql/connect/column.py
@@ -96,6 +96,10 @@ def _unary_op(name: str, self: ParentColumn) -> ParentColumn:
     return Column(UnresolvedFunction(name, [self._expr]))  # type: 
ignore[list-item]
 
 
+def _to_expr(v: Any) -> Expression:
+    return v._expr if isinstance(v, Column) else 
LiteralExpression._from_value(v)
+
+
 @with_origin_to_class
 class Column(ParentColumn):
     def __new__(
@@ -310,14 +314,12 @@ class Column(ParentColumn):
                 message_parameters={},
             )
 
-        if isinstance(value, Column):
-            _value = value._expr
-        else:
-            _value = LiteralExpression._from_value(value)
-
-        _branches = self._expr._branches + [(condition._expr, _value)]
-
-        return Column(CaseWhen(branches=_branches, else_value=None))
+        return Column(
+            CaseWhen(
+                branches=self._expr._branches + [(condition._expr, 
_to_expr(value))],
+                else_value=None,
+            )
+        )
 
     def otherwise(self, value: Any) -> ParentColumn:
         if not isinstance(self._expr, CaseWhen):
@@ -330,12 +332,12 @@ class Column(ParentColumn):
                 "otherwise() can only be applied once on a Column previously 
generated by when()"
             )
 
-        if isinstance(value, Column):
-            _value = value._expr
-        else:
-            _value = LiteralExpression._from_value(value)
-
-        return Column(CaseWhen(branches=self._expr._branches, 
else_value=_value))
+        return Column(
+            CaseWhen(
+                branches=self._expr._branches,
+                else_value=_to_expr(value),
+            )
+        )
 
     def like(self: ParentColumn, other: str) -> ParentColumn:
         return _bin_op("like", self, other)
@@ -360,22 +362,15 @@ class Column(ParentColumn):
                 },
             )
 
-        if isinstance(length, Column):
-            length_expr = length._expr
-            start_expr = startPos._expr  # type: ignore[union-attr]
-        elif isinstance(length, int):
-            length_expr = LiteralExpression._from_value(length)
-            start_expr = LiteralExpression._from_value(startPos)
+        if isinstance(length, (Column, int)):
+            length_expr = _to_expr(length)
+            start_expr = _to_expr(startPos)
         else:
             raise PySparkTypeError(
                 error_class="NOT_COLUMN_OR_INT",
                 message_parameters={"arg_name": "startPos", "arg_type": 
type(length).__name__},
             )
-        return Column(
-            UnresolvedFunction(
-                "substr", [self._expr, start_expr, length_expr]  # type: 
ignore[list-item]
-            )
-        )
+        return Column(UnresolvedFunction("substr", [self._expr, start_expr, 
length_expr]))
 
     def __eq__(self, other: Any) -> ParentColumn:  # type: ignore[override]
         if other is None or isinstance(
@@ -459,14 +454,7 @@ class Column(ParentColumn):
         else:
             _cols = list(cols)
 
-        _exprs = [self._expr]
-        for c in _cols:
-            if isinstance(c, Column):
-                _exprs.append(c._expr)
-            else:
-                _exprs.append(LiteralExpression._from_value(c))
-
-        return Column(UnresolvedFunction("in", _exprs))
+        return Column(UnresolvedFunction("in", [self._expr] + [_to_expr(c) for 
c in _cols]))
 
     def between(
         self,
@@ -556,10 +544,8 @@ class Column(ParentColumn):
                     message_parameters={},
                 )
             return self.substr(k.start, k.stop)
-        elif isinstance(k, Column):
-            return Column(UnresolvedExtractValue(self._expr, k._expr))
         else:
-            return Column(UnresolvedExtractValue(self._expr, 
LiteralExpression._from_value(k)))
+            return Column(UnresolvedExtractValue(self._expr, _to_expr(k)))
 
     def __iter__(self) -> None:
         raise PySparkTypeError(


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to