[spark] branch master updated: [SPARK-41329][CONNECT] Resolve circular imports in Spark Connect

gurwls223 Fri, 09 Dec 2022 16:33:41 -0800

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new deabae7465d [SPARK-41329][CONNECT] Resolve circular imports in Spark 
Connect
deabae7465d is described below

commit deabae7465db606b06c8e1cbaddf9cd67df6083c
Author: Hyukjin Kwon <gurwls...@apache.org>
AuthorDate: Sat Dec 10 09:33:20 2022 +0900

    [SPARK-41329][CONNECT] Resolve circular imports in Spark Connect
    
    ### What changes were proposed in this pull request?
    
    This PR proposes to resolve the circular imports workarounds
    
    ### Why are the changes needed?
    
    For better readability and maintanence.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No, dev-only.
    
    ### How was this patch tested?
    
    CI in this PR should test it out.
    
    Closes #38994 from HyukjinKwon/SPARK-41329.
    
    Lead-authored-by: Hyukjin Kwon <gurwls...@apache.org>
    Co-authored-by: Hyukjin Kwon <gurwls...@gmail.com>
    Signed-off-by: Hyukjin Kwon <gurwls...@apache.org>
---
 python/pyspark/sql/connect/column.py     | 26 +++++++++++---------------
 python/pyspark/sql/connect/readwriter.py |  1 -
 2 files changed, 11 insertions(+), 16 deletions(-)

diff --git a/python/pyspark/sql/connect/column.py 
b/python/pyspark/sql/connect/column.py
index f1a909b89fc..b84c27afb15 100644
--- a/python/pyspark/sql/connect/column.py
+++ b/python/pyspark/sql/connect/column.py
@@ -27,14 +27,10 @@ import pyspark.sql.connect.proto as proto
 from pyspark.sql.connect.types import pyspark_types_to_proto_types
 
 if TYPE_CHECKING:
-    from pyspark.sql.connect._typing import ColumnOrName
+    from pyspark.sql.connect._typing import ColumnOrName, PrimitiveType
     from pyspark.sql.connect.client import SparkConnectClient
     import pyspark.sql.connect.proto as proto
 
-# TODO(SPARK-41329): solve the circular import between _typing and this class
-# if we want to reuse _type.PrimitiveType
-PrimitiveType = Union[bool, float, int, str]
-
 
 def _func_op(name: str, doc: str = "") -> Callable[["Column"], "Column"]:
     def _(self: "Column") -> "Column":
@@ -554,7 +550,7 @@ class Column:
         return _func_op("not")(_bin_op("==")(self, other))
 
     # string methods
-    def contains(self, other: Union[PrimitiveType, "Column"]) -> "Column":
+    def contains(self, other: Union["PrimitiveType", "Column"]) -> "Column":
         """
         Contains the other element. Returns a boolean :class:`Column` based on 
a string match.
 
@@ -698,6 +694,9 @@ class Column:
         >>> df.select(df.name.substr(1, 3).alias("col")).collect()
         [Row(col='Ali'), Row(col='Bob')]
         """
+        from pyspark.sql.connect.function_builder import functions as F
+        from pyspark.sql.connect.functions import lit
+
         if type(startPos) != type(length):
             raise TypeError(
                 "startPos and length must be the same type. "
@@ -706,17 +705,16 @@ class Column:
                     length_t=type(length),
                 )
             )
-        from pyspark.sql.connect.function_builder import functions as F
 
         if isinstance(length, int):
-            length_exp = self._lit(length)
+            length_exp = lit(length)
         elif isinstance(length, Column):
             length_exp = length
         else:
             raise TypeError("Unsupported type for substr().")
 
         if isinstance(startPos, int):
-            start_exp = self._lit(startPos)
+            start_exp = lit(startPos)
         else:
             start_exp = startPos
 
@@ -726,8 +724,11 @@ class Column:
         """Returns a binary expression with the current column as the left
         side and the other expression as the right side.
         """
+        from pyspark.sql.connect._typing import PrimitiveType
+        from pyspark.sql.connect.functions import lit
+
         if isinstance(other, get_args(PrimitiveType)):
-            other = self._lit(other)
+            other = lit(other)
         return scalar_function("==", self, other)
 
     def to_plan(self, session: "SparkConnectClient") -> proto.Expression:
@@ -779,11 +780,6 @@ class Column:
         else:
             raise TypeError("unexpected type: %s" % type(dataType))
 
-    # TODO(SPARK-41329): solve the circular import between functions.py and
-    # this class if we want to reuse functions.lit
-    def _lit(self, x: Any) -> "Column":
-        return Column(LiteralExpression(x))
-
     def __repr__(self) -> str:
         return "Column<'%s'>" % self._expr.__repr__()
 
diff --git a/python/pyspark/sql/connect/readwriter.py 
b/python/pyspark/sql/connect/readwriter.py
index 470417b6a28..778509bcf76 100644
--- a/python/pyspark/sql/connect/readwriter.py
+++ b/python/pyspark/sql/connect/readwriter.py
@@ -164,7 +164,6 @@ class DataFrameReader(OptionUtils):
         return self._df(plan)
 
     def _df(self, plan: LogicalPlan) -> "DataFrame":
-        # The import is needed here to avoid circular import issues.
         from pyspark.sql.connect.dataframe import DataFrame
 
         return DataFrame.withPlan(plan, self._client)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated: [SPARK-41329][CONNECT] Resolve circular imports in Spark Connect

Reply via email to