This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new deabae7465d [SPARK-41329][CONNECT] Resolve circular imports in Spark Connect deabae7465d is described below commit deabae7465db606b06c8e1cbaddf9cd67df6083c Author: Hyukjin Kwon <gurwls...@apache.org> AuthorDate: Sat Dec 10 09:33:20 2022 +0900 [SPARK-41329][CONNECT] Resolve circular imports in Spark Connect ### What changes were proposed in this pull request? This PR proposes to resolve the circular imports workarounds ### Why are the changes needed? For better readability and maintanence. ### Does this PR introduce _any_ user-facing change? No, dev-only. ### How was this patch tested? CI in this PR should test it out. Closes #38994 from HyukjinKwon/SPARK-41329. Lead-authored-by: Hyukjin Kwon <gurwls...@apache.org> Co-authored-by: Hyukjin Kwon <gurwls...@gmail.com> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- python/pyspark/sql/connect/column.py | 26 +++++++++++--------------- python/pyspark/sql/connect/readwriter.py | 1 - 2 files changed, 11 insertions(+), 16 deletions(-) diff --git a/python/pyspark/sql/connect/column.py b/python/pyspark/sql/connect/column.py index f1a909b89fc..b84c27afb15 100644 --- a/python/pyspark/sql/connect/column.py +++ b/python/pyspark/sql/connect/column.py @@ -27,14 +27,10 @@ import pyspark.sql.connect.proto as proto from pyspark.sql.connect.types import pyspark_types_to_proto_types if TYPE_CHECKING: - from pyspark.sql.connect._typing import ColumnOrName + from pyspark.sql.connect._typing import ColumnOrName, PrimitiveType from pyspark.sql.connect.client import SparkConnectClient import pyspark.sql.connect.proto as proto -# TODO(SPARK-41329): solve the circular import between _typing and this class -# if we want to reuse _type.PrimitiveType -PrimitiveType = Union[bool, float, int, str] - def _func_op(name: str, doc: str = "") -> Callable[["Column"], "Column"]: def _(self: "Column") -> "Column": @@ -554,7 +550,7 @@ class Column: return _func_op("not")(_bin_op("==")(self, other)) # string methods - def contains(self, other: Union[PrimitiveType, "Column"]) -> "Column": + def contains(self, other: Union["PrimitiveType", "Column"]) -> "Column": """ Contains the other element. Returns a boolean :class:`Column` based on a string match. @@ -698,6 +694,9 @@ class Column: >>> df.select(df.name.substr(1, 3).alias("col")).collect() [Row(col='Ali'), Row(col='Bob')] """ + from pyspark.sql.connect.function_builder import functions as F + from pyspark.sql.connect.functions import lit + if type(startPos) != type(length): raise TypeError( "startPos and length must be the same type. " @@ -706,17 +705,16 @@ class Column: length_t=type(length), ) ) - from pyspark.sql.connect.function_builder import functions as F if isinstance(length, int): - length_exp = self._lit(length) + length_exp = lit(length) elif isinstance(length, Column): length_exp = length else: raise TypeError("Unsupported type for substr().") if isinstance(startPos, int): - start_exp = self._lit(startPos) + start_exp = lit(startPos) else: start_exp = startPos @@ -726,8 +724,11 @@ class Column: """Returns a binary expression with the current column as the left side and the other expression as the right side. """ + from pyspark.sql.connect._typing import PrimitiveType + from pyspark.sql.connect.functions import lit + if isinstance(other, get_args(PrimitiveType)): - other = self._lit(other) + other = lit(other) return scalar_function("==", self, other) def to_plan(self, session: "SparkConnectClient") -> proto.Expression: @@ -779,11 +780,6 @@ class Column: else: raise TypeError("unexpected type: %s" % type(dataType)) - # TODO(SPARK-41329): solve the circular import between functions.py and - # this class if we want to reuse functions.lit - def _lit(self, x: Any) -> "Column": - return Column(LiteralExpression(x)) - def __repr__(self) -> str: return "Column<'%s'>" % self._expr.__repr__() diff --git a/python/pyspark/sql/connect/readwriter.py b/python/pyspark/sql/connect/readwriter.py index 470417b6a28..778509bcf76 100644 --- a/python/pyspark/sql/connect/readwriter.py +++ b/python/pyspark/sql/connect/readwriter.py @@ -164,7 +164,6 @@ class DataFrameReader(OptionUtils): return self._df(plan) def _df(self, plan: LogicalPlan) -> "DataFrame": - # The import is needed here to avoid circular import issues. from pyspark.sql.connect.dataframe import DataFrame return DataFrame.withPlan(plan, self._client) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org