This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 13902f55687c [SPARK-51959][PYTHON] Make functions not import ParentDataFrame 13902f55687c is described below commit 13902f55687c216bf8de5538c9a5f0675bb0f862 Author: Ruifeng Zheng <ruife...@apache.org> AuthorDate: Wed Apr 30 19:22:53 2025 +0900 [SPARK-51959][PYTHON] Make functions not import ParentDataFrame ### What changes were proposed in this pull request? Make functions not import ParentDataFrame ### Why are the changes needed? `ParentDataFrame` is only used in `broadcast` function, and we don't need this alias ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? CI ### Was this patch authored or co-authored using generative AI tooling? No Closes #50766 from zhengruifeng/py_parent_df. Authored-by: Ruifeng Zheng <ruife...@apache.org> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- python/pyspark/sql/connect/functions/builtin.py | 4 ++-- python/pyspark/sql/functions/builtin.py | 7 ++++--- python/pyspark/sql/tests/test_functions.py | 1 - 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/python/pyspark/sql/connect/functions/builtin.py b/python/pyspark/sql/connect/functions/builtin.py index f49495ef05bd..85d78ccac301 100644 --- a/python/pyspark/sql/connect/functions/builtin.py +++ b/python/pyspark/sql/connect/functions/builtin.py @@ -44,7 +44,6 @@ import numpy as np from pyspark.errors import PySparkTypeError, PySparkValueError from pyspark.errors.utils import _with_origin -from pyspark.sql.dataframe import DataFrame as ParentDataFrame from pyspark.sql import Column from pyspark.sql.connect.expressions import ( CaseWhen, @@ -83,6 +82,7 @@ if TYPE_CHECKING: DataTypeOrString, UserDefinedFunctionLike, ) + from pyspark.sql.dataframe import DataFrame from pyspark.sql.connect.udtf import UserDefinedTableFunction @@ -318,7 +318,7 @@ def getbit(col: "ColumnOrName", pos: "ColumnOrName") -> Column: getbit.__doc__ = pysparkfuncs.getbit.__doc__ -def broadcast(df: "ParentDataFrame") -> "ParentDataFrame": +def broadcast(df: "DataFrame") -> "DataFrame": from pyspark.sql.connect.dataframe import DataFrame if not isinstance(df, DataFrame): diff --git a/python/pyspark/sql/functions/builtin.py b/python/pyspark/sql/functions/builtin.py index 40c61caffeac..9db33493babd 100644 --- a/python/pyspark/sql/functions/builtin.py +++ b/python/pyspark/sql/functions/builtin.py @@ -42,7 +42,6 @@ from typing import ( from pyspark.errors import PySparkTypeError, PySparkValueError from pyspark.errors.utils import _with_origin from pyspark.sql.column import Column -from pyspark.sql.dataframe import DataFrame as ParentDataFrame from pyspark.sql.types import ( ArrayType, ByteType, @@ -72,6 +71,7 @@ from pyspark.sql.utils import ( if TYPE_CHECKING: from pyspark import SparkContext + from pyspark.sql.dataframe import DataFrame from pyspark.sql._typing import ( ColumnOrName, DataTypeOrString, @@ -6635,7 +6635,7 @@ def approx_count_distinct(col: "ColumnOrName", rsd: Optional[float] = None) -> C @_try_remote_functions -def broadcast(df: "ParentDataFrame") -> "ParentDataFrame": +def broadcast(df: "DataFrame") -> "DataFrame": """ Marks a DataFrame as small enough for use in broadcast joins. @@ -6664,9 +6664,10 @@ def broadcast(df: "ParentDataFrame") -> "ParentDataFrame": +-----+---+ """ from py4j.java_gateway import JVMView + from pyspark.sql.dataframe import DataFrame sc = _get_active_spark_context() - return ParentDataFrame(cast(JVMView, sc._jvm).functions.broadcast(df._jdf), df.sparkSession) + return DataFrame(cast(JVMView, sc._jvm).functions.broadcast(df._jdf), df.sparkSession) @_try_remote_functions diff --git a/python/pyspark/sql/tests/test_functions.py b/python/pyspark/sql/tests/test_functions.py index a95bdcb8e507..e2b3e33756ba 100644 --- a/python/pyspark/sql/tests/test_functions.py +++ b/python/pyspark/sql/tests/test_functions.py @@ -146,7 +146,6 @@ class FunctionsTestsMixin: "Column", # should be imported from pyspark.sql "DataType", # should be imported from pyspark.sql.types "NumericType", # should be imported from pyspark.sql.types - "ParentDataFrame", # internal class "PySparkTypeError", # should be imported from pyspark.errors "PySparkValueError", # should be imported from pyspark.errors "StringType", # should be imported from pyspark.sql.types --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org