This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 13902f55687c [SPARK-51959][PYTHON] Make functions not import
ParentDataFrame
13902f55687c is described below
commit 13902f55687c216bf8de5538c9a5f0675bb0f862
Author: Ruifeng Zheng <[email protected]>
AuthorDate: Wed Apr 30 19:22:53 2025 +0900
[SPARK-51959][PYTHON] Make functions not import ParentDataFrame
### What changes were proposed in this pull request?
Make functions not import ParentDataFrame
### Why are the changes needed?
`ParentDataFrame` is only used in `broadcast` function, and we don't need
this alias
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
CI
### Was this patch authored or co-authored using generative AI tooling?
No
Closes #50766 from zhengruifeng/py_parent_df.
Authored-by: Ruifeng Zheng <[email protected]>
Signed-off-by: Hyukjin Kwon <[email protected]>
---
python/pyspark/sql/connect/functions/builtin.py | 4 ++--
python/pyspark/sql/functions/builtin.py | 7 ++++---
python/pyspark/sql/tests/test_functions.py | 1 -
3 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/python/pyspark/sql/connect/functions/builtin.py
b/python/pyspark/sql/connect/functions/builtin.py
index f49495ef05bd..85d78ccac301 100644
--- a/python/pyspark/sql/connect/functions/builtin.py
+++ b/python/pyspark/sql/connect/functions/builtin.py
@@ -44,7 +44,6 @@ import numpy as np
from pyspark.errors import PySparkTypeError, PySparkValueError
from pyspark.errors.utils import _with_origin
-from pyspark.sql.dataframe import DataFrame as ParentDataFrame
from pyspark.sql import Column
from pyspark.sql.connect.expressions import (
CaseWhen,
@@ -83,6 +82,7 @@ if TYPE_CHECKING:
DataTypeOrString,
UserDefinedFunctionLike,
)
+ from pyspark.sql.dataframe import DataFrame
from pyspark.sql.connect.udtf import UserDefinedTableFunction
@@ -318,7 +318,7 @@ def getbit(col: "ColumnOrName", pos: "ColumnOrName") ->
Column:
getbit.__doc__ = pysparkfuncs.getbit.__doc__
-def broadcast(df: "ParentDataFrame") -> "ParentDataFrame":
+def broadcast(df: "DataFrame") -> "DataFrame":
from pyspark.sql.connect.dataframe import DataFrame
if not isinstance(df, DataFrame):
diff --git a/python/pyspark/sql/functions/builtin.py
b/python/pyspark/sql/functions/builtin.py
index 40c61caffeac..9db33493babd 100644
--- a/python/pyspark/sql/functions/builtin.py
+++ b/python/pyspark/sql/functions/builtin.py
@@ -42,7 +42,6 @@ from typing import (
from pyspark.errors import PySparkTypeError, PySparkValueError
from pyspark.errors.utils import _with_origin
from pyspark.sql.column import Column
-from pyspark.sql.dataframe import DataFrame as ParentDataFrame
from pyspark.sql.types import (
ArrayType,
ByteType,
@@ -72,6 +71,7 @@ from pyspark.sql.utils import (
if TYPE_CHECKING:
from pyspark import SparkContext
+ from pyspark.sql.dataframe import DataFrame
from pyspark.sql._typing import (
ColumnOrName,
DataTypeOrString,
@@ -6635,7 +6635,7 @@ def approx_count_distinct(col: "ColumnOrName", rsd:
Optional[float] = None) -> C
@_try_remote_functions
-def broadcast(df: "ParentDataFrame") -> "ParentDataFrame":
+def broadcast(df: "DataFrame") -> "DataFrame":
"""
Marks a DataFrame as small enough for use in broadcast joins.
@@ -6664,9 +6664,10 @@ def broadcast(df: "ParentDataFrame") ->
"ParentDataFrame":
+-----+---+
"""
from py4j.java_gateway import JVMView
+ from pyspark.sql.dataframe import DataFrame
sc = _get_active_spark_context()
- return ParentDataFrame(cast(JVMView,
sc._jvm).functions.broadcast(df._jdf), df.sparkSession)
+ return DataFrame(cast(JVMView, sc._jvm).functions.broadcast(df._jdf),
df.sparkSession)
@_try_remote_functions
diff --git a/python/pyspark/sql/tests/test_functions.py
b/python/pyspark/sql/tests/test_functions.py
index a95bdcb8e507..e2b3e33756ba 100644
--- a/python/pyspark/sql/tests/test_functions.py
+++ b/python/pyspark/sql/tests/test_functions.py
@@ -146,7 +146,6 @@ class FunctionsTestsMixin:
"Column", # should be imported from pyspark.sql
"DataType", # should be imported from pyspark.sql.types
"NumericType", # should be imported from pyspark.sql.types
- "ParentDataFrame", # internal class
"PySparkTypeError", # should be imported from pyspark.errors
"PySparkValueError", # should be imported from pyspark.errors
"StringType", # should be imported from pyspark.sql.types
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]