This is an automated email from the ASF dual-hosted git repository. ruifengz pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 474f64a8850 [SPARK-44984][PYTHON][CONNECT] Remove `_get_alias` from DataFrame 474f64a8850 is described below commit 474f64a88502fe242654eb85c7cb5a1514c710e9 Author: Ruifeng Zheng <ruife...@apache.org> AuthorDate: Mon Aug 28 19:44:32 2023 +0800 [SPARK-44984][PYTHON][CONNECT] Remove `_get_alias` from DataFrame ### What changes were proposed in this pull request? Remove `_get_alias` from DataFrame ### Why are the changes needed? `_get_alias` was added in the [initial PR](https://github.com/apache/spark/commit/6637bbe2b25ff2877b41a9677ce6d75e6996f968), but seems unneeded - field `alias` in `plan.Project` is always `None`; - `_get_alias` takes no parameter, but is used to replace a specify column name, the logic is weird when the column name varies; ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? CI ### Was this patch authored or co-authored using generative AI tooling? No Closes #42698 from zhengruifeng/py_connect_del_alias. Authored-by: Ruifeng Zheng <ruife...@apache.org> Signed-off-by: Ruifeng Zheng <ruife...@apache.org> --- python/pyspark/sql/connect/dataframe.py | 15 ++------------- python/pyspark/sql/connect/plan.py | 1 - 2 files changed, 2 insertions(+), 14 deletions(-) diff --git a/python/pyspark/sql/connect/dataframe.py b/python/pyspark/sql/connect/dataframe.py index 365cde59227..94c3ca95956 100644 --- a/python/pyspark/sql/connect/dataframe.py +++ b/python/pyspark/sql/connect/dataframe.py @@ -1573,14 +1573,6 @@ class DataFrame: sampleBy.__doc__ = PySparkDataFrame.sampleBy.__doc__ - def _get_alias(self) -> Optional[str]: - p = self._plan - while p is not None: - if isinstance(p, plan.Project) and p.alias: - return p.alias - p = p._child - return None - def __getattr__(self, name: str) -> "Column": if self._plan is None: raise SparkConnectException("Cannot analyze on empty plan.") @@ -1607,9 +1599,8 @@ class DataFrame: "'%s' object has no attribute '%s'" % (self.__class__.__name__, name) ) - alias = self._get_alias() return _to_col_with_plan_id( - col=alias if alias is not None else name, + col=name, plan_id=self._plan._plan_id, ) @@ -1625,8 +1616,6 @@ class DataFrame: def __getitem__(self, item: Union[int, str, Column, List, Tuple]) -> Union[Column, "DataFrame"]: if isinstance(item, str): - # Check for alias - alias = self._get_alias() if self._plan is None: raise SparkConnectException("Cannot analyze on empty plan.") @@ -1635,7 +1624,7 @@ class DataFrame: self.select(item).isLocal() return _to_col_with_plan_id( - col=alias if alias is not None else item, + col=item, plan_id=self._plan._plan_id, ) elif isinstance(item, Column): diff --git a/python/pyspark/sql/connect/plan.py b/python/pyspark/sql/connect/plan.py index 7952d2af999..5e9b4e53dbf 100644 --- a/python/pyspark/sql/connect/plan.py +++ b/python/pyspark/sql/connect/plan.py @@ -464,7 +464,6 @@ class Project(LogicalPlan): def __init__(self, child: Optional["LogicalPlan"], *columns: "ColumnOrName") -> None: super().__init__(child) self._columns = list(columns) - self.alias: Optional[str] = None self._verify_expressions() def _verify_expressions(self) -> None: --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org