This is an automated email from the ASF dual-hosted git repository. ruifengz pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 6bd95d0e004 [SPARK-44775][PYTHON][DOCS] Add missing version information in DataFrame APIs 6bd95d0e004 is described below commit 6bd95d0e004505840aa0749107aa76f3a17958be Author: Ruifeng Zheng <ruife...@apache.org> AuthorDate: Mon Aug 14 10:40:15 2023 +0800 [SPARK-44775][PYTHON][DOCS] Add missing version information in DataFrame APIs ### What changes were proposed in this pull request? Add missing version information in DataFrame APIs ### Why are the changes needed? to improve docs ### Does this PR introduce _any_ user-facing change? yes ### How was this patch tested? CI Closes #42451 from zhengruifeng/doc_df_api_versions. Authored-by: Ruifeng Zheng <ruife...@apache.org> Signed-off-by: Ruifeng Zheng <ruife...@apache.org> --- python/pyspark/sql/connect/dataframe.py | 16 ++-------------- python/pyspark/sql/dataframe.py | 30 +++++++++++++++++++++++++++++- 2 files changed, 31 insertions(+), 15 deletions(-) diff --git a/python/pyspark/sql/connect/dataframe.py b/python/pyspark/sql/connect/dataframe.py index 14d9c2c9d05..7b326538a8e 100644 --- a/python/pyspark/sql/connect/dataframe.py +++ b/python/pyspark/sql/connect/dataframe.py @@ -2023,22 +2023,10 @@ class DataFrame: # SparkConnect specific API def offset(self, n: int) -> "DataFrame": - """Returns a new :class: `DataFrame` by skipping the first `n` rows. - - .. versionadded:: 3.4.0 - - Parameters - ---------- - num : int - Number of records to skip. - - Returns - ------- - :class:`DataFrame` - Subset of the records - """ return DataFrame.withPlan(plan.Offset(child=self._plan, offset=n), session=self._session) + offset.__doc__ = PySparkDataFrame.offset.__doc__ + @classmethod def withPlan(cls, plan: plan.LogicalPlan, session: "SparkSession") -> "DataFrame": """ diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index f6fe17539c6..8be2c224265 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -516,6 +516,9 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin): .. versionadded:: 2.0.0 + .. versionchanged:: 3.5.0 + Supports Spark Connect. + Notes ----- This API is evolving. @@ -1304,7 +1307,10 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin): def offset(self, num: int) -> "DataFrame": """Returns a new :class: `DataFrame` by skipping the first `n` rows. - .. versionadded:: 3.5.0 + .. versionadded:: 3.4.0 + + .. versionchanged:: 3.5.0 + Supports vanilla PySpark. Parameters ---------- @@ -3540,6 +3546,9 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin): .. versionadded:: 3.4.0 + .. versionchanged:: 3.4.0 + Supports Spark Connect. + Parameters ---------- ids : str, Column, tuple, list, optional @@ -3631,6 +3640,9 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin): .. versionadded:: 3.3.0 + .. versionchanged:: 3.5.0 + Supports Spark Connect. + Parameters ---------- observation : :class:`Observation` or str @@ -4066,6 +4078,9 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin): .. versionadded:: 3.5.0 + .. versionchanged:: 3.5.0 + Supports Spark Connect. + Parameters ---------- subset : List of column names, optional @@ -5276,6 +5291,8 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin): def toDF(self, *cols: str) -> "DataFrame": """Returns a new :class:`DataFrame` that with new specified column names + .. versionadded:: 1.6.0 + .. versionchanged:: 3.4.0 Supports Spark Connect. @@ -5381,6 +5398,9 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin): .. versionadded:: 3.1.0 + .. versionchanged:: 3.5.0 + Supports Spark Connect. + Notes ----- The equality comparison here is simplified by tolerating the cosmetic differences @@ -5426,6 +5446,9 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin): .. versionadded:: 3.1.0 + .. versionchanged:: 3.5.0 + Supports Spark Connect. + Notes ----- Unlike the standard hash code, the hash is calculated against the query plan @@ -5549,6 +5572,11 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin): """ Converts the existing DataFrame into a pandas-on-Spark DataFrame. + .. versionadded:: 3.2.0 + + .. versionchanged:: 3.5.0 + Supports Spark Connect. + If a pandas-on-Spark DataFrame is converted to a Spark DataFrame and then back to pandas-on-Spark, it will lose the index information and the original index will be turned into a normal column. --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org