This is an automated email from the ASF dual-hosted git repository.
ruifengz pushed a commit to branch branch-3.5
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.5 by this push:
new 0742644d21b [SPARK-44775][PYTHON][DOCS] Add missing version
information in DataFrame APIs
0742644d21b is described below
commit 0742644d21b816c8c94ebaf5c789e2ec4e30b099
Author: Ruifeng Zheng <[email protected]>
AuthorDate: Mon Aug 14 10:40:15 2023 +0800
[SPARK-44775][PYTHON][DOCS] Add missing version information in DataFrame
APIs
### What changes were proposed in this pull request?
Add missing version information in DataFrame APIs
### Why are the changes needed?
to improve docs
### Does this PR introduce _any_ user-facing change?
yes
### How was this patch tested?
CI
Closes #42451 from zhengruifeng/doc_df_api_versions.
Authored-by: Ruifeng Zheng <[email protected]>
Signed-off-by: Ruifeng Zheng <[email protected]>
(cherry picked from commit 6bd95d0e004505840aa0749107aa76f3a17958be)
Signed-off-by: Ruifeng Zheng <[email protected]>
---
python/pyspark/sql/connect/dataframe.py | 16 ++--------------
python/pyspark/sql/dataframe.py | 30 +++++++++++++++++++++++++++++-
2 files changed, 31 insertions(+), 15 deletions(-)
diff --git a/python/pyspark/sql/connect/dataframe.py
b/python/pyspark/sql/connect/dataframe.py
index 14d9c2c9d05..7b326538a8e 100644
--- a/python/pyspark/sql/connect/dataframe.py
+++ b/python/pyspark/sql/connect/dataframe.py
@@ -2023,22 +2023,10 @@ class DataFrame:
# SparkConnect specific API
def offset(self, n: int) -> "DataFrame":
- """Returns a new :class: `DataFrame` by skipping the first `n` rows.
-
- .. versionadded:: 3.4.0
-
- Parameters
- ----------
- num : int
- Number of records to skip.
-
- Returns
- -------
- :class:`DataFrame`
- Subset of the records
- """
return DataFrame.withPlan(plan.Offset(child=self._plan, offset=n),
session=self._session)
+ offset.__doc__ = PySparkDataFrame.offset.__doc__
+
@classmethod
def withPlan(cls, plan: plan.LogicalPlan, session: "SparkSession") ->
"DataFrame":
"""
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index f6fe17539c6..8be2c224265 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -516,6 +516,9 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin):
.. versionadded:: 2.0.0
+ .. versionchanged:: 3.5.0
+ Supports Spark Connect.
+
Notes
-----
This API is evolving.
@@ -1304,7 +1307,10 @@ class DataFrame(PandasMapOpsMixin,
PandasConversionMixin):
def offset(self, num: int) -> "DataFrame":
"""Returns a new :class: `DataFrame` by skipping the first `n` rows.
- .. versionadded:: 3.5.0
+ .. versionadded:: 3.4.0
+
+ .. versionchanged:: 3.5.0
+ Supports vanilla PySpark.
Parameters
----------
@@ -3540,6 +3546,9 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin):
.. versionadded:: 3.4.0
+ .. versionchanged:: 3.4.0
+ Supports Spark Connect.
+
Parameters
----------
ids : str, Column, tuple, list, optional
@@ -3631,6 +3640,9 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin):
.. versionadded:: 3.3.0
+ .. versionchanged:: 3.5.0
+ Supports Spark Connect.
+
Parameters
----------
observation : :class:`Observation` or str
@@ -4066,6 +4078,9 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin):
.. versionadded:: 3.5.0
+ .. versionchanged:: 3.5.0
+ Supports Spark Connect.
+
Parameters
----------
subset : List of column names, optional
@@ -5276,6 +5291,8 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin):
def toDF(self, *cols: str) -> "DataFrame":
"""Returns a new :class:`DataFrame` that with new specified column
names
+ .. versionadded:: 1.6.0
+
.. versionchanged:: 3.4.0
Supports Spark Connect.
@@ -5381,6 +5398,9 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin):
.. versionadded:: 3.1.0
+ .. versionchanged:: 3.5.0
+ Supports Spark Connect.
+
Notes
-----
The equality comparison here is simplified by tolerating the cosmetic
differences
@@ -5426,6 +5446,9 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin):
.. versionadded:: 3.1.0
+ .. versionchanged:: 3.5.0
+ Supports Spark Connect.
+
Notes
-----
Unlike the standard hash code, the hash is calculated against the
query plan
@@ -5549,6 +5572,11 @@ class DataFrame(PandasMapOpsMixin,
PandasConversionMixin):
"""
Converts the existing DataFrame into a pandas-on-Spark DataFrame.
+ .. versionadded:: 3.2.0
+
+ .. versionchanged:: 3.5.0
+ Supports Spark Connect.
+
If a pandas-on-Spark DataFrame is converted to a Spark DataFrame and
then back
to pandas-on-Spark, it will lose the index information and the
original index
will be turned into a normal column.
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]