This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch branch-3.5 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.5 by this push: new 97f20813dca [MINOR][PYTHON][DOCS] Remove duplicated versionchanged per versionadded 97f20813dca is described below commit 97f20813dca0e8d5512f25c2163bada79a8871c7 Author: Hyukjin Kwon <gurwls...@apache.org> AuthorDate: Tue Aug 22 20:09:08 2023 +0900 [MINOR][PYTHON][DOCS] Remove duplicated versionchanged per versionadded ### What changes were proposed in this pull request? This PR addresses all the cases of duplicated `versionchanged` directives with `versionadded` directives, see also https://github.com/apache/spark/pull/42597. Also, this PR mentions that all functions support Spark Connect from Apache Spark 3.5.0. ### Why are the changes needed? To remove duplicated information in docstring. ### Does this PR introduce _any_ user-facing change? Yes, it removes duplicated information in PySpark API Reference page. ### How was this patch tested? CI in this PR should validate them. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #42602 from HyukjinKwon/minor-versionchanges. Authored-by: Hyukjin Kwon <gurwls...@apache.org> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> (cherry picked from commit 65b8ca2694c2443b4f97963de9398ac0ff779d0c) Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- .../source/reference/pyspark.sql/functions.rst | 3 + python/pyspark/sql/conf.py | 18 +---- python/pyspark/sql/dataframe.py | 33 +++++---- python/pyspark/sql/functions.py | 80 +++++++++++----------- python/pyspark/sql/session.py | 7 +- 5 files changed, 67 insertions(+), 74 deletions(-) diff --git a/python/docs/source/reference/pyspark.sql/functions.rst b/python/docs/source/reference/pyspark.sql/functions.rst index c5194311db8..cd4f79ec62f 100644 --- a/python/docs/source/reference/pyspark.sql/functions.rst +++ b/python/docs/source/reference/pyspark.sql/functions.rst @@ -21,6 +21,9 @@ Functions ========= .. currentmodule:: pyspark.sql.functions +A collections of builtin functions available for DataFrame operations. +From Apache Spark 3.5.0, all functions support Spark Connect. + Normal Functions ---------------- .. autosummary:: diff --git a/python/pyspark/sql/conf.py b/python/pyspark/sql/conf.py index 9a7ecd9fcbe..599bb358fb1 100644 --- a/python/pyspark/sql/conf.py +++ b/python/pyspark/sql/conf.py @@ -39,11 +39,7 @@ class RuntimeConfig: @since(2.0) def set(self, key: str, value: Union[str, int, bool]) -> None: - """Sets the given Spark runtime configuration property. - - .. versionchanged:: 3.4.0 - Supports Spark Connect. - """ + """Sets the given Spark runtime configuration property.""" self._jconf.set(key, value) @since(2.0) @@ -52,9 +48,6 @@ class RuntimeConfig: ) -> Optional[str]: """Returns the value of Spark runtime configuration property for the given key, assuming it is set. - - .. versionchanged:: 3.4.0 - Supports Spark Connect. """ self._checkType(key, "key") if default is _NoValue: @@ -66,11 +59,7 @@ class RuntimeConfig: @since(2.0) def unset(self, key: str) -> None: - """Resets the configuration property for the given key. - - .. versionchanged:: 3.4.0 - Supports Spark Connect. - """ + """Resets the configuration property for the given key.""" self._jconf.unset(key) def _checkType(self, obj: Any, identifier: str) -> None: @@ -84,9 +73,6 @@ class RuntimeConfig: def isModifiable(self, key: str) -> bool: """Indicates whether the configuration property with the given key is modifiable in the current session. - - .. versionchanged:: 3.4.0 - Supports Spark Connect. """ return self._jconf.isModifiable(key) diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index ac5b4f0250a..35f2c70f8c9 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -2234,9 +2234,6 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin): .. versionadded:: 3.4.0 - .. versionchanged:: 3.4.0 - Supports Spark Connect. - Parameters ---------- schema : :class:`StructType` @@ -2265,6 +2262,8 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin): * Fail if the nullability is not compatible. For example, the column and/or inner field is nullable but the specified schema requires them to be not nullable. + Supports Spark Connect. + Examples -------- >>> from pyspark.sql.types import StructField, StringType @@ -3543,9 +3542,6 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin): .. versionadded:: 3.4.0 - .. versionchanged:: 3.4.0 - Supports Spark Connect. - Parameters ---------- ids : str, Column, tuple, list @@ -3565,6 +3561,10 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin): :class:`DataFrame` Unpivoted DataFrame. + Notes + ----- + Supports Spark Connect. + Examples -------- >>> df = spark.createDataFrame( @@ -3629,9 +3629,6 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin): .. versionadded:: 3.4.0 - .. versionchanged:: 3.4.0 - Supports Spark Connect. - Parameters ---------- ids : str, Column, tuple, list, optional @@ -3654,6 +3651,10 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin): See Also -------- DataFrame.unpivot + + Notes + ----- + Supports Spark Connect. """ return self.unpivot(ids, values, variableColumnName, valueColumnName) @@ -4231,9 +4232,6 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin): .. versionadded:: 3.5.0 - .. versionchanged:: 3.5.0 - Supports Spark Connect. - Parameters ---------- subset : List of column names, optional @@ -4244,6 +4242,10 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin): :class:`DataFrame` DataFrame without duplicates. + Notes + ----- + Supports Spark Connect. + Examples -------- >>> from pyspark.sql import Row @@ -5209,9 +5211,6 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin): .. versionadded:: 3.4.0 Added support for multiple columns renaming - .. versionchanged:: 3.4.0 - Supports Spark Connect. - Parameters ---------- colsMap : dict @@ -5227,6 +5226,10 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin): -------- :meth:`withColumnRenamed` + Notes + ----- + Support Spark Connect + Examples -------- >>> df = spark.createDataFrame([(2, "Alice"), (5, "Bob")], schema=["age", "name"]) diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index 7999f9ce3a0..ee619099ded 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -612,9 +612,6 @@ def mode(col: "ColumnOrName") -> Column: .. versionadded:: 3.4.0 - .. versionchanged:: 3.4.0 - Supports Spark Connect. - Parameters ---------- col : :class:`~pyspark.sql.Column` or str @@ -625,6 +622,10 @@ def mode(col: "ColumnOrName") -> Column: :class:`~pyspark.sql.Column` the most frequent value in a group. + Notes + ----- + Supports Spark Connect. + Examples -------- >>> df = spark.createDataFrame([ @@ -929,9 +930,6 @@ def median(col: "ColumnOrName") -> Column: .. versionadded:: 3.4.0 - .. versionchanged:: 3.4.0 - Supports Spark Connect. - Parameters ---------- col : :class:`~pyspark.sql.Column` or str @@ -942,6 +940,10 @@ def median(col: "ColumnOrName") -> Column: :class:`~pyspark.sql.Column` the median of the values in a group. + Notes + ----- + Supports Spark Connect. + Examples -------- >>> df = spark.createDataFrame([ @@ -3371,9 +3373,6 @@ def pmod(dividend: Union["ColumnOrName", float], divisor: Union["ColumnOrName", .. versionadded:: 3.4.0 - .. versionchanged:: 3.4.0 - Supports Spark Connect. - Parameters ---------- dividend : str, :class:`~pyspark.sql.Column` or float @@ -3386,6 +3385,10 @@ def pmod(dividend: Union["ColumnOrName", float], divisor: Union["ColumnOrName", :class:`~pyspark.sql.Column` positive value of dividend mod divisor. + Notes + ----- + Supports Spark Connect. + Examples -------- >>> from pyspark.sql.functions import pmod @@ -3673,9 +3676,6 @@ def approx_count_distinct(col: "ColumnOrName", rsd: Optional[float] = None) -> C .. versionadded:: 2.1.0 - .. versionchanged:: 3.4.0 - Supports Spark Connect. - .. versionchanged:: 3.4.0 Supports Spark Connect. @@ -4522,13 +4522,8 @@ def approx_percentile( in the ordered `col` values (sorted from least to greatest) such that no more than `percentage` of `col` values is less than the value or equal to that value. - .. versionadded:: 3.5.0 - Notes - ----- - Supports Spark Connect. - Parameters ---------- col : :class:`~pyspark.sql.Column` or str @@ -7670,9 +7665,6 @@ def window_time( .. versionadded:: 3.4.0 - .. versionchanged:: 3.4.0 - Supports Spark Connect. - Parameters ---------- windowColumn : :class:`~pyspark.sql.Column` @@ -7683,6 +7675,10 @@ def window_time( :class:`~pyspark.sql.Column` the column for computed results. + Notes + ----- + Supports Spark Connect. + Examples -------- >>> import datetime @@ -11180,9 +11176,6 @@ def get(col: "ColumnOrName", index: Union["ColumnOrName", int]) -> Column: .. versionadded:: 3.4.0 - .. versionchanged:: 3.4.0 - Supports Spark Connect. - Parameters ---------- col : :class:`~pyspark.sql.Column` or str @@ -11198,6 +11191,7 @@ def get(col: "ColumnOrName", index: Union["ColumnOrName", int]) -> Column: Notes ----- The position is not 1 based, but 0 based index. + Supports Spark Connect. See Also -------- @@ -11346,9 +11340,6 @@ def array_insert(arr: "ColumnOrName", pos: Union["ColumnOrName", int], value: An .. versionadded:: 3.4.0 - .. versionchanged:: 3.4.0 - Supports Spark Connect. - Parameters ---------- arr : :class:`~pyspark.sql.Column` or str @@ -11364,6 +11355,10 @@ def array_insert(arr: "ColumnOrName", pos: Union["ColumnOrName", int], value: An :class:`~pyspark.sql.Column` an array of values, including the new specified value + Notes + ----- + Supports Spark Connect. + Examples -------- >>> df = spark.createDataFrame( @@ -11486,9 +11481,6 @@ def array_compact(col: "ColumnOrName") -> Column: .. versionadded:: 3.4.0 - .. versionchanged:: 3.4.0 - Supports Spark Connect. - Parameters ---------- col : :class:`~pyspark.sql.Column` or str @@ -11499,6 +11491,10 @@ def array_compact(col: "ColumnOrName") -> Column: :class:`~pyspark.sql.Column` an array by excluding the null values. + Notes + ----- + Supports Spark Connect. + Examples -------- >>> df = spark.createDataFrame([([1, None, 2, 3],), ([4, 5, None, 4],)], ['data']) @@ -11516,9 +11512,6 @@ def array_append(col: "ColumnOrName", value: Any) -> Column: .. versionadded:: 3.4.0 - .. versionchanged:: 3.4.0 - Supports Spark Connect. - Parameters ---------- col : :class:`~pyspark.sql.Column` or str @@ -11531,6 +11524,10 @@ def array_append(col: "ColumnOrName", value: Any) -> Column: :class:`~pyspark.sql.Column` an array of values from first array along with the element. + Notes + ----- + Supports Spark Connect. + Examples -------- >>> from pyspark.sql import Row @@ -11634,9 +11631,6 @@ def inline(col: "ColumnOrName") -> Column: .. versionadded:: 3.4.0 - .. versionchanged:: 3.4.0 - Supports Spark Connect. - Parameters ---------- col : :class:`~pyspark.sql.Column` or str @@ -11651,6 +11645,10 @@ def inline(col: "ColumnOrName") -> Column: -------- :meth:`explode` + Notes + ----- + Supports Spark Connect. + Examples -------- >>> from pyspark.sql import Row @@ -11775,9 +11773,6 @@ def inline_outer(col: "ColumnOrName") -> Column: .. versionadded:: 3.4.0 - .. versionchanged:: 3.4.0 - Supports Spark Connect. - Parameters ---------- col : :class:`~pyspark.sql.Column` or str @@ -11793,6 +11788,10 @@ def inline_outer(col: "ColumnOrName") -> Column: :meth:`explode_outer` :meth:`inline` + Notes + ----- + Supports Spark Connect. + Examples -------- >>> from pyspark.sql import Row @@ -14472,8 +14471,9 @@ def unwrap_udt(col: "ColumnOrName") -> Column: .. versionadded:: 3.4.0 - .. versionchanged:: 3.4.0 - Supports Spark Connect. + Notes + ----- + Supports Spark Connect. """ return _invoke_function("unwrap_udt", _to_java_column(col)) diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py index 7a492d634cf..d3f3359acea 100644 --- a/python/pyspark/sql/session.py +++ b/python/pyspark/sql/session.py @@ -851,12 +851,13 @@ class SparkSession(SparkConversionMixin): .. versionadded:: 3.5.0 - .. versionchanged:: 3.5.0 - Supports Spark Connect. - Returns ------- :class:`UDTFRegistration` + + Notes + ----- + Supports Spark Connect. """ from pyspark.sql.udtf import UDTFRegistration --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org