This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 685f536a5947 [MINOR][PYTHON][DOCS] Remove since decorator usages in
pyspark.sql
685f536a5947 is described below
commit 685f536a594700df32c22d62563ba3ec3dd26081
Author: Hyukjin Kwon <[email protected]>
AuthorDate: Mon Nov 27 15:27:39 2023 +0900
[MINOR][PYTHON][DOCS] Remove since decorator usages in pyspark.sql
### What changes were proposed in this pull request?
This PR proposes to remove `since` decorator for docstrings in
`pyspark.sql.`. They don't play very well with numpydoc style so we have
removed them away almost all places in `pyspark.sql.`. These are leftovers.
### Why are the changes needed?
`since` decorator does not play very well with numpydoc style.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Before/after are virtually the same. Checked with linter.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #44029 from HyukjinKwon/minor-remove-since.
Authored-by: Hyukjin Kwon <[email protected]>
Signed-off-by: Hyukjin Kwon <[email protected]>
---
python/pyspark/sql/conf.py | 5 +++--
python/pyspark/sql/context.py | 23 ++++++++++++++++-------
python/pyspark/sql/datasource.py | 16 ++++++++++------
python/pyspark/sql/readwriter.py | 34 ++++++++++++++++++++++------------
4 files changed, 51 insertions(+), 27 deletions(-)
diff --git a/python/pyspark/sql/conf.py b/python/pyspark/sql/conf.py
index b00f534eb48d..b2dda115f666 100644
--- a/python/pyspark/sql/conf.py
+++ b/python/pyspark/sql/conf.py
@@ -20,7 +20,7 @@ from typing import Any, Optional, Union
from py4j.java_gateway import JavaObject
-from pyspark import since, _NoValue
+from pyspark import _NoValue
from pyspark._globals import _NoValueType
@@ -123,10 +123,11 @@ class RuntimeConfig:
"expected %s '%s' to be a string (was '%s')" % (identifier,
obj, type(obj).__name__)
)
- @since(2.4)
def isModifiable(self, key: str) -> bool:
"""Indicates whether the configuration property with the given key
is modifiable in the current session.
+
+ .. versionadded:: 2.4.0
"""
return self._jconf.isModifiable(key)
diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py
index b75567ee0710..efc9760edf8b 100644
--- a/python/pyspark/sql/context.py
+++ b/python/pyspark/sql/context.py
@@ -34,7 +34,7 @@ from typing import (
from py4j.java_gateway import JavaObject
-from pyspark import since, _NoValue
+from pyspark import _NoValue
from pyspark._globals import _NoValueType
from pyspark.sql.session import _monkey_patch_RDD, SparkSession
from pyspark.sql.dataframe import DataFrame
@@ -632,19 +632,28 @@ class SQLContext:
else:
return [name for name in self._ssql_ctx.tableNames(dbName)]
- @since(1.0)
def cacheTable(self, tableName: str) -> None:
- """Caches the specified table in-memory."""
+ """
+ Caches the specified table in-memory.
+
+ .. versionadded:: 1.0.0
+ """
self._ssql_ctx.cacheTable(tableName)
- @since(1.0)
def uncacheTable(self, tableName: str) -> None:
- """Removes the specified table from the in-memory cache."""
+ """
+ Removes the specified table from the in-memory cache.
+
+ .. versionadded:: 1.0.0
+ """
self._ssql_ctx.uncacheTable(tableName)
- @since(1.3)
def clearCache(self) -> None:
- """Removes all cached tables from the in-memory cache."""
+ """
+ Removes all cached tables from the in-memory cache.
+
+ .. versionadded:: 1.3.0
+ """
self._ssql_ctx.clearCache()
@property
diff --git a/python/pyspark/sql/datasource.py b/python/pyspark/sql/datasource.py
index b380e8b534eb..032e3f48a82e 100644
--- a/python/pyspark/sql/datasource.py
+++ b/python/pyspark/sql/datasource.py
@@ -17,7 +17,6 @@
from abc import ABC, abstractmethod
from typing import final, Any, Dict, Iterator, List, Tuple, Type, Union,
TYPE_CHECKING
-from pyspark import since
from pyspark.sql import Row
from pyspark.sql.types import StructType
@@ -29,7 +28,6 @@ if TYPE_CHECKING:
__all__ = ["DataSource", "DataSourceReader", "DataSourceWriter",
"DataSourceRegistration"]
-@since(4.0)
class DataSource(ABC):
"""
A base class for data sources.
@@ -42,6 +40,8 @@ class DataSource(ABC):
After implementing this interface, you can start to load your data source
using
``spark.read.format(...).load()`` and save data using
``df.write.format(...).save()``.
+
+ .. versionadded: 4.0.0
"""
@final
@@ -145,11 +145,12 @@ class DataSource(ABC):
raise NotImplementedError
-@since(4.0)
class DataSourceReader(ABC):
"""
A base class for data source readers. Data source readers are responsible
for
outputting data from a data source.
+
+ .. versionadded: 4.0.0
"""
def partitions(self) -> Iterator[Any]:
@@ -241,11 +242,12 @@ class DataSourceReader(ABC):
...
-@since(4.0)
class DataSourceWriter(ABC):
"""
A base class for data source writers. Data source writers are responsible
for saving
the data to the data source.
+
+ .. versionadded: 4.0.0
"""
@abstractmethod
@@ -305,21 +307,23 @@ class DataSourceWriter(ABC):
...
-@since(4.0)
class WriterCommitMessage:
"""
A commit message returned by the ``write`` method of ``DataSourceWriter``
and will be
sent back to the driver side as input parameter of ``commit`` or ``abort``
method.
+
+ .. versionadded: 4.0.0
"""
...
-@since(4.0)
class DataSourceRegistration:
"""
Wrapper for data source registration. This instance can be accessed by
:attr:`spark.dataSource`.
+
+ .. versionadded: 4.0.0
"""
def __init__(self, sparkSession: "SparkSession"):
diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py
index b7e2c145f443..b61284247b0e 100644
--- a/python/pyspark/sql/readwriter.py
+++ b/python/pyspark/sql/readwriter.py
@@ -19,7 +19,7 @@ from typing import cast, overload, Dict, Iterable, List,
Optional, Tuple, TYPE_C
from py4j.java_gateway import JavaClass, JavaObject
-from pyspark import RDD, since
+from pyspark import RDD
from pyspark.sql.column import _to_seq, _to_java_column, Column
from pyspark.sql.types import StructType
from pyspark.sql import utils
@@ -2295,41 +2295,44 @@ class DataFrameWriterV2:
self._spark = df.sparkSession
self._jwriter = df._jdf.writeTo(table)
- @since(3.1)
def using(self, provider: str) -> "DataFrameWriterV2":
"""
Specifies a provider for the underlying output data source.
Spark's default catalog supports "parquet", "json", etc.
+
+ .. versionadded: 3.1.0
"""
self._jwriter.using(provider)
return self
- @since(3.1)
def option(self, key: str, value: "OptionalPrimitiveType") ->
"DataFrameWriterV2":
"""
Add a write option.
+
+ .. versionadded: 3.1.0
"""
self._jwriter.option(key, to_str(value))
return self
- @since(3.1)
def options(self, **options: "OptionalPrimitiveType") ->
"DataFrameWriterV2":
"""
Add write options.
+
+ .. versionadded: 3.1.0
"""
options = {k: to_str(v) for k, v in options.items()}
self._jwriter.options(options)
return self
- @since(3.1)
def tableProperty(self, property: str, value: str) -> "DataFrameWriterV2":
"""
Add table property.
+
+ .. versionadded: 3.1.0
"""
self._jwriter.tableProperty(property, value)
return self
- @since(3.1)
def partitionedBy(self, col: Column, *cols: Column) -> "DataFrameWriterV2":
"""
Partition the output table created by `create`, `createOrReplace`, or
`replace` using
@@ -2356,33 +2359,35 @@ class DataFrameWriterV2:
* :py:func:`pyspark.sql.functions.hours`
* :py:func:`pyspark.sql.functions.bucket`
+ .. versionadded: 3.1.0
"""
col = _to_java_column(col)
cols = _to_seq(self._spark._sc, [_to_java_column(c) for c in cols])
self._jwriter.partitionedBy(col, cols)
return self
- @since(3.1)
def create(self) -> None:
"""
Create a new table from the contents of the data frame.
The new table's schema, partition layout, properties, and other
configuration will be
based on the configuration set on this writer.
+
+ .. versionadded: 3.1.0
"""
self._jwriter.create()
- @since(3.1)
def replace(self) -> None:
"""
Replace an existing table with the contents of the data frame.
The existing table's schema, partition layout, properties, and other
configuration will be
replaced with the contents of the data frame and the configuration set
on this writer.
+
+ .. versionadded: 3.1.0
"""
self._jwriter.replace()
- @since(3.1)
def createOrReplace(self) -> None:
"""
Create a new table or replace an existing table with the contents of
the data frame.
@@ -2391,26 +2396,29 @@ class DataFrameWriterV2:
and other configuration will be based on the contents of the data frame
and the configuration set on this writer.
If the table exists, its configuration and data will be replaced.
+
+ .. versionadded: 3.1.0
"""
self._jwriter.createOrReplace()
- @since(3.1)
def append(self) -> None:
"""
Append the contents of the data frame to the output table.
+
+ .. versionadded: 3.1.0
"""
self._jwriter.append()
- @since(3.1)
def overwrite(self, condition: Column) -> None:
"""
Overwrite rows matching the given filter condition with the contents
of the data frame in
the output table.
+
+ .. versionadded: 3.1.0
"""
condition = _to_java_column(condition)
self._jwriter.overwrite(condition)
- @since(3.1)
def overwritePartitions(self) -> None:
"""
Overwrite all partition for which the data frame contains at least one
row with the contents
@@ -2418,6 +2426,8 @@ class DataFrameWriterV2:
This operation is equivalent to Hive's `INSERT OVERWRITE ...
PARTITION`, which replaces
partitions dynamically depending on the contents of the data frame.
+
+ .. versionadded: 3.1.0
"""
self._jwriter.overwritePartitions()
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]