This is an automated email from the ASF dual-hosted git repository.
ruifengz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 4bb3aaeb4c3 [SPARK-45028][PYTHON][DOCS] Refine docstring of
`DataFrame.drop`
4bb3aaeb4c3 is described below
commit 4bb3aaeb4c3f13a723b6da30fe07c007e417b98c
Author: panbingkun <[email protected]>
AuthorDate: Fri Sep 1 09:18:22 2023 +0800
[SPARK-45028][PYTHON][DOCS] Refine docstring of `DataFrame.drop`
### What changes were proposed in this pull request?
This pr aims to refine docstring of `DataFrame.drop`.
### Why are the changes needed?
To improve PySpark documentation.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Pass GA.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #42748 from panbingkun/SPARK-45028.
Authored-by: panbingkun <[email protected]>
Signed-off-by: Ruifeng Zheng <[email protected]>
---
python/pyspark/sql/dataframe.py | 45 +++++++++++++++++++++++------------------
1 file changed, 25 insertions(+), 20 deletions(-)
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 8417d445eea..42d85b82e9e 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -5513,7 +5513,8 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin):
...
def drop(self, *cols: "ColumnOrName") -> "DataFrame": # type: ignore[misc]
- """Returns a new :class:`DataFrame` without specified columns.
+ """
+ Returns a new :class:`DataFrame` without specified columns.
This is a no-op if the schema doesn't contain the given column name(s).
.. versionadded:: 1.4.0
@@ -5524,28 +5525,26 @@ class DataFrame(PandasMapOpsMixin,
PandasConversionMixin):
Parameters
----------
cols: str or :class:`Column`
- a name of the column, or the :class:`Column` to drop
+ A name of the column, or the :class:`Column` to be dropped.
Returns
-------
:class:`DataFrame`
- DataFrame without given columns.
+ A new :class:`DataFrame` without the specified columns.
Notes
-----
- When an input is a column name, it is treated literally without
further interpretation.
- Otherwise, will try to match the equivalent expression.
- So that dropping column by its name `drop(colName)` has different
semantic with directly
- dropping the column `drop(col(colName))`.
+ - When an input is a column name, it is treated literally without
further interpretation.
+ Otherwise, it will try to match the equivalent expression.
+ So dropping a column by its name `drop(colName)` has a different
semantic
+ with directly dropping the column `drop(col(colName))`.
Examples
--------
- >>> from pyspark.sql import Row
- >>> from pyspark.sql.functions import col, lit
+ Example 1: Drop a column by name.
+
>>> df = spark.createDataFrame(
... [(14, "Tom"), (23, "Alice"), (16, "Bob")], ["age", "name"])
- >>> df2 = spark.createDataFrame([Row(height=80, name="Tom"),
Row(height=85, name="Bob")])
-
>>> df.drop('age').show()
+-----+
| name|
@@ -5554,6 +5553,9 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin):
|Alice|
| Bob|
+-----+
+
+ Example 2: Drop a column by :class:`Column` object.
+
>>> df.drop(df.age).show()
+-----+
| name|
@@ -5563,9 +5565,10 @@ class DataFrame(PandasMapOpsMixin,
PandasConversionMixin):
| Bob|
+-----+
- Drop the column that joined both DataFrames on.
+ Example 3: Drop the column that joined both DataFrames on.
- >>> df.join(df2, df.name == df2.name,
'inner').drop('name').sort('age').show()
+ >>> df2 = spark.createDataFrame([(80, "Tom"), (85, "Bob")], ["height",
"name"])
+ >>> df.join(df2, df.name == df2.name).drop('name').sort('age').show()
+---+------+
|age|height|
+---+------+
@@ -5586,7 +5589,7 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin):
| 16| Bob| 85| Bob|
+---+-----+------+----+
- Drop two column by the same name.
+ Example 4: Drop two column by the same name.
>>> df3.drop("name").show()
+---+------+
@@ -5600,14 +5603,18 @@ class DataFrame(PandasMapOpsMixin,
PandasConversionMixin):
| 16| 85|
+---+------+
- Can not drop col('name') due to ambiguous reference.
+ Example 5: Can not drop col('name') due to ambiguous reference.
- >>> df3.drop(col("name")).show()
+ >>> from pyspark.sql import functions as sf
+ >>> df3.drop(sf.col("name")).show()
Traceback (most recent call last):
...
pyspark.errors.exceptions.captured.AnalysisException:
[AMBIGUOUS_REFERENCE] Reference...
- >>> df4 = df.withColumn("a.b.c", lit(1))
+ Example 6: Can not find a column matching the expression "a.b.c".
+
+ >>> from pyspark.sql import functions as sf
+ >>> df4 = df.withColumn("a.b.c", sf.lit(1))
>>> df4.show()
+---+-----+-----+
|age| name|a.b.c|
@@ -5626,9 +5633,7 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin):
| 16| Bob|
+---+-----+
- Can not find a column matching the expression "a.b.c".
-
- >>> df4.drop(col("a.b.c")).show()
+ >>> df4.drop(sf.col("a.b.c")).show()
+---+-----+-----+
|age| name|a.b.c|
+---+-----+-----+
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]