This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new e07dc5aac80f [SPARK-48714][SPARK-48794][FOLLOW-UP][PYTHON][DOCS] Add
`mergeInto` to API reference
e07dc5aac80f is described below
commit e07dc5aac80f538c3317aea34b13af3ca80a205f
Author: Ruifeng Zheng <[email protected]>
AuthorDate: Mon Jul 15 08:31:23 2024 +0900
[SPARK-48714][SPARK-48794][FOLLOW-UP][PYTHON][DOCS] Add `mergeInto` to API
reference
### What changes were proposed in this pull request?
Add `mergeInto` to API reference
### Why are the changes needed?
this feature was missing in doc
### Does this PR introduce _any_ user-facing change?
yes, doc change
### How was this patch tested?
CI
### Was this patch authored or co-authored using generative AI tooling?
No
Closes #47329 from zhengruifeng/py_doc_merge_into.
Authored-by: Ruifeng Zheng <[email protected]>
Signed-off-by: Hyukjin Kwon <[email protected]>
---
python/docs/source/reference/pyspark.sql/dataframe.rst | 1 +
python/docs/source/reference/pyspark.sql/io.rst | 5 +++++
python/pyspark/sql/__init__.py | 2 ++
python/pyspark/sql/merge.py | 3 +++
4 files changed, 11 insertions(+)
diff --git a/python/docs/source/reference/pyspark.sql/dataframe.rst
b/python/docs/source/reference/pyspark.sql/dataframe.rst
index d0196baa7a05..a7349c98f580 100644
--- a/python/docs/source/reference/pyspark.sql/dataframe.rst
+++ b/python/docs/source/reference/pyspark.sql/dataframe.rst
@@ -131,6 +131,7 @@ DataFrame
DataFrame.write
DataFrame.writeStream
DataFrame.writeTo
+ DataFrame.mergeInto
DataFrame.pandas_api
DataFrameNaFunctions.drop
DataFrameNaFunctions.fill
diff --git a/python/docs/source/reference/pyspark.sql/io.rst
b/python/docs/source/reference/pyspark.sql/io.rst
index e687ca0d27d8..0554e4bea89d 100644
--- a/python/docs/source/reference/pyspark.sql/io.rst
+++ b/python/docs/source/reference/pyspark.sql/io.rst
@@ -63,3 +63,8 @@ Input/Output
DataFrameWriterV2.append
DataFrameWriterV2.overwrite
DataFrameWriterV2.overwritePartitions
+ MergeIntoWriter.whenMatched
+ MergeIntoWriter.whenNotMatched
+ MergeIntoWriter.whenNotMatchedBySource
+ MergeIntoWriter.withSchemaEvolution
+ MergeIntoWriter.merge
diff --git a/python/pyspark/sql/__init__.py b/python/pyspark/sql/__init__.py
index bc046da81d27..a0a6e8ef70c8 100644
--- a/python/pyspark/sql/__init__.py
+++ b/python/pyspark/sql/__init__.py
@@ -48,6 +48,7 @@ from pyspark.sql.dataframe import DataFrame,
DataFrameNaFunctions, DataFrameStat
from pyspark.sql.group import GroupedData
from pyspark.sql.observation import Observation
from pyspark.sql.readwriter import DataFrameReader, DataFrameWriter,
DataFrameWriterV2
+from pyspark.sql.merge import MergeIntoWriter
from pyspark.sql.window import Window, WindowSpec
from pyspark.sql.pandas.group_ops import PandasCogroupedOps
from pyspark.sql.utils import is_remote
@@ -73,6 +74,7 @@ __all__ = [
"DataFrameReader",
"DataFrameWriter",
"DataFrameWriterV2",
+ "MergeIntoWriter",
"PandasCogroupedOps",
"is_remote",
]
diff --git a/python/pyspark/sql/merge.py b/python/pyspark/sql/merge.py
index bff903cd4392..337eba25e5d7 100644
--- a/python/pyspark/sql/merge.py
+++ b/python/pyspark/sql/merge.py
@@ -61,6 +61,7 @@ class MergeIntoWriter:
) -> "MergeIntoWriter.WhenNotMatched":
"""
Initialize a `WhenNotMatched` action with a condition.
+
This `WhenNotMatched` action will be executed when a source row does
not match any target
row based on the merge condition and the specified `condition` is
satisfied.
@@ -76,9 +77,11 @@ class MergeIntoWriter:
) -> "MergeIntoWriter.WhenNotMatchedBySource":
"""
Initialize a `WhenNotMatchedBySource` action with a condition.
+
This `WhenNotMatchedBySource` action will be executed when a target
row does not match any
rows in the source table based on the merge condition and the
specified `condition`
is satisfied.
+
This `WhenNotMatchedBySource` can be followed by one of the following
merge actions:
- `updateAll`: Update all the not matched target table rows with
source dataset rows.
- `update(Dict)`: Update all the not matched target table rows while
changing only
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]