This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 58c24a5719b8 [SPARK-44762][CONNECT][CORE] Doc for
SparkConnect.addJobTag and Connect SparkSession.addTag
58c24a5719b8 is described below
commit 58c24a5719b8717ea37347c668c9df8a3714ae3c
Author: Juliusz Sompolski <[email protected]>
AuthorDate: Sat Sep 30 20:54:49 2023 +0900
[SPARK-44762][CONNECT][CORE] Doc for SparkConnect.addJobTag and Connect
SparkSession.addTag
### What changes were proposed in this pull request?
Add more documentation about using tags, similar to how
SparkConnect.setJobGroup is documented.
### Why are the changes needed?
Better doc.
### Does this PR introduce _any_ user-facing change?
Yes, better doc.
### How was this patch tested?
Doc only.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #43182 from juliuszsompolski/SPARK-44762.
Authored-by: Juliusz Sompolski <[email protected]>
Signed-off-by: Hyukjin Kwon <[email protected]>
---
.../main/scala/org/apache/spark/sql/SparkSession.scala | 16 ++++++++++++++++
core/src/main/scala/org/apache/spark/SparkContext.scala | 16 ++++++++++++++++
python/pyspark/context.py | 8 ++++++++
python/pyspark/sql/session.py | 10 +++++++++-
4 files changed, 49 insertions(+), 1 deletion(-)
diff --git
a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala
b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala
index 42052e3f8e66..e60bda0a838e 100644
---
a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++
b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -675,6 +675,22 @@ class SparkSession private[sql] (
/**
* Add a tag to be assigned to all the operations started by this thread in
this session.
*
+ * Often, a unit of execution in an application consists of multiple Spark
executions.
+ * Application programmers can use this method to group all those jobs
together and give a group
+ * tag. The application can use
`org.apache.spark.sql.SparkSession.interruptTag` to cancel all
+ * running running executions with this tag. For example:
+ * {{{
+ * // In the main thread:
+ * spark.addTag("myjobs")
+ * spark.range(10).map(i => { Thread.sleep(10); i }).collect()
+ *
+ * // In a separate thread:
+ * spark.interruptTag("myjobs")
+ * }}}
+ *
+ * There may be multiple tags present at the same time, so different parts
of application may
+ * use different tags to perform cancellation at different levels of
granularity.
+ *
* @param tag
* The tag to be added. Cannot contain ',' (comma) character or be an
empty string.
*
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala
b/core/src/main/scala/org/apache/spark/SparkContext.scala
index dd95b71d4d59..ced26144e07a 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -874,6 +874,22 @@ class SparkContext(config: SparkConf) extends Logging {
/**
* Add a tag to be assigned to all the jobs started by this thread.
*
+ * Often, a unit of execution in an application consists of multiple Spark
actions or jobs.
+ * Application programmers can use this method to group all those jobs
together and give a
+ * group tag. The application can use
`org.apache.spark.sql.SparkSession.interruptTag` to cancel
+ * all running executions with this tag. For example:
+ * {{{
+ * // In the main thread:
+ * sc.addJobTag("myjobs")
+ * sc.parallelize(1 to 10000, 2).map { i => Thread.sleep(10); i }.count()
+ *
+ * // In a separate thread:
+ * spark.cancelJobsWithTag("myjobs")
+ * }}}
+ *
+ * There may be multiple tags present at the same time, so different parts
of application may use
+ * different tags to perform cancellation at different levels of granularity.
+ *
* @param tag The tag to be added. Cannot contain ',' (comma) character.
*
* @since 3.5.0
diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index e4c8b2378721..82cd325c279b 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -2193,6 +2193,14 @@ class SparkContext:
"""
Add a tag to be assigned to all the jobs started by this thread.
+ Often, a unit of execution in an application consists of multiple
Spark actions or jobs.
+ Application programmers can use this method to group all those jobs
together and give a
+ group tag. The application can use
:meth:`SparkContext.cancelJobsWithTag` to cancel all
+ running executions with this tag.
+
+ There may be multiple tags present at the same time, so different
parts of application may
+ use different tags to perform cancellation at different levels of
granularity.
+
.. versionadded:: 3.5.0
Parameters
diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py
index de2e8d0cda2a..5c07ca607b73 100644
--- a/python/pyspark/sql/session.py
+++ b/python/pyspark/sql/session.py
@@ -2010,11 +2010,19 @@ class SparkSession(SparkConversionMixin):
"""
Add a tag to be assigned to all the operations started by this thread
in this session.
+ Often, a unit of execution in an application consists of multiple
Spark executions.
+ Application programmers can use this method to group all those jobs
together and give a
+ group tag. The application can use :meth:`SparkSession.interruptTag`
to cancel all running
+ executions with this tag.
+
+ There may be multiple tags present at the same time, so different
parts of application may
+ use different tags to perform cancellation at different levels of
granularity.
+
.. versionadded:: 3.5.0
Parameters
----------
- tag : list of str
+ tag : str
The tag to be added. Cannot contain ',' (comma) character or be an
empty string.
"""
raise RuntimeError(
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]