This is an automated email from the ASF dual-hosted git repository.
agrove pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git
The following commit(s) were added to refs/heads/main by this push:
new b6d868c0 feat: Show user a more intuitive message when queries fall
back to Spark (#656)
b6d868c0 is described below
commit b6d868c050ce21fd16641063d7be917e63d66e6b
Author: Andy Grove <[email protected]>
AuthorDate: Fri Jul 12 14:39:58 2024 -0600
feat: Show user a more intuitive message when queries fall back to Spark
(#656)
* update rustfmt to reorder imports
* Enable fallback logging by default and use verbose format
* update config guide
* revert change
* revert change
* call generateVerboseExtendedInfo instead of generateExtendedInfo
* improve log message
* improve log message
* format
---
common/src/main/scala/org/apache/comet/CometConf.scala | 5 +++--
docs/source/user-guide/configs.md | 2 +-
native/{core => }/rustfmt.toml | 0
.../org/apache/comet/CometSparkSessionExtensions.scala | 18 +++++++-----------
4 files changed, 11 insertions(+), 14 deletions(-)
diff --git a/common/src/main/scala/org/apache/comet/CometConf.scala
b/common/src/main/scala/org/apache/comet/CometConf.scala
index 787e8b4e..bcc8d25a 100644
--- a/common/src/main/scala/org/apache/comet/CometConf.scala
+++ b/common/src/main/scala/org/apache/comet/CometConf.scala
@@ -286,9 +286,10 @@ object CometConf extends ShimCometConf {
conf("spark.comet.explainFallback.enabled")
.doc(
"When this setting is enabled, Comet will provide logging explaining
the reason(s) " +
- "why a query stage cannot be executed natively.")
+ "why a query stage cannot be executed natively. Set this to false to
" +
+ "reduce the amount of logging.")
.booleanConf
- .createWithDefault(false)
+ .createWithDefault(true)
val COMET_BATCH_SIZE: ConfigEntry[Int] = conf("spark.comet.batchSize")
.doc("The columnar batch size, i.e., the maximum number of rows that a
batch can contain.")
diff --git a/docs/source/user-guide/configs.md
b/docs/source/user-guide/configs.md
index 28374907..501a2ba3 100644
--- a/docs/source/user-guide/configs.md
+++ b/docs/source/user-guide/configs.md
@@ -39,7 +39,7 @@ Comet provides the following configuration settings.
| spark.comet.exec.shuffle.enabled | Whether to enable Comet native shuffle.
By default, this config is false. Note that this requires setting
'spark.shuffle.manager' to
'org.apache.spark.sql.comet.execution.shuffle.CometShuffleManager'.
'spark.shuffle.manager' must be set before starting the Spark application and
cannot be changed during the application. | false |
| spark.comet.exec.shuffle.mode | The mode of Comet shuffle. This config is
only effective if Comet shuffle is enabled. Available modes are 'native',
'jvm', and 'auto'. 'native' is for native shuffle which has best performance in
general. 'jvm' is for jvm-based columnar shuffle which has higher coverage than
native shuffle. 'auto' is for Comet to choose the best shuffle mode based on
the query plan. By default, this config is 'jvm'. | jvm |
| spark.comet.explain.verbose.enabled | When this setting is enabled, Comet
will provide a verbose tree representation of the extended information. | false
|
-| spark.comet.explainFallback.enabled | When this setting is enabled, Comet
will provide logging explaining the reason(s) why a query stage cannot be
executed natively. | false |
+| spark.comet.explainFallback.enabled | When this setting is enabled, Comet
will provide logging explaining the reason(s) why a query stage cannot be
executed natively. Set this to false to reduce the amount of logging. | true |
| spark.comet.memory.overhead.factor | Fraction of executor memory to be
allocated as additional non-heap memory per executor process for Comet. Default
value is 0.2. | 0.2 |
| spark.comet.memory.overhead.min | Minimum amount of additional memory to be
allocated per executor process for Comet, in MiB. | 402653184b |
| spark.comet.nativeLoadRequired | Whether to require Comet native library to
load successfully when Comet is enabled. If not, Comet will silently fallback
to Spark when it fails to load the native lib. Otherwise, an error will be
thrown and the Spark job will be aborted. | false |
diff --git a/native/core/rustfmt.toml b/native/rustfmt.toml
similarity index 100%
rename from native/core/rustfmt.toml
rename to native/rustfmt.toml
diff --git
a/spark/src/main/scala/org/apache/comet/CometSparkSessionExtensions.scala
b/spark/src/main/scala/org/apache/comet/CometSparkSessionExtensions.scala
index c30d6b0f..028ea063 100644
--- a/spark/src/main/scala/org/apache/comet/CometSparkSessionExtensions.scala
+++ b/spark/src/main/scala/org/apache/comet/CometSparkSessionExtensions.scala
@@ -850,17 +850,13 @@ class CometSparkSessionExtensions
// if the plan cannot be run fully natively then explain why (when
appropriate
// config is enabled)
if (CometConf.COMET_EXPLAIN_FALLBACK_ENABLED.get()) {
- new ExtendedExplainInfo().extensionInfo(newPlan) match {
- case reasons if reasons.size == 1 =>
- logWarning(
- "Comet cannot execute some parts of this plan natively " +
- s"because ${reasons.head}")
- case reasons if reasons.size > 1 =>
- logWarning(
- "Comet cannot execute some parts of this plan natively" +
- s" because:\n\t- ${reasons.mkString("\n\t- ")}")
- case _ =>
- // no reasons recorded
+ val info = new ExtendedExplainInfo()
+ if (info.extensionInfo(newPlan).nonEmpty) {
+ logWarning(
+ "Comet cannot execute some parts of this plan natively " +
+ s"(set ${CometConf.COMET_EXPLAIN_FALLBACK_ENABLED.key}=false "
+
+ "to disable this logging):\n" +
+ s"${info.generateVerboseExtendedInfo(newPlan)}")
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]