This is an automated email from the ASF dual-hosted git repository.
marong pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new dfbf912e64 [GLUTEN-8794] Support logging as many fallback reasons as
possible (#8798)
dfbf912e64 is described below
commit dfbf912e64cb23262793b9a13929f09329b1acc2
Author: Rong Ma <[email protected]>
AuthorDate: Tue Feb 25 19:09:26 2025 +0000
[GLUTEN-8794] Support logging as many fallback reasons as possible (#8798)
---
.../backendsapi/velox/VeloxValidatorApi.scala | 4 +-
.../gluten/backendsapi/SparkPlanExecApi.scala | 2 +-
.../BasicPhysicalOperatorTransformer.scala | 10 ++--
.../gluten/execution/WholeStageTransformer.scala | 58 ++++++++++++++--------
.../gluten/expression/ExpressionConverter.scala | 31 +++++++-----
.../apache/gluten/extension/ValidationResult.scala | 11 +++-
.../sql/execution/GlutenFallbackReporter.scala | 2 +-
.../spark/sql/gluten/GlutenFallbackSuite.scala | 2 +-
.../spark/sql/gluten/GlutenFallbackSuite.scala | 2 +-
.../spark/sql/gluten/GlutenFallbackSuite.scala | 2 +-
.../org/apache/gluten/config/GlutenConfig.scala | 8 +++
11 files changed, 87 insertions(+), 45 deletions(-)
diff --git
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxValidatorApi.scala
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxValidatorApi.scala
index 9b30013661..8b2f76eb04 100644
---
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxValidatorApi.scala
+++
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxValidatorApi.scala
@@ -49,8 +49,8 @@ class VeloxValidatorApi extends ValidatorApi {
}
ValidationResult.failed(
String.format(
- "Native validation failed: %n%s",
- info.fallbackInfo.asScala.reduce[String] { case (l, r) => l + "\n" + r
}))
+ "Native validation failed: %n |- %s",
+ info.fallbackInfo.asScala.reduce[String] { case (l, r) => l + "\n |-
" + r }))
}
private def isPrimitiveType(dataType: DataType): Boolean = {
diff --git
a/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala
b/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala
index 54c63a563e..02803bbf2b 100644
---
a/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala
+++
b/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala
@@ -199,7 +199,7 @@ trait SparkPlanExecApi {
substraitExprName: String,
child: ExpressionTransformer,
original: TryEval): ExpressionTransformer = {
- throw new GlutenNotSupportException("try_eval is not supported")
+ throw new
GlutenNotSupportException(s"try_eval(${original.child.prettyName}) is not
supported")
}
def genArithmeticTransformer(
diff --git
a/gluten-substrait/src/main/scala/org/apache/gluten/execution/BasicPhysicalOperatorTransformer.scala
b/gluten-substrait/src/main/scala/org/apache/gluten/execution/BasicPhysicalOperatorTransformer.scala
index fe4898dbaa..d7f729f4d2 100644
---
a/gluten-substrait/src/main/scala/org/apache/gluten/execution/BasicPhysicalOperatorTransformer.scala
+++
b/gluten-substrait/src/main/scala/org/apache/gluten/execution/BasicPhysicalOperatorTransformer.scala
@@ -183,10 +183,12 @@ abstract class ProjectExecTransformerBase(val list:
Seq[NamedExpression], val in
val substraitContext = new SubstraitContext
// Firstly, need to check if the Substrait plan for this operator can be
successfully generated.
val operatorId = substraitContext.nextOperatorId(this.nodeName)
- val relNode =
- getRelNode(substraitContext, list, child.output, operatorId, null,
validation = true)
- // Then, validate the generated plan in native engine.
- doNativeValidation(substraitContext, relNode)
+ failValidationWithException {
+ val relNode =
+ getRelNode(substraitContext, list, child.output, operatorId, null,
validation = true)
+ // Then, validate the generated plan in native engine.
+ doNativeValidation(substraitContext, relNode)
+ }()
}
override def isNullIntolerant(expr: Expression): Boolean = expr match {
diff --git
a/gluten-substrait/src/main/scala/org/apache/gluten/execution/WholeStageTransformer.scala
b/gluten-substrait/src/main/scala/org/apache/gluten/execution/WholeStageTransformer.scala
index 72755e5920..fa99a418e9 100644
---
a/gluten-substrait/src/main/scala/org/apache/gluten/execution/WholeStageTransformer.scala
+++
b/gluten-substrait/src/main/scala/org/apache/gluten/execution/WholeStageTransformer.scala
@@ -66,6 +66,32 @@ trait ValidatablePlan extends GlutenPlan with LogLevelUtil {
protected lazy val enableNativeValidation = glutenConf.enableNativeValidation
+ protected lazy val validationFailFast = glutenConf.validationFailFast
+
+ // Wraps a validation function f that can also throw a
GlutenNotSupportException.
+ // Returns ValidationResult.failed if f throws a GlutenNotSupportException,
+ // otherwise returns the result of f.
+ protected def failValidationWithException(f: => ValidationResult)(
+ finallyBlock: => Unit = ()): ValidationResult = {
+ try {
+ f
+ } catch {
+ case e @ (_: GlutenNotSupportException | _:
UnsupportedOperationException) =>
+ if (!e.isInstanceOf[GlutenNotSupportException]) {
+ logDebug(s"Just a warning. This exception perhaps needs to be
fixed.", e)
+ }
+ val message = s"Validation failed with exception from: $nodeName,
reason: ${e.getMessage}"
+ if (glutenConf.printStackOnValidationFailure) {
+ logOnLevel(glutenConf.validationLogLevel, message, e)
+ }
+ ValidationResult.failed(message)
+ case t: Throwable =>
+ throw t
+ } finally {
+ finallyBlock
+ }
+ }
+
/**
* Validate whether this SparkPlan supports to be transformed into substrait
node in Native Code.
*/
@@ -79,29 +105,21 @@ trait ValidatablePlan extends GlutenPlan with LogLevelUtil
{
.getOrElse(ValidationResult.succeeded)
if (!schemaValidationResult.ok()) {
TestStats.addFallBackClassName(this.getClass.toString)
- return schemaValidationResult
+ if (validationFailFast) {
+ return schemaValidationResult
+ }
}
- try {
+ val validationResult = failValidationWithException {
TransformerState.enterValidation
- val res = doValidateInternal()
- if (!res.ok()) {
- TestStats.addFallBackClassName(this.getClass.toString)
- }
- res
- } catch {
- case e @ (_: GlutenNotSupportException | _:
UnsupportedOperationException) =>
- if (!e.isInstanceOf[GlutenNotSupportException]) {
- logDebug(s"Just a warning. This exception perhaps needs to be
fixed.", e)
- }
- // FIXME: Use a validation-specific method to catch validation failures
- TestStats.addFallBackClassName(this.getClass.toString)
- logValidationMessage(
- s"Validation failed with exception for plan: $nodeName, due to:
${e.getMessage}",
- e)
- ValidationResult.failed(e.getMessage)
- } finally {
+ doValidateInternal()
+ } {
TransformerState.finishValidation
}
+ if (!validationResult.ok()) {
+ TestStats.addFallBackClassName(this.getClass.toString)
+ }
+ if (validationFailFast) validationResult
+ else ValidationResult.merge(schemaValidationResult, validationResult)
}
protected def doValidateInternal(): ValidationResult =
ValidationResult.succeeded
@@ -140,7 +158,7 @@ trait TransformSupport extends ValidatablePlan {
// Since https://github.com/apache/incubator-gluten/pull/2185.
protected def doNativeValidation(context: SubstraitContext, node: RelNode):
ValidationResult = {
- if (node != null && glutenConf.enableNativeValidation) {
+ if (node != null && enableNativeValidation) {
val planNode = PlanBuilder.makePlan(context, Lists.newArrayList(node))
BackendsApiManager.getValidatorApiInstance
.doNativeValidateWithFailureReason(planNode)
diff --git
a/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala
b/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala
index e4aeb3ef78..af0db2f642 100644
---
a/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala
+++
b/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala
@@ -736,21 +736,26 @@ object ExpressionConverter extends SQLConfHelper with
Logging {
}
}
- private def getAndCheckSubstraitName(expr: Expression, expressionsMap:
Map[Class[_], String]) = {
+ private def getAndCheckSubstraitName(
+ expr: Expression,
+ expressionsMap: Map[Class[_], String]): String = {
TestStats.addExpressionClassName(expr.getClass.getName)
// Check whether Gluten supports this expression
- val substraitExprNameOpt = expressionsMap.get(expr.getClass)
- if (substraitExprNameOpt.isEmpty) {
- throw new GlutenNotSupportException(
- s"Not supported to map spark function name" +
- s" to substrait function name: $expr, class name:
${expr.getClass.getSimpleName}.")
- }
- val substraitExprName = substraitExprNameOpt.get
- // Check whether each backend supports this expression
- if
(!BackendsApiManager.getValidatorApiInstance.doExprValidate(substraitExprName,
expr)) {
- throw new GlutenNotSupportException(s"Not supported: $expr.")
- }
- substraitExprName
+ expressionsMap
+ .get(expr.getClass)
+ .flatMap {
+ name =>
+ if (!BackendsApiManager.getValidatorApiInstance.doExprValidate(name,
expr)) {
+ None
+ } else {
+ Some(name)
+ }
+ }
+ .getOrElse {
+ throw new GlutenNotSupportException(
+ s"Not supported to map spark function name" +
+ s" to substrait function name: $expr, class name:
${expr.getClass.getSimpleName}.")
+ }
}
private def bindGetStructField(
diff --git
a/gluten-substrait/src/main/scala/org/apache/gluten/extension/ValidationResult.scala
b/gluten-substrait/src/main/scala/org/apache/gluten/extension/ValidationResult.scala
index b49917c59f..28aee70441 100644
---
a/gluten-substrait/src/main/scala/org/apache/gluten/extension/ValidationResult.scala
+++
b/gluten-substrait/src/main/scala/org/apache/gluten/extension/ValidationResult.scala
@@ -49,7 +49,16 @@ object ValidationResult {
}
def succeeded: ValidationResult = Succeeded
- def failed(reason: String): ValidationResult = Failed(reason)
+ def failed(reason: String, prefix: String = "\n - "): ValidationResult =
Failed(prefix + reason)
+ def merge(left: ValidationResult, right: ValidationResult): ValidationResult
=
+ (left.ok(), right.ok()) match {
+ case (_, true) =>
+ left
+ case (true, false) =>
+ right
+ case (false, false) =>
+ failed(left.reason() + right.reason(), prefix = "")
+ }
implicit class EncodeFallbackTagImplicits(result: ValidationResult) {
def tagOnFallback(plan: TreeNode[_]): Unit = {
diff --git
a/gluten-substrait/src/main/scala/org/apache/spark/sql/execution/GlutenFallbackReporter.scala
b/gluten-substrait/src/main/scala/org/apache/spark/sql/execution/GlutenFallbackReporter.scala
index ce69c22810..49bb82a9a8 100644
---
a/gluten-substrait/src/main/scala/org/apache/spark/sql/execution/GlutenFallbackReporter.scala
+++
b/gluten-substrait/src/main/scala/org/apache/spark/sql/execution/GlutenFallbackReporter.scala
@@ -48,7 +48,7 @@ case class GlutenFallbackReporter(glutenConf: GlutenConfig,
spark: SparkSession)
val executionIdInfo =
Option(spark.sparkContext.getLocalProperty(SQLExecution.EXECUTION_ID_KEY))
.map(id => s"[QueryId=$id]")
.getOrElse("")
- logOnLevel(logLevel, s"Validation failed for plan:
$nodeName$executionIdInfo, due to: $reason.")
+ logOnLevel(logLevel, s"Validation failed for plan:
$nodeName$executionIdInfo, due to: $reason")
}
private def printFallbackReason(plan: SparkPlan): Unit = {
diff --git
a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/gluten/GlutenFallbackSuite.scala
b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/gluten/GlutenFallbackSuite.scala
index 15fb9aced2..8dd964dd6e 100644
---
a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/gluten/GlutenFallbackSuite.scala
+++
b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/gluten/GlutenFallbackSuite.scala
@@ -51,7 +51,7 @@ class GlutenFallbackSuite extends GlutenSQLTestsTrait with
AdaptiveSparkPlanHelp
}
}
val msgRegex = """Validation failed for plan: Scan parquet
default\.t\[QueryId=[0-9]+\],""" +
- """ due to: \[FallbackByUserOptions\] Validation failed on node Scan
parquet default\.t\."""
+ """ due to: \[FallbackByUserOptions\] Validation failed on node Scan
parquet default\.t"""
assert(testAppender.loggingEvents.exists(_.getMessage.getFormattedMessage.matches(msgRegex)))
}
}
diff --git
a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/gluten/GlutenFallbackSuite.scala
b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/gluten/GlutenFallbackSuite.scala
index 3fe46bcdec..9070f15d42 100644
---
a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/gluten/GlutenFallbackSuite.scala
+++
b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/gluten/GlutenFallbackSuite.scala
@@ -53,7 +53,7 @@ class GlutenFallbackSuite extends GlutenSQLTestsTrait with
AdaptiveSparkPlanHelp
val msgRegex =
"""Validation failed for plan: Scan parquet
spark_catalog.default\.t\[QueryId=[0-9]+\],""" +
""" due to: \[FallbackByUserOptions\] Validation failed on node Scan
parquet""" +
- """ spark_catalog\.default\.t\.""".stripMargin
+ """ spark_catalog\.default\.t""".stripMargin
assert(testAppender.loggingEvents.exists(_.getMessage.getFormattedMessage.matches(msgRegex)))
}
}
diff --git
a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/gluten/GlutenFallbackSuite.scala
b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/gluten/GlutenFallbackSuite.scala
index 3fe46bcdec..9070f15d42 100644
---
a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/gluten/GlutenFallbackSuite.scala
+++
b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/gluten/GlutenFallbackSuite.scala
@@ -53,7 +53,7 @@ class GlutenFallbackSuite extends GlutenSQLTestsTrait with
AdaptiveSparkPlanHelp
val msgRegex =
"""Validation failed for plan: Scan parquet
spark_catalog.default\.t\[QueryId=[0-9]+\],""" +
""" due to: \[FallbackByUserOptions\] Validation failed on node Scan
parquet""" +
- """ spark_catalog\.default\.t\.""".stripMargin
+ """ spark_catalog\.default\.t""".stripMargin
assert(testAppender.loggingEvents.exists(_.getMessage.getFormattedMessage.matches(msgRegex)))
}
}
diff --git
a/shims/common/src/main/scala/org/apache/gluten/config/GlutenConfig.scala
b/shims/common/src/main/scala/org/apache/gluten/config/GlutenConfig.scala
index 31d397d7ba..e82acda340 100644
--- a/shims/common/src/main/scala/org/apache/gluten/config/GlutenConfig.scala
+++ b/shims/common/src/main/scala/org/apache/gluten/config/GlutenConfig.scala
@@ -303,6 +303,8 @@ class GlutenConfig(conf: SQLConf) extends Logging {
def printStackOnValidationFailure: Boolean =
getConf(VALIDATION_PRINT_FAILURE_STACK_)
+ def validationFailFast: Boolean = getConf(VALIDATION_FAIL_FAST)
+
def enableFallbackReport: Boolean = getConf(FALLBACK_REPORTER_ENABLED)
def debug: Boolean = getConf(DEBUG_ENABLED)
@@ -1310,6 +1312,12 @@ object GlutenConfig {
.booleanConf
.createWithDefault(false)
+ val VALIDATION_FAIL_FAST =
+ buildConf("spark.gluten.sql.validation.failFast")
+ .internal()
+ .booleanConf
+ .createWithDefault(true)
+
val SOFT_AFFINITY_LOG_LEVEL =
buildConf("spark.gluten.soft-affinity.logLevel")
.internal()
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]