This is an automated email from the ASF dual-hosted git repository.

marong pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new dfbf912e64 [GLUTEN-8794] Support logging as many fallback reasons as 
possible (#8798)
dfbf912e64 is described below

commit dfbf912e64cb23262793b9a13929f09329b1acc2
Author: Rong Ma <[email protected]>
AuthorDate: Tue Feb 25 19:09:26 2025 +0000

    [GLUTEN-8794] Support logging as many fallback reasons as possible (#8798)
---
 .../backendsapi/velox/VeloxValidatorApi.scala      |  4 +-
 .../gluten/backendsapi/SparkPlanExecApi.scala      |  2 +-
 .../BasicPhysicalOperatorTransformer.scala         | 10 ++--
 .../gluten/execution/WholeStageTransformer.scala   | 58 ++++++++++++++--------
 .../gluten/expression/ExpressionConverter.scala    | 31 +++++++-----
 .../apache/gluten/extension/ValidationResult.scala | 11 +++-
 .../sql/execution/GlutenFallbackReporter.scala     |  2 +-
 .../spark/sql/gluten/GlutenFallbackSuite.scala     |  2 +-
 .../spark/sql/gluten/GlutenFallbackSuite.scala     |  2 +-
 .../spark/sql/gluten/GlutenFallbackSuite.scala     |  2 +-
 .../org/apache/gluten/config/GlutenConfig.scala    |  8 +++
 11 files changed, 87 insertions(+), 45 deletions(-)

diff --git 
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxValidatorApi.scala
 
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxValidatorApi.scala
index 9b30013661..8b2f76eb04 100644
--- 
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxValidatorApi.scala
+++ 
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxValidatorApi.scala
@@ -49,8 +49,8 @@ class VeloxValidatorApi extends ValidatorApi {
     }
     ValidationResult.failed(
       String.format(
-        "Native validation failed: %n%s",
-        info.fallbackInfo.asScala.reduce[String] { case (l, r) => l + "\n" + r 
}))
+        "Native validation failed: %n   |- %s",
+        info.fallbackInfo.asScala.reduce[String] { case (l, r) => l + "\n   |- 
" + r }))
   }
 
   private def isPrimitiveType(dataType: DataType): Boolean = {
diff --git 
a/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala
 
b/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala
index 54c63a563e..02803bbf2b 100644
--- 
a/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala
+++ 
b/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala
@@ -199,7 +199,7 @@ trait SparkPlanExecApi {
       substraitExprName: String,
       child: ExpressionTransformer,
       original: TryEval): ExpressionTransformer = {
-    throw new GlutenNotSupportException("try_eval is not supported")
+    throw new 
GlutenNotSupportException(s"try_eval(${original.child.prettyName}) is not 
supported")
   }
 
   def genArithmeticTransformer(
diff --git 
a/gluten-substrait/src/main/scala/org/apache/gluten/execution/BasicPhysicalOperatorTransformer.scala
 
b/gluten-substrait/src/main/scala/org/apache/gluten/execution/BasicPhysicalOperatorTransformer.scala
index fe4898dbaa..d7f729f4d2 100644
--- 
a/gluten-substrait/src/main/scala/org/apache/gluten/execution/BasicPhysicalOperatorTransformer.scala
+++ 
b/gluten-substrait/src/main/scala/org/apache/gluten/execution/BasicPhysicalOperatorTransformer.scala
@@ -183,10 +183,12 @@ abstract class ProjectExecTransformerBase(val list: 
Seq[NamedExpression], val in
     val substraitContext = new SubstraitContext
     // Firstly, need to check if the Substrait plan for this operator can be 
successfully generated.
     val operatorId = substraitContext.nextOperatorId(this.nodeName)
-    val relNode =
-      getRelNode(substraitContext, list, child.output, operatorId, null, 
validation = true)
-    // Then, validate the generated plan in native engine.
-    doNativeValidation(substraitContext, relNode)
+    failValidationWithException {
+      val relNode =
+        getRelNode(substraitContext, list, child.output, operatorId, null, 
validation = true)
+      // Then, validate the generated plan in native engine.
+      doNativeValidation(substraitContext, relNode)
+    }()
   }
 
   override def isNullIntolerant(expr: Expression): Boolean = expr match {
diff --git 
a/gluten-substrait/src/main/scala/org/apache/gluten/execution/WholeStageTransformer.scala
 
b/gluten-substrait/src/main/scala/org/apache/gluten/execution/WholeStageTransformer.scala
index 72755e5920..fa99a418e9 100644
--- 
a/gluten-substrait/src/main/scala/org/apache/gluten/execution/WholeStageTransformer.scala
+++ 
b/gluten-substrait/src/main/scala/org/apache/gluten/execution/WholeStageTransformer.scala
@@ -66,6 +66,32 @@ trait ValidatablePlan extends GlutenPlan with LogLevelUtil {
 
   protected lazy val enableNativeValidation = glutenConf.enableNativeValidation
 
+  protected lazy val validationFailFast = glutenConf.validationFailFast
+
+  // Wraps a validation function f that can also throw a 
GlutenNotSupportException.
+  // Returns ValidationResult.failed if f throws a GlutenNotSupportException,
+  // otherwise returns the result of f.
+  protected def failValidationWithException(f: => ValidationResult)(
+      finallyBlock: => Unit = ()): ValidationResult = {
+    try {
+      f
+    } catch {
+      case e @ (_: GlutenNotSupportException | _: 
UnsupportedOperationException) =>
+        if (!e.isInstanceOf[GlutenNotSupportException]) {
+          logDebug(s"Just a warning. This exception perhaps needs to be 
fixed.", e)
+        }
+        val message = s"Validation failed with exception from: $nodeName, 
reason: ${e.getMessage}"
+        if (glutenConf.printStackOnValidationFailure) {
+          logOnLevel(glutenConf.validationLogLevel, message, e)
+        }
+        ValidationResult.failed(message)
+      case t: Throwable =>
+        throw t
+    } finally {
+      finallyBlock
+    }
+  }
+
   /**
    * Validate whether this SparkPlan supports to be transformed into substrait 
node in Native Code.
    */
@@ -79,29 +105,21 @@ trait ValidatablePlan extends GlutenPlan with LogLevelUtil 
{
       .getOrElse(ValidationResult.succeeded)
     if (!schemaValidationResult.ok()) {
       TestStats.addFallBackClassName(this.getClass.toString)
-      return schemaValidationResult
+      if (validationFailFast) {
+        return schemaValidationResult
+      }
     }
-    try {
+    val validationResult = failValidationWithException {
       TransformerState.enterValidation
-      val res = doValidateInternal()
-      if (!res.ok()) {
-        TestStats.addFallBackClassName(this.getClass.toString)
-      }
-      res
-    } catch {
-      case e @ (_: GlutenNotSupportException | _: 
UnsupportedOperationException) =>
-        if (!e.isInstanceOf[GlutenNotSupportException]) {
-          logDebug(s"Just a warning. This exception perhaps needs to be 
fixed.", e)
-        }
-        // FIXME: Use a validation-specific method to catch validation failures
-        TestStats.addFallBackClassName(this.getClass.toString)
-        logValidationMessage(
-          s"Validation failed with exception for plan: $nodeName, due to: 
${e.getMessage}",
-          e)
-        ValidationResult.failed(e.getMessage)
-    } finally {
+      doValidateInternal()
+    } {
       TransformerState.finishValidation
     }
+    if (!validationResult.ok()) {
+      TestStats.addFallBackClassName(this.getClass.toString)
+    }
+    if (validationFailFast) validationResult
+    else ValidationResult.merge(schemaValidationResult, validationResult)
   }
 
   protected def doValidateInternal(): ValidationResult = 
ValidationResult.succeeded
@@ -140,7 +158,7 @@ trait TransformSupport extends ValidatablePlan {
 
   // Since https://github.com/apache/incubator-gluten/pull/2185.
   protected def doNativeValidation(context: SubstraitContext, node: RelNode): 
ValidationResult = {
-    if (node != null && glutenConf.enableNativeValidation) {
+    if (node != null && enableNativeValidation) {
       val planNode = PlanBuilder.makePlan(context, Lists.newArrayList(node))
       BackendsApiManager.getValidatorApiInstance
         .doNativeValidateWithFailureReason(planNode)
diff --git 
a/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala
 
b/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala
index e4aeb3ef78..af0db2f642 100644
--- 
a/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala
+++ 
b/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala
@@ -736,21 +736,26 @@ object ExpressionConverter extends SQLConfHelper with 
Logging {
     }
   }
 
-  private def getAndCheckSubstraitName(expr: Expression, expressionsMap: 
Map[Class[_], String]) = {
+  private def getAndCheckSubstraitName(
+      expr: Expression,
+      expressionsMap: Map[Class[_], String]): String = {
     TestStats.addExpressionClassName(expr.getClass.getName)
     // Check whether Gluten supports this expression
-    val substraitExprNameOpt = expressionsMap.get(expr.getClass)
-    if (substraitExprNameOpt.isEmpty) {
-      throw new GlutenNotSupportException(
-        s"Not supported to map spark function name" +
-          s" to substrait function name: $expr, class name: 
${expr.getClass.getSimpleName}.")
-    }
-    val substraitExprName = substraitExprNameOpt.get
-    // Check whether each backend supports this expression
-    if 
(!BackendsApiManager.getValidatorApiInstance.doExprValidate(substraitExprName, 
expr)) {
-      throw new GlutenNotSupportException(s"Not supported: $expr.")
-    }
-    substraitExprName
+    expressionsMap
+      .get(expr.getClass)
+      .flatMap {
+        name =>
+          if (!BackendsApiManager.getValidatorApiInstance.doExprValidate(name, 
expr)) {
+            None
+          } else {
+            Some(name)
+          }
+      }
+      .getOrElse {
+        throw new GlutenNotSupportException(
+          s"Not supported to map spark function name" +
+            s" to substrait function name: $expr, class name: 
${expr.getClass.getSimpleName}.")
+      }
   }
 
   private def bindGetStructField(
diff --git 
a/gluten-substrait/src/main/scala/org/apache/gluten/extension/ValidationResult.scala
 
b/gluten-substrait/src/main/scala/org/apache/gluten/extension/ValidationResult.scala
index b49917c59f..28aee70441 100644
--- 
a/gluten-substrait/src/main/scala/org/apache/gluten/extension/ValidationResult.scala
+++ 
b/gluten-substrait/src/main/scala/org/apache/gluten/extension/ValidationResult.scala
@@ -49,7 +49,16 @@ object ValidationResult {
   }
 
   def succeeded: ValidationResult = Succeeded
-  def failed(reason: String): ValidationResult = Failed(reason)
+  def failed(reason: String, prefix: String = "\n - "): ValidationResult = 
Failed(prefix + reason)
+  def merge(left: ValidationResult, right: ValidationResult): ValidationResult 
=
+    (left.ok(), right.ok()) match {
+      case (_, true) =>
+        left
+      case (true, false) =>
+        right
+      case (false, false) =>
+        failed(left.reason() + right.reason(), prefix = "")
+    }
 
   implicit class EncodeFallbackTagImplicits(result: ValidationResult) {
     def tagOnFallback(plan: TreeNode[_]): Unit = {
diff --git 
a/gluten-substrait/src/main/scala/org/apache/spark/sql/execution/GlutenFallbackReporter.scala
 
b/gluten-substrait/src/main/scala/org/apache/spark/sql/execution/GlutenFallbackReporter.scala
index ce69c22810..49bb82a9a8 100644
--- 
a/gluten-substrait/src/main/scala/org/apache/spark/sql/execution/GlutenFallbackReporter.scala
+++ 
b/gluten-substrait/src/main/scala/org/apache/spark/sql/execution/GlutenFallbackReporter.scala
@@ -48,7 +48,7 @@ case class GlutenFallbackReporter(glutenConf: GlutenConfig, 
spark: SparkSession)
     val executionIdInfo = 
Option(spark.sparkContext.getLocalProperty(SQLExecution.EXECUTION_ID_KEY))
       .map(id => s"[QueryId=$id]")
       .getOrElse("")
-    logOnLevel(logLevel, s"Validation failed for plan: 
$nodeName$executionIdInfo, due to: $reason.")
+    logOnLevel(logLevel, s"Validation failed for plan: 
$nodeName$executionIdInfo, due to: $reason")
   }
 
   private def printFallbackReason(plan: SparkPlan): Unit = {
diff --git 
a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/gluten/GlutenFallbackSuite.scala
 
b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/gluten/GlutenFallbackSuite.scala
index 15fb9aced2..8dd964dd6e 100644
--- 
a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/gluten/GlutenFallbackSuite.scala
+++ 
b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/gluten/GlutenFallbackSuite.scala
@@ -51,7 +51,7 @@ class GlutenFallbackSuite extends GlutenSQLTestsTrait with 
AdaptiveSparkPlanHelp
         }
       }
       val msgRegex = """Validation failed for plan: Scan parquet 
default\.t\[QueryId=[0-9]+\],""" +
-        """ due to: \[FallbackByUserOptions\] Validation failed on node Scan 
parquet default\.t\."""
+        """ due to: \[FallbackByUserOptions\] Validation failed on node Scan 
parquet default\.t"""
       
assert(testAppender.loggingEvents.exists(_.getMessage.getFormattedMessage.matches(msgRegex)))
     }
   }
diff --git 
a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/gluten/GlutenFallbackSuite.scala
 
b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/gluten/GlutenFallbackSuite.scala
index 3fe46bcdec..9070f15d42 100644
--- 
a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/gluten/GlutenFallbackSuite.scala
+++ 
b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/gluten/GlutenFallbackSuite.scala
@@ -53,7 +53,7 @@ class GlutenFallbackSuite extends GlutenSQLTestsTrait with 
AdaptiveSparkPlanHelp
       val msgRegex =
         """Validation failed for plan: Scan parquet 
spark_catalog.default\.t\[QueryId=[0-9]+\],""" +
           """ due to: \[FallbackByUserOptions\] Validation failed on node Scan 
parquet""" +
-          """ spark_catalog\.default\.t\.""".stripMargin
+          """ spark_catalog\.default\.t""".stripMargin
       
assert(testAppender.loggingEvents.exists(_.getMessage.getFormattedMessage.matches(msgRegex)))
     }
   }
diff --git 
a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/gluten/GlutenFallbackSuite.scala
 
b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/gluten/GlutenFallbackSuite.scala
index 3fe46bcdec..9070f15d42 100644
--- 
a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/gluten/GlutenFallbackSuite.scala
+++ 
b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/gluten/GlutenFallbackSuite.scala
@@ -53,7 +53,7 @@ class GlutenFallbackSuite extends GlutenSQLTestsTrait with 
AdaptiveSparkPlanHelp
       val msgRegex =
         """Validation failed for plan: Scan parquet 
spark_catalog.default\.t\[QueryId=[0-9]+\],""" +
           """ due to: \[FallbackByUserOptions\] Validation failed on node Scan 
parquet""" +
-          """ spark_catalog\.default\.t\.""".stripMargin
+          """ spark_catalog\.default\.t""".stripMargin
       
assert(testAppender.loggingEvents.exists(_.getMessage.getFormattedMessage.matches(msgRegex)))
     }
   }
diff --git 
a/shims/common/src/main/scala/org/apache/gluten/config/GlutenConfig.scala 
b/shims/common/src/main/scala/org/apache/gluten/config/GlutenConfig.scala
index 31d397d7ba..e82acda340 100644
--- a/shims/common/src/main/scala/org/apache/gluten/config/GlutenConfig.scala
+++ b/shims/common/src/main/scala/org/apache/gluten/config/GlutenConfig.scala
@@ -303,6 +303,8 @@ class GlutenConfig(conf: SQLConf) extends Logging {
   def printStackOnValidationFailure: Boolean =
     getConf(VALIDATION_PRINT_FAILURE_STACK_)
 
+  def validationFailFast: Boolean = getConf(VALIDATION_FAIL_FAST)
+
   def enableFallbackReport: Boolean = getConf(FALLBACK_REPORTER_ENABLED)
 
   def debug: Boolean = getConf(DEBUG_ENABLED)
@@ -1310,6 +1312,12 @@ object GlutenConfig {
       .booleanConf
       .createWithDefault(false)
 
+  val VALIDATION_FAIL_FAST =
+    buildConf("spark.gluten.sql.validation.failFast")
+      .internal()
+      .booleanConf
+      .createWithDefault(true)
+
   val SOFT_AFFINITY_LOG_LEVEL =
     buildConf("spark.gluten.soft-affinity.logLevel")
       .internal()


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to