This is an automated email from the ASF dual-hosted git repository.

felixybw pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new 017fa4bd52 [Tools] Improve error handling and data sanitization in 
UnsupportedOperators report (#11601)
017fa4bd52 is described below

commit 017fa4bd52d58a5d9a756bac83cedfe34147a3c0
Author: litao <[email protected]>
AuthorDate: Thu Feb 12 11:25:32 2026 +0800

    [Tools] Improve error handling and data sanitization in 
UnsupportedOperators report (#11601)
    
    Co-authored-by: tom03.li <[email protected]>
---
 .../org/apache/gluten/qt/writer/ImpactReport.scala |  3 ++-
 .../qt/writer/OperatorImpactReportWriter.scala     | 29 ++++++++++++++++++----
 2 files changed, 26 insertions(+), 6 deletions(-)

diff --git 
a/tools/qualification-tool/src/main/scala/org/apache/gluten/qt/writer/ImpactReport.scala
 
b/tools/qualification-tool/src/main/scala/org/apache/gluten/qt/writer/ImpactReport.scala
index 41c1b0977e..b85a6e0da8 100644
--- 
a/tools/qualification-tool/src/main/scala/org/apache/gluten/qt/writer/ImpactReport.scala
+++ 
b/tools/qualification-tool/src/main/scala/org/apache/gluten/qt/writer/ImpactReport.scala
@@ -21,7 +21,8 @@ import 
org.apache.gluten.qt.support.ResultVisitor.UnsupportedImpact
 case class ImpactReport(operatorName: String, impact: UnsupportedImpact) 
extends Report {
   private val cumulativeCpuMs = impact.getCumulativeCpuDuration.toMillis
   private val count = impact.getCount
+  private val sanitizedOperatorName = operatorName.replaceAll("[\r\n\t]+", " 
").trim
 
   override def toTSVLine: String =
-    Seq(s"$operatorName", s"$cumulativeCpuMs", s"$count").mkString("\t")
+    Seq(s"$sanitizedOperatorName", s"$cumulativeCpuMs", 
s"$count").mkString("\t")
 }
diff --git 
a/tools/qualification-tool/src/main/scala/org/apache/gluten/qt/writer/OperatorImpactReportWriter.scala
 
b/tools/qualification-tool/src/main/scala/org/apache/gluten/qt/writer/OperatorImpactReportWriter.scala
index b5675dbe97..e02366f585 100644
--- 
a/tools/qualification-tool/src/main/scala/org/apache/gluten/qt/writer/OperatorImpactReportWriter.scala
+++ 
b/tools/qualification-tool/src/main/scala/org/apache/gluten/qt/writer/OperatorImpactReportWriter.scala
@@ -30,12 +30,31 @@ case class OperatorImpactReportWriter(conf: 
QualificationToolConfiguration)
 
   override def doPostProcess(lines: Iterator[String]): Iterator[String] = {
     val sumMap = mutable.Map[String, (Long, Long)]()
+    var lineNumber = 0
     for (line <- lines) {
-      val Array(col1, col2, col3) = line.split("\t").map(_.trim)
-      val cpu = col2.toLong
-      val count = col3.toLong
-      val current = sumMap.getOrElse(col1, (0L, 0L))
-      sumMap(col1) = (current._1 + cpu, current._2 + count)
+      lineNumber += 1
+      try {
+        val columns = line.split("\t").map(_.trim)
+        if (columns.length != 3) {
+          System.err.println(
+            s"[WARN] Line $lineNumber: Expected 3 columns, got 
${columns.length}. Skipping. Content: $line")
+        } else {
+          val Array(col1, col2, col3) = columns
+          try {
+            val cpu = col2.toLong
+            val count = col3.toLong
+            val current = sumMap.getOrElse(col1, (0L, 0L))
+            sumMap(col1) = (current._1 + cpu, current._2 + count)
+          } catch {
+            case e: NumberFormatException =>
+              System.err.println(
+                s"[WARN] Line $lineNumber: Invalid number format (cpu='$col2', 
count='$col3'). Skipping. Content: $line")
+          }
+        }
+      } catch {
+        case e: Exception =>
+          System.err.println(s"[ERROR] Line $lineNumber: ${e.getMessage}. 
Skipping. Content: $line")
+      }
     }
     sumMap.toSeq
       .sortBy(_._2)(Ordering.Tuple2(Ordering.Long.reverse, 
Ordering.Long.reverse))


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to