This is an automated email from the ASF dual-hosted git repository.
felixybw pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 017fa4bd52 [Tools] Improve error handling and data sanitization in
UnsupportedOperators report (#11601)
017fa4bd52 is described below
commit 017fa4bd52d58a5d9a756bac83cedfe34147a3c0
Author: litao <[email protected]>
AuthorDate: Thu Feb 12 11:25:32 2026 +0800
[Tools] Improve error handling and data sanitization in
UnsupportedOperators report (#11601)
Co-authored-by: tom03.li <[email protected]>
---
.../org/apache/gluten/qt/writer/ImpactReport.scala | 3 ++-
.../qt/writer/OperatorImpactReportWriter.scala | 29 ++++++++++++++++++----
2 files changed, 26 insertions(+), 6 deletions(-)
diff --git
a/tools/qualification-tool/src/main/scala/org/apache/gluten/qt/writer/ImpactReport.scala
b/tools/qualification-tool/src/main/scala/org/apache/gluten/qt/writer/ImpactReport.scala
index 41c1b0977e..b85a6e0da8 100644
---
a/tools/qualification-tool/src/main/scala/org/apache/gluten/qt/writer/ImpactReport.scala
+++
b/tools/qualification-tool/src/main/scala/org/apache/gluten/qt/writer/ImpactReport.scala
@@ -21,7 +21,8 @@ import
org.apache.gluten.qt.support.ResultVisitor.UnsupportedImpact
case class ImpactReport(operatorName: String, impact: UnsupportedImpact)
extends Report {
private val cumulativeCpuMs = impact.getCumulativeCpuDuration.toMillis
private val count = impact.getCount
+ private val sanitizedOperatorName = operatorName.replaceAll("[\r\n\t]+", "
").trim
override def toTSVLine: String =
- Seq(s"$operatorName", s"$cumulativeCpuMs", s"$count").mkString("\t")
+ Seq(s"$sanitizedOperatorName", s"$cumulativeCpuMs",
s"$count").mkString("\t")
}
diff --git
a/tools/qualification-tool/src/main/scala/org/apache/gluten/qt/writer/OperatorImpactReportWriter.scala
b/tools/qualification-tool/src/main/scala/org/apache/gluten/qt/writer/OperatorImpactReportWriter.scala
index b5675dbe97..e02366f585 100644
---
a/tools/qualification-tool/src/main/scala/org/apache/gluten/qt/writer/OperatorImpactReportWriter.scala
+++
b/tools/qualification-tool/src/main/scala/org/apache/gluten/qt/writer/OperatorImpactReportWriter.scala
@@ -30,12 +30,31 @@ case class OperatorImpactReportWriter(conf:
QualificationToolConfiguration)
override def doPostProcess(lines: Iterator[String]): Iterator[String] = {
val sumMap = mutable.Map[String, (Long, Long)]()
+ var lineNumber = 0
for (line <- lines) {
- val Array(col1, col2, col3) = line.split("\t").map(_.trim)
- val cpu = col2.toLong
- val count = col3.toLong
- val current = sumMap.getOrElse(col1, (0L, 0L))
- sumMap(col1) = (current._1 + cpu, current._2 + count)
+ lineNumber += 1
+ try {
+ val columns = line.split("\t").map(_.trim)
+ if (columns.length != 3) {
+ System.err.println(
+ s"[WARN] Line $lineNumber: Expected 3 columns, got
${columns.length}. Skipping. Content: $line")
+ } else {
+ val Array(col1, col2, col3) = columns
+ try {
+ val cpu = col2.toLong
+ val count = col3.toLong
+ val current = sumMap.getOrElse(col1, (0L, 0L))
+ sumMap(col1) = (current._1 + cpu, current._2 + count)
+ } catch {
+ case e: NumberFormatException =>
+ System.err.println(
+ s"[WARN] Line $lineNumber: Invalid number format (cpu='$col2',
count='$col3'). Skipping. Content: $line")
+ }
+ }
+ } catch {
+ case e: Exception =>
+ System.err.println(s"[ERROR] Line $lineNumber: ${e.getMessage}.
Skipping. Content: $line")
+ }
}
sumMap.toSeq
.sortBy(_._2)(Ordering.Tuple2(Ordering.Long.reverse,
Ordering.Long.reverse))
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]