This is an automated email from the ASF dual-hosted git repository.

mingliang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new e3682fdafb [GLUTEN-8010][CORE] Don't generate native metrics if 
transformer don't generate relNode (#8011)
e3682fdafb is described below

commit e3682fdafb505af87ad1c1605908c5ec30463a9e
Author: Mingliang Zhu <[email protected]>
AuthorDate: Mon Nov 25 16:22:40 2024 +0800

    [GLUTEN-8010][CORE] Don't generate native metrics if transformer don't 
generate relNode (#8011)
---
 .../scala/org/apache/gluten/metrics/MetricsUtil.scala     |  3 +++
 .../execution/BasicPhysicalOperatorTransformer.scala      | 15 ++++++++++-----
 .../apache/gluten/execution/ExpandExecTransformer.scala   | 12 ++++++++----
 .../org/apache/gluten/execution/SortExecTransformer.scala | 12 ++++++++----
 .../apache/gluten/execution/WholeStageTransformer.scala   |  3 +++
 .../apache/gluten/execution/WindowExecTransformer.scala   | 12 ++++++++----
 .../org/apache/gluten/substrait/SubstraitContext.scala    | 13 -------------
 7 files changed, 40 insertions(+), 30 deletions(-)

diff --git 
a/backends-clickhouse/src/main/scala/org/apache/gluten/metrics/MetricsUtil.scala
 
b/backends-clickhouse/src/main/scala/org/apache/gluten/metrics/MetricsUtil.scala
index e1e0f7c11a..7d81467e97 100644
--- 
a/backends-clickhouse/src/main/scala/org/apache/gluten/metrics/MetricsUtil.scala
+++ 
b/backends-clickhouse/src/main/scala/org/apache/gluten/metrics/MetricsUtil.scala
@@ -38,6 +38,9 @@ object MetricsUtil extends Logging {
           j.metricsUpdater(),
           // must put the buildPlan first
           Seq(treeifyMetricsUpdaters(j.buildPlan), 
treeifyMetricsUpdaters(j.streamedPlan)))
+      case t: TransformSupport if t.metricsUpdater() == MetricsUpdater.None =>
+        assert(t.children.size == 1, "MetricsUpdater.None can only be used on 
unary operator")
+        treeifyMetricsUpdaters(t.children.head)
       case t: TransformSupport =>
         MetricsUpdaterTree(t.metricsUpdater(), 
t.children.map(treeifyMetricsUpdaters))
       case _ =>
diff --git 
a/gluten-substrait/src/main/scala/org/apache/gluten/execution/BasicPhysicalOperatorTransformer.scala
 
b/gluten-substrait/src/main/scala/org/apache/gluten/execution/BasicPhysicalOperatorTransformer.scala
index dbe667ebb2..2830ef404c 100644
--- 
a/gluten-substrait/src/main/scala/org/apache/gluten/execution/BasicPhysicalOperatorTransformer.scala
+++ 
b/gluten-substrait/src/main/scala/org/apache/gluten/execution/BasicPhysicalOperatorTransformer.scala
@@ -61,8 +61,13 @@ abstract class FilterExecTransformerBase(val cond: 
Expression, val input: SparkP
     case _ => false
   }
 
-  override def metricsUpdater(): MetricsUpdater =
+  override def isNoop: Boolean = getRemainingCondition == null
+
+  override def metricsUpdater(): MetricsUpdater = if (isNoop) {
+    MetricsUpdater.None
+  } else {
     
BackendsApiManager.getMetricsApiInstance.genFilterTransformerMetricsUpdater(metrics)
+  }
 
   def getRelNode(
       context: SubstraitContext,
@@ -149,15 +154,15 @@ abstract class FilterExecTransformerBase(val cond: 
Expression, val input: SparkP
 
   override protected def doTransform(context: SubstraitContext): 
TransformContext = {
     val childCtx = child.asInstanceOf[TransformSupport].transform(context)
-    val remainingCondition = getRemainingCondition
-    val operatorId = context.nextOperatorId(this.nodeName)
-    if (remainingCondition == null) {
+    if (isNoop) {
       // The computing for this filter is not needed.
-      context.registerEmptyRelToOperator(operatorId)
       // Since some columns' nullability will be removed after this filter, we 
need to update the
       // outputAttributes of child context.
       return TransformContext(output, childCtx.root)
     }
+
+    val operatorId = context.nextOperatorId(this.nodeName)
+    val remainingCondition = getRemainingCondition
     val currRel = getRelNode(
       context,
       remainingCondition,
diff --git 
a/gluten-substrait/src/main/scala/org/apache/gluten/execution/ExpandExecTransformer.scala
 
b/gluten-substrait/src/main/scala/org/apache/gluten/execution/ExpandExecTransformer.scala
index b600175b28..c6936daaff 100644
--- 
a/gluten-substrait/src/main/scala/org/apache/gluten/execution/ExpandExecTransformer.scala
+++ 
b/gluten-substrait/src/main/scala/org/apache/gluten/execution/ExpandExecTransformer.scala
@@ -48,8 +48,13 @@ case class ExpandExecTransformer(
     AttributeSet.fromAttributeSets(projections.flatten.map(_.references))
   }
 
-  override def metricsUpdater(): MetricsUpdater =
+  override def isNoop: Boolean = projections == null || projections.isEmpty
+
+  override def metricsUpdater(): MetricsUpdater = if (isNoop) {
+    MetricsUpdater.None
+  } else {
     
BackendsApiManager.getMetricsApiInstance.genExpandTransformerMetricsUpdater(metrics)
+  }
 
   // The GroupExpressions can output data with arbitrary partitioning, so set 
it
   // as UNKNOWN partitioning
@@ -112,13 +117,12 @@ case class ExpandExecTransformer(
 
   override protected def doTransform(context: SubstraitContext): 
TransformContext = {
     val childCtx = child.asInstanceOf[TransformSupport].transform(context)
-    val operatorId = context.nextOperatorId(this.nodeName)
-    if (projections == null || projections.isEmpty) {
+    if (isNoop) {
       // The computing for this Expand is not needed.
-      context.registerEmptyRelToOperator(operatorId)
       return childCtx
     }
 
+    val operatorId = context.nextOperatorId(this.nodeName)
     val currRel =
       getRelNode(context, projections, child.output, operatorId, 
childCtx.root, validation = false)
     assert(currRel != null, "Expand Rel should be valid")
diff --git 
a/gluten-substrait/src/main/scala/org/apache/gluten/execution/SortExecTransformer.scala
 
b/gluten-substrait/src/main/scala/org/apache/gluten/execution/SortExecTransformer.scala
index c62a30b846..6f9564e6d5 100644
--- 
a/gluten-substrait/src/main/scala/org/apache/gluten/execution/SortExecTransformer.scala
+++ 
b/gluten-substrait/src/main/scala/org/apache/gluten/execution/SortExecTransformer.scala
@@ -44,8 +44,13 @@ case class SortExecTransformer(
   @transient override lazy val metrics =
     
BackendsApiManager.getMetricsApiInstance.genSortTransformerMetrics(sparkContext)
 
-  override def metricsUpdater(): MetricsUpdater =
+  override def isNoop: Boolean = sortOrder == null || sortOrder.isEmpty
+
+  override def metricsUpdater(): MetricsUpdater = if (isNoop) {
+    MetricsUpdater.None
+  } else {
     
BackendsApiManager.getMetricsApiInstance.genSortTransformerMetricsUpdater(metrics)
+  }
 
   override def output: Seq[Attribute] = child.output
 
@@ -103,13 +108,12 @@ case class SortExecTransformer(
 
   override protected def doTransform(context: SubstraitContext): 
TransformContext = {
     val childCtx = child.asInstanceOf[TransformSupport].transform(context)
-    val operatorId = context.nextOperatorId(this.nodeName)
-    if (sortOrder == null || sortOrder.isEmpty) {
+    if (isNoop) {
       // The computing for this project is not needed.
-      context.registerEmptyRelToOperator(operatorId)
       return childCtx
     }
 
+    val operatorId = context.nextOperatorId(this.nodeName)
     val currRel =
       getRelNode(context, sortOrder, child.output, operatorId, childCtx.root, 
validation = false)
     assert(currRel != null, "Sort Rel should be valid")
diff --git 
a/gluten-substrait/src/main/scala/org/apache/gluten/execution/WholeStageTransformer.scala
 
b/gluten-substrait/src/main/scala/org/apache/gluten/execution/WholeStageTransformer.scala
index e8a42883a5..6414b67a80 100644
--- 
a/gluten-substrait/src/main/scala/org/apache/gluten/execution/WholeStageTransformer.scala
+++ 
b/gluten-substrait/src/main/scala/org/apache/gluten/execution/WholeStageTransformer.scala
@@ -98,6 +98,9 @@ trait TransformSupport extends GlutenPlan {
         Seq(plan.executeColumnar())
     }
   }
+
+  // When true, it will not generate relNode, nor will it generate native 
metrics.
+  def isNoop: Boolean = false
 }
 
 trait LeafTransformSupport extends TransformSupport with LeafExecNode {
diff --git 
a/gluten-substrait/src/main/scala/org/apache/gluten/execution/WindowExecTransformer.scala
 
b/gluten-substrait/src/main/scala/org/apache/gluten/execution/WindowExecTransformer.scala
index 7b9e2865f8..28d7809924 100644
--- 
a/gluten-substrait/src/main/scala/org/apache/gluten/execution/WindowExecTransformer.scala
+++ 
b/gluten-substrait/src/main/scala/org/apache/gluten/execution/WindowExecTransformer.scala
@@ -51,8 +51,13 @@ case class WindowExecTransformer(
   @transient override lazy val metrics =
     
BackendsApiManager.getMetricsApiInstance.genWindowTransformerMetrics(sparkContext)
 
-  override def metricsUpdater(): MetricsUpdater =
+  override def isNoop: Boolean = windowExpression == null || 
windowExpression.isEmpty
+
+  override def metricsUpdater(): MetricsUpdater = if (isNoop) {
+    MetricsUpdater.None
+  } else {
     
BackendsApiManager.getMetricsApiInstance.genWindowTransformerMetricsUpdater(metrics)
+  }
 
   override def output: Seq[Attribute] = child.output ++ 
windowExpression.map(_.toAttribute)
 
@@ -177,13 +182,12 @@ case class WindowExecTransformer(
 
   override protected def doTransform(context: SubstraitContext): 
TransformContext = {
     val childCtx = child.asInstanceOf[TransformSupport].transform(context)
-    val operatorId = context.nextOperatorId(this.nodeName)
-    if (windowExpression == null || windowExpression.isEmpty) {
+    if (isNoop) {
       // The computing for this operator is not needed.
-      context.registerEmptyRelToOperator(operatorId)
       return childCtx
     }
 
+    val operatorId = context.nextOperatorId(this.nodeName)
     val currRel =
       getWindowRel(context, child.output, operatorId, childCtx.root, 
validation = false)
     assert(currRel != null, "Window Rel should be valid")
diff --git 
a/gluten-substrait/src/main/scala/org/apache/gluten/substrait/SubstraitContext.scala
 
b/gluten-substrait/src/main/scala/org/apache/gluten/substrait/SubstraitContext.scala
index 79148d9f30..1ceb2d4155 100644
--- 
a/gluten-substrait/src/main/scala/org/apache/gluten/substrait/SubstraitContext.scala
+++ 
b/gluten-substrait/src/main/scala/org/apache/gluten/substrait/SubstraitContext.scala
@@ -112,19 +112,6 @@ class SubstraitContext extends Serializable {
     id
   }
 
-  /**
-   * Register empty rel list to certain operator id. Used when the computing 
of a Spark transformer
-   * is omitted.
-   * @param operatorId
-   *   operator id
-   */
-  def registerEmptyRelToOperator(operatorId: JLong): Unit = {
-    if (!operatorToRelsMap.containsKey(operatorId)) {
-      val rels = new JArrayList[JLong]()
-      operatorToRelsMap.put(operatorId, rels)
-    }
-  }
-
   /**
    * Return the registered map.
    * @return


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to