This is an automated email from the ASF dual-hosted git repository.

hongze pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new 28a0a7bbf9 [GLUTEN-10818][VL] Implement KnownSizeEstimation for 
buildSideRelation (#10817)
28a0a7bbf9 is described below

commit 28a0a7bbf9d7289fc73cbc0c91f7125581e1e1fa
Author: Terry Wang <[email protected]>
AuthorDate: Mon Oct 13 15:21:52 2025 +0800

    [GLUTEN-10818][VL] Implement KnownSizeEstimation for buildSideRelation 
(#10817)
---
 .../spark/sql/execution/ColumnarBuildSideRelation.scala     | 11 ++++++++++-
 .../execution/unsafe/UnsafeColumnarBuildSideRelation.scala  | 13 +++++++++++--
 2 files changed, 21 insertions(+), 3 deletions(-)

diff --git 
a/backends-velox/src/main/scala/org/apache/spark/sql/execution/ColumnarBuildSideRelation.scala
 
b/backends-velox/src/main/scala/org/apache/spark/sql/execution/ColumnarBuildSideRelation.scala
index 59a9cb2b00..d542fd92b9 100644
--- 
a/backends-velox/src/main/scala/org/apache/spark/sql/execution/ColumnarBuildSideRelation.scala
+++ 
b/backends-velox/src/main/scala/org/apache/spark/sql/execution/ColumnarBuildSideRelation.scala
@@ -33,6 +33,7 @@ import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.utils.SparkArrowUtil
 import org.apache.spark.sql.vectorized.ColumnarBatch
 import org.apache.spark.task.TaskResources
+import org.apache.spark.util.KnownSizeEstimation
 
 import org.apache.arrow.c.ArrowSchema
 
@@ -61,7 +62,8 @@ case class ColumnarBuildSideRelation(
     output: Seq[Attribute],
     batches: Array[Array[Byte]],
     safeBroadcastMode: SafeBroadcastMode)
-  extends BuildSideRelation {
+  extends BuildSideRelation
+  with KnownSizeEstimation {
 
   // Rebuild the real BroadcastMode on demand; never serialize it.
   @transient override lazy val mode: BroadcastMode =
@@ -236,4 +238,11 @@ case class ColumnarBuildSideRelation(
     }
     iterator.toArray
   }
+  override def estimatedSize: Long = {
+    if (batches != null) {
+      batches.map(_.length.toLong).sum
+    } else {
+      0L
+    }
+  }
 }
diff --git 
a/backends-velox/src/main/scala/org/apache/spark/sql/execution/unsafe/UnsafeColumnarBuildSideRelation.scala
 
b/backends-velox/src/main/scala/org/apache/spark/sql/execution/unsafe/UnsafeColumnarBuildSideRelation.scala
index 80e92a1537..308834657a 100644
--- 
a/backends-velox/src/main/scala/org/apache/spark/sql/execution/unsafe/UnsafeColumnarBuildSideRelation.scala
+++ 
b/backends-velox/src/main/scala/org/apache/spark/sql/execution/unsafe/UnsafeColumnarBuildSideRelation.scala
@@ -36,7 +36,7 @@ import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.utils.SparkArrowUtil
 import org.apache.spark.sql.vectorized.ColumnarBatch
 import org.apache.spark.task.TaskResources
-import org.apache.spark.util.Utils
+import org.apache.spark.util.{KnownSizeEstimation, Utils}
 
 import com.esotericsoftware.kryo.{Kryo, KryoSerializable}
 import com.esotericsoftware.kryo.io.{Input, Output}
@@ -102,7 +102,8 @@ case class UnsafeColumnarBuildSideRelation(
   extends BuildSideRelation
   with Externalizable
   with Logging
-  with KryoSerializable {
+  with KryoSerializable
+  with KnownSizeEstimation {
 
   // Rebuild the real BroadcastMode on demand; never serialize it.
   @transient override lazy val mode: BroadcastMode =
@@ -366,4 +367,12 @@ case class UnsafeColumnarBuildSideRelation(
     }
     iterator.toArray
   }
+
+  override def estimatedSize: Long = {
+    if (batches != null) {
+      batches.totalBytes
+    } else {
+      0L
+    }
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to