This is an automated email from the ASF dual-hosted git repository.
hongze pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 28a0a7bbf9 [GLUTEN-10818][VL] Implement KnownSizeEstimation for
buildSideRelation (#10817)
28a0a7bbf9 is described below
commit 28a0a7bbf9d7289fc73cbc0c91f7125581e1e1fa
Author: Terry Wang <[email protected]>
AuthorDate: Mon Oct 13 15:21:52 2025 +0800
[GLUTEN-10818][VL] Implement KnownSizeEstimation for buildSideRelation
(#10817)
---
.../spark/sql/execution/ColumnarBuildSideRelation.scala | 11 ++++++++++-
.../execution/unsafe/UnsafeColumnarBuildSideRelation.scala | 13 +++++++++++--
2 files changed, 21 insertions(+), 3 deletions(-)
diff --git
a/backends-velox/src/main/scala/org/apache/spark/sql/execution/ColumnarBuildSideRelation.scala
b/backends-velox/src/main/scala/org/apache/spark/sql/execution/ColumnarBuildSideRelation.scala
index 59a9cb2b00..d542fd92b9 100644
---
a/backends-velox/src/main/scala/org/apache/spark/sql/execution/ColumnarBuildSideRelation.scala
+++
b/backends-velox/src/main/scala/org/apache/spark/sql/execution/ColumnarBuildSideRelation.scala
@@ -33,6 +33,7 @@ import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.utils.SparkArrowUtil
import org.apache.spark.sql.vectorized.ColumnarBatch
import org.apache.spark.task.TaskResources
+import org.apache.spark.util.KnownSizeEstimation
import org.apache.arrow.c.ArrowSchema
@@ -61,7 +62,8 @@ case class ColumnarBuildSideRelation(
output: Seq[Attribute],
batches: Array[Array[Byte]],
safeBroadcastMode: SafeBroadcastMode)
- extends BuildSideRelation {
+ extends BuildSideRelation
+ with KnownSizeEstimation {
// Rebuild the real BroadcastMode on demand; never serialize it.
@transient override lazy val mode: BroadcastMode =
@@ -236,4 +238,11 @@ case class ColumnarBuildSideRelation(
}
iterator.toArray
}
+ override def estimatedSize: Long = {
+ if (batches != null) {
+ batches.map(_.length.toLong).sum
+ } else {
+ 0L
+ }
+ }
}
diff --git
a/backends-velox/src/main/scala/org/apache/spark/sql/execution/unsafe/UnsafeColumnarBuildSideRelation.scala
b/backends-velox/src/main/scala/org/apache/spark/sql/execution/unsafe/UnsafeColumnarBuildSideRelation.scala
index 80e92a1537..308834657a 100644
---
a/backends-velox/src/main/scala/org/apache/spark/sql/execution/unsafe/UnsafeColumnarBuildSideRelation.scala
+++
b/backends-velox/src/main/scala/org/apache/spark/sql/execution/unsafe/UnsafeColumnarBuildSideRelation.scala
@@ -36,7 +36,7 @@ import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.utils.SparkArrowUtil
import org.apache.spark.sql.vectorized.ColumnarBatch
import org.apache.spark.task.TaskResources
-import org.apache.spark.util.Utils
+import org.apache.spark.util.{KnownSizeEstimation, Utils}
import com.esotericsoftware.kryo.{Kryo, KryoSerializable}
import com.esotericsoftware.kryo.io.{Input, Output}
@@ -102,7 +102,8 @@ case class UnsafeColumnarBuildSideRelation(
extends BuildSideRelation
with Externalizable
with Logging
- with KryoSerializable {
+ with KryoSerializable
+ with KnownSizeEstimation {
// Rebuild the real BroadcastMode on demand; never serialize it.
@transient override lazy val mode: BroadcastMode =
@@ -366,4 +367,12 @@ case class UnsafeColumnarBuildSideRelation(
}
iterator.toArray
}
+
+ override def estimatedSize: Long = {
+ if (batches != null) {
+ batches.totalBytes
+ } else {
+ 0L
+ }
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]