This is an automated email from the ASF dual-hosted git repository.
yuanzhou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 5f577b46e [VL] Fix row to column batch size (#6342)
5f577b46e is described below
commit 5f577b46ebc892a277b540e6b7b5d01fe50a1eb2
Author: Jin Chengcheng <[email protected]>
AuthorDate: Fri Jul 5 17:30:05 2024 +0800
[VL] Fix row to column batch size (#6342)
Fix row to column batch size to pick the right config
---
.../org/apache/gluten/execution/RowToVeloxColumnarExec.scala | 9 +++------
1 file changed, 3 insertions(+), 6 deletions(-)
diff --git
a/backends-velox/src/main/scala/org/apache/gluten/execution/RowToVeloxColumnarExec.scala
b/backends-velox/src/main/scala/org/apache/gluten/execution/RowToVeloxColumnarExec.scala
index 7bcf56f7e..29478fe9d 100644
---
a/backends-velox/src/main/scala/org/apache/gluten/execution/RowToVeloxColumnarExec.scala
+++
b/backends-velox/src/main/scala/org/apache/gluten/execution/RowToVeloxColumnarExec.scala
@@ -16,6 +16,7 @@
*/
package org.apache.gluten.execution
+import org.apache.gluten.GlutenConfig
import org.apache.gluten.backendsapi.BackendsApiManager
import org.apache.gluten.columnarbatch.ColumnarBatches
import org.apache.gluten.exception.GlutenException
@@ -56,9 +57,7 @@ case class RowToVeloxColumnarExec(child: SparkPlan) extends
RowToColumnarExecBas
val numInputRows = longMetric("numInputRows")
val numOutputBatches = longMetric("numOutputBatches")
val convertTime = longMetric("convertTime")
- // Instead of creating a new config we are reusing columnBatchSize. In the
future if we do
- // combine with some of the Arrow conversion tools we will need to unify
some of the configs.
- val numRows = conf.columnBatchSize
+ val numRows = GlutenConfig.getConf.maxBatchSize
// This avoids calling `schema` in the RDD closure, so that we don't need
to include the entire
// plan (this) in the closure.
val localSchema = schema
@@ -78,9 +77,7 @@ case class RowToVeloxColumnarExec(child: SparkPlan) extends
RowToColumnarExecBas
val numInputRows = longMetric("numInputRows")
val numOutputBatches = longMetric("numOutputBatches")
val convertTime = longMetric("convertTime")
- // Instead of creating a new config we are reusing columnBatchSize. In the
future if we do
- // combine with some of the Arrow conversion tools we will need to unify
some of the configs.
- val numRows = conf.columnBatchSize
+ val numRows = GlutenConfig.getConf.maxBatchSize
val mode = BroadcastUtils.getBroadcastMode(outputPartitioning)
val relation = child.executeBroadcast()
BroadcastUtils.sparkToVeloxUnsafe(
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]