This is an automated email from the ASF dual-hosted git repository.

coderfender pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git


The following commit(s) were added to refs/heads/main by this push:
     new d7147dbb16 bug: no column projection should still persist row count 
(#4444)
d7147dbb16 is described below

commit d7147dbb16c024d5cd3c9b6569c06e198395b244
Author: Bhargava Vadlamani <[email protected]>
AuthorDate: Thu May 28 11:39:59 2026 -0500

    bug: no column projection should still persist row count (#4444)
---
 .github/workflows/pr_build_linux.yml               |  1 +
 .github/workflows/pr_build_macos.yml               |  1 +
 .../org/apache/spark/sql/comet/util/Utils.scala    |  9 ++++
 .../apache/spark/sql/comet/util/UtilsSuite.scala   | 54 ++++++++++++++++++++++
 4 files changed, 65 insertions(+)

diff --git a/.github/workflows/pr_build_linux.yml 
b/.github/workflows/pr_build_linux.yml
index f7d6c1a73d..0e4988e368 100644
--- a/.github/workflows/pr_build_linux.yml
+++ b/.github/workflows/pr_build_linux.yml
@@ -383,6 +383,7 @@ jobs:
               org.apache.spark.sql.comet.CometDppFallbackRepro3949Suite
               org.apache.spark.sql.comet.CometShuffleFallbackStickinessSuite
               org.apache.spark.sql.comet.CometDecimalArithmeticViewSuite
+              org.apache.spark.sql.comet.util.UtilsSuite
               org.apache.comet.objectstore.NativeConfigSuite
               org.apache.spark.sql.CometToPrettyStringSuite
               org.apache.spark.sql.CometCollationSuite
diff --git a/.github/workflows/pr_build_macos.yml 
b/.github/workflows/pr_build_macos.yml
index 7af77ca2c9..5101f5290c 100644
--- a/.github/workflows/pr_build_macos.yml
+++ b/.github/workflows/pr_build_macos.yml
@@ -223,6 +223,7 @@ jobs:
               org.apache.spark.sql.comet.CometDppFallbackRepro3949Suite
               org.apache.spark.sql.comet.CometShuffleFallbackStickinessSuite
               org.apache.spark.sql.comet.CometDecimalArithmeticViewSuite
+              org.apache.spark.sql.comet.util.UtilsSuite
               org.apache.comet.objectstore.NativeConfigSuite
               org.apache.spark.sql.CometToPrettyStringSuite
               org.apache.spark.sql.CometCollationSuite
diff --git a/spark/src/main/scala/org/apache/spark/sql/comet/util/Utils.scala 
b/spark/src/main/scala/org/apache/spark/sql/comet/util/Utils.scala
index 783367c054..0343983e11 100644
--- a/spark/src/main/scala/org/apache/spark/sql/comet/util/Utils.scala
+++ b/spark/src/main/scala/org/apache/spark/sql/comet/util/Utils.scala
@@ -224,6 +224,10 @@ object Utils extends CometTypeShim with Logging {
 
       val (fieldVectors, batchProviderOpt) = getBatchFieldVectors(batch)
       val root = new VectorSchemaRoot(fieldVectors.asJava)
+      if (fieldVectors.isEmpty) {
+        // VSR cannot infer rowCount without field vectors
+        root.setRowCount(batch.numRows())
+      }
       val provider = batchProviderOpt.getOrElse(dictionaryProvider)
 
       val writer = new ArrowStreamWriter(root, provider, 
Channels.newChannel(out))
@@ -336,6 +340,11 @@ object Utils extends CometTypeShim with Logging {
           return (Array.empty, 0L, 0L)
         }
 
+        if (targetRoot.getSchema.getFields.isEmpty) {
+          // VSRAppender does not update rowCount with no columns
+          targetRoot.setRowCount(totalRows.toInt)
+        }
+
         assert(
           targetRoot.getRowCount.toLong == totalRows,
           s"Row count mismatch after coalesce: ${targetRoot.getRowCount} != 
$totalRows")
diff --git 
a/spark/src/test/scala/org/apache/spark/sql/comet/util/UtilsSuite.scala 
b/spark/src/test/scala/org/apache/spark/sql/comet/util/UtilsSuite.scala
new file mode 100644
index 0000000000..a79b862793
--- /dev/null
+++ b/spark/src/test/scala/org/apache/spark/sql/comet/util/UtilsSuite.scala
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.spark.sql.comet.util
+
+import org.apache.spark.sql.CometTestBase
+import org.apache.spark.sql.vectorized.{ColumnarBatch, ColumnVector}
+
+class UtilsSuite extends CometTestBase {
+
+  test("serializeBatches preserves row count for a zero-column batch") {
+    val numRows = 5
+    val batch = new ColumnarBatch(Array.empty[ColumnVector], numRows)
+
+    val (rowCount, buf) = Utils.serializeBatches(Iterator(batch)).next()
+    assert(rowCount == numRows)
+
+    val decoded = Utils.decodeBatches(buf, "test").toSeq
+    assert(decoded.map(_.numRows()).sum == numRows)
+  }
+
+  test("coalesceBroadcastBatches preserves row count across zero-column 
inputs") {
+    val numRows = 5
+    val numBatches = 3
+    val batches =
+      (0 until numBatches).map(_ => new 
ColumnarBatch(Array.empty[ColumnVector], numRows))
+
+    val bufs = 
Utils.serializeBatches(batches.iterator).map(_._2).toSeq.iterator
+    val (coalesced, batchCount, totalRows) = 
Utils.coalesceBroadcastBatches(bufs)
+
+    val expected = numRows.toLong * numBatches
+    assert(batchCount == numBatches)
+    assert(totalRows == expected)
+
+    val decoded = coalesced.iterator.flatMap(b => Utils.decodeBatches(b, 
"test")).toSeq
+    assert(decoded.map(_.numRows()).sum == expected)
+  }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to