This is an automated email from the ASF dual-hosted git repository.

voonhous pushed a commit to tag rfc-105-pre-cleanup
in repository https://gitbox.apache.org/repos/asf/hudi.git

commit f251d1e019ce22c6f1538631152800e7471f0733
Author: voon <[email protected]>
AuthorDate: Mon May 25 19:32:53 2026 +0800

    fix(trino): drop stale partition-column injection for count(*) projections
    
    For base-file-only splits with an empty projection (count(*)), the page
    source provider was injecting a synthetic partitionColumnHandle() so the
    loop in HudiBaseFileOnlyPageSource had something to materialize. The page
    emitted to Trino then carried a single VariableWidthBlock holding the
    partition path. Trino 482's OutputValidatingSourceOperator now validates
    that the page's channel count matches the operator's declared output types
    and rejects the mismatched page with "Invalid number of channels; got 1
    expected 0", failing count(*) queries with partition predicates.
    
    The underlying Parquet page source already handles empty projections
    correctly, returning pages with the right positionCount and zero blocks.
    Removes the injection in getHiveColumns and the now-bogus non-empty
    assertion, and forwards the zero-block physical page directly from
    HudiBaseFileOnlyPageSource when the output projection is empty.
---
 .../trino/plugin/hudi/HudiBaseFileOnlyPageSource.java  |  5 +++++
 .../io/trino/plugin/hudi/HudiPageSourceProvider.java   | 18 +++---------------
 2 files changed, 8 insertions(+), 15 deletions(-)

diff --git 
a/hudi-trino-plugin/src/main/java/io/trino/plugin/hudi/HudiBaseFileOnlyPageSource.java
 
b/hudi-trino-plugin/src/main/java/io/trino/plugin/hudi/HudiBaseFileOnlyPageSource.java
index a8955408ebf4..b1799bfa703f 100644
--- 
a/hudi-trino-plugin/src/main/java/io/trino/plugin/hudi/HudiBaseFileOnlyPageSource.java
+++ 
b/hudi-trino-plugin/src/main/java/io/trino/plugin/hudi/HudiBaseFileOnlyPageSource.java
@@ -98,6 +98,11 @@ public class HudiBaseFileOnlyPageSource
             return physicalSourcePage;
         }
 
+        if (allOutputColumns.isEmpty()) {
+            // count(*) projects no columns; forward the physical page which 
already carries the correct positionCount with zero blocks.
+            return physicalSourcePage;
+        }
+
         Block[] outputBlocks = new Block[allOutputColumns.size()];
         for (int i = 0; i < allOutputColumns.size(); i++) {
             HiveColumnHandle outputColumn = allOutputColumns.get(i);
diff --git 
a/hudi-trino-plugin/src/main/java/io/trino/plugin/hudi/HudiPageSourceProvider.java
 
b/hudi-trino-plugin/src/main/java/io/trino/plugin/hudi/HudiPageSourceProvider.java
index bae76b737a31..0b87592b74e0 100644
--- 
a/hudi-trino-plugin/src/main/java/io/trino/plugin/hudi/HudiPageSourceProvider.java
+++ 
b/hudi-trino-plugin/src/main/java/io/trino/plugin/hudi/HudiPageSourceProvider.java
@@ -55,7 +55,6 @@ import org.apache.hudi.common.model.HoodieTableType;
 import org.apache.hudi.common.schema.HoodieSchema;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
 import org.apache.hudi.common.table.read.HoodieFileGroupReader;
-import org.apache.hudi.common.util.ValidationUtils;
 import org.apache.hudi.storage.StoragePath;
 import org.apache.parquet.column.ColumnDescriptor;
 import org.apache.parquet.io.MessageColumnIO;
@@ -65,7 +64,6 @@ import org.joda.time.DateTimeZone;
 
 import java.io.IOException;
 import java.util.ArrayList;
-import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Locale;
@@ -79,7 +77,6 @@ import static io.trino.parquet.ParquetTypeUtils.getColumnIO;
 import static io.trino.parquet.ParquetTypeUtils.getDescriptors;
 import static io.trino.parquet.predicate.PredicateUtils.buildPredicate;
 import static io.trino.parquet.predicate.PredicateUtils.getFilteredRowGroups;
-import static io.trino.plugin.hive.HiveColumnHandle.partitionColumnHandle;
 import static 
io.trino.plugin.hive.parquet.ParquetPageSourceFactory.ParquetReaderProvider;
 import static 
io.trino.plugin.hive.parquet.ParquetPageSourceFactory.createDataSource;
 import static 
io.trino.plugin.hive.parquet.ParquetPageSourceFactory.createParquetPageSource;
@@ -203,8 +200,6 @@ public class HudiPageSourceProvider
 
         // Avoid avro serialization if split/filegroup only contains base files
         if (isBaseFileOnly) {
-            ValidationUtils.checkArgument(!hiveColumnHandles.isEmpty(),
-                    "Column handles should always be present for providing 
Hudi data page source on a base file");
             return new HudiBaseFileOnlyPageSource(
                     dataPageSource,
                     hiveColumnHandles,
@@ -422,15 +417,8 @@ public class HudiPageSourceProvider
     private static List<HiveColumnHandle> getHiveColumns(List<ColumnHandle> 
columns,
                                                          boolean 
isBaseFileOnly)
     {
-        if (!isBaseFileOnly || !columns.isEmpty()) {
-            return columns.stream()
-                    .map(HiveColumnHandle.class::cast)
-                    .toList();
-        }
-
-        // The `columns` list containing the requested columns to read could 
be empty
-        // when count(*) is in the statement; to make sure the page source 
works properly,
-        // the synthesized partition column is added in this case.
-        return Collections.singletonList(partitionColumnHandle());
+        return columns.stream()
+                .map(HiveColumnHandle.class::cast)
+                .toList();
     }
 }

Reply via email to