This is an automated email from the ASF dual-hosted git repository. voonhous pushed a commit to tag rfc-105-pre-cleanup in repository https://gitbox.apache.org/repos/asf/hudi.git
commit f251d1e019ce22c6f1538631152800e7471f0733 Author: voon <[email protected]> AuthorDate: Mon May 25 19:32:53 2026 +0800 fix(trino): drop stale partition-column injection for count(*) projections For base-file-only splits with an empty projection (count(*)), the page source provider was injecting a synthetic partitionColumnHandle() so the loop in HudiBaseFileOnlyPageSource had something to materialize. The page emitted to Trino then carried a single VariableWidthBlock holding the partition path. Trino 482's OutputValidatingSourceOperator now validates that the page's channel count matches the operator's declared output types and rejects the mismatched page with "Invalid number of channels; got 1 expected 0", failing count(*) queries with partition predicates. The underlying Parquet page source already handles empty projections correctly, returning pages with the right positionCount and zero blocks. Removes the injection in getHiveColumns and the now-bogus non-empty assertion, and forwards the zero-block physical page directly from HudiBaseFileOnlyPageSource when the output projection is empty. --- .../trino/plugin/hudi/HudiBaseFileOnlyPageSource.java | 5 +++++ .../io/trino/plugin/hudi/HudiPageSourceProvider.java | 18 +++--------------- 2 files changed, 8 insertions(+), 15 deletions(-) diff --git a/hudi-trino-plugin/src/main/java/io/trino/plugin/hudi/HudiBaseFileOnlyPageSource.java b/hudi-trino-plugin/src/main/java/io/trino/plugin/hudi/HudiBaseFileOnlyPageSource.java index a8955408ebf4..b1799bfa703f 100644 --- a/hudi-trino-plugin/src/main/java/io/trino/plugin/hudi/HudiBaseFileOnlyPageSource.java +++ b/hudi-trino-plugin/src/main/java/io/trino/plugin/hudi/HudiBaseFileOnlyPageSource.java @@ -98,6 +98,11 @@ public class HudiBaseFileOnlyPageSource return physicalSourcePage; } + if (allOutputColumns.isEmpty()) { + // count(*) projects no columns; forward the physical page which already carries the correct positionCount with zero blocks. + return physicalSourcePage; + } + Block[] outputBlocks = new Block[allOutputColumns.size()]; for (int i = 0; i < allOutputColumns.size(); i++) { HiveColumnHandle outputColumn = allOutputColumns.get(i); diff --git a/hudi-trino-plugin/src/main/java/io/trino/plugin/hudi/HudiPageSourceProvider.java b/hudi-trino-plugin/src/main/java/io/trino/plugin/hudi/HudiPageSourceProvider.java index bae76b737a31..0b87592b74e0 100644 --- a/hudi-trino-plugin/src/main/java/io/trino/plugin/hudi/HudiPageSourceProvider.java +++ b/hudi-trino-plugin/src/main/java/io/trino/plugin/hudi/HudiPageSourceProvider.java @@ -55,7 +55,6 @@ import org.apache.hudi.common.model.HoodieTableType; import org.apache.hudi.common.schema.HoodieSchema; import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.hudi.common.table.read.HoodieFileGroupReader; -import org.apache.hudi.common.util.ValidationUtils; import org.apache.hudi.storage.StoragePath; import org.apache.parquet.column.ColumnDescriptor; import org.apache.parquet.io.MessageColumnIO; @@ -65,7 +64,6 @@ import org.joda.time.DateTimeZone; import java.io.IOException; import java.util.ArrayList; -import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Locale; @@ -79,7 +77,6 @@ import static io.trino.parquet.ParquetTypeUtils.getColumnIO; import static io.trino.parquet.ParquetTypeUtils.getDescriptors; import static io.trino.parquet.predicate.PredicateUtils.buildPredicate; import static io.trino.parquet.predicate.PredicateUtils.getFilteredRowGroups; -import static io.trino.plugin.hive.HiveColumnHandle.partitionColumnHandle; import static io.trino.plugin.hive.parquet.ParquetPageSourceFactory.ParquetReaderProvider; import static io.trino.plugin.hive.parquet.ParquetPageSourceFactory.createDataSource; import static io.trino.plugin.hive.parquet.ParquetPageSourceFactory.createParquetPageSource; @@ -203,8 +200,6 @@ public class HudiPageSourceProvider // Avoid avro serialization if split/filegroup only contains base files if (isBaseFileOnly) { - ValidationUtils.checkArgument(!hiveColumnHandles.isEmpty(), - "Column handles should always be present for providing Hudi data page source on a base file"); return new HudiBaseFileOnlyPageSource( dataPageSource, hiveColumnHandles, @@ -422,15 +417,8 @@ public class HudiPageSourceProvider private static List<HiveColumnHandle> getHiveColumns(List<ColumnHandle> columns, boolean isBaseFileOnly) { - if (!isBaseFileOnly || !columns.isEmpty()) { - return columns.stream() - .map(HiveColumnHandle.class::cast) - .toList(); - } - - // The `columns` list containing the requested columns to read could be empty - // when count(*) is in the statement; to make sure the page source works properly, - // the synthesized partition column is added in this case. - return Collections.singletonList(partitionColumnHandle()); + return columns.stream() + .map(HiveColumnHandle.class::cast) + .toList(); } }
