This is an automated email from the ASF dual-hosted git repository. ayushsaxena pushed a commit to branch HIVE-26885 in repository https://gitbox.apache.org/repos/asf/hive.git
commit a042e1480385570ad5dfbbd42c6bcb341b89f671 Author: Ayush Saxena <[email protected]> AuthorDate: Thu Dec 22 22:54:40 2022 +0530 HIVE-26885: Iceberg: Parquet Vectorized V2 reads fails with NPE. --- .../iceberg/mr/hive/vector/HiveBatchIterator.java | 2 +- .../mr/hive/vector/TestHiveIcebergVectorization.java | 17 +++++++++++++++-- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/vector/HiveBatchIterator.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/vector/HiveBatchIterator.java index 22a42f2953e..3b543b22aca 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/vector/HiveBatchIterator.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/vector/HiveBatchIterator.java @@ -65,7 +65,7 @@ public final class HiveBatchIterator implements CloseableIterator<HiveBatchConte batch.size = 0; } - if (recordReader instanceof RowPositionAwareVectorizedRecordReader) { + if (batch.size != 0 && recordReader instanceof RowPositionAwareVectorizedRecordReader) { rowOffset = ((RowPositionAwareVectorizedRecordReader) recordReader).getRowNumber(); } diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/vector/TestHiveIcebergVectorization.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/vector/TestHiveIcebergVectorization.java index c35fa22568c..18059d41838 100644 --- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/vector/TestHiveIcebergVectorization.java +++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/vector/TestHiveIcebergVectorization.java @@ -19,6 +19,7 @@ package org.apache.iceberg.mr.hive.vector; +import java.util.Collections; import java.util.Iterator; import java.util.List; import java.util.Map; @@ -136,8 +137,20 @@ public class TestHiveIcebergVectorization extends HiveIcebergStorageHandlerWithE /** * Tests HiveDeleteFilter implementation correctly filtering rows from VRBs. */ + @Test + public void testHiveDeleteFilterWithEmptyBatches() { + Map<String, String> props = Maps.newHashMap(); + props.put("parquet.block.size", "8192"); + props.put("parquet.page.row.count.limit", "20"); + testVectorizedReadWithDeleteFilter(props); + } + @Test public void testHiveDeleteFilter() { + testVectorizedReadWithDeleteFilter(Collections.emptyMap()); + } + + private void testVectorizedReadWithDeleteFilter(Map<String, String> props) { // The Avro "vectorized" case should actually serve as compareTo scenario to non-vectorized reading, because // there's no vectorization for Avro and it falls back to the non-vectorized implementation Assume.assumeTrue(isVectorized && testTableType == TestTables.TestTableType.HIVE_CATALOG); @@ -158,8 +171,8 @@ public class TestHiveIcebergVectorization extends HiveIcebergStorageHandlerWithE for (int i = 0; i < records.size(); ++i) { records.get(i).setField("customer_id", (long) i); } - testTables.createTable(shell, "vectordelete", schema, - PartitionSpec.unpartitioned(), fileFormat, records, 2); + + testTables.createTable(shell, "vectordelete", schema, PartitionSpec.unpartitioned(), fileFormat, records, 2, props); // Delete every odd row until 6000 shell.executeStatement("DELETE FROM vectordelete WHERE customer_id % 2 = 1 and customer_id < 6000");
