[hive] 01/01: HIVE-26885: Iceberg: Parquet Vectorized V2 reads fails with NPE.

ayushsaxena Thu, 22 Dec 2022 19:56:11 -0800

This is an automated email from the ASF dual-hosted git repository.

ayushsaxena pushed a commit to branch HIVE-26885
in repository https://gitbox.apache.org/repos/asf/hive.git


commit a042e1480385570ad5dfbbd42c6bcb341b89f671
Author: Ayush Saxena <[email protected]>
AuthorDate: Thu Dec 22 22:54:40 2022 +0530

    HIVE-26885: Iceberg: Parquet Vectorized V2 reads fails with NPE.
---
 .../iceberg/mr/hive/vector/HiveBatchIterator.java       |  2 +-
 .../mr/hive/vector/TestHiveIcebergVectorization.java    | 17 +++++++++++++++--
 2 files changed, 16 insertions(+), 3 deletions(-)

diff --git 
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/vector/HiveBatchIterator.java
 
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/vector/HiveBatchIterator.java
index 22a42f2953e..3b543b22aca 100644
--- 
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/vector/HiveBatchIterator.java
+++ 
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/vector/HiveBatchIterator.java
@@ -65,7 +65,7 @@ public final class HiveBatchIterator implements 
CloseableIterator<HiveBatchConte
           batch.size = 0;
         }
 
-        if (recordReader instanceof RowPositionAwareVectorizedRecordReader) {
+        if (batch.size != 0 && recordReader instanceof 
RowPositionAwareVectorizedRecordReader) {
           rowOffset = ((RowPositionAwareVectorizedRecordReader) 
recordReader).getRowNumber();
         }
 
diff --git 
a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/vector/TestHiveIcebergVectorization.java
 
b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/vector/TestHiveIcebergVectorization.java
index c35fa22568c..18059d41838 100644
--- 
a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/vector/TestHiveIcebergVectorization.java
+++ 
b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/vector/TestHiveIcebergVectorization.java
@@ -19,6 +19,7 @@
 
 package org.apache.iceberg.mr.hive.vector;
 
+import java.util.Collections;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
@@ -136,8 +137,20 @@ public class TestHiveIcebergVectorization extends 
HiveIcebergStorageHandlerWithE
   /**
    * Tests HiveDeleteFilter implementation correctly filtering rows from VRBs.
    */
+  @Test
+  public void testHiveDeleteFilterWithEmptyBatches() {
+    Map<String, String> props = Maps.newHashMap();
+    props.put("parquet.block.size", "8192");
+    props.put("parquet.page.row.count.limit", "20");
+    testVectorizedReadWithDeleteFilter(props);
+  }
+
   @Test
   public void testHiveDeleteFilter() {
+    testVectorizedReadWithDeleteFilter(Collections.emptyMap());
+  }
+
+  private void testVectorizedReadWithDeleteFilter(Map<String, String> props) {
     // The Avro "vectorized" case should actually serve as compareTo scenario 
to non-vectorized reading, because
     // there's no vectorization for Avro and it falls back to the 
non-vectorized implementation
     Assume.assumeTrue(isVectorized && testTableType == 
TestTables.TestTableType.HIVE_CATALOG);
@@ -158,8 +171,8 @@ public class TestHiveIcebergVectorization extends 
HiveIcebergStorageHandlerWithE
     for (int i = 0; i < records.size(); ++i) {
       records.get(i).setField("customer_id", (long) i);
     }
-    testTables.createTable(shell, "vectordelete", schema,
-        PartitionSpec.unpartitioned(), fileFormat, records, 2);
+
+    testTables.createTable(shell, "vectordelete", schema, 
PartitionSpec.unpartitioned(), fileFormat, records, 2, props);
 
     // Delete every odd row until 6000
     shell.executeStatement("DELETE FROM vectordelete WHERE customer_id % 2 = 1 
and customer_id < 6000");

[hive] 01/01: HIVE-26885: Iceberg: Parquet Vectorized V2 reads fails with NPE.

Reply via email to