This is an automated email from the ASF dual-hosted git repository.

klcopp pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 681de97697 HIVE-26146: Handle missing hive.acid.key.index in the 
fixacidkeyindex utility (Alessandro Solimando, reviewed by Aman Sinha and Karen 
Coppage)
681de97697 is described below

commit 681de97697d2eb53924b5452abce3cca1790aa38
Author: Alessandro Solimando <[email protected]>
AuthorDate: Tue Apr 19 17:12:17 2022 +0200

    HIVE-26146: Handle missing hive.acid.key.index in the fixacidkeyindex 
utility (Alessandro Solimando, reviewed by Aman Sinha and Karen Coppage)
    
    Closes #3216.
---
 .../hadoop/hive/ql/io/orc/FixAcidKeyIndex.java     |  8 +++-
 .../hadoop/hive/ql/io/orc/OrcOutputFormat.java     |  2 +-
 .../hadoop/hive/ql/io/orc/OrcRecordUpdater.java    | 10 ++--
 .../hadoop/hive/ql/io/orc/TestFixAcidKeyIndex.java | 53 ++++++++++++++++------
 .../hive/ql/io/orc/TestInputOutputFormat.java      |  2 +-
 5 files changed, 50 insertions(+), 25 deletions(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FixAcidKeyIndex.java 
b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FixAcidKeyIndex.java
index 145ee5f246..43723997e7 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FixAcidKeyIndex.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FixAcidKeyIndex.java
@@ -157,6 +157,11 @@ public class FixAcidKeyIndex {
         RecordReader rr = reader.rows()) {
       List<StripeInformation> stripes = reader.getStripes();
       RecordIdentifier[] keyIndex = OrcRecordUpdater.parseKeyIndex(reader);
+
+      if (keyIndex == null) {
+        result.isValid = false;
+      }
+
       StructObjectInspector soi = (StructObjectInspector) 
reader.getObjectInspector();
       // 
struct<operation:int,originalTransaction:bigint,bucket:int,rowId:bigint,currentTransaction:bigint
       List<? extends StructField> structFields = soi.getAllStructFieldRefs();
@@ -180,7 +185,8 @@ public class FixAcidKeyIndex {
         RecordIdentifier recordIdentifier = new 
RecordIdentifier(lastTransaction, lastBucket, lastRowId);
         result.recordIdentifiers.add(recordIdentifier);
 
-        if (stripes.size() != keyIndex.length || keyIndex[i] == null || 
recordIdentifier.compareTo(keyIndex[i]) != 0) {
+        if (result.isValid && (stripes.size() != keyIndex.length || 
keyIndex[i] == null
+            || recordIdentifier.compareTo(keyIndex[i]) != 0)) {
           result.isValid = false;
         }
       }
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java 
b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java
index e05954a91c..7ec77166e6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java
@@ -305,7 +305,7 @@ public class OrcOutputFormat extends 
FileOutputFormat<NullWritable, OrcSerdeRow>
       }
     }
     final OrcRecordUpdater.KeyIndexBuilder watcher =
-        new OrcRecordUpdater.KeyIndexBuilder("compactor");
+        new OrcRecordUpdater.KeyIndexBuilder();
     opts.inspector(options.getInspector())
         .callback(watcher);
     final Writer writer = OrcFile.createWriter(filename, opts);
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java 
b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java
index 5abae74dd1..54617fa0bc 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java
@@ -136,7 +136,7 @@ public class OrcRecordUpdater implements RecordUpdater {
   private long deleteCount = 0;
   // used only for insert events, this is the number of rows held in memory 
before flush() is invoked
   private long bufferedRows = 0;
-  private final KeyIndexBuilder indexBuilder = new KeyIndexBuilder("insert");
+  private final KeyIndexBuilder indexBuilder = new KeyIndexBuilder();
   private KeyIndexBuilder deleteEventIndexBuilder;
   private StructField recIdField = null; // field to look for the record 
identifier in
   private StructField rowIdField = null; // field inside recId to look for row 
id in
@@ -474,7 +474,7 @@ public class OrcRecordUpdater implements RecordUpdater {
       // Initialize a deleteEventWriter if not yet done. (Lazy initialization)
       if (deleteEventWriter == null) {
         // Initialize an indexBuilder for deleteEvents. (HIVE-17284)
-        deleteEventIndexBuilder = new KeyIndexBuilder("delete");
+        deleteEventIndexBuilder = new KeyIndexBuilder();
         this.deleteEventWriter = OrcFile.createWriter(deleteEventPath,
             deleteWriterOptions.callback(deleteEventIndexBuilder));
         AcidUtils.OrcAcidVersion.setAcidVersionInDataFile(deleteEventWriter);
@@ -690,7 +690,6 @@ public class OrcRecordUpdater implements RecordUpdater {
   }
 
   static class KeyIndexBuilder implements OrcFile.WriterCallback {
-    private final String builderName;
     StringBuilder lastKey = new StringBuilder();//list of last keys for each 
stripe
     long lastTransaction;
     int lastBucket;
@@ -706,11 +705,8 @@ public class OrcRecordUpdater implements RecordUpdater {
      *
      *  This is used to decide if we need to make preStripeWrite() call here.
      */
-    private long numKeysCurrentStripe = 0;
+    protected long numKeysCurrentStripe = 0;
 
-    KeyIndexBuilder(String name) {
-      this.builderName = name;
-    }
     @Override
     public void preStripeWrite(OrcFile.WriterContext context
     ) throws IOException {
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFixAcidKeyIndex.java 
b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFixAcidKeyIndex.java
index 192d8b47a1..07e16931b6 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFixAcidKeyIndex.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFixAcidKeyIndex.java
@@ -21,6 +21,7 @@ import java.io.File;
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.PrintStream;
+import java.nio.charset.StandardCharsets;
 
 import org.apache.commons.io.FileUtils;
 import org.apache.hadoop.conf.Configuration;
@@ -32,6 +33,7 @@ import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.Text;
 import org.apache.orc.OrcFile.WriterContext;
+import org.apache.orc.impl.OrcAcidUtils;
 import org.junit.Before;
 import org.junit.Rule;
 import org.junit.Test;
@@ -63,9 +65,6 @@ public class TestFixAcidKeyIndex {
   static abstract class TestKeyIndexBuilder
       extends OrcRecordUpdater.KeyIndexBuilder
       implements OrcFile.WriterCallback {
-    public TestKeyIndexBuilder(String name) {
-      super(name);
-    }
 
     // Will be called before closing the ORC file to stop writing any 
additional information
     // to the acid key index.
@@ -241,6 +240,21 @@ public class TestFixAcidKeyIndex {
     fixInvalidIndex(testFilePath);
   }
 
+  @Test
+  public void testMissingKeyIndex() throws Exception {
+    // Try single stripe
+    createTestAcidFile(testFilePath, 100, new MissingKeyIndexBuilder());
+    checkInvalidKeyIndex(testFilePath);
+    // Try fixing, this should result in new fixed file.
+    fixInvalidIndex(testFilePath);
+
+    // Multiple stripes
+    createTestAcidFile(testFilePath, 12000, new MissingKeyIndexBuilder());
+    checkInvalidKeyIndex(testFilePath);
+    // Try fixing, this should result in new fixed file.
+    fixInvalidIndex(testFilePath);
+  }
+
   @Test
   public void testNonAcidOrcFile() throws Exception {
     // Copy data/files/alltypesorc to workDir
@@ -259,13 +273,30 @@ public class TestFixAcidKeyIndex {
   }
 
   /**
-   * Version of KeyIndexBuilder that generates a valid key index
+   * Version of KeyIndexBuilder that does not generate any key index
    */
-  static class GoodKeyIndexBuilder extends TestKeyIndexBuilder {
+  static class MissingKeyIndexBuilder extends TestKeyIndexBuilder {
+
+    @Override
+    public void stopWritingKeyIndex() {
+      // Do nothing - this should generate proper index.
+    }
 
-    GoodKeyIndexBuilder() {
-      super("GoodKeyIndexBuilder");
+    @Override
+    public void preFooterWrite(OrcFile.WriterContext context) throws 
IOException {
+      if(numKeysCurrentStripe > 0) {
+        preStripeWrite(context);
+      }
+      context.getWriter().addUserMetadata(
+          OrcAcidUtils.ACID_STATS, 
StandardCharsets.UTF_8.encode(acidStats.serialize()));
+      // here we don't generate the "hive.acid.key.index" metadata entry
     }
+  }
+
+  /**
+   * Version of KeyIndexBuilder that generates a valid key index
+   */
+  static class GoodKeyIndexBuilder extends TestKeyIndexBuilder {
 
     @Override
     public void stopWritingKeyIndex() {
@@ -281,10 +312,6 @@ public class TestFixAcidKeyIndex {
 
     boolean writeAcidIndexInfo = true;
 
-    BadKeyIndexBuilder() {
-      super("BadKeyIndexBuilder");
-    }
-
     public void stopWritingKeyIndex() {
       LOG.info("*** Stop writing index!");
       writeAcidIndexInfo = false;
@@ -307,10 +334,6 @@ public class TestFixAcidKeyIndex {
    */
   static class FaultyKeyIndexBuilder extends TestKeyIndexBuilder {
 
-    public FaultyKeyIndexBuilder() {
-      super("FaultyKeyIndexBuilder");
-    }
-
     @Override
     public void preStripeWrite(WriterContext context) throws IOException {
       this.lastRowId = lastRowId - 5;
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java 
b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
index 6220bbf9cc..c98f7f408e 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
@@ -4113,7 +4113,7 @@ public class TestInputOutputFormat {
             "currentTransaction:bigint," +
             "row:struct<a:int,b:struct<c:int>,d:string>>");
 
-    OrcRecordUpdater.KeyIndexBuilder indexBuilder = new 
OrcRecordUpdater.KeyIndexBuilder("test");
+    OrcRecordUpdater.KeyIndexBuilder indexBuilder = new 
OrcRecordUpdater.KeyIndexBuilder();
     OrcFile.WriterOptions options = OrcFile.writerOptions(conf)
         .fileSystem(fs)
         .setSchema(fileSchema)

Reply via email to