This is an automated email from the ASF dual-hosted git repository.
klcopp pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 681de97697 HIVE-26146: Handle missing hive.acid.key.index in the
fixacidkeyindex utility (Alessandro Solimando, reviewed by Aman Sinha and Karen
Coppage)
681de97697 is described below
commit 681de97697d2eb53924b5452abce3cca1790aa38
Author: Alessandro Solimando <[email protected]>
AuthorDate: Tue Apr 19 17:12:17 2022 +0200
HIVE-26146: Handle missing hive.acid.key.index in the fixacidkeyindex
utility (Alessandro Solimando, reviewed by Aman Sinha and Karen Coppage)
Closes #3216.
---
.../hadoop/hive/ql/io/orc/FixAcidKeyIndex.java | 8 +++-
.../hadoop/hive/ql/io/orc/OrcOutputFormat.java | 2 +-
.../hadoop/hive/ql/io/orc/OrcRecordUpdater.java | 10 ++--
.../hadoop/hive/ql/io/orc/TestFixAcidKeyIndex.java | 53 ++++++++++++++++------
.../hive/ql/io/orc/TestInputOutputFormat.java | 2 +-
5 files changed, 50 insertions(+), 25 deletions(-)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FixAcidKeyIndex.java
b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FixAcidKeyIndex.java
index 145ee5f246..43723997e7 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FixAcidKeyIndex.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FixAcidKeyIndex.java
@@ -157,6 +157,11 @@ public class FixAcidKeyIndex {
RecordReader rr = reader.rows()) {
List<StripeInformation> stripes = reader.getStripes();
RecordIdentifier[] keyIndex = OrcRecordUpdater.parseKeyIndex(reader);
+
+ if (keyIndex == null) {
+ result.isValid = false;
+ }
+
StructObjectInspector soi = (StructObjectInspector)
reader.getObjectInspector();
//
struct<operation:int,originalTransaction:bigint,bucket:int,rowId:bigint,currentTransaction:bigint
List<? extends StructField> structFields = soi.getAllStructFieldRefs();
@@ -180,7 +185,8 @@ public class FixAcidKeyIndex {
RecordIdentifier recordIdentifier = new
RecordIdentifier(lastTransaction, lastBucket, lastRowId);
result.recordIdentifiers.add(recordIdentifier);
- if (stripes.size() != keyIndex.length || keyIndex[i] == null ||
recordIdentifier.compareTo(keyIndex[i]) != 0) {
+ if (result.isValid && (stripes.size() != keyIndex.length ||
keyIndex[i] == null
+ || recordIdentifier.compareTo(keyIndex[i]) != 0)) {
result.isValid = false;
}
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java
b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java
index e05954a91c..7ec77166e6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java
@@ -305,7 +305,7 @@ public class OrcOutputFormat extends
FileOutputFormat<NullWritable, OrcSerdeRow>
}
}
final OrcRecordUpdater.KeyIndexBuilder watcher =
- new OrcRecordUpdater.KeyIndexBuilder("compactor");
+ new OrcRecordUpdater.KeyIndexBuilder();
opts.inspector(options.getInspector())
.callback(watcher);
final Writer writer = OrcFile.createWriter(filename, opts);
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java
b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java
index 5abae74dd1..54617fa0bc 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java
@@ -136,7 +136,7 @@ public class OrcRecordUpdater implements RecordUpdater {
private long deleteCount = 0;
// used only for insert events, this is the number of rows held in memory
before flush() is invoked
private long bufferedRows = 0;
- private final KeyIndexBuilder indexBuilder = new KeyIndexBuilder("insert");
+ private final KeyIndexBuilder indexBuilder = new KeyIndexBuilder();
private KeyIndexBuilder deleteEventIndexBuilder;
private StructField recIdField = null; // field to look for the record
identifier in
private StructField rowIdField = null; // field inside recId to look for row
id in
@@ -474,7 +474,7 @@ public class OrcRecordUpdater implements RecordUpdater {
// Initialize a deleteEventWriter if not yet done. (Lazy initialization)
if (deleteEventWriter == null) {
// Initialize an indexBuilder for deleteEvents. (HIVE-17284)
- deleteEventIndexBuilder = new KeyIndexBuilder("delete");
+ deleteEventIndexBuilder = new KeyIndexBuilder();
this.deleteEventWriter = OrcFile.createWriter(deleteEventPath,
deleteWriterOptions.callback(deleteEventIndexBuilder));
AcidUtils.OrcAcidVersion.setAcidVersionInDataFile(deleteEventWriter);
@@ -690,7 +690,6 @@ public class OrcRecordUpdater implements RecordUpdater {
}
static class KeyIndexBuilder implements OrcFile.WriterCallback {
- private final String builderName;
StringBuilder lastKey = new StringBuilder();//list of last keys for each
stripe
long lastTransaction;
int lastBucket;
@@ -706,11 +705,8 @@ public class OrcRecordUpdater implements RecordUpdater {
*
* This is used to decide if we need to make preStripeWrite() call here.
*/
- private long numKeysCurrentStripe = 0;
+ protected long numKeysCurrentStripe = 0;
- KeyIndexBuilder(String name) {
- this.builderName = name;
- }
@Override
public void preStripeWrite(OrcFile.WriterContext context
) throws IOException {
diff --git
a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFixAcidKeyIndex.java
b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFixAcidKeyIndex.java
index 192d8b47a1..07e16931b6 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFixAcidKeyIndex.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFixAcidKeyIndex.java
@@ -21,6 +21,7 @@ import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintStream;
+import java.nio.charset.StandardCharsets;
import org.apache.commons.io.FileUtils;
import org.apache.hadoop.conf.Configuration;
@@ -32,6 +33,7 @@ import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.orc.OrcFile.WriterContext;
+import org.apache.orc.impl.OrcAcidUtils;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
@@ -63,9 +65,6 @@ public class TestFixAcidKeyIndex {
static abstract class TestKeyIndexBuilder
extends OrcRecordUpdater.KeyIndexBuilder
implements OrcFile.WriterCallback {
- public TestKeyIndexBuilder(String name) {
- super(name);
- }
// Will be called before closing the ORC file to stop writing any
additional information
// to the acid key index.
@@ -241,6 +240,21 @@ public class TestFixAcidKeyIndex {
fixInvalidIndex(testFilePath);
}
+ @Test
+ public void testMissingKeyIndex() throws Exception {
+ // Try single stripe
+ createTestAcidFile(testFilePath, 100, new MissingKeyIndexBuilder());
+ checkInvalidKeyIndex(testFilePath);
+ // Try fixing, this should result in new fixed file.
+ fixInvalidIndex(testFilePath);
+
+ // Multiple stripes
+ createTestAcidFile(testFilePath, 12000, new MissingKeyIndexBuilder());
+ checkInvalidKeyIndex(testFilePath);
+ // Try fixing, this should result in new fixed file.
+ fixInvalidIndex(testFilePath);
+ }
+
@Test
public void testNonAcidOrcFile() throws Exception {
// Copy data/files/alltypesorc to workDir
@@ -259,13 +273,30 @@ public class TestFixAcidKeyIndex {
}
/**
- * Version of KeyIndexBuilder that generates a valid key index
+ * Version of KeyIndexBuilder that does not generate any key index
*/
- static class GoodKeyIndexBuilder extends TestKeyIndexBuilder {
+ static class MissingKeyIndexBuilder extends TestKeyIndexBuilder {
+
+ @Override
+ public void stopWritingKeyIndex() {
+ // Do nothing - this should generate proper index.
+ }
- GoodKeyIndexBuilder() {
- super("GoodKeyIndexBuilder");
+ @Override
+ public void preFooterWrite(OrcFile.WriterContext context) throws
IOException {
+ if(numKeysCurrentStripe > 0) {
+ preStripeWrite(context);
+ }
+ context.getWriter().addUserMetadata(
+ OrcAcidUtils.ACID_STATS,
StandardCharsets.UTF_8.encode(acidStats.serialize()));
+ // here we don't generate the "hive.acid.key.index" metadata entry
}
+ }
+
+ /**
+ * Version of KeyIndexBuilder that generates a valid key index
+ */
+ static class GoodKeyIndexBuilder extends TestKeyIndexBuilder {
@Override
public void stopWritingKeyIndex() {
@@ -281,10 +312,6 @@ public class TestFixAcidKeyIndex {
boolean writeAcidIndexInfo = true;
- BadKeyIndexBuilder() {
- super("BadKeyIndexBuilder");
- }
-
public void stopWritingKeyIndex() {
LOG.info("*** Stop writing index!");
writeAcidIndexInfo = false;
@@ -307,10 +334,6 @@ public class TestFixAcidKeyIndex {
*/
static class FaultyKeyIndexBuilder extends TestKeyIndexBuilder {
- public FaultyKeyIndexBuilder() {
- super("FaultyKeyIndexBuilder");
- }
-
@Override
public void preStripeWrite(WriterContext context) throws IOException {
this.lastRowId = lastRowId - 5;
diff --git
a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
index 6220bbf9cc..c98f7f408e 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
@@ -4113,7 +4113,7 @@ public class TestInputOutputFormat {
"currentTransaction:bigint," +
"row:struct<a:int,b:struct<c:int>,d:string>>");
- OrcRecordUpdater.KeyIndexBuilder indexBuilder = new
OrcRecordUpdater.KeyIndexBuilder("test");
+ OrcRecordUpdater.KeyIndexBuilder indexBuilder = new
OrcRecordUpdater.KeyIndexBuilder();
OrcFile.WriterOptions options = OrcFile.writerOptions(conf)
.fileSystem(fs)
.setSchema(fileSchema)