This is an automated email from the ASF dual-hosted git repository.
prasanthj pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 1368bd0 HIVE-21222: ACID: When there are no delete deltas skip
finding min max keys (Prasanth Jayachandran reviewed by Eugene Koifman)
1368bd0 is described below
commit 1368bd07b5dbc3747390c051e8512a2b41217933
Author: Prasanth Jayachandran <[email protected]>
AuthorDate: Tue Feb 12 00:22:21 2019 -0800
HIVE-21222: ACID: When there are no delete deltas skip finding min max keys
(Prasanth Jayachandran reviewed by Eugene Koifman)
---
.../ql/io/orc/VectorizedOrcAcidRowBatchReader.java | 3 ++-
.../hive/ql/io/orc/TestInputOutputFormat.java | 4 +--
.../orc/TestVectorizedOrcAcidRowBatchReader.java | 31 ++++++++++++----------
3 files changed, 21 insertions(+), 17 deletions(-)
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java
b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java
index 6d1ca722..2349cda 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java
@@ -390,7 +390,8 @@ public class VectorizedOrcAcidRowBatchReader
private OrcRawRecordMerger.KeyInterval findMinMaxKeys(
OrcSplit orcSplit, Configuration conf,
Reader.Options deleteEventReaderOptions) throws IOException {
- if(!HiveConf.getBoolVar(conf, ConfVars.FILTER_DELETE_EVENTS)) {
+ final boolean noDeleteDeltas =
getDeleteDeltaDirsFromSplit(orcSplit).length == 0;
+ if(!HiveConf.getBoolVar(conf, ConfVars.FILTER_DELETE_EVENTS) ||
noDeleteDeltas) {
LOG.debug("findMinMaxKeys() " + ConfVars.FILTER_DELETE_EVENTS +
"=false");
return new OrcRawRecordMerger.KeyInterval(null, null);
}
diff --git
a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
index 50ebbfa..5c13d45 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
@@ -802,8 +802,8 @@ public class TestInputOutputFormat {
int readsAfter = fs.statistics.getReadOps();
System.out.println("STATS TRACE END - " + testCaseName.getMethodName());
int delta = readsAfter - readsBefore;
- //HIVE-16812 adds 1 read of the footer of each file
- assertEquals(16, delta);
+ //HIVE-16812 adds 1 read of the footer of each file (only if delete
delta exists)
+ assertEquals(8, delta);
} finally {
MockFileSystem.clearGlobalFiles();
}
diff --git
a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedOrcAcidRowBatchReader.java
b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedOrcAcidRowBatchReader.java
index 3382288..a8f18d1 100644
---
a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedOrcAcidRowBatchReader.java
+++
b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedOrcAcidRowBatchReader.java
@@ -35,6 +35,7 @@ import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx;
+import org.apache.hadoop.hive.ql.io.AcidInputFormat;
import org.apache.hadoop.hive.ql.io.AcidOutputFormat;
import org.apache.hadoop.hive.ql.io.AcidUtils;
import org.apache.hadoop.hive.ql.io.BucketCodec;
@@ -60,6 +61,8 @@ import org.junit.Test;
import static org.junit.Assert.*;
+import com.google.common.collect.Lists;
+
/**
* This class tests the VectorizedOrcAcidRowBatchReader by creating an actual
split and a set
* of delete delta files. The split is on an insert delta and there are
multiple delete deltas
@@ -549,7 +552,7 @@ public class TestVectorizedOrcAcidRowBatchReader {
OrcSplit split = new OrcSplit(acidFilePath, null,
stripe.getOffset() + 50,
stripe.getLength() - 100,
- new String[] {"localhost"}, null, false, true, new ArrayList<>(),
+ new String[] {"localhost"}, null, false, true, Lists.newArrayList(new
AcidInputFormat.DeltaMetaData()),
fileLength, fileLength, root, null);
validateKeyInterval(split, new RecordIdentifier(1, 1, 1),
@@ -560,7 +563,7 @@ public class TestVectorizedOrcAcidRowBatchReader {
split = new OrcSplit(acidFilePath, null,
stripe.getOffset() + 50,
stripe.getLength() - 100,
- new String[] {"localhost"}, null, false, true, new ArrayList<>(),
+ new String[] {"localhost"}, null, false, true, Lists.newArrayList(new
AcidInputFormat.DeltaMetaData()),
fileLength, fileLength, root, null);
validateKeyInterval(split, new RecordIdentifier(1, 1, 1),
@@ -572,7 +575,7 @@ public class TestVectorizedOrcAcidRowBatchReader {
split = new OrcSplit(acidFilePath, null,
stripe.getOffset(),
stripe.getLength() - 50,
- new String[] {"localhost"}, null, false, true, new ArrayList<>(),
+ new String[] {"localhost"}, null, false, true, Lists.newArrayList(new
AcidInputFormat.DeltaMetaData()),
fileLength, fileLength, root, null);
// The key interval for the 1st stripe
@@ -588,7 +591,7 @@ public class TestVectorizedOrcAcidRowBatchReader {
split = new OrcSplit(acidFilePath, null,
stripe.getOffset(),
stripe.getLength() + 50,
- new String[] {"localhost"}, null, false, true, new ArrayList<>(),
+ new String[] {"localhost"}, null, false, true, Lists.newArrayList(new
AcidInputFormat.DeltaMetaData()),
fileLength, fileLength, root, null);
// The key interval for the last 2 stripes
@@ -601,7 +604,7 @@ public class TestVectorizedOrcAcidRowBatchReader {
split = new OrcSplit(acidFilePath, null,
stripe.getOffset() - 50,
stripe.getLength() + 50,
- new String[] {"localhost"}, null, false, true, new ArrayList<>(),
+ new String[] {"localhost"}, null, false, true, Lists.newArrayList(new
AcidInputFormat.DeltaMetaData()),
fileLength, fileLength, root, null);
// The key interval for the last stripe
@@ -612,7 +615,7 @@ public class TestVectorizedOrcAcidRowBatchReader {
split = new OrcSplit(acidFilePath, null,
stripes.get(0).getOffset() + 50,
reader.getContentLength() - 50,
- new String[] {"localhost"}, null, false, true, new ArrayList<>(),
+ new String[] {"localhost"}, null, false, true, Lists.newArrayList(new
AcidInputFormat.DeltaMetaData()),
fileLength, fileLength, root, null);
// The key interval for the last 2 stripes
@@ -623,7 +626,7 @@ public class TestVectorizedOrcAcidRowBatchReader {
split = new OrcSplit(acidFilePath, null,
stripes.get(0).getOffset(),
reader.getContentLength(),
- new String[] {"localhost"}, null, false, true, new ArrayList<>(),
+ new String[] {"localhost"}, null, false, true, Lists.newArrayList(new
AcidInputFormat.DeltaMetaData()),
fileLength, fileLength, root, null);
// The key interval for all 3 stripes
@@ -874,7 +877,7 @@ public class TestVectorizedOrcAcidRowBatchReader {
OrcSplit split = new OrcSplit(originalFilePath, null,
stripe.getOffset() + 50,
stripe.getLength() - 100,
- new String[] {"localhost"}, null, true, true, new ArrayList<>(),
+ new String[] {"localhost"}, null, true, true, Lists.newArrayList(new
AcidInputFormat.DeltaMetaData()),
fileLength, fileLength, root, syntheticProps);
validateKeyInterval(split, new RecordIdentifier(0, bucketProperty, 2),
@@ -885,7 +888,7 @@ public class TestVectorizedOrcAcidRowBatchReader {
split = new OrcSplit(originalFilePath, null,
stripe.getOffset() + 50,
stripe.getLength() - 100,
- new String[] {"localhost"}, null, true, true, new ArrayList<>(),
+ new String[] {"localhost"}, null, true, true, Lists.newArrayList(new
AcidInputFormat.DeltaMetaData()),
fileLength, fileLength, root, syntheticProps);
validateKeyInterval(split, new RecordIdentifier(0, bucketProperty, 3),
@@ -897,7 +900,7 @@ public class TestVectorizedOrcAcidRowBatchReader {
split = new OrcSplit(originalFilePath, null,
stripe.getOffset(),
stripe.getLength() - 50,
- new String[] {"localhost"}, null, true, true, new ArrayList<>(),
+ new String[] {"localhost"}, null, true, true, Lists.newArrayList(new
AcidInputFormat.DeltaMetaData()),
fileLength, fileLength, root, syntheticProps);
// The key interval for the 1st stripe
@@ -909,7 +912,7 @@ public class TestVectorizedOrcAcidRowBatchReader {
split = new OrcSplit(originalFilePath, null,
stripe.getOffset(),
stripe.getLength() + 50,
- new String[] {"localhost"}, null, true, true, new ArrayList<>(),
+ new String[] {"localhost"}, null, true, true, Lists.newArrayList(new
AcidInputFormat.DeltaMetaData()),
fileLength, fileLength, root, syntheticProps);
// The key interval for the last 2 stripes
@@ -922,7 +925,7 @@ public class TestVectorizedOrcAcidRowBatchReader {
split = new OrcSplit(originalFilePath, null,
stripe.getOffset() - 50,
stripe.getLength() + 50,
- new String[] {"localhost"}, null, true, true, new ArrayList<>(),
+ new String[] {"localhost"}, null, true, true, Lists.newArrayList(new
AcidInputFormat.DeltaMetaData()),
fileLength, fileLength, root, syntheticProps);
// The key interval for the last stripe
@@ -933,7 +936,7 @@ public class TestVectorizedOrcAcidRowBatchReader {
split = new OrcSplit(originalFilePath, null,
stripes.get(0).getOffset() + 50,
reader.getContentLength() - 50,
- new String[] {"localhost"}, null, true, true, new ArrayList<>(),
+ new String[] {"localhost"}, null, true, true, Lists.newArrayList(new
AcidInputFormat.DeltaMetaData()),
fileLength, fileLength, root, syntheticProps);
// The key interval for the last 2 stripes
@@ -944,7 +947,7 @@ public class TestVectorizedOrcAcidRowBatchReader {
split = new OrcSplit(originalFilePath, null,
stripes.get(0).getOffset(),
reader.getContentLength(),
- new String[] {"localhost"}, null, true, true, new ArrayList<>(),
+ new String[] {"localhost"}, null, true, true, Lists.newArrayList(new
AcidInputFormat.DeltaMetaData()),
fileLength, fileLength, root, syntheticProps);
// The key interval for all 3 stripes