HIVE-17705: HIVE-17562 is returning incorrect results (Prasanth Jayachandran reviewed by Eugene Koifman)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/c6e60d1e Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/c6e60d1e Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/c6e60d1e Branch: refs/heads/branch-2.2 Commit: c6e60d1e30c5ce6e0d87c913ef32eb7f41d76f97 Parents: b71a88f Author: Prasanth Jayachandran <prasan...@apache.org> Authored: Thu Oct 5 10:33:21 2017 -0700 Committer: Prasanth Jayachandran <prasan...@apache.org> Committed: Thu Oct 5 10:38:06 2017 -0700 ---------------------------------------------------------------------- .../apache/hadoop/hive/ql/io/AcidInputFormat.java | 5 +++++ .../hadoop/hive/ql/io/orc/OrcInputFormat.java | 9 ++------- .../org/apache/hadoop/hive/ql/io/orc/OrcSplit.java | 2 +- .../hive/ql/io/orc/TestInputOutputFormat.java | 16 ++++++++-------- 4 files changed, 16 insertions(+), 16 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/c6e60d1e/ql/src/java/org/apache/hadoop/hive/ql/io/AcidInputFormat.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidInputFormat.java index 7c7074d..c0ffaf7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidInputFormat.java @@ -152,6 +152,11 @@ public interface AcidInputFormat<KEY extends WritableComparable, VALUE> stmtIds.add(in.readInt()); } } + + @Override + public String toString() { + return "{ minTxnId: " + minTxnId + " maxTxnId: " + maxTxnId + " stmtIds: " + stmtIds + " }"; + } } /** * Options for controlling the record readers. http://git-wip-us.apache.org/repos/asf/hive/blob/c6e60d1e/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java index 44b5011..d25b3fe 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java @@ -1123,7 +1123,6 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>, private OrcFile.WriterVersion writerVersion; private long projColsUncompressedSize; private List<OrcSplit> deltaSplits; - private final SplitInfo splitInfo; private final ByteBuffer ppdResult; private final UserGroupInformation ugi; private final boolean allowSyntheticFileIds; @@ -1146,7 +1145,6 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>, this.hasBase = splitInfo.hasBase; this.projColsUncompressedSize = -1; this.deltaSplits = splitInfo.getSplits(); - this.splitInfo = splitInfo; this.allowSyntheticFileIds = allowSyntheticFileIds; this.ppdResult = splitInfo.ppdResult; } @@ -1360,12 +1358,9 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>, // 2) delete all rows // 3) major compaction // 4) insert some rows - // In such cases, consider base files without any stripes as uncovered delta + // In such cases, consider entire base delta file as an orc split (similar to what BI strategy does) if (stripes == null || stripes.isEmpty()) { - AcidOutputFormat.Options options = AcidUtils.parseBaseOrDeltaBucketFilename(file.getPath(), context.conf); - int bucket = options.getBucket(); - splitInfo.covered[bucket] = false; - deltaSplits = splitInfo.getSplits(); + splits.add(createSplit(0, file.getLen(), orcTail)); } else { // if we didn't have predicate pushdown, read everything if (includeStripe == null) { http://git-wip-us.apache.org/repos/asf/hive/blob/c6e60d1e/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java index f3acbb1..0a1adb1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java @@ -225,6 +225,6 @@ public class OrcSplit extends FileSplit implements ColumnarSplit, LlapAwareSplit public String toString() { return "OrcSplit [" + getPath() + ", start=" + getStart() + ", length=" + getLength() + ", isOriginal=" + isOriginal + ", fileLength=" + fileLen + ", hasFooter=" + hasFooter + - ", hasBase=" + hasBase + ", deltas=" + (deltas == null ? 0 : deltas.size()) + "]"; + ", hasBase=" + hasBase + ", deltas=" + deltas + "]"; } } http://git-wip-us.apache.org/repos/asf/hive/blob/c6e60d1e/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java index 27bd934..b33934a 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java @@ -2935,7 +2935,7 @@ public class TestInputOutputFormat { assertTrue(split.toString().contains("start=3")); assertTrue(split.toString().contains("hasFooter=false")); assertTrue(split.toString().contains("hasBase=true")); - assertTrue(split.toString().contains("deltas=0")); + assertTrue(split.toString().contains("deltas=[]")); if (split instanceof OrcSplit) { assertFalse("No footer serialize test for non-vector reader, hasFooter is not expected in" + " orc splits.", ((OrcSplit) split).hasFooter()); @@ -3007,7 +3007,7 @@ public class TestInputOutputFormat { assertTrue(split.toString().contains("start=3")); assertTrue(split.toString().contains("hasFooter=true")); assertTrue(split.toString().contains("hasBase=true")); - assertTrue(split.toString().contains("deltas=0")); + assertTrue(split.toString().contains("deltas=[]")); if (split instanceof OrcSplit) { assertTrue("Footer serialize test for non-vector reader, hasFooter is expected in" + " orc splits.", ((OrcSplit) split).hasFooter()); @@ -3080,7 +3080,7 @@ public class TestInputOutputFormat { assertTrue(split.toString().contains("start=3")); assertTrue(split.toString().contains("hasFooter=false")); assertTrue(split.toString().contains("hasBase=true")); - assertTrue(split.toString().contains("deltas=0")); + assertTrue(split.toString().contains("deltas=[]")); if (split instanceof OrcSplit) { assertFalse("No footer serialize test for vector reader, hasFooter is not expected in" + " orc splits.", ((OrcSplit) split).hasFooter()); @@ -3155,7 +3155,7 @@ public class TestInputOutputFormat { assertTrue(split.toString().contains("start=3")); assertTrue(split.toString().contains("hasFooter=true")); assertTrue(split.toString().contains("hasBase=true")); - assertTrue(split.toString().contains("deltas=0")); + assertTrue(split.toString().contains("deltas=[]")); if (split instanceof OrcSplit) { assertTrue("Footer serialize test for vector reader, hasFooter is expected in" + " orc splits.", ((OrcSplit) split).hasFooter()); @@ -3228,7 +3228,7 @@ public class TestInputOutputFormat { assertTrue(split.toString().contains("start=3")); assertTrue(split.toString().contains("hasFooter=false")); assertTrue(split.toString().contains("hasBase=true")); - assertTrue(split.toString().contains("deltas=0")); + assertTrue(split.toString().contains("deltas=[]")); if (split instanceof OrcSplit) { assertFalse("No footer serialize test for non-vector reader, hasFooter is not expected in" + " orc splits.", ((OrcSplit) split).hasFooter()); @@ -3303,7 +3303,7 @@ public class TestInputOutputFormat { assertTrue(split.toString().contains("start=3")); assertTrue(split.toString().contains("hasFooter=true")); assertTrue(split.toString().contains("hasBase=true")); - assertTrue(split.toString().contains("deltas=0")); + assertTrue(split.toString().contains("deltas=[]")); if (split instanceof OrcSplit) { assertTrue("Footer serialize test for ACID reader, hasFooter is expected in" + " orc splits.", ((OrcSplit) split).hasFooter()); @@ -3379,7 +3379,7 @@ public class TestInputOutputFormat { // NOTE: don't be surprised if deltas value is different // in older release deltas=2 as min and max transaction are added separately to delta list. // in newer release since both of them are put together deltas=1 - assertTrue(split.toString().contains("deltas=1")); + assertTrue(split.toString().contains("deltas=[{ minTxnId: 1 maxTxnId: 2 stmtIds: [] }]]")); if (split instanceof OrcSplit) { assertFalse("No footer serialize test for ACID reader, hasFooter is not expected in" + " orc splits.", ((OrcSplit) split).hasFooter()); @@ -3457,7 +3457,7 @@ public class TestInputOutputFormat { // NOTE: don't be surprised if deltas value is different // in older release deltas=2 as min and max transaction are added separately to delta list. // in newer release since both of them are put together deltas=1 - assertTrue(split.toString().contains("deltas=1")); + assertTrue(split.toString().contains("deltas=[{ minTxnId: 1 maxTxnId: 2 stmtIds: [] }]]")); if (split instanceof OrcSplit) { assertTrue("Footer serialize test for ACID reader, hasFooter is not expected in" + " orc splits.", ((OrcSplit) split).hasFooter());