HIVE-17705: HIVE-17562 is returning incorrect results (Prasanth Jayachandran reviewed by Eugene Koifman)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/0c56cf69 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/0c56cf69 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/0c56cf69 Branch: refs/heads/branch-2.3 Commit: 0c56cf6968c613274b9f46c966fd785df16d746a Parents: 4bf2b37 Author: Prasanth Jayachandran <prasan...@apache.org> Authored: Thu Oct 5 10:33:21 2017 -0700 Committer: Prasanth Jayachandran <prasan...@apache.org> Committed: Thu Oct 5 10:51:40 2017 -0700 ---------------------------------------------------------------------- .../apache/hadoop/hive/ql/io/AcidInputFormat.java | 5 +++++ .../hadoop/hive/ql/io/orc/OrcInputFormat.java | 9 ++------- .../org/apache/hadoop/hive/ql/io/orc/OrcSplit.java | 2 +- .../hive/ql/io/orc/TestInputOutputFormat.java | 16 ++++++++-------- 4 files changed, 16 insertions(+), 16 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/0c56cf69/ql/src/java/org/apache/hadoop/hive/ql/io/AcidInputFormat.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidInputFormat.java index 7c7074d..c0ffaf7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidInputFormat.java @@ -152,6 +152,11 @@ public interface AcidInputFormat<KEY extends WritableComparable, VALUE> stmtIds.add(in.readInt()); } } + + @Override + public String toString() { + return "{ minTxnId: " + minTxnId + " maxTxnId: " + maxTxnId + " stmtIds: " + stmtIds + " }"; + } } /** * Options for controlling the record readers. http://git-wip-us.apache.org/repos/asf/hive/blob/0c56cf69/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java index 6be38a8..e1aea1f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java @@ -1219,7 +1219,6 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>, private OrcFile.WriterVersion writerVersion; private long projColsUncompressedSize; private List<OrcSplit> deltaSplits; - private final SplitInfo splitInfo; private final ByteBuffer ppdResult; private final UserGroupInformation ugi; private final boolean allowSyntheticFileIds; @@ -1242,7 +1241,6 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>, this.hasBase = splitInfo.hasBase; this.projColsUncompressedSize = -1; this.deltaSplits = splitInfo.getSplits(); - this.splitInfo = splitInfo; this.allowSyntheticFileIds = allowSyntheticFileIds; this.ppdResult = splitInfo.ppdResult; } @@ -1456,12 +1454,9 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>, // 2) delete all rows // 3) major compaction // 4) insert some rows - // In such cases, consider base files without any stripes as uncovered delta + // In such cases, consider entire base delta file as an orc split (similar to what BI strategy does) if (stripes == null || stripes.isEmpty()) { - AcidOutputFormat.Options options = AcidUtils.parseBaseOrDeltaBucketFilename(file.getPath(), context.conf); - int bucket = options.getBucket(); - splitInfo.covered[bucket] = false; - deltaSplits = splitInfo.getSplits(); + splits.add(createSplit(0, file.getLen(), orcTail)); } else { // if we didn't have predicate pushdown, read everything if (includeStripe == null) { http://git-wip-us.apache.org/repos/asf/hive/blob/0c56cf69/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java index d61b24b..998cbc0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java @@ -225,6 +225,6 @@ public class OrcSplit extends FileSplit implements ColumnarSplit, LlapAwareSplit public String toString() { return "OrcSplit [" + getPath() + ", start=" + getStart() + ", length=" + getLength() + ", isOriginal=" + isOriginal + ", fileLength=" + fileLen + ", hasFooter=" + hasFooter + - ", hasBase=" + hasBase + ", deltas=" + (deltas == null ? 0 : deltas.size()) + "]"; + ", hasBase=" + hasBase + ", deltas=" + deltas + "]"; } } http://git-wip-us.apache.org/repos/asf/hive/blob/0c56cf69/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java index b003eb8..8e364e8 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java @@ -3087,7 +3087,7 @@ public class TestInputOutputFormat { assertTrue(split.toString().contains("start=3")); assertTrue(split.toString().contains("hasFooter=false")); assertTrue(split.toString().contains("hasBase=true")); - assertTrue(split.toString().contains("deltas=0")); + assertTrue(split.toString().contains("deltas=[]")); if (split instanceof OrcSplit) { assertFalse("No footer serialize test for non-vector reader, hasFooter is not expected in" + " orc splits.", ((OrcSplit) split).hasFooter()); @@ -3159,7 +3159,7 @@ public class TestInputOutputFormat { assertTrue(split.toString().contains("start=3")); assertTrue(split.toString().contains("hasFooter=true")); assertTrue(split.toString().contains("hasBase=true")); - assertTrue(split.toString().contains("deltas=0")); + assertTrue(split.toString().contains("deltas=[]")); if (split instanceof OrcSplit) { assertTrue("Footer serialize test for non-vector reader, hasFooter is expected in" + " orc splits.", ((OrcSplit) split).hasFooter()); @@ -3232,7 +3232,7 @@ public class TestInputOutputFormat { assertTrue(split.toString().contains("start=3")); assertTrue(split.toString().contains("hasFooter=false")); assertTrue(split.toString().contains("hasBase=true")); - assertTrue(split.toString().contains("deltas=0")); + assertTrue(split.toString().contains("deltas=[]")); if (split instanceof OrcSplit) { assertFalse("No footer serialize test for vector reader, hasFooter is not expected in" + " orc splits.", ((OrcSplit) split).hasFooter()); @@ -3307,7 +3307,7 @@ public class TestInputOutputFormat { assertTrue(split.toString().contains("start=3")); assertTrue(split.toString().contains("hasFooter=true")); assertTrue(split.toString().contains("hasBase=true")); - assertTrue(split.toString().contains("deltas=0")); + assertTrue(split.toString().contains("deltas=[]")); if (split instanceof OrcSplit) { assertTrue("Footer serialize test for vector reader, hasFooter is expected in" + " orc splits.", ((OrcSplit) split).hasFooter()); @@ -3380,7 +3380,7 @@ public class TestInputOutputFormat { assertTrue(split.toString().contains("start=3")); assertTrue(split.toString().contains("hasFooter=false")); assertTrue(split.toString().contains("hasBase=true")); - assertTrue(split.toString().contains("deltas=0")); + assertTrue(split.toString().contains("deltas=[]")); if (split instanceof OrcSplit) { assertFalse("No footer serialize test for non-vector reader, hasFooter is not expected in" + " orc splits.", ((OrcSplit) split).hasFooter()); @@ -3455,7 +3455,7 @@ public class TestInputOutputFormat { assertTrue(split.toString().contains("start=3")); assertTrue(split.toString().contains("hasFooter=true")); assertTrue(split.toString().contains("hasBase=true")); - assertTrue(split.toString().contains("deltas=0")); + assertTrue(split.toString().contains("deltas=[]")); if (split instanceof OrcSplit) { assertTrue("Footer serialize test for ACID reader, hasFooter is expected in" + " orc splits.", ((OrcSplit) split).hasFooter()); @@ -3531,7 +3531,7 @@ public class TestInputOutputFormat { // NOTE: don't be surprised if deltas value is different // in older release deltas=2 as min and max transaction are added separately to delta list. // in newer release since both of them are put together deltas=1 - assertTrue(split.toString().contains("deltas=1")); + assertTrue(split.toString().contains("deltas=[{ minTxnId: 1 maxTxnId: 2 stmtIds: [] }]]")); if (split instanceof OrcSplit) { assertFalse("No footer serialize test for ACID reader, hasFooter is not expected in" + " orc splits.", ((OrcSplit) split).hasFooter()); @@ -3609,7 +3609,7 @@ public class TestInputOutputFormat { // NOTE: don't be surprised if deltas value is different // in older release deltas=2 as min and max transaction are added separately to delta list. // in newer release since both of them are put together deltas=1 - assertTrue(split.toString().contains("deltas=1")); + assertTrue(split.toString().contains("deltas=[{ minTxnId: 1 maxTxnId: 2 stmtIds: [] }]]")); if (split instanceof OrcSplit) { assertTrue("Footer serialize test for ACID reader, hasFooter is not expected in" + " orc splits.", ((OrcSplit) split).hasFooter());