Repository: hive Updated Branches: refs/heads/branch-2 3f25911a6 -> a121b6d56
HIVE-17705: HIVE-17562 is returning incorrect results (Prasanth Jayachandran reviewed by Eugene Koifman) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/a121b6d5 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/a121b6d5 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/a121b6d5 Branch: refs/heads/branch-2 Commit: a121b6d569b38f7424a2a32efae200970dbcc263 Parents: 3f25911 Author: Prasanth Jayachandran <[email protected]> Authored: Thu Oct 5 10:33:21 2017 -0700 Committer: Prasanth Jayachandran <[email protected]> Committed: Thu Oct 5 10:33:48 2017 -0700 ---------------------------------------------------------------------- .../apache/hadoop/hive/ql/io/AcidInputFormat.java | 5 +++++ .../hadoop/hive/ql/io/orc/OrcInputFormat.java | 9 ++------- .../org/apache/hadoop/hive/ql/io/orc/OrcSplit.java | 2 +- .../hive/ql/io/orc/TestInputOutputFormat.java | 16 ++++++++-------- 4 files changed, 16 insertions(+), 16 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/a121b6d5/ql/src/java/org/apache/hadoop/hive/ql/io/AcidInputFormat.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidInputFormat.java index 25177ef..db8d0e8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidInputFormat.java @@ -155,6 +155,11 @@ public interface AcidInputFormat<KEY extends WritableComparable, VALUE> stmtIds.add(in.readInt()); } } + + @Override + public String toString() { + return "{ minTxnId: " + minTxnId + " maxTxnId: " + maxTxnId + " stmtIds: " + stmtIds + " }"; + } } /** * Options for controlling the record readers. http://git-wip-us.apache.org/repos/asf/hive/blob/a121b6d5/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java index 0813033..bfd1aec 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java @@ -1227,7 +1227,6 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>, private OrcFile.WriterVersion writerVersion; private long projColsUncompressedSize; private List<OrcSplit> deltaSplits; - private final SplitInfo splitInfo; private final ByteBuffer ppdResult; private final UserGroupInformation ugi; private final boolean allowSyntheticFileIds; @@ -1250,7 +1249,6 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>, this.hasBase = splitInfo.hasBase; this.projColsUncompressedSize = -1; this.deltaSplits = splitInfo.getSplits(); - this.splitInfo = splitInfo; this.allowSyntheticFileIds = allowSyntheticFileIds; this.ppdResult = splitInfo.ppdResult; } @@ -1464,12 +1462,9 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>, // 2) delete all rows // 3) major compaction // 4) insert some rows - // In such cases, consider base files without any stripes as uncovered delta + // In such cases, consider entire base delta file as an orc split (similar to what BI strategy does) if (stripes == null || stripes.isEmpty()) { - AcidOutputFormat.Options options = AcidUtils.parseBaseOrDeltaBucketFilename(file.getPath(), context.conf); - int bucket = options.getBucket(); - splitInfo.covered[bucket] = false; - deltaSplits = splitInfo.getSplits(); + splits.add(createSplit(0, file.getLen(), orcTail)); } else { // if we didn't have predicate pushdown, read everything if (includeStripe == null) { http://git-wip-us.apache.org/repos/asf/hive/blob/a121b6d5/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java index d61b24b..998cbc0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java @@ -225,6 +225,6 @@ public class OrcSplit extends FileSplit implements ColumnarSplit, LlapAwareSplit public String toString() { return "OrcSplit [" + getPath() + ", start=" + getStart() + ", length=" + getLength() + ", isOriginal=" + isOriginal + ", fileLength=" + fileLen + ", hasFooter=" + hasFooter + - ", hasBase=" + hasBase + ", deltas=" + (deltas == null ? 0 : deltas.size()) + "]"; + ", hasBase=" + hasBase + ", deltas=" + deltas + "]"; } } http://git-wip-us.apache.org/repos/asf/hive/blob/a121b6d5/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java index a14ff5d..d5d0d27 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java @@ -3087,7 +3087,7 @@ public class TestInputOutputFormat { assertTrue(split.toString().contains("start=3")); assertTrue(split.toString().contains("hasFooter=false")); assertTrue(split.toString().contains("hasBase=true")); - assertTrue(split.toString().contains("deltas=0")); + assertTrue(split.toString().contains("deltas=[]")); if (split instanceof OrcSplit) { assertFalse("No footer serialize test for non-vector reader, hasFooter is not expected in" + " orc splits.", ((OrcSplit) split).hasFooter()); @@ -3159,7 +3159,7 @@ public class TestInputOutputFormat { assertTrue(split.toString().contains("start=3")); assertTrue(split.toString().contains("hasFooter=true")); assertTrue(split.toString().contains("hasBase=true")); - assertTrue(split.toString().contains("deltas=0")); + assertTrue(split.toString().contains("deltas=[]")); if (split instanceof OrcSplit) { assertTrue("Footer serialize test for non-vector reader, hasFooter is expected in" + " orc splits.", ((OrcSplit) split).hasFooter()); @@ -3232,7 +3232,7 @@ public class TestInputOutputFormat { assertTrue(split.toString().contains("start=3")); assertTrue(split.toString().contains("hasFooter=false")); assertTrue(split.toString().contains("hasBase=true")); - assertTrue(split.toString().contains("deltas=0")); + assertTrue(split.toString().contains("deltas=[]")); if (split instanceof OrcSplit) { assertFalse("No footer serialize test for vector reader, hasFooter is not expected in" + " orc splits.", ((OrcSplit) split).hasFooter()); @@ -3307,7 +3307,7 @@ public class TestInputOutputFormat { assertTrue(split.toString().contains("start=3")); assertTrue(split.toString().contains("hasFooter=true")); assertTrue(split.toString().contains("hasBase=true")); - assertTrue(split.toString().contains("deltas=0")); + assertTrue(split.toString().contains("deltas=[]")); if (split instanceof OrcSplit) { assertTrue("Footer serialize test for vector reader, hasFooter is expected in" + " orc splits.", ((OrcSplit) split).hasFooter()); @@ -3380,7 +3380,7 @@ public class TestInputOutputFormat { assertTrue(split.toString().contains("start=3")); assertTrue(split.toString().contains("hasFooter=false")); assertTrue(split.toString().contains("hasBase=true")); - assertTrue(split.toString().contains("deltas=0")); + assertTrue(split.toString().contains("deltas=[]")); if (split instanceof OrcSplit) { assertFalse("No footer serialize test for non-vector reader, hasFooter is not expected in" + " orc splits.", ((OrcSplit) split).hasFooter()); @@ -3457,7 +3457,7 @@ public class TestInputOutputFormat { assertTrue(split.toString().contains("start=3")); assertTrue(split.toString().contains("hasFooter=true")); assertTrue(split.toString().contains("hasBase=true")); - assertTrue(split.toString().contains("deltas=0")); + assertTrue(split.toString().contains("deltas=[]")); if (split instanceof OrcSplit) { assertTrue("Footer serialize test for ACID reader, hasFooter is expected in" + " orc splits.", ((OrcSplit) split).hasFooter()); @@ -3535,7 +3535,7 @@ public class TestInputOutputFormat { // NOTE: don't be surprised if deltas value is different // in older release deltas=2 as min and max transaction are added separately to delta list. // in newer release since both of them are put together deltas=1 - assertTrue(split.toString().contains("deltas=1")); + assertTrue(split.toString().contains("deltas=[{ minTxnId: 1 maxTxnId: 2 stmtIds: [] }]]")); if (split instanceof OrcSplit) { assertFalse("No footer serialize test for ACID reader, hasFooter is not expected in" + " orc splits.", ((OrcSplit) split).hasFooter()); @@ -3614,7 +3614,7 @@ public class TestInputOutputFormat { // NOTE: don't be surprised if deltas value is different // in older release deltas=2 as min and max transaction are added separately to delta list. // in newer release since both of them are put together deltas=1 - assertTrue(split.toString().contains("deltas=1")); + assertTrue(split.toString().contains("deltas=[{ minTxnId: 1 maxTxnId: 2 stmtIds: [] }]]")); if (split instanceof OrcSplit) { assertTrue("Footer serialize test for ACID reader, hasFooter is not expected in" + " orc splits.", ((OrcSplit) split).hasFooter());
