Repository: hive Updated Branches: refs/heads/branch-2.1 113cfb03e -> 0829581e4 refs/heads/master 164889838 -> 75e8be81a
HIVE-14377 : LLAP IO: issue with how estimate cache removes unneeded buffers (Sergey Shelukhin, reviewed by Prasanth J) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/75e8be81 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/75e8be81 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/75e8be81 Branch: refs/heads/master Commit: 75e8be81a8645746baff209b2fc0f997587bd3ce Parents: 1648898 Author: Sergey Shelukhin <[email protected]> Authored: Mon Aug 1 12:01:14 2016 -0700 Committer: Sergey Shelukhin <[email protected]> Committed: Mon Aug 1 12:34:01 2016 -0700 ---------------------------------------------------------------------- .../hive/llap/cache/LowLevelCacheImpl.java | 2 +- .../llap/io/encoded/OrcEncodedDataReader.java | 5 +++ .../llap/io/metadata/OrcFileEstimateErrors.java | 6 +++- .../ql/io/orc/encoded/EncodedReaderImpl.java | 16 +++------ .../hive/ql/io/orc/encoded/IncompleteCb.java | 37 ++++++++++++++++++++ 5 files changed, 53 insertions(+), 13 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/75e8be81/llap-server/src/java/org/apache/hadoop/hive/llap/cache/LowLevelCacheImpl.java ---------------------------------------------------------------------- diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/cache/LowLevelCacheImpl.java b/llap-server/src/java/org/apache/hadoop/hive/llap/cache/LowLevelCacheImpl.java index 038c3ed..8bc675d 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/cache/LowLevelCacheImpl.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/cache/LowLevelCacheImpl.java @@ -171,7 +171,7 @@ public class LowLevelCacheImpl implements LowLevelCache, BufferUsageManager, Lla metrics.incrCacheHitBytes(Math.min(requestedLength, currentCached.getLength())); } if (currentNotCached != null) { - assert !currentNotCached.hasData(); + assert !currentNotCached.hasData(); // Assumes no ranges passed to cache to read have data. if (gotAllData != null) { gotAllData.value = false; } http://git-wip-us.apache.org/repos/asf/hive/blob/75e8be81/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java ---------------------------------------------------------------------- diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java index 4af9dc2..0d212ec 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java @@ -837,6 +837,11 @@ public class OrcEncodedDataReader extends CallableWithNdc<Void> long baseOffset, DiskRangeListFactory factory, BooleanRef gotAllData) { DiskRangeList result = (lowLevelCache == null) ? range : lowLevelCache.getFileData(fileKey, range, baseOffset, factory, counters, gotAllData); + if (LlapIoImpl.ORC_LOGGER.isTraceEnabled()) { + LlapIoImpl.ORC_LOGGER.trace("Disk ranges after data cache (file " + fileKey + + ", base offset " + baseOffset + "): " + + RecordReaderUtils.stringifyDiskRanges(range.next)); + } if (gotAllData.value) return result; return (metadataCache == null) ? range : metadataCache.getIncompleteCbs(fileKey, range, baseOffset, factory, gotAllData); http://git-wip-us.apache.org/repos/asf/hive/blob/75e8be81/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcFileEstimateErrors.java ---------------------------------------------------------------------- diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcFileEstimateErrors.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcFileEstimateErrors.java index ad88b98..20af0d0 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcFileEstimateErrors.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcFileEstimateErrors.java @@ -30,6 +30,7 @@ import org.apache.hadoop.hive.llap.IncrementalObjectSizeEstimator.ObjectEstimato import org.apache.hadoop.hive.llap.cache.EvictionDispatcher; import org.apache.hadoop.hive.llap.cache.LlapCacheableBuffer; import org.apache.hadoop.hive.ql.io.SyntheticFileId; +import org.apache.hadoop.hive.ql.io.orc.encoded.IncompleteCb; public class OrcFileEstimateErrors extends LlapCacheableBuffer { private final Object fileKey; @@ -67,6 +68,7 @@ public class OrcFileEstimateErrors extends LlapCacheableBuffer { prev = new MutateHelper(ranges); } DiskRangeList current = ranges; + gotAllData.value = true; // Assume by default that we would find everything. while (current != null) { // We assume ranges in "ranges" are non-overlapping; thus, we will save next in advance. DiskRangeList check = current; @@ -77,7 +79,9 @@ public class OrcFileEstimateErrors extends LlapCacheableBuffer { gotAllData.value = false; continue; } - check.removeSelf(); + // We could just remove here and handle the missing tail during read, but that can be + // dangerous; let's explicitly add an incomplete CB. + check.replaceSelfWith(new IncompleteCb(check.getOffset(), check.getEnd())); } return prev.next; } http://git-wip-us.apache.org/repos/asf/hive/blob/75e8be81/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedReaderImpl.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedReaderImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedReaderImpl.java index dad35e3..bcb54d6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedReaderImpl.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedReaderImpl.java @@ -710,6 +710,11 @@ class EncodedReaderImpl implements EncodedReader { ponderReleaseInitialRefcount(unlockUntilCOffset, streamOffset, cc); lastUncompressed = cc; next = current.next; + if (next != null && (endCOffset >= 0 && currentOffset < endCOffset) + && next.getOffset() >= endCOffset) { + throw new IOException("Expected data at " + currentOffset + " (reading until " + + endCOffset + "), but the next buffer starts at " + next.getOffset()); + } } else if (current instanceof IncompleteCb) { // 2b. This is a known incomplete CB caused by ORC CB end boundaries being estimates. if (isTracingEnabled) { @@ -1123,17 +1128,6 @@ class EncodedReaderImpl implements EncodedReader { return ranges; } - private static class IncompleteCb extends DiskRangeList { - public IncompleteCb(long offset, long end) { - super(offset, end); - } - - @Override - public String toString() { - return "incomplete CB start: " + offset + " end: " + end; - } - } - /** * Reads one compression block from the source; handles compression blocks read from * multiple ranges (usually, that would only happen with zcr). http://git-wip-us.apache.org/repos/asf/hive/blob/75e8be81/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/IncompleteCb.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/IncompleteCb.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/IncompleteCb.java new file mode 100644 index 0000000..e3671ab --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/IncompleteCb.java @@ -0,0 +1,37 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.io.orc.encoded; + +import org.apache.hadoop.hive.common.io.DiskRangeList; + +public class IncompleteCb extends DiskRangeList { + public IncompleteCb(long offset, long end) { + super(offset, end); + } + + @Override + public String toString() { + return "incomplete CB start: " + offset + " end: " + end; + } + + @Override + public boolean hasData() { + return true; // Should not be treated like it needs data. + } +} \ No newline at end of file
