Repository: asterixdb Updated Branches: refs/heads/master a22ca7bf8 -> c04046c11
[ASTERIXDB-2133] Fix unncessary binary search in GroupFrameAccessor - user model changes: no - storage format changes: no - interface changes: no Details: - GroupFrameAccessor holds a list of frames from a run during the merge step of merge sort. However, everytime we access a tuple, it performs binary search to get the physical tuple index. This patch fixes this by remembering the last accessed frame. It is expected that tuples are accessed sequentially (since it's the merge step), which greatly reduces binary searches Change-Id: I4a1b19ad47f6b1dda4bd5c417932e4c9ba36a714 Reviewed-on: https://asterix-gerrit.ics.uci.edu/2079 Sonar-Qube: Jenkins <[email protected]> Tested-by: Jenkins <[email protected]> Contrib: Jenkins <[email protected]> Integration-Tests: Jenkins <[email protected]> Reviewed-by: Ian Maxon <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/asterixdb/repo Commit: http://git-wip-us.apache.org/repos/asf/asterixdb/commit/c04046c1 Tree: http://git-wip-us.apache.org/repos/asf/asterixdb/tree/c04046c1 Diff: http://git-wip-us.apache.org/repos/asf/asterixdb/diff/c04046c1 Branch: refs/heads/master Commit: c04046c11be062c4320ba7e0bbd5258ffb600ad3 Parents: a22ca7b Author: luochen01 <[email protected]> Authored: Fri Oct 27 09:14:19 2017 -0700 Committer: Luo Chen <[email protected]> Committed: Tue Oct 31 21:11:25 2017 -0700 ---------------------------------------------------------------------- .../std/sort/util/GroupFrameAccessor.java | 29 ++++++++++++-------- 1 file changed, 18 insertions(+), 11 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/asterixdb/blob/c04046c1/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/sort/util/GroupFrameAccessor.java ---------------------------------------------------------------------- diff --git a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/sort/util/GroupFrameAccessor.java b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/sort/util/GroupFrameAccessor.java index cac50e7..bf61435 100644 --- a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/sort/util/GroupFrameAccessor.java +++ b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/sort/util/GroupFrameAccessor.java @@ -57,10 +57,13 @@ public class GroupFrameAccessor implements IFrameTupleAccessor { private final RecordDescriptor recordDescriptor; private final int minFrameSize; private final FrameTupleAccessor frameTupleAccessor; - private int lastTupleIndex; private int lastFrameId; + // the start tuple index of the last accessed frame (inclusive) + private int lastFrameStart; + // the end tuple index of the last accessed frame (exclusive) + private int lastFrameEnd; private ByteBuffer buffer; - private List<InnerFrameInfo> innerFrameInfos; + private final List<InnerFrameInfo> innerFrameInfos; public GroupFrameAccessor(int minFrameSize, RecordDescriptor recordDescriptor) { this.minFrameSize = minFrameSize; @@ -127,8 +130,9 @@ public class GroupFrameAccessor implements IFrameTupleAccessor { @Override public void reset(ByteBuffer buffer) { this.buffer = buffer; - this.lastTupleIndex = -1; this.lastFrameId = -1; + this.lastFrameStart = -1; + this.lastFrameEnd = -1; parseGroupedBuffer(0, buffer.limit()); } @@ -153,12 +157,13 @@ public class GroupFrameAccessor implements IFrameTupleAccessor { private int resetSubTupleAccessor(int tupleIndex) { assert tupleIndex < getTupleCount(); - if (innerFrameInfos.size() == 1) { - return tupleIndex; - } - if (tupleIndex == lastTupleIndex) { - return lastFrameId > 0 ? lastTupleIndex - innerFrameInfos.get(lastFrameId - 1).tupleCount : lastTupleIndex; + if (tupleIndex >= lastFrameStart && tupleIndex < lastFrameEnd) { + // a special optimization path + // since GroupFrameAccessor is used by merge, it is expected that tuples are accessed sequentially + // thus, if tuple still fit into the last frame, we do not need to perform binary search + return tupleIndex - lastFrameStart; } + // we perform binary search to get the frame Id int subFrameId = Collections.binarySearch(innerFrameInfos, tupleIndex); if (subFrameId >= 0) { subFrameId++; @@ -166,9 +171,11 @@ public class GroupFrameAccessor implements IFrameTupleAccessor { subFrameId = -subFrameId - 1; } frameTupleAccessor.reset(buffer, innerFrameInfos.get(subFrameId).start, innerFrameInfos.get(subFrameId).length); - lastTupleIndex = tupleIndex; lastFrameId = subFrameId; - return lastFrameId > 0 ? lastTupleIndex - innerFrameInfos.get(lastFrameId - 1).tupleCount : lastTupleIndex; + lastFrameStart = lastFrameId > 0 ? innerFrameInfos.get(lastFrameId - 1).tupleCount : 0; + lastFrameEnd = innerFrameInfos.get(lastFrameId).tupleCount; + + return tupleIndex - lastFrameStart; } -} +} \ No newline at end of file
