This is an automated email from the ASF dual-hosted git repository.
dlych pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git
The following commit(s) were added to refs/heads/master by this push:
new bcabce2 [NO ISSUE] Setting No. of partitions in HHJ
new f34f28c Merge branch 'gerrit/cheshire-cat'
bcabce2 is described below
commit bcabce2eceac5326cc817b2f38917474b344c7a0
Author: Shiva <[email protected]>
AuthorDate: Thu Jul 29 10:33:20 2021 -0700
[NO ISSUE] Setting No. of partitions in HHJ
- user model changes: no
- storage format changes: no
- interface changes: no
Details:
- Sets the default number of partitions for HHJ to 20
- Sets the minimum number of partitions for HHJ to 20
- Sets the size of build input to -1 so the minimum
number of partitions are used
Change-Id: Icfbe12c486ccecaefe44f806cc47c8257acbdebf
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/12544
Integration-Tests: Jenkins <[email protected]>
Tested-by: Jenkins <[email protected]>
Reviewed-by: Dmitry Lychagin <[email protected]>
---
.../rewriter/base/PhysicalOptimizationConfig.java | 3 +--
.../OptimizedHybridHashJoinOperatorDescriptor.java | 21 +++++++++++----------
2 files changed, 12 insertions(+), 12 deletions(-)
diff --git
a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/rewriter/base/PhysicalOptimizationConfig.java
b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/rewriter/base/PhysicalOptimizationConfig.java
index bfff925..ec605b8 100644
---
a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/rewriter/base/PhysicalOptimizationConfig.java
+++
b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/rewriter/base/PhysicalOptimizationConfig.java
@@ -86,8 +86,7 @@ public class PhysicalOptimizationConfig {
}
public int getMaxFramesForJoinLeftInput() {
- int frameSize = getFrameSize();
- return getInt(MAX_FRAMES_FOR_JOIN_LEFT_INPUT, (int) (140L * 1024 * MB
/ frameSize));
+ return getInt(MAX_FRAMES_FOR_JOIN_LEFT_INPUT, -1);
}
public void setMaxFramesForJoinLeftInput(int frameLimit) {
diff --git
a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/join/OptimizedHybridHashJoinOperatorDescriptor.java
b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/join/OptimizedHybridHashJoinOperatorDescriptor.java
index bb79981..dcccd61 100644
---
a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/join/OptimizedHybridHashJoinOperatorDescriptor.java
+++
b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/join/OptimizedHybridHashJoinOperatorDescriptor.java
@@ -199,20 +199,21 @@ public class OptimizedHybridHashJoinOperatorDescriptor
extends AbstractOperatorD
//memorySize is the memory for join (we have already excluded the 2
buffers for in/out)
private static int getNumberOfPartitions(int memorySize, int buildSize,
double factor, int nPartitions)
throws HyracksDataException {
- int numberOfPartitions = 0;
if (memorySize <= 2) {
throw new HyracksDataException("Not enough memory is available for
Hybrid Hash Join.");
}
- if (memorySize > buildSize * factor) {
- // We will switch to in-Mem HJ eventually: create two big
partitions.
- // We set 2 (not 1) to avoid a corner case where the only
partition may be spilled to the disk.
- // This may happen since this formula doesn't consider the hash
table size. If this is the case,
- // we will do a nested loop join after some iterations. But, this
is not effective.
- return 2;
+ int minimumNumberOfPartitions = Math.min(20, memorySize);
+ if (buildSize < 0 || memorySize > buildSize * factor) {
+
+ return minimumNumberOfPartitions;
}
- numberOfPartitions = (int) (Math.ceil((buildSize * factor /
nPartitions - memorySize) / (memorySize - 1)));
- numberOfPartitions = Math.max(2, numberOfPartitions);
- if (numberOfPartitions > memorySize) {
+ // Two frames are already excluded from the memorySize for taking the
input and output into account. That
+ // makes the denominator in the following formula to be different than
the denominator in original Hybrid Hash
+ // Join which is memorySize - 1. This formula gives the total number
of partitions, the spilled partitions
+ // and the memory-resident partition ( + 1 in formula is for taking
the memory-resident partition into account).
+ int numberOfPartitions = (int) (Math.ceil((buildSize * factor /
nPartitions - memorySize) / (memorySize))) + 1;
+ numberOfPartitions = Math.max(minimumNumberOfPartitions,
numberOfPartitions);
+ if (numberOfPartitions > memorySize) { // Considers applying Grace
Hash Join instead of Hybrid Hash Join.
numberOfPartitions = (int) Math.ceil(Math.sqrt(buildSize * factor
/ nPartitions));
return Math.max(2, Math.min(numberOfPartitions, memorySize));
}