This is an automated email from the ASF dual-hosted git repository.

dlych pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git


The following commit(s) were added to refs/heads/master by this push:
     new bcabce2  [NO ISSUE] Setting No. of partitions in HHJ
     new f34f28c  Merge branch 'gerrit/cheshire-cat'
bcabce2 is described below

commit bcabce2eceac5326cc817b2f38917474b344c7a0
Author: Shiva <[email protected]>
AuthorDate: Thu Jul 29 10:33:20 2021 -0700

    [NO ISSUE] Setting No. of partitions in HHJ
    
    - user model changes: no
    - storage format changes: no
    - interface changes: no
    
    Details:
    - Sets the default number of partitions for HHJ to 20
    - Sets the minimum number of partitions for HHJ to 20
    - Sets the size of build input to -1 so the minimum
      number of partitions are used
    
    Change-Id: Icfbe12c486ccecaefe44f806cc47c8257acbdebf
    Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/12544
    Integration-Tests: Jenkins <[email protected]>
    Tested-by: Jenkins <[email protected]>
    Reviewed-by: Dmitry Lychagin <[email protected]>
---
 .../rewriter/base/PhysicalOptimizationConfig.java   |  3 +--
 .../OptimizedHybridHashJoinOperatorDescriptor.java  | 21 +++++++++++----------
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git 
a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/rewriter/base/PhysicalOptimizationConfig.java
 
b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/rewriter/base/PhysicalOptimizationConfig.java
index bfff925..ec605b8 100644
--- 
a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/rewriter/base/PhysicalOptimizationConfig.java
+++ 
b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/rewriter/base/PhysicalOptimizationConfig.java
@@ -86,8 +86,7 @@ public class PhysicalOptimizationConfig {
     }
 
     public int getMaxFramesForJoinLeftInput() {
-        int frameSize = getFrameSize();
-        return getInt(MAX_FRAMES_FOR_JOIN_LEFT_INPUT, (int) (140L * 1024 * MB 
/ frameSize));
+        return getInt(MAX_FRAMES_FOR_JOIN_LEFT_INPUT, -1);
     }
 
     public void setMaxFramesForJoinLeftInput(int frameLimit) {
diff --git 
a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/join/OptimizedHybridHashJoinOperatorDescriptor.java
 
b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/join/OptimizedHybridHashJoinOperatorDescriptor.java
index bb79981..dcccd61 100644
--- 
a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/join/OptimizedHybridHashJoinOperatorDescriptor.java
+++ 
b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/join/OptimizedHybridHashJoinOperatorDescriptor.java
@@ -199,20 +199,21 @@ public class OptimizedHybridHashJoinOperatorDescriptor 
extends AbstractOperatorD
     //memorySize is the memory for join (we have already excluded the 2 
buffers for in/out)
     private static int getNumberOfPartitions(int memorySize, int buildSize, 
double factor, int nPartitions)
             throws HyracksDataException {
-        int numberOfPartitions = 0;
         if (memorySize <= 2) {
             throw new HyracksDataException("Not enough memory is available for 
Hybrid Hash Join.");
         }
-        if (memorySize > buildSize * factor) {
-            // We will switch to in-Mem HJ eventually: create two big 
partitions.
-            // We set 2 (not 1) to avoid a corner case where the only 
partition may be spilled to the disk.
-            // This may happen since this formula doesn't consider the hash 
table size. If this is the case,
-            // we will do a nested loop join after some iterations. But, this 
is not effective.
-            return 2;
+        int minimumNumberOfPartitions = Math.min(20, memorySize);
+        if (buildSize < 0 || memorySize > buildSize * factor) {
+
+            return minimumNumberOfPartitions;
         }
-        numberOfPartitions = (int) (Math.ceil((buildSize * factor / 
nPartitions - memorySize) / (memorySize - 1)));
-        numberOfPartitions = Math.max(2, numberOfPartitions);
-        if (numberOfPartitions > memorySize) {
+        // Two frames are already excluded from the memorySize for taking the 
input and output into account. That
+        // makes the denominator in the following formula to be different than 
the denominator in original Hybrid Hash
+        // Join which is memorySize - 1. This formula gives the total number 
of partitions, the spilled partitions
+        // and the memory-resident partition ( + 1 in formula is for taking 
the memory-resident partition into account).
+        int numberOfPartitions = (int) (Math.ceil((buildSize * factor / 
nPartitions - memorySize) / (memorySize))) + 1;
+        numberOfPartitions = Math.max(minimumNumberOfPartitions, 
numberOfPartitions);
+        if (numberOfPartitions > memorySize) { // Considers applying Grace 
Hash Join instead of Hybrid Hash Join.
             numberOfPartitions = (int) Math.ceil(Math.sqrt(buildSize * factor 
/ nPartitions));
             return Math.max(2, Math.min(numberOfPartitions, memorySize));
         }

Reply via email to