Repository: hive
Updated Branches:
  refs/heads/master 8c4b99a4e -> 3726ce590


HIVE-13730 : Avoid double spilling the same partition when memory threshold is 
set very low (Wei Zheng, reviewed by Vikram Dixit K)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/3726ce59
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/3726ce59
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/3726ce59

Branch: refs/heads/master
Commit: 3726ce590f9dcb0e679ed6faaafa1211e9f881d3
Parents: 8c4b99a
Author: Wei Zheng <w...@apache.org>
Authored: Wed May 18 09:51:31 2016 -0700
Committer: Wei Zheng <w...@apache.org>
Committed: Wed May 18 09:51:31 2016 -0700

----------------------------------------------------------------------
 .../persistence/HybridHashTableContainer.java   | 22 +++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/3726ce59/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java
index 5552dfb..bb35bae 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java
@@ -385,6 +385,11 @@ public class HybridHashTableContainer
         memoryUsed += hashPartitions[i].hashMap.memorySize();
       }
     }
+
+    if (writeBufferSize * (numPartitions - numPartitionsSpilledOnCreation) > 
memoryThreshold) {
+      LOG.error("There is not enough memory to allocate " +
+          (numPartitions - numPartitionsSpilledOnCreation) + " hash 
partitions.");
+    }
     assert numPartitionsSpilledOnCreation != numPartitions : "All partitions 
are directly spilled!" +
         " It is not supported now.";
     LOG.info("Number of partitions created: " + numPartitions);
@@ -558,7 +563,7 @@ public class HybridHashTableContainer
    * @return the biggest partition number
    */
   private int biggestPartition() {
-    int res = 0;
+    int res = -1;
     int maxSize = 0;
 
     // If a partition has been spilled to disk, its size will be 0, i.e. it 
won't be picked
@@ -574,6 +579,17 @@ public class HybridHashTableContainer
         res = i;
       }
     }
+
+    // It can happen that although there're some partitions in memory, but 
their sizes are all 0.
+    // In that case we just pick one and spill.
+    if (res == -1) {
+      for (int i = 0; i < hashPartitions.length; i++) {
+        if (!isOnDisk(i)) {
+          return i;
+        }
+      }
+    }
+
     return res;
   }
 
@@ -585,6 +601,10 @@ public class HybridHashTableContainer
   public long spillPartition(int partitionId) throws IOException {
     HashPartition partition = hashPartitions[partitionId];
     int inMemRowCount = partition.hashMap.getNumValues();
+    if (inMemRowCount == 0) {
+      LOG.warn("Trying to spill an empty hash partition! It may be due to " +
+          "hive.auto.convert.join.noconditionaltask.size being set too low.");
+    }
 
     File file = FileUtils.createLocalDirsTempFile(
         spillLocalDirs, "partition-" + partitionId + "-", null, false);

Reply via email to