Repository: hive Updated Branches: refs/heads/master 8c4b99a4e -> 3726ce590
HIVE-13730 : Avoid double spilling the same partition when memory threshold is set very low (Wei Zheng, reviewed by Vikram Dixit K) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/3726ce59 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/3726ce59 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/3726ce59 Branch: refs/heads/master Commit: 3726ce590f9dcb0e679ed6faaafa1211e9f881d3 Parents: 8c4b99a Author: Wei Zheng <w...@apache.org> Authored: Wed May 18 09:51:31 2016 -0700 Committer: Wei Zheng <w...@apache.org> Committed: Wed May 18 09:51:31 2016 -0700 ---------------------------------------------------------------------- .../persistence/HybridHashTableContainer.java | 22 +++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/3726ce59/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java index 5552dfb..bb35bae 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java @@ -385,6 +385,11 @@ public class HybridHashTableContainer memoryUsed += hashPartitions[i].hashMap.memorySize(); } } + + if (writeBufferSize * (numPartitions - numPartitionsSpilledOnCreation) > memoryThreshold) { + LOG.error("There is not enough memory to allocate " + + (numPartitions - numPartitionsSpilledOnCreation) + " hash partitions."); + } assert numPartitionsSpilledOnCreation != numPartitions : "All partitions are directly spilled!" + " It is not supported now."; LOG.info("Number of partitions created: " + numPartitions); @@ -558,7 +563,7 @@ public class HybridHashTableContainer * @return the biggest partition number */ private int biggestPartition() { - int res = 0; + int res = -1; int maxSize = 0; // If a partition has been spilled to disk, its size will be 0, i.e. it won't be picked @@ -574,6 +579,17 @@ public class HybridHashTableContainer res = i; } } + + // It can happen that although there're some partitions in memory, but their sizes are all 0. + // In that case we just pick one and spill. + if (res == -1) { + for (int i = 0; i < hashPartitions.length; i++) { + if (!isOnDisk(i)) { + return i; + } + } + } + return res; } @@ -585,6 +601,10 @@ public class HybridHashTableContainer public long spillPartition(int partitionId) throws IOException { HashPartition partition = hashPartitions[partitionId]; int inMemRowCount = partition.hashMap.getNumValues(); + if (inMemRowCount == 0) { + LOG.warn("Trying to spill an empty hash partition! It may be due to " + + "hive.auto.convert.join.noconditionaltask.size being set too low."); + } File file = FileUtils.createLocalDirsTempFile( spillLocalDirs, "partition-" + partitionId + "-", null, false);