Github user paul-rogers commented on a diff in the pull request: https://github.com/apache/drill/pull/761#discussion_r103335045 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/xsort/managed/ExternalSortBatch.java --- @@ -1231,52 +1308,44 @@ private boolean consolidateBatches() { * This method spills only half the accumulated batches * minimizing unnecessary disk writes. The exact count must lie between * the minimum and maximum spill counts. - */ + */ private void spillFromMemory() { // Determine the number of batches to spill to create a spill file // of the desired size. The actual file size might be a bit larger // or smaller than the target, which is expected. - long estSize = 0; int spillCount = 0; + long spillSize = 0; for (InputBatch batch : bufferedBatches) { - estSize += batch.getDataSize(); - if (estSize > spillFileSize) { - break; } + long batchSize = batch.getDataSize(); + spillSize += batchSize; spillCount++; + if (spillSize + batchSize / 2 > spillFileSize) { + break; } } - // Should not happen, but just to be sure... + // Must always spill at least 2, even if this creates an over-size + // spill file. - if (spillCount == 0) { - return; } + spillCount = Math.max(spillCount, 2); // Do the actual spill. - logger.trace("Starting spill from memory. Memory = {}, Buffered batch count = {}, Spill batch count = {}", - allocator.getAllocatedMemory(), bufferedBatches.size(), spillCount); mergeAndSpill(bufferedBatches, spillCount); } private void mergeAndSpill(LinkedList<? extends BatchGroup> source, int count) { - if (count == 0) { - return; } spilledRuns.add(doMergeAndSpill(source, count)); } private BatchGroup.SpilledRun doMergeAndSpill(LinkedList<? extends BatchGroup> batchGroups, int spillCount) { List<BatchGroup> batchesToSpill = Lists.newArrayList(); spillCount = Math.min(batchGroups.size(), spillCount); assert spillCount > 0 : "Spill count to mergeAndSpill must not be zero"; - long spillSize = 0; for (int i = 0; i < spillCount; i++) { - @SuppressWarnings("resource") - BatchGroup batch = batchGroups.pollFirst(); - assert batch != null : "Encountered a null batch during merge and spill operation"; - batchesToSpill.add(batch); - spillSize += batch.getDataSize(); + batchesToSpill.add(batchGroups.pollFirst()); --- End diff -- This won't happen for first-generation spills due to the check in `isSpillNeeded`. But, it could happen when spilling for merges, so I fixed the code.
--- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---