Ben-Zvi commented on a change in pull request #1408: DRILL-6453: Resolve
deadlock when reading from build and probe sides simultaneously in HashJoin
URL: https://github.com/apache/drill/pull/1408#discussion_r208706673
##########
File path:
exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/join/HashJoinMemoryCalculatorImpl.java
##########
@@ -420,31 +357,32 @@ public long getMaxReservedMemory() {
private void calculateMemoryUsage()
{
// Adjust based on number of records
- maxBuildBatchSize = computeMaxBatchSizeNoHash(buildBatchSize,
buildNumRecords,
- maxBatchNumRecordsBuild, fragmentationFactor, safetyFactor);
- maxProbeBatchSize = computeMaxBatchSizeNoHash(probeBatchSize,
probeNumRecords,
- maxBatchNumRecordsProbe, fragmentationFactor, safetyFactor);
-
- // Safety factor can be multiplied at the end since these batches are
coming from exchange operators, so no excess value vector doubling
- partitionBuildBatchSize = computeMaxBatchSize(buildBatchSize,
- buildNumRecords,
- recordsPerPartitionBatchBuild,
- fragmentationFactor,
- safetyFactor,
- reserveHash);
+ maxBuildBatchSize =
buildSizePredictor.predictBatchSize(maxBatchNumRecordsBuild, false);
- // Safety factor can be multiplied at the end since these batches are
coming from exchange operators, so no excess value vector doubling
- partitionProbeBatchSize = computeMaxBatchSize(
- probeBatchSize,
- probeNumRecords,
- recordsPerPartitionBatchProbe,
- fragmentationFactor,
- safetyFactor,
- reserveHash);
+ if (probeSizePredictor.hasData()) {
+ // We have probe data and we can compute the max incoming size.
+ maxProbeBatchSize =
probeSizePredictor.predictBatchSize(maxBatchNumRecordsProbe, false);
+ } else {
+ // We don't have probe data
+ if (probeEmpty) {
+ // We know the probe has no data, so we don't need to reserve any
space for the incoming probe
+ maxProbeBatchSize = 0;
+ } else {
+ // The probe side may have data, so assume it is the max incoming
batch size. This assumption
+ // can fail in some cases since the batch sizing project is
incomplete.
+ maxProbeBatchSize = maxIncomingBatchSize;
+ }
+ }
+
+ partitionBuildBatchSize =
buildSizePredictor.predictBatchSize(recordsPerPartitionBatchBuild, reserveHash);
+
+ if (probeSizePredictor.hasData()) {
+ partitionProbeBatchSize =
probeSizePredictor.predictBatchSize(recordsPerPartitionBatchProbe, reserveHash);
+ }
maxOutputBatchSize = (long) ((double)outputBatchSize *
fragmentationFactor * safetyFactor);
- long probeReservedMemory;
+ long probeReservedMemory = -1;
Review comment:
Why "-1" ? Looks like the default (zero) would work as well
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services