xintongsong commented on a change in pull request #9760: [FLINK-13982][runtime] Implement memory calculation logics URL: https://github.com/apache/flink/pull/9760#discussion_r329342755
########## File path: flink-runtime/src/main/java/org/apache/flink/runtime/clusterframework/TaskExecutorResourceUtils.java ########## @@ -0,0 +1,333 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.runtime.clusterframework; + +import org.apache.flink.api.java.tuple.Tuple2; +import org.apache.flink.api.java.tuple.Tuple6; +import org.apache.flink.configuration.Configuration; +import org.apache.flink.configuration.MemorySize; +import org.apache.flink.configuration.TaskManagerOptions; +import org.apache.flink.util.ConfigurationException; + +import static org.apache.flink.util.Preconditions.checkArgument; + +/** + * Utility class for TaskExecutor memory configurations. + */ +public class TaskExecutorResourceUtils { + + private TaskExecutorResourceUtils() {} + + // ------------------------------------------------------------------------ + // Memory Configuration Calculations + // ------------------------------------------------------------------------ + + public static TaskExecutorResourceSpec resourceSpecFromConfig(final Configuration config) throws ConfigurationException { + if (isTaskHeapMemorySizeExplicitlyConfigured(config) && isManagedMemorySizeExplicitlyConfigured(config)) { + // both task heap memory and managed memory is configured, use these to derive total flink memory + return deriveResourceSpecWithExplicitTaskAndManagedMemory(config); + } else if (isTotalFlinkMemorySizeExplicitlyConfigured(config)) { + // total flink memory is configured, but not task heap and managed memory, derive from total flink memory + return deriveResourceSpecWithTotalFlinkMemory(config); + } else if (isTotalProcessMemorySizeExplicitlyConfigured(config)) { + return deriveResourceSpecWithTotalProcessMemory(config); + } else { + throw new ConfigurationException("Either Task Heap Memory size and Managed Memory size, or Total Flink" + + " Memory size, or Total Process Memory size need to be configured explicitly."); + } + } + + private static TaskExecutorResourceSpec deriveResourceSpecWithExplicitTaskAndManagedMemory(final Configuration config) { + // derive total flink internal memory sizes from explicitly configure task heap memory size and managed memory size + + final MemorySize frameworkHeapMemorySize = getFrameworkHeapMemorySize(config); + final MemorySize taskHeapMemorySize = getTaskHeapMemorySize(config); + final MemorySize taskOffHeapMemorySize = getTaskOffHeapMemorySize(config); + + final MemorySize managedMemorySize = getManagedMemorySize(config); + final Tuple2<MemorySize, MemorySize> managedMemorySizeTuple2 = deriveOnHeapAndOffHeapManagedMemorySizeFromManagedMemorySize(config, managedMemorySize); + final MemorySize onHeapManagedMemorySize = managedMemorySizeTuple2.f0; + final MemorySize offHeapManagedMemorySize = managedMemorySizeTuple2.f1; + + final MemorySize shuffleMemorySize = deriveShuffleMemoryWithInverseFraction(config, + frameworkHeapMemorySize.add(taskHeapMemorySize).add(taskOffHeapMemorySize).add(managedMemorySize)); + + // derive total flink external memory sizes from derived total flink memory size + + final MemorySize totalFlinkMemorySize = frameworkHeapMemorySize + .add(taskHeapMemorySize) + .add(taskOffHeapMemorySize) + .add(shuffleMemorySize) + .add(managedMemorySize); + + final Tuple2<MemorySize, MemorySize> totalFlinkExternalMemorySizeTuple2 = deriveTotalFlinkExternalMemorySizes(config, totalFlinkMemorySize); + + return new TaskExecutorResourceSpec( + frameworkHeapMemorySize, + taskHeapMemorySize, + taskOffHeapMemorySize, + shuffleMemorySize, + onHeapManagedMemorySize, + offHeapManagedMemorySize, + totalFlinkExternalMemorySizeTuple2.f0, // jvmMetaspaceSize + totalFlinkExternalMemorySizeTuple2.f1); // jvmOverheadSize + } + + private static TaskExecutorResourceSpec deriveResourceSpecWithTotalFlinkMemory(final Configuration config) { + // derive total flink internal memory sizes from explicitly configured total flink memory size + + final MemorySize totalFlinkMemorySize = getTotalFlinkMemorySize(config); + final Tuple6<MemorySize, MemorySize, MemorySize, MemorySize, MemorySize, MemorySize> totalFLinkInternalMemorySizeTuple6 = + deriveTotalFlinkInternalMemorySizes(config, totalFlinkMemorySize); + + // derive total flink external memory sizes from explicitly configured total flink memory size + final Tuple2<MemorySize, MemorySize> totalFlinkExternalMemorySizeTuple2 = + deriveTotalFlinkExternalMemorySizes(config, totalFlinkMemorySize); + + return new TaskExecutorResourceSpec( + totalFLinkInternalMemorySizeTuple6.f0, // frameworkHeapMemorySize + totalFLinkInternalMemorySizeTuple6.f1, // taskHeapMemorySize + totalFLinkInternalMemorySizeTuple6.f2, // taskOffHeapMemorySize + totalFLinkInternalMemorySizeTuple6.f3, // shuffleMemorySize + totalFLinkInternalMemorySizeTuple6.f4, // onHeapManagedMemorySize + totalFLinkInternalMemorySizeTuple6.f5, // offHeapManagedMemorySize + totalFlinkExternalMemorySizeTuple2.f0, // jvmMetaspaceSize + totalFlinkExternalMemorySizeTuple2.f1); // jvmOverheadSize + } + + private static TaskExecutorResourceSpec deriveResourceSpecWithTotalProcessMemory(final Configuration config) { + // derive total flink external memory sizes from explicitly configured total process memory size + + final MemorySize totalProcessMemorySize = getTotalProcessMemorySize(config); + final MemorySize jvmMetaspaceSize = getJvmMetaspaceSize(config); + final MemorySize jvmOverheadSize = deriveJvmOverheadWithFraction(config, totalProcessMemorySize); + + final MemorySize totalFlinkExternalMemorySize = jvmMetaspaceSize.add(jvmOverheadSize); + checkArgument(totalFlinkExternalMemorySize.getBytes() < totalProcessMemorySize.getBytes(), + "Sum of configured JVM Metaspace (" + jvmMetaspaceSize.toString() + + ") and JVM Overhead (" + jvmOverheadSize.toString() + + ") exceed configured Total Process memory (" + totalProcessMemorySize.toString() + ")."); + final MemorySize totalFlinkMemorySize = totalProcessMemorySize.subtract(totalFlinkExternalMemorySize); + + // derive total flink internal memory sizes from derived total flink memory size + final Tuple6<MemorySize, MemorySize, MemorySize, MemorySize, MemorySize, MemorySize> totalFLinkInternalMemorySizeTuple6 = + deriveTotalFlinkInternalMemorySizes(config, totalFlinkMemorySize); + + return new TaskExecutorResourceSpec( + totalFLinkInternalMemorySizeTuple6.f0, // frameworkHeapMemorySize + totalFLinkInternalMemorySizeTuple6.f1, // taskHeapMemorySize + totalFLinkInternalMemorySizeTuple6.f2, // taskOffHeapMemorySize + totalFLinkInternalMemorySizeTuple6.f3, // shuffleMemorySize + totalFLinkInternalMemorySizeTuple6.f4, // onHeapManagedMemorySize + totalFLinkInternalMemorySizeTuple6.f5, // offHeapManagedMemorySize + jvmMetaspaceSize, + jvmOverheadSize); + } + + private static Tuple2<MemorySize, MemorySize> deriveTotalFlinkExternalMemorySizes( + final Configuration config, final MemorySize totalFlinkMemorySize) { + final MemorySize jvmMetaspaceSize = getJvmMetaspaceSize(config); + final MemorySize jvmOverheadSize = deriveJvmOverheadWithInverseFraction(config, + totalFlinkMemorySize.add(jvmMetaspaceSize)); + return new Tuple2<>(jvmMetaspaceSize, jvmOverheadSize); + } + + private static Tuple6<MemorySize, MemorySize, MemorySize, MemorySize, MemorySize, MemorySize> deriveTotalFlinkInternalMemorySizes( + final Configuration config, final MemorySize totalFlinkMemorySize) { + final MemorySize frameworkHeapMemorySize = getFrameworkHeapMemorySize(config); + final MemorySize taskOffHeapMemorySize = getTaskOffHeapMemorySize(config); + + final MemorySize shuffleMemorySize = deriveShuffleMemoryWithFraction(config, totalFlinkMemorySize); + final MemorySize managedMemorySize = deriveManagedMemoryAbsoluteOrWithFraction(config, totalFlinkMemorySize); + + final MemorySize totalFlinkExceptTaskHeapMemorySize = + frameworkHeapMemorySize.add(taskOffHeapMemorySize).add(shuffleMemorySize).add(managedMemorySize); + checkArgument(totalFlinkExceptTaskHeapMemorySize.getBytes() < totalFlinkMemorySize.getBytes(), + "Sum of configured Framework Heap Memory (" + frameworkHeapMemorySize.toString() + + "), Task Off-Heap Memory (" + taskOffHeapMemorySize.toString() + + "), Shuffle Memory (" + shuffleMemorySize.toString() + + ") and Managed Memory (" + managedMemorySize.toString() + + ") exceed configured Total Flink Memory (" + totalFlinkMemorySize.toString() + ")."); + final MemorySize taskHeapMemorySize = totalFlinkMemorySize.subtract(totalFlinkExceptTaskHeapMemorySize); Review comment: Currently in this PR, `taskmanager.memory.task.heap.size` will be ignored in such cases. I also noticed your concern during the implementation, that the user explicitly configured task heap memory size is not taking effect only because managed memory size is not explicitly configured at the same time, which is quite not intuitive to the users. However, if we want to respect user's explicit configurations as much as possible, then it would be a question that which implicit configuration should be overwritten. E.g., if the user explicitly configured total flink memory size and task heap memory size, and the explicitly configured task heap memory size is larger than derived from total flink memory size, the which memory pool should we shrink to guarantee the explicit configured task heap memory size? Leaving alone task off heap memory and framework heap memory (which both have default absolute values), should we prefer shrinking managed memory or shuffle memory? And do we respect the implicit min sizes of managed/shuffle memory? What if shrinking either of managed/shuffle memory can not give enough memory to the explicitly configured task heap memory (wrt the min sizes), so that we need to shrink both of them? How many should we shrink on each of them? It seems to me that the calculation logics can be complicated if we try to respect all explicit user configurations, which may also not good for user to understand and for maintainability. Thus before we decide how to deal with this tradeoff, I implemented this PR strictly following FLIP-49 design doc. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [email protected] With regards, Apache Git Services
