This is an automated email from the ASF dual-hosted git repository.
jhung pushed a change to branch YARN-8200.branch3
in repository https://gitbox.apache.org/repos/asf/hadoop.git.
from a0291a0 YARN-9175. Null resources check in ResourceInfo for branch-3.0
new 2a8a3a5 YARN-7033. Add support for NM Recovery of assigned resources
(e.g. GPU's, NUMA, FPGA's) to container. (Devaraj K and Wangda Tan)
new cf67a3b YARN-6620. Add support in NodeManager to isolate GPU devices
by using CGroups. Contributed by Wangda Tan.
The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails. The revisions
listed as "add" were already present in the repository and have only
been added to this reference.
Summary of changes:
.../yarn/api/records/ResourceInformation.java | 10 +
.../apache/hadoop/yarn/conf/YarnConfiguration.java | 33 ++
.../hadoop/yarn/util/resource/ResourceUtils.java | 51 +-
.../src/main/resources/yarn-default.xml | 39 ++
.../yarn/util/resource/TestResourceUtils.java | 17 +
.../yarn/server/nodemanager/ContainerExecutor.java | 3 +-
.../hadoop/yarn/server/nodemanager/Context.java | 3 +
.../nodemanager/DefaultContainerExecutor.java | 2 +-
.../server/nodemanager/LinuxContainerExecutor.java | 10 +-
.../yarn/server/nodemanager/NodeManager.java | 92 ++--
.../server/nodemanager/NodeStatusUpdaterImpl.java | 38 +-
.../containermanager/container/Container.java | 7 +
.../containermanager/container/ContainerImpl.java | 13 +
.../container/ResourceMappings.java | 124 +++++
.../linux/privileged/PrivilegedOperation.java | 1 +
.../linux/resources/ResourceHandlerChain.java | 4 +-
.../linux/resources/ResourceHandlerModule.java | 42 +-
.../linux/resources/gpu/GpuResourceAllocator.java | 242 +++++++++
.../resources/gpu/GpuResourceHandlerImpl.java | 153 ++++++
.../resourceplugin/NodeResourceUpdaterPlugin.java | 52 ++
.../resourceplugin/ResourcePlugin.java | 83 ++++
.../resourceplugin/ResourcePluginManager.java | 106 ++++
.../resourceplugin/gpu/GpuDiscoverer.java | 254 ++++++++++
.../gpu/GpuNodeResourceUpdateHandler.java | 66 +++
.../resourceplugin/gpu/GpuResourcePlugin.java | 61 +++
.../recovery/NMLeveldbStateStoreService.java | 42 ++
.../recovery/NMNullStateStoreService.java | 7 +
.../nodemanager/recovery/NMStateStoreService.java | 23 +
.../webapp/dao/gpu/GpuDeviceInformation.java | 72 +++
.../webapp/dao/gpu/GpuDeviceInformationParser.java | 87 ++++
.../webapp/dao/gpu/PerGpuDeviceInformation.java | 165 +++++++
.../webapp/dao/gpu/PerGpuMemoryUsage.java | 58 +++
.../webapp/dao/gpu/PerGpuTemperature.java | 80 +++
.../webapp/dao/gpu/PerGpuUtilizations.java | 50 ++
.../server/nodemanager/NodeManagerTestBase.java | 164 ++++++
.../nodemanager/TestDefaultContainerExecutor.java | 4 +-
.../nodemanager/TestLinuxContainerExecutor.java | 2 +-
.../TestLinuxContainerExecutorWithMocks.java | 2 +-
.../yarn/server/nodemanager/TestNodeManager.java | 2 +-
.../server/nodemanager/TestNodeStatusUpdater.java | 100 +---
.../nodemanager/amrmproxy/BaseAMRMProxyTest.java | 46 +-
.../TestContainerManagerRecovery.java | 161 ++++--
.../linux/resources/TestResourceHandlerModule.java | 8 +-
.../resources/gpu/TestGpuResourceHandler.java | 382 ++++++++++++++
.../TestContainersMonitorResourceChange.java | 2 +-
.../resourceplugin/TestResourcePluginManager.java | 261 ++++++++++
.../resourceplugin/gpu/TestGpuDiscoverer.java | 123 +++++
.../recovery/NMMemoryStateStoreService.java | 14 +
.../recovery/TestNMLeveldbStateStoreService.java | 121 +++--
.../server/nodemanager/webapp/MockContainer.java | 6 +
.../dao/gpu/TestGpuDeviceInformationParser.java | 50 ++
.../test/resources/nvidia-smi-sample-xml-output | 547 +++++++++++++++++++++
52 files changed, 3796 insertions(+), 289 deletions(-)
create mode 100644
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ResourceMappings.java
create mode 100644
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/GpuResourceAllocator.java
create mode 100644
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/GpuResourceHandlerImpl.java
create mode 100644
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/NodeResourceUpdaterPlugin.java
create mode 100644
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/ResourcePlugin.java
create mode 100644
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/ResourcePluginManager.java
create mode 100644
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDiscoverer.java
create mode 100644
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuNodeResourceUpdateHandler.java
create mode 100644
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuResourcePlugin.java
create mode 100644
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/gpu/GpuDeviceInformation.java
create mode 100644
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/gpu/GpuDeviceInformationParser.java
create mode 100644
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/gpu/PerGpuDeviceInformation.java
create mode 100644
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/gpu/PerGpuMemoryUsage.java
create mode 100644
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/gpu/PerGpuTemperature.java
create mode 100644
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/gpu/PerGpuUtilizations.java
create mode 100644
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/NodeManagerTestBase.java
create mode 100644
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/TestGpuResourceHandler.java
create mode 100644
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/TestResourcePluginManager.java
create mode 100644
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuDiscoverer.java
create mode 100644
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/gpu/TestGpuDeviceInformationParser.java
create mode 100644
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/resources/nvidia-smi-sample-xml-output
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]