[
https://issues.apache.org/jira/browse/YARN-11196?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17584657#comment-17584657
]
ASF GitHub Bot commented on YARN-11196:
---------------------------------------
PrabhuJoseph commented on code in PR #4742:
URL: https://github.com/apache/hadoop/pull/4742#discussion_r954562550
##########
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestDefaultContainerExecutor.java:
##########
@@ -736,4 +755,197 @@ public void testPickDirectory() throws Exception {
// new FsPermission(ApplicationLocalizer.LOGDIR_PERM), true);
// }
+ @Before
+ public void setUp() throws IOException, YarnException {
+ yarnConfiguration = new YarnConfiguration();
+ setNumaConfig();
+ Context mockContext = createAndGetMockContext();
+ NMStateStoreService nmStateStoreService =
+ mock(NMStateStoreService.class);
+ when(mockContext.getNMStateStore()).thenReturn(nmStateStoreService);
+ numaResourceAllocator = new NumaResourceAllocator(mockContext) {
+ @Override
+ public String executeNGetCmdOutput(Configuration config)
+ throws YarnRuntimeException {
+ return getNumaCmdOutput();
+ }
+ };
+
+ numaResourceAllocator.init(yarnConfiguration);
+ FileContext lfs = FileContext.getLocalFSFileContext();
+ containerExecutor = new DefaultContainerExecutor(lfs) {
+ @Override
+ public Configuration getConf() {
+ return yarnConfiguration;
+ }
+ };
+ containerExecutor.setNumaResourceAllocator(numaResourceAllocator);
+ mockContainer = mock(Container.class);
+ }
+
+ private void setNumaConfig() {
+ yarnConfiguration.set(YarnConfiguration.NM_NUMA_AWARENESS_ENABLED, "true");
+ yarnConfiguration.set(YarnConfiguration.NM_NUMA_AWARENESS_READ_TOPOLOGY,
"true");
+ yarnConfiguration.set(YarnConfiguration.NM_NUMA_AWARENESS_NUMACTL_CMD,
"/usr/bin/numactl");
+ }
+
+
+ private String getNumaCmdOutput() {
+ // architecture of 8 cpu cores
+ // randomly picked size of memory
+ return "available: 2 nodes (0-1)\n\t"
+ + "node 0 cpus: 0 2 4 6\n\t"
+ + "node 0 size: 73717 MB\n\t"
+ + "node 0 free: 73717 MB\n\t"
+ + "node 1 cpus: 1 3 5 7\n\t"
+ + "node 1 size: 73717 MB\n\t"
+ + "node 1 free: 73717 MB\n\t"
+ + "node distances:\n\t"
+ + "node 0 1\n\t"
+ + "0: 10 20\n\t"
+ + "1: 20 10";
+ }
+
+ private Context createAndGetMockContext() {
+ Context mockContext = mock(Context.class);
+ @SuppressWarnings("unchecked")
+ ConcurrentHashMap<ContainerId, Container> mockContainers = mock(
+ ConcurrentHashMap.class);
+ mockContainer = mock(Container.class);
+ when(mockContainer.getResourceMappings())
+ .thenReturn(new ResourceMappings());
+ when(mockContainers.get(any())).thenReturn(mockContainer);
+ when(mockContext.getContainers()).thenReturn(mockContainers);
+ when(mockContainer.getResource()).thenReturn(Resource.newInstance(2048,
2));
+ return mockContext;
+ }
+
+ private void testAllocateNumaResource(String containerId, Resource resource,
+ String memNodes, String cpuNodes)
throws Exception {
+ when(mockContainer.getContainerId())
+ .thenReturn(ContainerId.fromString(containerId));
+ when(mockContainer.getResource()).thenReturn(resource);
+ NumaResourceAllocation numaResourceAllocation =
+ numaResourceAllocator.allocateNumaNodes(mockContainer);
+ String[] commands =
containerExecutor.getNumaCommands(numaResourceAllocation);
+ assertEquals(Arrays.asList(commands), Arrays.asList("/usr/bin/numactl",
+ "--interleave=" + memNodes, "--cpunodebind=" + cpuNodes));
+ }
+
+ @Test
+ public void testAllocateNumaMemoryResource() throws Exception {
+ // keeping cores constant for testing memory resources
+
+ // allocates node 0 for memory and cpu
+ testAllocateNumaResource("container_1481156246874_0001_01_000001",
+ Resource.newInstance(2048, 2), "0", "0");
+
+ // allocates node 1 for memory and cpu since allocator uses round robin
assignment
+ testAllocateNumaResource("container_1481156246874_0001_01_000002",
+ Resource.newInstance(60000, 2), "1", "1");
+
+ // allocates node 0,1 for memory since there is no sufficient memory in
any one node
+ testAllocateNumaResource("container_1481156246874_0001_01_000003",
+ Resource.newInstance(80000, 2), "0,1", "0");
+
+ // returns null since there are no sufficient resources available for the
request
+ when(mockContainer.getContainerId()).thenReturn(
+ ContainerId.fromString("container_1481156246874_0001_01_000004"));
+ when(mockContainer.getResource())
+ .thenReturn(Resource.newInstance(80000, 2));
+ Assert.assertNull(numaResourceAllocator.allocateNumaNodes(mockContainer));
+
+ // allocates node 1 for memory and cpu
+ testAllocateNumaResource("container_1481156246874_0001_01_000005",
+ Resource.newInstance(1024, 2), "1", "1");
+ }
+
+ @Test
+ public void testAllocateNumaCpusResource() throws Exception {
+ // keeping memory constant
+
+ // allocates node 0 for memory and cpu
+ testAllocateNumaResource("container_1481156246874_0001_01_000001",
+ Resource.newInstance(2048, 2), "0", "0");
+
+ // allocates node 1 for memory and cpu since allocator uses round robin
assignment
+ testAllocateNumaResource("container_1481156246874_0001_01_000002",
+ Resource.newInstance(2048, 2), "1", "1");
+
+ // allocates node 0,1 for cpus since there is are no sufficient cpus
available in any one node
+ testAllocateNumaResource("container_1481156246874_0001_01_000003",
+ Resource.newInstance(2048, 3), "0", "0,1");
+
+ // returns null since there are no sufficient resources available for the
request
+ when(mockContainer.getContainerId()).thenReturn(
+ ContainerId.fromString("container_1481156246874_0001_01_000004"));
+ when(mockContainer.getResource()).thenReturn(Resource.newInstance(2048,
2));
+ Assert.assertNull(numaResourceAllocator.allocateNumaNodes(mockContainer));
+
+ // allocates node 1 for memory and cpu
+ testAllocateNumaResource("container_1481156246874_0001_01_000005",
+ Resource.newInstance(2048, 1), "1", "1");
+ }
+
+ @Test
+ public void testReacquireContainer() throws Exception {
+ @SuppressWarnings("unchecked")
+ ConcurrentHashMap<ContainerId, Container> mockContainers = mock(
+ ConcurrentHashMap.class);
+ Context mockContext = mock(Context.class);
+ NMStateStoreService mock = mock(NMStateStoreService.class);
+ when(mockContext.getNMStateStore()).thenReturn(mock);
+ ResourceMappings resourceMappings = new ResourceMappings();
+ AssignedResources assignedRscs = new AssignedResources();
+ when(mockContainer.getResource())
+ .thenReturn(Resource.newInstance(142900, 2));
+ ContainerId cid =
ContainerId.fromString("container_1481156246874_0001_01_000001");
+ when(mockContainer.getContainerId()).thenReturn(cid);
+ NumaResourceAllocation numaResourceAllocation =
+ numaResourceAllocator.allocateNumaNodes(mockContainer);
+
assignedRscs.updateAssignedResources(Arrays.asList(numaResourceAllocation));
+ resourceMappings.addAssignedResources("numa", assignedRscs);
+ when(mockContainer.getResourceMappings()).thenReturn(resourceMappings);
+ when(mockContainers.get(any())).thenReturn(mockContainer);
+ when(mockContext.getContainers()).thenReturn(mockContainers);
+
+ // recovered numa resources should be added to the used resources and
+ // remaining will be available for further allocation.
+
+ ContainerReacquisitionContext containerReacquisitionContext =
+ new ContainerReacquisitionContext.Builder()
+ .setContainerId(cid)
+ .setUser("user")
+ .setContainer(mockContainer)
+ .build();
+
+ containerExecutor.reacquireContainer(containerReacquisitionContext);
+
+ // returns null since there are no sufficient resources available for the
request
+ when(mockContainer.getContainerId()).thenReturn(
+ ContainerId.fromString("container_1481156246874_0001_01_000004"));
+ when(mockContainer.getResource())
+ .thenReturn(Resource.newInstance(156250, 2));
Review Comment:
Better to check for a lesser resource size (1024) instead of 156250.
> NUMA Awareness support in DefaultContainerExecutor
> --------------------------------------------------
>
> Key: YARN-11196
> URL: https://issues.apache.org/jira/browse/YARN-11196
> Project: Hadoop YARN
> Issue Type: Improvement
> Components: nodemanager
> Affects Versions: 3.3.3
> Reporter: Prabhu Joseph
> Assignee: Samrat Deb
> Priority: Major
> Labels: pull-request-available
>
> [YARN-5764|https://issues.apache.org/jira/browse/YARN-5764] has added support
> of NUMA Awareness for Containers launched through LinuxContainerExecutor.
> This feature is useful to have in DefaultContainerExecutor as well.
--
This message was sent by Atlassian Jira
(v8.20.10#820010)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]