[ https://issues.apache.org/jira/browse/YARN-11838?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=18013816#comment-18013816 ]
ASF GitHub Bot commented on YARN-11838: --------------------------------------- Copilot commented on code in PR #7828: URL: https://github.com/apache/hadoop/pull/7828#discussion_r2275024189 ########## hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/nodelabels/NodeAttributesManagerImpl.java: ########## @@ -741,7 +741,15 @@ public void refreshNodeAttributesToScheduler(NodeId nodeId) { if (host == null || host.attributes == null) { return; } - newNodeToAttributesMap.put(hostName, host.attributes.keySet()); + + // Use read lock and create defensive copy since + // other threads might access host.attributes + readLock.lock(); + try { + newNodeToAttributesMap.put(hostName, new HashSet<>(host.attributes.keySet())); Review Comment: Creating a new HashSet on every call could impact performance. Consider using Collections.unmodifiableSet() if the caller doesn't need to modify the returned set, or implement a more efficient copying mechanism for frequently called methods. ```suggestion newNodeToAttributesMap.put(hostName, Collections.unmodifiableSet(new HashSet<>(host.attributes.keySet()))); ``` ########## hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRefreshNodeAttributesConcurrency.java: ########## @@ -0,0 +1,255 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.resourcemanager; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.yarn.api.records.NodeAttribute; +import org.apache.hadoop.yarn.api.records.NodeAttributeType; +import org.apache.hadoop.yarn.api.records.NodeId; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.event.AsyncDispatcher; +import org.apache.hadoop.yarn.event.Event; +import org.apache.hadoop.yarn.event.EventHandler; +import org.apache.hadoop.yarn.nodelabels.NodeAttributeStore; +import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.FileSystemNodeAttributeStore; +import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.NodeAttributesManagerImpl; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAttributesUpdateSchedulerEvent; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.DisplayName; +import org.mockito.ArgumentCaptor; + +import java.io.IOException; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; + +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +/** + * Tests verify that the read lock and defensive copy implementation prevents race condition. + */ +public class TestRefreshNodeAttributesConcurrency { + + private NodeAttributesManagerImpl attributesManager; + private RMContext rmContext; + private AsyncDispatcher dispatcher; + private EventHandler<Event> eventHandler; + private static final String TEST_HOST = "testhost"; + private static final String TEST_PREFIX = "yarn.test.io"; + + @BeforeEach + @SuppressWarnings("unchecked") + void setUp() throws IOException { + Configuration conf = new Configuration(); + attributesManager = new NodeAttributesManagerImpl(); + conf.setClass(YarnConfiguration.FS_NODE_ATTRIBUTE_STORE_IMPL_CLASS, + FileSystemNodeAttributeStore.class, NodeAttributeStore.class); + conf = NodeAttributeTestUtils.getRandomDirConf(conf); Review Comment: The NodeAttributeTestUtils class is referenced but not imported. This will cause a compilation error. > YARN ConcurrentModificationException When Refreshing Node Attributes > -------------------------------------------------------------------- > > Key: YARN-11838 > URL: https://issues.apache.org/jira/browse/YARN-11838 > Project: Hadoop YARN > Issue Type: Bug > Components: nodeattibute, yarn > Affects Versions: 3.3.0, 3.2.1 > Reporter: Syed Shameerur Rahman > Assignee: Syed Shameerur Rahman > Priority: Major > Labels: pull-request-available > > h2. The Problem Flow > # A new node is being added to the cluster (NODE_ADDED event) > # The CapacityScheduler calls addNode() method > # This triggers refreshNodeAttributesToScheduler() in > NodeAttributesManagerImpl > # During this process, the code attempts to convert a HashMap to a string > for logging > # While iterating through the HashMap for string conversion, another thread > modifies the same HashMap : > https://github.com/apache/hadoop/blob/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/nodelabels/NodeAttributesManagerImpl.java#L748 > # This causes the ConcurrentModificationException > > {code:java} > 025-07-17 19:23:37,166 ERROR org.apache.hadoop.yarn.event.EventDispatcher > (SchedulerEventDispatcher:Event Processor): Error in handling event type > NODE_ADDED to the Event Dispatcher > java.util.ConcurrentModificationException > at > java.base/java.util.HashMap$HashIterator.nextNode(HashMap.java:1597) > at java.base/java.util.HashMap$KeyIterator.next(HashMap.java:1620) > at > java.base/java.util.AbstractCollection.toString(AbstractCollection.java:456) > at java.base/java.lang.String.valueOf(String.java:4220) > at java.base/java.lang.StringBuilder.append(StringBuilder.java:173) > at java.base/java.util.AbstractMap.toString(AbstractMap.java:555) > at java.base/java.lang.String.valueOf(String.java:4220) > at java.base/java.lang.StringBuilder.append(StringBuilder.java:173) > at > org.apache.hadoop.yarn.server.resourcemanager.nodelabels.NodeAttributesManagerImpl.refreshNodeAttributesToScheduler(NodeAttributesManagerImpl.java:748) > {code} -- This message was sent by Atlassian Jira (v8.20.10#820010) --------------------------------------------------------------------- To unsubscribe, e-mail: yarn-issues-unsubscr...@hadoop.apache.org For additional commands, e-mail: yarn-issues-h...@hadoop.apache.org