YARN-4837. User facing aspects of 'AM blacklisting' feature need fixing. (vinodkv via wangda)
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/aef6e455 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/aef6e455 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/aef6e455 Branch: refs/heads/branch-2 Commit: aef6e455b12f818ccd0de0468c1ec6fbe213c5d8 Parents: df29f77 Author: Wangda Tan <wan...@apache.org> Authored: Thu Jun 9 10:08:31 2016 -0700 Committer: Wangda Tan <wan...@apache.org> Committed: Thu Jun 9 10:08:31 2016 -0700 ---------------------------------------------------------------------- .../yarn/api/records/AMBlackListingRequest.java | 67 ----- .../records/ApplicationSubmissionContext.java | 23 -- .../hadoop/yarn/conf/YarnConfiguration.java | 25 +- .../src/main/proto/yarn_protos.proto | 1 - .../yarn/conf/TestYarnConfigurationFields.java | 7 + .../impl/pb/AMBlackListingRequestPBImpl.java | 104 -------- .../pb/ApplicationSubmissionContextPBImpl.java | 40 --- .../src/main/resources/yarn-default.xml | 19 -- .../hadoop/yarn/api/TestPBImplRecords.java | 10 - .../blacklist/BlacklistManager.java | 9 +- .../blacklist/BlacklistUpdates.java | 47 ---- .../blacklist/DisabledBlacklistManager.java | 12 +- .../blacklist/SimpleBlacklistManager.java | 17 +- .../server/resourcemanager/rmapp/RMAppImpl.java | 79 ++---- .../rmapp/attempt/RMAppAttempt.java | 2 +- .../rmapp/attempt/RMAppAttemptImpl.java | 85 +++++-- .../scheduler/AbstractYarnScheduler.java | 2 +- .../scheduler/AppSchedulingInfo.java | 74 +++--- .../scheduler/SchedulerAppUtils.java | 16 +- .../scheduler/SchedulerApplicationAttempt.java | 33 ++- .../scheduler/capacity/CapacityScheduler.java | 11 +- .../allocator/RegularContainerAllocator.java | 2 +- .../scheduler/fair/FSLeafQueue.java | 2 +- .../scheduler/fair/FairScheduler.java | 8 +- .../scheduler/fifo/FifoScheduler.java | 12 +- .../webapp/RMAppAttemptBlock.java | 9 +- .../resourcemanager/webapp/RMAppBlock.java | 13 +- .../resourcemanager/webapp/RMWebServices.java | 21 +- .../webapp/dao/AMBlackListingRequestInfo.java | 61 ----- .../webapp/dao/AppAttemptInfo.java | 8 +- .../dao/ApplicationSubmissionContextInfo.java | 13 - .../TestNodeBlacklistingOnAMFailures.java | 251 +++++++++++++++++++ .../applicationsmanager/TestAMRestart.java | 177 +------------ .../blacklist/TestBlacklistManager.java | 29 +-- .../rmapp/TestRMAppTransitions.java | 58 ----- .../scheduler/TestAppSchedulingInfo.java | 12 +- .../capacity/TestCapacityScheduler.java | 8 +- .../scheduler/fair/TestFSAppAttempt.java | 12 +- .../scheduler/fair/TestFairScheduler.java | 9 +- .../TestRMWebServicesAppsModification.java | 39 ++- 40 files changed, 536 insertions(+), 891 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/aef6e455/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/AMBlackListingRequest.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/AMBlackListingRequest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/AMBlackListingRequest.java deleted file mode 100644 index 4aec2ba..0000000 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/AMBlackListingRequest.java +++ /dev/null @@ -1,67 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.yarn.api.records; - -import org.apache.hadoop.classification.InterfaceAudience.Public; -import org.apache.hadoop.classification.InterfaceAudience.Private; -import org.apache.hadoop.classification.InterfaceStability.Evolving; -import org.apache.hadoop.classification.InterfaceStability.Unstable; -import org.apache.hadoop.yarn.util.Records; - -/** - * Specific AMBlacklistingRequest from AM to enable/disable blacklisting. - */ -@Public -@Evolving -public abstract class AMBlackListingRequest { - - @Private - @Unstable - public static AMBlackListingRequest newInstance( - boolean isAMBlackListingEnabled, float disableFailureThreshold) { - AMBlackListingRequest blackListRequest = Records - .newRecord(AMBlackListingRequest.class); - blackListRequest.setBlackListingEnabled(isAMBlackListingEnabled); - blackListRequest - .setBlackListingDisableFailureThreshold(disableFailureThreshold); - return blackListRequest; - } - - /** - * @return AM Blacklisting is enabled. - */ - @Public - @Evolving - public abstract boolean isAMBlackListingEnabled(); - - /** - * @return AM Blacklisting disable failure threshold - */ - @Public - @Evolving - public abstract float getBlackListingDisableFailureThreshold(); - - @Private - @Unstable - public abstract void setBlackListingEnabled(boolean isAMBlackListingEnabled); - - @Private - @Unstable - public abstract void setBlackListingDisableFailureThreshold( - float disableFailureThreshold); -} http://git-wip-us.apache.org/repos/asf/hadoop/blob/aef6e455/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationSubmissionContext.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationSubmissionContext.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationSubmissionContext.java index fe833f7..21cd1bb 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationSubmissionContext.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationSubmissionContext.java @@ -535,27 +535,4 @@ public abstract class ApplicationSubmissionContext { @Public @Unstable public abstract void setReservationID(ReservationId reservationID); - - /** - * Get AM Blacklisting request object to know whether application needs any - * specific blacklisting for AM Nodes. - * - * @return AMBlackListingRequest object which has blacklisting information. - */ - @Public - @Unstable - public abstract AMBlackListingRequest getAMBlackListRequest(); - - /** - * Get AM Blacklisting request object to know whether application needs any - * specific blacklisting for AM Nodes. - * - * @param blackListRequest - * object which has blacklisting information such as - * "enable/disable AM blacklisting" and "disable failure threshold". - */ - @Public - @Unstable - public abstract void setAMBlackListRequest( - AMBlackListingRequest blackListRequest); } http://git-wip-us.apache.org/repos/asf/hadoop/blob/aef6e455/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index e9ff71e..467e5fc 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -2553,14 +2553,25 @@ public class YarnConfiguration extends Configuration { public static final long DEFAULT_RM_NODE_LABELS_PROVIDER_FETCH_INTERVAL_MS = 30 * 60 * 1000; - public static final String AM_BLACKLISTING_ENABLED = - YARN_PREFIX + "am.blacklisting.enabled"; - public static final boolean DEFAULT_AM_BLACKLISTING_ENABLED = true; - - public static final String AM_BLACKLISTING_DISABLE_THRESHOLD = - YARN_PREFIX + "am.blacklisting.disable-failure-threshold"; - public static final float DEFAULT_AM_BLACKLISTING_DISABLE_THRESHOLD = 0.8f; + @Private + /** + * This is a private feature that isn't supposed to be used by end-users. + */ + public static final String AM_SCHEDULING_NODE_BLACKLISTING_ENABLED = + RM_PREFIX + "am-scheduling.node-blacklisting-enabled"; + @Private + public static final boolean DEFAULT_AM_SCHEDULING_NODE_BLACKLISTING_ENABLED = + true; + @Private + /** + * This is a private feature that isn't supposed to be used by end-users. + */ + public static final String AM_SCHEDULING_NODE_BLACKLISTING_DISABLE_THRESHOLD = + RM_PREFIX + "am-scheduling.node-blacklisting-disable-threshold"; + @Private + public static final float + DEFAULT_AM_SCHEDULING_NODE_BLACKLISTING_DISABLE_THRESHOLD = 0.8f; private static final String NM_SCRIPT_BASED_NODE_LABELS_PROVIDER_PREFIX = NM_NODE_LABELS_PROVIDER_PREFIX + "script."; http://git-wip-us.apache.org/repos/asf/hadoop/blob/aef6e455/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto index dc029e8..5fb18fb 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto @@ -367,7 +367,6 @@ message ApplicationSubmissionContextProto { optional ReservationIdProto reservation_id = 15; optional string node_label_expression = 16; optional ResourceRequestProto am_container_resource_request = 17; - optional AMBlackListingRequestProto am_blacklisting_request = 18; } message LogAggregationContextProto { http://git-wip-us.apache.org/repos/asf/hadoop/blob/aef6e455/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/test/java/org/apache/hadoop/yarn/conf/TestYarnConfigurationFields.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/test/java/org/apache/hadoop/yarn/conf/TestYarnConfigurationFields.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/test/java/org/apache/hadoop/yarn/conf/TestYarnConfigurationFields.java index 61b698d..2c45b87 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/test/java/org/apache/hadoop/yarn/conf/TestYarnConfigurationFields.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/test/java/org/apache/hadoop/yarn/conf/TestYarnConfigurationFields.java @@ -92,6 +92,13 @@ public class TestYarnConfigurationFields extends TestConfigurationFieldsBase { .add(YarnConfiguration.DEFAULT_AMRM_PROXY_INTERCEPTOR_CLASS_PIPELINE); configurationPropsToSkipCompare.add(YarnConfiguration.CURATOR_LEADER_ELECTOR); + // Ignore blacklisting nodes for AM failures feature since it is still a + // "work in progress" + configurationPropsToSkipCompare.add(YarnConfiguration. + AM_SCHEDULING_NODE_BLACKLISTING_ENABLED); + configurationPropsToSkipCompare.add(YarnConfiguration. + AM_SCHEDULING_NODE_BLACKLISTING_DISABLE_THRESHOLD); + // Ignore all YARN Application Timeline Service (version 1) properties configurationPrefixToSkipCompare.add("yarn.timeline-service."); http://git-wip-us.apache.org/repos/asf/hadoop/blob/aef6e455/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/AMBlackListingRequestPBImpl.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/AMBlackListingRequestPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/AMBlackListingRequestPBImpl.java deleted file mode 100644 index 1d04dd2..0000000 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/AMBlackListingRequestPBImpl.java +++ /dev/null @@ -1,104 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.yarn.api.records.impl.pb; - -import org.apache.hadoop.classification.InterfaceAudience.Private; -import org.apache.hadoop.classification.InterfaceStability.Unstable; -import org.apache.hadoop.yarn.api.records.AMBlackListingRequest; -import org.apache.hadoop.yarn.proto.YarnProtos.AMBlackListingRequestProto; -import org.apache.hadoop.yarn.proto.YarnProtos.AMBlackListingRequestProtoOrBuilder; - -import com.google.protobuf.TextFormat; - -@Private -@Unstable -public class AMBlackListingRequestPBImpl extends AMBlackListingRequest { - AMBlackListingRequestProto proto = AMBlackListingRequestProto - .getDefaultInstance(); - AMBlackListingRequestProto.Builder builder = null; - boolean viaProto = false; - - public AMBlackListingRequestPBImpl() { - builder = AMBlackListingRequestProto.newBuilder(); - } - - public AMBlackListingRequestPBImpl(AMBlackListingRequestProto proto) { - this.proto = proto; - viaProto = true; - } - - public AMBlackListingRequestProto getProto() { - proto = viaProto ? proto : builder.build(); - viaProto = true; - return proto; - } - - private void maybeInitBuilder() { - if (viaProto || builder == null) { - builder = AMBlackListingRequestProto.newBuilder(proto); - } - viaProto = false; - } - - @Override - public boolean isAMBlackListingEnabled() { - AMBlackListingRequestProtoOrBuilder p = viaProto ? proto : builder; - return p.getBlacklistingEnabled(); - } - - @Override - public float getBlackListingDisableFailureThreshold() { - AMBlackListingRequestProtoOrBuilder p = viaProto ? proto : builder; - return p.getBlacklistingFailureThreshold(); - } - - @Override - public void setBlackListingEnabled(boolean isAMBlackListingEnabled) { - maybeInitBuilder(); - builder.setBlacklistingEnabled(isAMBlackListingEnabled); - } - - @Override - public void setBlackListingDisableFailureThreshold( - float disableFailureThreshold) { - maybeInitBuilder(); - builder.setBlacklistingFailureThreshold(disableFailureThreshold); - } - - @Override - public int hashCode() { - return getProto().hashCode(); - } - - @Override - public boolean equals(Object other) { - if (other == null) { - return false; - } - if (other.getClass().isAssignableFrom(this.getClass())) { - return this.getProto().equals(this.getClass().cast(other).getProto()); - } - return false; - } - - @Override - public String toString() { - return TextFormat.shortDebugString(getProto()); - } -} http://git-wip-us.apache.org/repos/asf/hadoop/blob/aef6e455/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationSubmissionContextPBImpl.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationSubmissionContextPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationSubmissionContextPBImpl.java index b39258e..67e3a84 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationSubmissionContextPBImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationSubmissionContextPBImpl.java @@ -24,7 +24,6 @@ import java.util.Set; import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.util.StringUtils; -import org.apache.hadoop.yarn.api.records.AMBlackListingRequest; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext; import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; @@ -34,7 +33,6 @@ import org.apache.hadoop.yarn.api.records.ReservationId; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.conf.YarnConfiguration; -import org.apache.hadoop.yarn.proto.YarnProtos.AMBlackListingRequestProto; import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationIdProto; import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationSubmissionContextProto; import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationSubmissionContextProtoOrBuilder; @@ -65,7 +63,6 @@ extends ApplicationSubmissionContext { private ResourceRequest amResourceRequest = null; private LogAggregationContext logAggregationContext = null; private ReservationId reservationId = null; - private AMBlackListingRequest amBlackListRequest = null; public ApplicationSubmissionContextPBImpl() { builder = ApplicationSubmissionContextProto.newBuilder(); @@ -134,10 +131,6 @@ extends ApplicationSubmissionContext { if (this.reservationId != null) { builder.setReservationId(convertToProtoFormat(this.reservationId)); } - if (this.amBlackListRequest != null) { - builder.setAmBlacklistingRequest( - convertToProtoFormat(this.amBlackListRequest)); - } } private void mergeLocalToProto() { @@ -420,29 +413,6 @@ extends ApplicationSubmissionContext { return p.getKeepContainersAcrossApplicationAttempts(); } - @Override - public AMBlackListingRequest getAMBlackListRequest() { - ApplicationSubmissionContextProtoOrBuilder p = viaProto ? proto : builder; - if (amBlackListRequest != null) { - return amBlackListRequest; - } - if (!p.hasAmBlacklistingRequest()) { - return null; - } - amBlackListRequest = convertFromProtoFormat(p.getAmBlacklistingRequest()); - return amBlackListRequest; - } - - @Override - public void setAMBlackListRequest(AMBlackListingRequest amBlackListRequest) { - maybeInitBuilder(); - if (amBlackListRequest == null) { - builder.clearAmBlacklistingRequest(); - return; - } - this.amBlackListRequest = amBlackListRequest; - } - private PriorityPBImpl convertFromProtoFormat(PriorityProto p) { return new PriorityPBImpl(p); } @@ -485,16 +455,6 @@ extends ApplicationSubmissionContext { return ((ResourcePBImpl)t).getProto(); } - private AMBlackListingRequestPBImpl convertFromProtoFormat( - AMBlackListingRequestProto a) { - return new AMBlackListingRequestPBImpl(a); - } - - private AMBlackListingRequestProto convertToProtoFormat( - AMBlackListingRequest a) { - return ((AMBlackListingRequestPBImpl) a).getProto(); - } - @Override public String getNodeLabelExpression() { ApplicationSubmissionContextProtoOrBuilder p = viaProto ? proto : builder; http://git-wip-us.apache.org/repos/asf/hadoop/blob/aef6e455/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml index b3b2e2d..eabb679 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml @@ -2670,25 +2670,6 @@ <value>4096</value> </property> - <property> - <description> - Enable/disable blacklisting of hosts for AM based on AM failures on those - hosts. - </description> - <name>yarn.am.blacklisting.enabled</name> - <value>true</value> - </property> - - <property> - <description> - Threshold of ratio number of NodeManager hosts that are allowed to be - blacklisted for AM. Beyond this ratio there is no blacklisting to avoid - danger of blacklisting the entire cluster. - </description> - <name>yarn.am.blacklisting.disable-failure-threshold</name> - <value>0.8f</value> - </property> - <property> <description> Choose different implementation of node label's storage http://git-wip-us.apache.org/repos/asf/hadoop/blob/aef6e455/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestPBImplRecords.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestPBImplRecords.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestPBImplRecords.java index 91d65b1..55b1233 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestPBImplRecords.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestPBImplRecords.java @@ -109,7 +109,6 @@ import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.IncreaseContainersReso import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.IncreaseContainersResourceResponsePBImpl; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.SubmitApplicationRequestPBImpl; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.SubmitApplicationResponsePBImpl; -import org.apache.hadoop.yarn.api.records.AMBlackListingRequest; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationAttemptReport; import org.apache.hadoop.yarn.api.records.ApplicationId; @@ -155,7 +154,6 @@ import org.apache.hadoop.yarn.api.records.StrictPreemptionContract; import org.apache.hadoop.yarn.api.records.Token; import org.apache.hadoop.yarn.api.records.URL; import org.apache.hadoop.yarn.api.records.YarnClusterMetrics; -import org.apache.hadoop.yarn.api.records.impl.pb.AMBlackListingRequestPBImpl; import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationAttemptIdPBImpl; import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationAttemptReportPBImpl; import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationIdPBImpl; @@ -190,7 +188,6 @@ import org.apache.hadoop.yarn.api.records.impl.pb.StrictPreemptionContractPBImpl import org.apache.hadoop.yarn.api.records.impl.pb.TokenPBImpl; import org.apache.hadoop.yarn.api.records.impl.pb.URLPBImpl; import org.apache.hadoop.yarn.api.records.impl.pb.YarnClusterMetricsPBImpl; -import org.apache.hadoop.yarn.proto.YarnProtos.AMBlackListingRequestProto; import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationAttemptIdProto; import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationAttemptReportProto; import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationIdProto; @@ -515,7 +512,6 @@ public class TestPBImplRecords { generateByNewInstance(ResourceAllocationRequest.class); generateByNewInstance(ReservationAllocationState.class); generateByNewInstance(ResourceUtilization.class); - generateByNewInstance(AMBlackListingRequest.class); } private class GetSetPair { @@ -1355,10 +1351,4 @@ public class TestPBImplRecords { validatePBImplRecord(CheckForDecommissioningNodesResponsePBImpl.class, CheckForDecommissioningNodesResponseProto.class); } - - @Test - public void testAMBlackListingRequestPBImpl() throws Exception { - validatePBImplRecord(AMBlackListingRequestPBImpl.class, - AMBlackListingRequestProto.class); - } } http://git-wip-us.apache.org/repos/asf/hadoop/blob/aef6e455/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/blacklist/BlacklistManager.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/blacklist/BlacklistManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/blacklist/BlacklistManager.java index f03b421..f343603 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/blacklist/BlacklistManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/blacklist/BlacklistManager.java @@ -19,6 +19,7 @@ package org.apache.hadoop.yarn.server.resourcemanager.blacklist; import org.apache.hadoop.classification.InterfaceAudience.Private; +import org.apache.hadoop.yarn.api.records.ResourceBlacklistRequest; /** * Tracks blacklists based on failures reported on nodes. @@ -33,14 +34,14 @@ public interface BlacklistManager { void addNode(String node); /** - * Get {@link BlacklistUpdates} that indicate which nodes should be + * Get {@link ResourceBlacklistRequest} that indicate which nodes should be * added or to removed from the blacklist. - * @return {@link BlacklistUpdates} + * @return {@link ResourceBlacklistRequest} */ - BlacklistUpdates getBlacklistUpdates(); + ResourceBlacklistRequest getBlacklistUpdates(); /** - * Refresh the number of nodemanager hosts available for scheduling. + * Refresh the number of NodeManagers available for scheduling. * @param nodeHostCount is the number of node hosts. */ void refreshNodeHostCount(int nodeHostCount); http://git-wip-us.apache.org/repos/asf/hadoop/blob/aef6e455/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/blacklist/BlacklistUpdates.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/blacklist/BlacklistUpdates.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/blacklist/BlacklistUpdates.java deleted file mode 100644 index c76dfb4..0000000 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/blacklist/BlacklistUpdates.java +++ /dev/null @@ -1,47 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.yarn.server.resourcemanager.blacklist; - -import org.apache.hadoop.classification.InterfaceAudience.Private; - -import java.util.List; - -/** - * Class to track blacklist additions and removals. - */ -@Private -public class BlacklistUpdates { - - private List<String> additions; - private List<String> removals; - - public BlacklistUpdates(List<String> additions, - List<String> removals) { - this.additions = additions; - this.removals = removals; - } - - public List<String> getAdditions() { - return additions; - } - - public List<String> getRemovals() { - return removals; - } -} http://git-wip-us.apache.org/repos/asf/hadoop/blob/aef6e455/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/blacklist/DisabledBlacklistManager.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/blacklist/DisabledBlacklistManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/blacklist/DisabledBlacklistManager.java index f155b45..8bb308d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/blacklist/DisabledBlacklistManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/blacklist/DisabledBlacklistManager.java @@ -20,21 +20,23 @@ package org.apache.hadoop.yarn.server.resourcemanager.blacklist; import java.util.ArrayList; +import org.apache.hadoop.yarn.api.records.ResourceBlacklistRequest; + /** * A {@link BlacklistManager} that returns no blacklists. */ -public class DisabledBlacklistManager implements BlacklistManager{ +public class DisabledBlacklistManager implements BlacklistManager { private static final ArrayList<String> EMPTY_LIST = new ArrayList<String>(); - private BlacklistUpdates noBlacklist = - new BlacklistUpdates(EMPTY_LIST, EMPTY_LIST); + private ResourceBlacklistRequest noBlacklist = + ResourceBlacklistRequest.newInstance(EMPTY_LIST, EMPTY_LIST); @Override public void addNode(String node) { } @Override - public BlacklistUpdates getBlacklistUpdates() { + public ResourceBlacklistRequest getBlacklistUpdates() { return noBlacklist; } @@ -42,4 +44,4 @@ public class DisabledBlacklistManager implements BlacklistManager{ public void refreshNodeHostCount(int nodeHostCount) { // Do nothing } -} +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hadoop/blob/aef6e455/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/blacklist/SimpleBlacklistManager.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/blacklist/SimpleBlacklistManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/blacklist/SimpleBlacklistManager.java index c7bd0f8..f10e885 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/blacklist/SimpleBlacklistManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/blacklist/SimpleBlacklistManager.java @@ -18,14 +18,15 @@ package org.apache.hadoop.yarn.server.resourcemanager.blacklist; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; - import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Set; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.yarn.api.records.ResourceBlacklistRequest; + /** * Maintains a list of failed nodes and returns that as long as number of * blacklisted nodes is below a threshold percentage of total nodes. If more @@ -58,8 +59,8 @@ public class SimpleBlacklistManager implements BlacklistManager { } @Override - public BlacklistUpdates getBlacklistUpdates() { - BlacklistUpdates ret; + public ResourceBlacklistRequest getBlacklistUpdates() { + ResourceBlacklistRequest ret; List<String> blacklist = new ArrayList<>(blacklistNodes); final int currentBlacklistSize = blacklist.size(); final double failureThreshold = this.blacklistDisableFailureThreshold * @@ -70,13 +71,15 @@ public class SimpleBlacklistManager implements BlacklistManager { "failure threshold ratio " + blacklistDisableFailureThreshold + " out of total usable nodes " + numberOfNodeManagerHosts); } - ret = new BlacklistUpdates(blacklist, EMPTY_LIST); + ret = ResourceBlacklistRequest.newInstance(blacklist, EMPTY_LIST); } else { LOG.warn("Ignoring Blacklists, blacklist size " + currentBlacklistSize + " is more than failure threshold ratio " + blacklistDisableFailureThreshold + " out of total usable nodes " + numberOfNodeManagerHosts); - ret = new BlacklistUpdates(EMPTY_LIST, blacklist); + // TODO: After the threshold hits, we will keep sending a long list + // every time a new AM is to be scheduled. + ret = ResourceBlacklistRequest.newInstance(EMPTY_LIST, blacklist); } return ret; } http://git-wip-us.apache.org/repos/asf/hadoop/blob/aef6e455/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java index 07d5a74..53d8ae5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java @@ -391,8 +391,8 @@ public class RMAppImpl implements RMApp, Recoverable { stateMachine; private static final int DUMMY_APPLICATION_ATTEMPT_NUMBER = -1; - private static final float MINIMUM_THRESHOLD_VALUE = 0.0f; - private static final float MAXIMUM_THRESHOLD_VALUE = 1.0f; + private static final float MINIMUM_AM_BLACKLIST_THRESHOLD_VALUE = 0.0f; + private static final float MAXIMUM_AM_BLACKLIST_THRESHOLD_VALUE = 1.0f; public RMAppImpl(ApplicationId applicationId, RMContext rmContext, Configuration config, String name, String user, String queue, @@ -471,42 +471,24 @@ public class RMAppImpl implements RMApp, Recoverable { YarnConfiguration.RM_MAX_LOG_AGGREGATION_DIAGNOSTICS_IN_MEMORY, YarnConfiguration.DEFAULT_RM_MAX_LOG_AGGREGATION_DIAGNOSTICS_IN_MEMORY); - // amBlacklistingEnabled can be configured globally and by each - // application. - // Case 1: If AMBlackListRequest is available in submission context, we - // will consider only app level request (RM level configuration will be - // skipped). - // Case 2: AMBlackListRequest is available in submission context and - // amBlacklisting is disabled. In this case, AM blacklisting wont be - // enabled for this app even if this feature is enabled in RM level. - // Case 3: AMBlackListRequest is not available through submission context. - // RM level AM black listing configuration will be considered. - if (null != submissionContext.getAMBlackListRequest()) { - amBlacklistingEnabled = submissionContext.getAMBlackListRequest() - .isAMBlackListingEnabled(); - blacklistDisableThreshold = 0.0f; - if (amBlacklistingEnabled) { - blacklistDisableThreshold = submissionContext.getAMBlackListRequest() - .getBlackListingDisableFailureThreshold(); - - // Verify whether blacklistDisableThreshold is valid. And for invalid - // threshold, reset to global level blacklistDisableThreshold - // configured. - if (blacklistDisableThreshold < MINIMUM_THRESHOLD_VALUE - || blacklistDisableThreshold > MAXIMUM_THRESHOLD_VALUE) { - blacklistDisableThreshold = conf.getFloat( - YarnConfiguration.AM_BLACKLISTING_DISABLE_THRESHOLD, - YarnConfiguration.DEFAULT_AM_BLACKLISTING_DISABLE_THRESHOLD); - } - } - } else { - amBlacklistingEnabled = conf.getBoolean( - YarnConfiguration.AM_BLACKLISTING_ENABLED, - YarnConfiguration.DEFAULT_AM_BLACKLISTING_ENABLED); - if (amBlacklistingEnabled) { - blacklistDisableThreshold = conf.getFloat( - YarnConfiguration.AM_BLACKLISTING_DISABLE_THRESHOLD, - YarnConfiguration.DEFAULT_AM_BLACKLISTING_DISABLE_THRESHOLD); + // amBlacklistingEnabled can be configured globally + // Just use the global values + amBlacklistingEnabled = + conf.getBoolean( + YarnConfiguration.AM_SCHEDULING_NODE_BLACKLISTING_ENABLED, + YarnConfiguration.DEFAULT_AM_SCHEDULING_NODE_BLACKLISTING_ENABLED); + if (amBlacklistingEnabled) { + blacklistDisableThreshold = conf.getFloat( + YarnConfiguration.AM_SCHEDULING_NODE_BLACKLISTING_DISABLE_THRESHOLD, + YarnConfiguration. + DEFAULT_AM_SCHEDULING_NODE_BLACKLISTING_DISABLE_THRESHOLD); + // Verify whether blacklistDisableThreshold is valid. And for invalid + // threshold, reset to global level blacklistDisableThreshold + // configured. + if (blacklistDisableThreshold < MINIMUM_AM_BLACKLIST_THRESHOLD_VALUE || + blacklistDisableThreshold > MAXIMUM_AM_BLACKLIST_THRESHOLD_VALUE) { + blacklistDisableThreshold = YarnConfiguration. + DEFAULT_AM_SCHEDULING_NODE_BLACKLISTING_DISABLE_THRESHOLD; } } } @@ -877,15 +859,16 @@ public class RMAppImpl implements RMApp, Recoverable { } private void createNewAttempt(ApplicationAttemptId appAttemptId) { - BlacklistManager currentAMBlacklist; + BlacklistManager currentAMBlacklistManager; if (currentAttempt != null) { - currentAMBlacklist = currentAttempt.getAMBlacklist(); + // Transfer over the blacklist from the previous app-attempt. + currentAMBlacklistManager = currentAttempt.getAMBlacklistManager(); } else { if (amBlacklistingEnabled) { - currentAMBlacklist = new SimpleBlacklistManager( + currentAMBlacklistManager = new SimpleBlacklistManager( scheduler.getNumClusterNodes(), blacklistDisableThreshold); } else { - currentAMBlacklist = new DisabledBlacklistManager(); + currentAMBlacklistManager = new DisabledBlacklistManager(); } } RMAppAttempt attempt = @@ -896,7 +879,7 @@ public class RMAppImpl implements RMApp, Recoverable { // hardware error and NM resync) + 1) equal to the max-attempt // limit. maxAppAttempts == (getNumFailedAppAttempts() + 1), amReq, - currentAMBlacklist); + currentAMBlacklistManager); attempts.put(appAttemptId, attempt); currentAttempt = attempt; } @@ -1825,16 +1808,6 @@ public class RMAppImpl implements RMApp, Recoverable { rmContext.getSystemMetricsPublisher().appCreated(app, startTime); } - @VisibleForTesting - public boolean isAmBlacklistingEnabled() { - return amBlacklistingEnabled; - } - - @VisibleForTesting - public float getAmBlacklistingDisableThreshold() { - return blacklistDisableThreshold; - } - @Private @VisibleForTesting public int getNextAttemptId() { http://git-wip-us.apache.org/repos/asf/hadoop/blob/aef6e455/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttempt.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttempt.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttempt.java index f2e60d1..cfd91e9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttempt.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttempt.java @@ -190,7 +190,7 @@ public interface RMAppAttempt extends EventHandler<RMAppAttemptEvent> { * Get the {@link BlacklistManager} that manages blacklists for AM failures * @return the {@link BlacklistManager} that tracks AM failures. */ - BlacklistManager getAMBlacklist(); + BlacklistManager getAMBlacklistManager(); /** * the start time of the application. http://git-wip-us.apache.org/repos/asf/hadoop/blob/aef6e455/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java index 75090fe..d210b53 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java @@ -57,6 +57,7 @@ import org.apache.hadoop.yarn.api.records.ContainerStatus; import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.Priority; +import org.apache.hadoop.yarn.api.records.ResourceBlacklistRequest; import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.api.records.YarnApplicationAttemptState; import org.apache.hadoop.yarn.conf.YarnConfiguration; @@ -73,7 +74,6 @@ import org.apache.hadoop.yarn.server.resourcemanager.RMServerUtils; import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.AMLauncherEvent; import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.AMLauncherEventType; import org.apache.hadoop.yarn.server.resourcemanager.blacklist.BlacklistManager; -import org.apache.hadoop.yarn.server.resourcemanager.blacklist.BlacklistUpdates; import org.apache.hadoop.yarn.server.resourcemanager.blacklist.DisabledBlacklistManager; import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore; import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState; @@ -492,7 +492,7 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { ApplicationMasterService masterService, ApplicationSubmissionContext submissionContext, Configuration conf, boolean maybeLastAttempt, ResourceRequest amReq, - BlacklistManager amBlacklist) { + BlacklistManager amBlacklistManager) { this.conf = conf; this.applicationAttemptId = appAttemptId; this.rmContext = rmContext; @@ -511,9 +511,9 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { this.attemptMetrics = new RMAppAttemptMetrics(applicationAttemptId, rmContext); - + this.amReq = amReq; - this.blacklistedNodesForAM = amBlacklist; + this.blacklistedNodesForAM = amBlacklistManager; } @Override @@ -1031,15 +1031,15 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { appAttempt.amReq.setResourceName(ResourceRequest.ANY); appAttempt.amReq.setRelaxLocality(true); - appAttempt.getAMBlacklist().refreshNodeHostCount( + appAttempt.getAMBlacklistManager().refreshNodeHostCount( appAttempt.scheduler.getNumClusterNodes()); - BlacklistUpdates amBlacklist = appAttempt.getAMBlacklist() - .getBlacklistUpdates(); + ResourceBlacklistRequest amBlacklist = + appAttempt.getAMBlacklistManager().getBlacklistUpdates(); if (LOG.isDebugEnabled()) { LOG.debug("Using blacklist for AM: additions(" + - amBlacklist.getAdditions() + ") and removals(" + - amBlacklist.getRemovals() + ")"); + amBlacklist.getBlacklistAdditions() + ") and removals(" + + amBlacklist.getBlacklistRemovals() + ")"); } // AM resource has been checked when submission Allocation amContainerAllocation = @@ -1047,8 +1047,8 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { appAttempt.applicationAttemptId, Collections.singletonList(appAttempt.amReq), EMPTY_CONTAINER_RELEASE_LIST, - amBlacklist.getAdditions(), - amBlacklist.getRemovals(), null, null); + amBlacklist.getBlacklistAdditions(), + amBlacklist.getBlacklistRemovals(), null, null); if (amContainerAllocation != null && amContainerAllocation.getContainers() != null) { assert (amContainerAllocation.getContainers().size() == 0); @@ -1481,9 +1481,36 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { } } - private boolean shouldCountTowardsNodeBlacklisting(int exitStatus) { - return !(exitStatus == ContainerExitStatus.SUCCESS - || exitStatus == ContainerExitStatus.PREEMPTED); + private static boolean shouldCountTowardsNodeBlacklisting(int exitStatus) { + switch (exitStatus) { + case ContainerExitStatus.PREEMPTED: + case ContainerExitStatus.KILLED_BY_RESOURCEMANAGER: + case ContainerExitStatus.KILLED_BY_APPMASTER: + case ContainerExitStatus.KILLED_AFTER_APP_COMPLETION: + case ContainerExitStatus.ABORTED: + // Neither the app's fault nor the system's fault. This happens by design, + // so no need for skipping nodes + return false; + case ContainerExitStatus.DISKS_FAILED: + // This container is marked with this exit-status means that the node is + // already marked as unhealthy given that most of the disks failed. So, no + // need for any explicit skipping of nodes. + return false; + case ContainerExitStatus.KILLED_EXCEEDED_VMEM: + case ContainerExitStatus.KILLED_EXCEEDED_PMEM: + // No point in skipping the node as it's not the system's fault + return false; + case ContainerExitStatus.SUCCESS: + return false; + case ContainerExitStatus.INVALID: + // Ideally, this shouldn't be considered for skipping a node. But in + // reality, it seems like there are cases where we are not setting + // exit-code correctly and so it's better to be conservative. See + // YARN-4284. + return true; + default: + return true; + } } private static final class UnmanagedAMAttemptSavedTransition @@ -1803,7 +1830,7 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { if (appAttempt.masterContainer != null && appAttempt.masterContainer.getId().equals( containerStatus.getContainerId())) { - appAttempt.sendAMContainerToNM(appAttempt, containerFinishedEvent); + appAttempt.amContainerFinished(appAttempt, containerFinishedEvent); // Remember the follow up transition and save the final attempt state. appAttempt.rememberTargetTransitionsAndStoreState(event, @@ -1848,13 +1875,17 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { // Add am container to the list so that am container instance will be // removed from NMContext. - private void sendAMContainerToNM(RMAppAttemptImpl appAttempt, + private static void amContainerFinished(RMAppAttemptImpl appAttempt, RMAppAttemptContainerFinishedEvent containerFinishedEvent) { + NodeId nodeId = containerFinishedEvent.getNodeId(); - if (containerFinishedEvent.getContainerStatus() != null) { - if (shouldCountTowardsNodeBlacklisting(containerFinishedEvent - .getContainerStatus().getExitStatus())) { - appAttempt.addAMNodeToBlackList(containerFinishedEvent.getNodeId()); + + ContainerStatus containerStatus = + containerFinishedEvent.getContainerStatus(); + if (containerStatus != null) { + int exitStatus = containerStatus.getExitStatus(); + if (shouldCountTowardsNodeBlacklisting(exitStatus)) { + appAttempt.addAMNodeToBlackList(nodeId); } } else { LOG.warn("No ContainerStatus in containerFinishedEvent"); @@ -1862,14 +1893,13 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { if (!appAttempt.getSubmissionContext() .getKeepContainersAcrossApplicationAttempts()) { - finishedContainersSentToAM.putIfAbsent(nodeId, + appAttempt.finishedContainersSentToAM.putIfAbsent(nodeId, new ArrayList<ContainerStatus>()); - appAttempt.finishedContainersSentToAM.get(nodeId).add( - containerFinishedEvent.getContainerStatus()); + appAttempt.finishedContainersSentToAM.get(nodeId).add(containerStatus); appAttempt.sendFinishedContainersToNM(); } else { appAttempt.sendFinishedAMContainerToNM(nodeId, - containerFinishedEvent.getContainerStatus().getContainerId()); + containerStatus.getContainerId()); } } @@ -1884,7 +1914,7 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { } @Override - public BlacklistManager getAMBlacklist() { + public BlacklistManager getAMBlacklistManager() { return blacklistedNodesForAM; } @@ -1943,7 +1973,7 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { containerStatus.getContainerId())) { new FinalTransition(RMAppAttemptState.FINISHED).transition( appAttempt, containerFinishedEvent); - appAttempt.sendAMContainerToNM(appAttempt, containerFinishedEvent); + appAttempt.amContainerFinished(appAttempt, containerFinishedEvent); return RMAppAttemptState.FINISHED; } // Add all finished containers so that they can be acked to NM. @@ -1968,7 +1998,8 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { // Thus, we still return FINAL_SAVING state here. if (appAttempt.masterContainer.getId().equals( containerStatus.getContainerId())) { - appAttempt.sendAMContainerToNM(appAttempt, containerFinishedEvent); + + appAttempt.amContainerFinished(appAttempt, containerFinishedEvent); if (appAttempt.targetedFinalState.equals(RMAppAttemptState.FAILED) || appAttempt.targetedFinalState.equals(RMAppAttemptState.KILLED)) { http://git-wip-us.apache.org/repos/asf/hadoop/blob/aef6e455/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java index 354dcb2..3066339 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java @@ -178,7 +178,7 @@ public abstract class AbstractYarnScheduler NodeFilter nodeFilter = new NodeFilter() { @Override public boolean accept(SchedulerNode node) { - return SchedulerAppUtils.isBlacklisted(app, node, LOG); + return SchedulerAppUtils.isPlaceBlacklisted(app, node, LOG); } }; return nodeTracker.getNodes(nodeFilter); http://git-wip-us.apache.org/repos/asf/hadoop/blob/aef6e455/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AppSchedulingInfo.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AppSchedulingInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AppSchedulingInfo.java index 463bebd..8d42c97 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AppSchedulingInfo.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AppSchedulingInfo.java @@ -73,9 +73,13 @@ public class AppSchedulingInfo { private ActiveUsersManager activeUsersManager; private boolean pending = true; // whether accepted/allocated by scheduler private ResourceUsage appResourceUsage; + private AtomicBoolean userBlacklistChanged = new AtomicBoolean(false); - private final Set<String> amBlacklist = new HashSet<>(); - private Set<String> userBlacklist = new HashSet<>(); + // Set of places (nodes / racks) blacklisted by the system. Today, this only + // has places blacklisted for AM containers. + private final Set<String> placesBlacklistedBySystem = new HashSet<>(); + private Set<String> placesBlacklistedByApp = new HashSet<>(); + private Set<String> requestedPartitions = new HashSet<>(); final Set<Priority> priorities = new TreeSet<>(COMPARATOR); @@ -447,32 +451,38 @@ public class AppSchedulingInfo { } /** - * The ApplicationMaster is updating the userBlacklist used for containers - * other than AMs. + * The ApplicationMaster is updating the placesBlacklistedByApp used for + * containers other than AMs. * - * @param blacklistAdditions resources to be added to the userBlacklist - * @param blacklistRemovals resources to be removed from the userBlacklist + * @param blacklistAdditions + * resources to be added to the userBlacklist + * @param blacklistRemovals + * resources to be removed from the userBlacklist */ - public void updateBlacklist( + public void updatePlacesBlacklistedByApp( List<String> blacklistAdditions, List<String> blacklistRemovals) { - if (updateUserOrAMBlacklist(userBlacklist, blacklistAdditions, + if (updateBlacklistedPlaces(placesBlacklistedByApp, blacklistAdditions, blacklistRemovals)) { userBlacklistChanged.set(true); } } /** - * RM is updating blacklist for AM containers. - * @param blacklistAdditions resources to be added to the amBlacklist - * @param blacklistRemovals resources to be added to the amBlacklist + * Update the list of places that are blacklisted by the system. Today the + * system only blacklists places when it sees that AMs failed there + * + * @param blacklistAdditions + * resources to be added to placesBlacklistedBySystem + * @param blacklistRemovals + * resources to be removed from placesBlacklistedBySystem */ - public void updateAMBlacklist( + public void updatePlacesBlacklistedBySystem( List<String> blacklistAdditions, List<String> blacklistRemovals) { - updateUserOrAMBlacklist(amBlacklist, blacklistAdditions, + updateBlacklistedPlaces(placesBlacklistedBySystem, blacklistAdditions, blacklistRemovals); } - boolean updateUserOrAMBlacklist(Set<String> blacklist, + private static boolean updateBlacklistedPlaces(Set<String> blacklist, List<String> blacklistAdditions, List<String> blacklistRemovals) { boolean changed = false; synchronized (blacklist) { @@ -481,9 +491,7 @@ public class AppSchedulingInfo { } if (blacklistRemovals != null) { - if (blacklist.removeAll(blacklistRemovals)) { - changed = true; - } + changed = blacklist.removeAll(blacklistRemovals) || changed; } } return changed; @@ -522,20 +530,24 @@ public class AppSchedulingInfo { } /** - * Returns if the node is either blacklisted by the user or the system - * @param resourceName the resourcename - * @param useAMBlacklist true if it should check amBlacklist + * Returns if the place (node/rack today) is either blacklisted by the + * application (user) or the system + * + * @param resourceName + * the resourcename + * @param blacklistedBySystem + * true if it should check amBlacklist * @return true if its blacklisted */ - public boolean isBlacklisted(String resourceName, - boolean useAMBlacklist) { - if (useAMBlacklist){ - synchronized (amBlacklist) { - return amBlacklist.contains(resourceName); + public boolean isPlaceBlacklisted(String resourceName, + boolean blacklistedBySystem) { + if (blacklistedBySystem){ + synchronized (placesBlacklistedBySystem) { + return placesBlacklistedBySystem.contains(resourceName); } } else { - synchronized (userBlacklist) { - return userBlacklist.contains(resourceName); + synchronized (placesBlacklistedByApp) { + return placesBlacklistedByApp.contains(resourceName); } } } @@ -773,12 +785,12 @@ public class AppSchedulingInfo { } public Set<String> getBlackList() { - return this.userBlacklist; + return this.placesBlacklistedByApp; } public Set<String> getBlackListCopy() { - synchronized (userBlacklist) { - return new HashSet<>(this.userBlacklist); + synchronized (placesBlacklistedByApp) { + return new HashSet<>(this.placesBlacklistedByApp); } } @@ -786,7 +798,7 @@ public class AppSchedulingInfo { AppSchedulingInfo appInfo) { // This should not require locking the userBlacklist since it will not be // used by this instance until after setCurrentAppAttempt. - this.userBlacklist = appInfo.getBlackList(); + this.placesBlacklistedByApp = appInfo.getBlackList(); } public synchronized void recoverContainer(RMContainer rmContainer) { http://git-wip-us.apache.org/repos/asf/hadoop/blob/aef6e455/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerAppUtils.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerAppUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerAppUtils.java index 36a1244..631da67 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerAppUtils.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerAppUtils.java @@ -22,20 +22,20 @@ import org.apache.commons.logging.Log; public class SchedulerAppUtils { - public static boolean isBlacklisted(SchedulerApplicationAttempt application, - SchedulerNode node, Log LOG) { - if (application.isBlacklisted(node.getNodeName())) { - if (LOG.isDebugEnabled()) { - LOG.debug("Skipping 'host' " + node.getNodeName() + + public static boolean isPlaceBlacklisted( + SchedulerApplicationAttempt application, SchedulerNode node, Log log) { + if (application.isPlaceBlacklisted(node.getNodeName())) { + if (log.isDebugEnabled()) { + log.debug("Skipping 'host' " + node.getNodeName() + " for " + application.getApplicationId() + " since it has been blacklisted"); } return true; } - if (application.isBlacklisted(node.getRackName())) { - if (LOG.isDebugEnabled()) { - LOG.debug("Skipping 'rack' " + node.getRackName() + + if (application.isPlaceBlacklisted(node.getRackName())) { + if (log.isDebugEnabled()) { + log.debug("Skipping 'rack' " + node.getRackName() + " for " + application.getApplicationId() + " since it has been blacklisted"); } http://git-wip-us.apache.org/repos/asf/hadoop/blob/aef6e455/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java index ffb8657..b48b272 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java @@ -592,27 +592,26 @@ public class SchedulerApplicationAttempt implements SchedulableEntity { return (!unmanagedAM && appAttempt.getMasterContainer() == null); } - // Blacklist used for user containers - public synchronized void updateBlacklist( - List<String> blacklistAdditions, List<String> blacklistRemovals) { + public synchronized void updateBlacklist(List<String> blacklistAdditions, + List<String> blacklistRemovals) { if (!isStopped) { - this.appSchedulingInfo.updateBlacklist( - blacklistAdditions, blacklistRemovals); - } - } - - // Blacklist used for AM containers - public synchronized void updateAMBlacklist( - List<String> blacklistAdditions, List<String> blacklistRemovals) { - if (!isStopped) { - this.appSchedulingInfo.updateAMBlacklist( - blacklistAdditions, blacklistRemovals); + if (isWaitingForAMContainer()) { + // The request is for the AM-container, and the AM-container is launched + // by the system. So, update the places that are blacklisted by system + // (as opposed to those blacklisted by the application). + this.appSchedulingInfo.updatePlacesBlacklistedBySystem( + blacklistAdditions, blacklistRemovals); + } else { + this.appSchedulingInfo.updatePlacesBlacklistedByApp(blacklistAdditions, + blacklistRemovals); + } } } - public boolean isBlacklisted(String resourceName) { - boolean useAMBlacklist = isWaitingForAMContainer(); - return this.appSchedulingInfo.isBlacklisted(resourceName, useAMBlacklist); + public boolean isPlaceBlacklisted(String resourceName) { + boolean forAMContainer = isWaitingForAMContainer(); + return this.appSchedulingInfo.isPlaceBlacklisted(resourceName, + forAMContainer); } public synchronized int addMissedNonPartitionedRequestSchedulingOpportunity( http://git-wip-us.apache.org/repos/asf/hadoop/blob/aef6e455/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java index 920e983..ee62a70 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java @@ -996,15 +996,8 @@ public class CapacityScheduler extends application.showRequests(); } } - - if (application.isWaitingForAMContainer()) { - // Allocate is for AM and update AM blacklist for this - application.updateAMBlacklist( - blacklistAdditions, blacklistRemovals); - } else { - application.updateBlacklist(blacklistAdditions, blacklistRemovals); - } - + + application.updateBlacklist(blacklistAdditions, blacklistRemovals); allocation = application.getAllocation(getResourceCalculator(), getClusterResource(), getMinimumResourceCapability()); http://git-wip-us.apache.org/repos/asf/hadoop/blob/aef6e455/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/RegularContainerAllocator.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/RegularContainerAllocator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/RegularContainerAllocator.java index b2d4bbe..aae5292 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/RegularContainerAllocator.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/RegularContainerAllocator.java @@ -81,7 +81,7 @@ public class RegularContainerAllocator extends AbstractContainerAllocator { private ContainerAllocation preCheckForNewContainer(Resource clusterResource, FiCaSchedulerNode node, SchedulingMode schedulingMode, ResourceLimits resourceLimits, Priority priority) { - if (SchedulerAppUtils.isBlacklisted(application, node, LOG)) { + if (SchedulerAppUtils.isPlaceBlacklisted(application, node, LOG)) { application.updateAppSkipNodeDiagnostics( CSAMContainerLaunchDiagnosticsConstants.SKIP_AM_ALLOCATION_IN_BLACK_LISTED_NODE); return ContainerAllocation.APP_SKIPPED; http://git-wip-us.apache.org/repos/asf/hadoop/blob/aef6e455/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSLeafQueue.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSLeafQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSLeafQueue.java index a398906..d8b51f7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSLeafQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSLeafQueue.java @@ -332,7 +332,7 @@ public class FSLeafQueue extends FSQueue { readLock.unlock(); } for (FSAppAttempt sched : pendingForResourceApps) { - if (SchedulerAppUtils.isBlacklisted(sched, node, LOG)) { + if (SchedulerAppUtils.isPlaceBlacklisted(sched, node, LOG)) { continue; } assigned = sched.assignContainer(node); http://git-wip-us.apache.org/repos/asf/hadoop/blob/aef6e455/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java index c8e8406..bc953ba 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java @@ -993,13 +993,7 @@ public class FairScheduler extends preemptionContainerIds.add(container.getContainerId()); } - if (application.isWaitingForAMContainer()) { - // Allocate is for AM and update AM blacklist for this - application.updateAMBlacklist( - blacklistAdditions, blacklistRemovals); - } else { - application.updateBlacklist(blacklistAdditions, blacklistRemovals); - } + application.updateBlacklist(blacklistAdditions, blacklistRemovals); List<Container> newlyAllocatedContainers = application.pullNewlyAllocatedContainers(); http://git-wip-us.apache.org/repos/asf/hadoop/blob/aef6e455/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java index 796b0cf..eaab495 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java @@ -375,13 +375,7 @@ public class FifoScheduler extends " #ask=" + ask.size()); } - if (application.isWaitingForAMContainer()) { - // Allocate is for AM and update AM blacklist for this - application.updateAMBlacklist( - blacklistAdditions, blacklistRemovals); - } else { - application.updateBlacklist(blacklistAdditions, blacklistRemovals); - } + application.updateBlacklist(blacklistAdditions, blacklistRemovals); Resource headroom = application.getHeadroom(); application.setApplicationHeadroomForMetrics(headroom); @@ -516,10 +510,10 @@ public class FifoScheduler extends application.showRequests(); synchronized (application) { // Check if this resource is on the blacklist - if (SchedulerAppUtils.isBlacklisted(application, node, LOG)) { + if (SchedulerAppUtils.isPlaceBlacklisted(application, node, LOG)) { continue; } - + for (Priority priority : application.getPriorities()) { int maxContainers = getMaxAllocatableContainers(application, priority, node, http://git-wip-us.apache.org/repos/asf/hadoop/blob/aef6e455/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMAppAttemptBlock.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMAppAttemptBlock.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMAppAttemptBlock.java index 6fef367..d2da8e3 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMAppAttemptBlock.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMAppAttemptBlock.java @@ -220,8 +220,9 @@ public class RMAppAttemptBlock extends AppAttemptBlock{ String appBlacklistedNodes = getNodeString(rmAppAttempt.getBlacklistedNodes()); // nodes which are blacklisted by the RM for AM launches - String rmBlackListedNodes = getNodeString( - rmAppAttempt.getAMBlacklist().getBlacklistUpdates().getAdditions()); + String rmBlackListedNodes = + getNodeString(rmAppAttempt.getAMBlacklistManager() + .getBlacklistUpdates().getBlacklistAdditions()); info("Application Attempt Overview") ._( @@ -256,8 +257,8 @@ public class RMAppAttemptBlock extends AppAttemptBlock{ "Diagnostics Info:", appAttempt.getDiagnosticsInfo() == null ? "" : appAttempt .getDiagnosticsInfo()) - ._("Application Blacklisted Nodes:", appBlacklistedNodes) - ._("RM Blacklisted Nodes(for AM launches)", rmBlackListedNodes); + ._("Nodes blacklisted by the application:", appBlacklistedNodes) + ._("Nodes blacklisted by the system:", rmBlackListedNodes); } private String getNodeString(Collection<String> nodes) { http://git-wip-us.apache.org/repos/asf/hadoop/blob/aef6e455/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMAppBlock.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMAppBlock.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMAppBlock.java index 2d822c1..cfa21dd 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMAppBlock.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMAppBlock.java @@ -113,10 +113,10 @@ public class RMAppBlock extends AppBlock{ Hamlet.TBODY<Hamlet.TABLE<Hamlet>> tbody = html.table("#attempts").thead().tr().th(".id", "Attempt ID") .th(".started", "Started").th(".node", "Node").th(".logs", "Logs") - .th(".appBlacklistednodes", "Nodes black listed by the application", - "App Blacklisted Nodes") - .th(".rmBlacklistednodes", "Nodes black listed by the RM for the" - + " app", "RM Blacklisted Nodes")._()._().tbody(); + .th(".appBlacklistednodes", "Nodes blacklisted by the application", + "Nodes blacklisted by the app") + .th(".rmBlacklistednodes", "Nodes blacklisted by the RM for the" + + " app", "Nodes blacklisted by the system")._()._().tbody(); RMApp rmApp = this.rm.getRMContext().getRMApps().get(this.appID); if (rmApp == null) { @@ -136,8 +136,9 @@ public class RMAppBlock extends AppBlock{ // nodes which are blacklisted by the application String appBlacklistedNodesCount = String.valueOf(nodes.size()); // nodes which are blacklisted by the RM for AM launches - String rmBlacklistedNodesCount = String.valueOf(rmAppAttempt - .getAMBlacklist().getBlacklistUpdates().getAdditions().size()); + String rmBlacklistedNodesCount = + String.valueOf(rmAppAttempt.getAMBlacklistManager() + .getBlacklistUpdates().getBlacklistAdditions().size()); String nodeLink = attemptInfo.getNodeHttpAddress(); if (nodeLink != null) { nodeLink = WebAppUtils.getHttpSchemePrefix(conf) + nodeLink; http://git-wip-us.apache.org/repos/asf/hadoop/blob/aef6e455/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebServices.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebServices.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebServices.java index 7de1d46..d05d952 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebServices.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebServices.java @@ -85,12 +85,15 @@ import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationResponse; import org.apache.hadoop.yarn.api.protocolrecords.MoveApplicationAcrossQueuesRequest; import org.apache.hadoop.yarn.api.protocolrecords.RenewDelegationTokenRequest; import org.apache.hadoop.yarn.api.protocolrecords.RenewDelegationTokenResponse; +import org.apache.hadoop.yarn.api.protocolrecords.ReservationDeleteRequest; import org.apache.hadoop.yarn.api.protocolrecords.ReservationListRequest; import org.apache.hadoop.yarn.api.protocolrecords.ReservationListResponse; +import org.apache.hadoop.yarn.api.protocolrecords.ReservationSubmissionRequest; +import org.apache.hadoop.yarn.api.protocolrecords.ReservationSubmissionResponse; +import org.apache.hadoop.yarn.api.protocolrecords.ReservationUpdateRequest; import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest; import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationResponse; import org.apache.hadoop.yarn.api.protocolrecords.UpdateApplicationPriorityRequest; -import org.apache.hadoop.yarn.api.records.AMBlackListingRequest; import org.apache.hadoop.yarn.api.records.ApplicationAccessType; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationReport; @@ -130,7 +133,6 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CSQueue; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler; -import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.AMBlackListingRequestInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.AppAttemptInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.AppAttemptsInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.AppInfo; @@ -168,10 +170,6 @@ import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ReservationReque import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ReservationSubmissionRequestInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ReservationUpdateRequestInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ReservationUpdateResponseInfo; -import org.apache.hadoop.yarn.api.protocolrecords.ReservationDeleteRequest; -import org.apache.hadoop.yarn.api.protocolrecords.ReservationSubmissionRequest; -import org.apache.hadoop.yarn.api.protocolrecords.ReservationSubmissionResponse; -import org.apache.hadoop.yarn.api.protocolrecords.ReservationUpdateRequest; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ResourceInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.SchedulerInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.SchedulerTypeInfo; @@ -1499,10 +1497,6 @@ public class RMWebServices extends WebServices { reservationIdStr); appContext.setReservationID(reservationId); } - if (newApp.getAMBlackListingRequestInfo() != null) { - appContext.setAMBlackListRequest(createAMBlackListingRequest( - newApp.getAMBlackListingRequestInfo())); - } return appContext; } @@ -1651,13 +1645,6 @@ public class RMWebServices extends WebServices { logAggregationContextInfo.getLogAggregationPolicyParameters()); } - private AMBlackListingRequest createAMBlackListingRequest( - AMBlackListingRequestInfo amBlackListingRequestInfo) { - return AMBlackListingRequest.newInstance( - amBlackListingRequestInfo.getAMBlackListingEnabled(), - amBlackListingRequestInfo.getBlackListingDisableFailureThreshold()); - } - @POST @Path("/delegation-token") @Produces({ MediaType.APPLICATION_JSON, MediaType.APPLICATION_XML }) http://git-wip-us.apache.org/repos/asf/hadoop/blob/aef6e455/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AMBlackListingRequestInfo.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AMBlackListingRequestInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AMBlackListingRequestInfo.java deleted file mode 100644 index 0084b0b..0000000 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AMBlackListingRequestInfo.java +++ /dev/null @@ -1,61 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.yarn.server.resourcemanager.webapp.dao; - -import javax.xml.bind.annotation.XmlAccessType; -import javax.xml.bind.annotation.XmlAccessorType; -import javax.xml.bind.annotation.XmlElement; -import javax.xml.bind.annotation.XmlRootElement; - -/** - * Simple class to allow users to send information required to create a - * AMBlackListingRequest which can then be used as part of the - * ApplicationSubmissionContext - * - */ -@XmlRootElement(name = "am-black-listing-requests") -@XmlAccessorType(XmlAccessType.FIELD) -public class AMBlackListingRequestInfo { - - @XmlElement(name = "am-black-listing-enabled") - boolean isAMBlackListingEnabled; - - @XmlElement(name = "disable-failure-threshold") - float disableFailureThreshold; - - public AMBlackListingRequestInfo() { - } - - public boolean getAMBlackListingEnabled() { - return isAMBlackListingEnabled; - } - - public void setAMBlackListingEnabled(boolean isAMBlackListingEnabled) { - this.isAMBlackListingEnabled = isAMBlackListingEnabled; - } - - public float getBlackListingDisableFailureThreshold() { - return disableFailureThreshold; - } - - public void setBlackListingDisableFailureThreshold( - float disableFailureThreshold) { - this.disableFailureThreshold = disableFailureThreshold; - } -} --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org