[
https://issues.apache.org/jira/browse/YARN-11177?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17584623#comment-17584623
]
ASF GitHub Bot commented on YARN-11177:
---------------------------------------
slfan1989 commented on code in PR #4764:
URL: https://github.com/apache/hadoop/pull/4764#discussion_r954524494
##########
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/clientrm/FederationClientInterceptor.java:
##########
@@ -888,13 +890,127 @@ public MoveApplicationAcrossQueuesResponse
moveApplicationAcrossQueues(
@Override
public GetNewReservationResponse getNewReservation(
GetNewReservationRequest request) throws YarnException, IOException {
- throw new NotImplementedException("Code is not implemented");
+
+ if (request == null) {
+ routerMetrics.incrGetNewReservationFailedRetrieved();
+ String errMsg = "Missing getNewReservation request.";
+ RouterServerUtil.logAndThrowException(errMsg, null);
+ }
+
+ long startTime = clock.getTime();
+ Map<SubClusterId, SubClusterInfo> subClustersActive =
+ federationFacade.getSubClusters(true);
+
+ for (int i = 0; i < numSubmitRetries; ++i) {
+ SubClusterId subClusterId = getRandomActiveSubCluster(subClustersActive);
+ LOG.info("getNewReservation try #{} on SubCluster {}.", i, subClusterId);
+ ApplicationClientProtocol clientRMProxy =
getClientRMProxyForSubCluster(subClusterId);
+ GetNewReservationResponse response = null;
+ try {
+ response = clientRMProxy.getNewReservation(request);
+ if (response != null) {
+ long stopTime = clock.getTime();
+ routerMetrics.succeededGetNewReservationRetrieved(stopTime -
startTime);
+ return response;
+ }
+ } catch (Exception e) {
+ LOG.warn("Unable to create a new Reservation in SubCluster {}.",
subClusterId.getId(), e);
+ subClustersActive.remove(subClusterId);
+ }
+ }
+
+ routerMetrics.incrGetNewReservationFailedRetrieved();
+ String errMsg = "Failed to create a new reservation.";
+ throw new YarnException(errMsg);
}
@Override
public ReservationSubmissionResponse submitReservation(
ReservationSubmissionRequest request) throws YarnException, IOException {
- throw new NotImplementedException("Code is not implemented");
+
+ if (request == null || request.getReservationId() == null
+ || request.getReservationDefinition() == null ||
request.getQueue() == null) {
+ routerMetrics.incrSubmitReservationFailedRetrieved();
+ RouterServerUtil.logAndThrowException(
+ "Missing submitReservation request or reservationId " +
+ "or reservation definition or queue.", null);
+ }
+
+ long startTime = clock.getTime();
+ ReservationId reservationId = request.getReservationId();
+
+ long retryCount = 0;
+ boolean firstRetry = true;
+
+ while (retryCount < numSubmitRetries) {
+
+ SubClusterId subClusterId =
policyFacade.getReservationHomeSubCluster(request);
+ LOG.info("submitReservation reservationId {} try #{} on SubCluster {}.",
+ reservationId, retryCount, subClusterId);
+
+ ReservationHomeSubCluster reservationHomeSubCluster =
+ ReservationHomeSubCluster.newInstance(reservationId, subClusterId);
+
+ // If it is the first attempt,use StateStore to add the
+ // mapping of reservationId and subClusterId.
+ // if the number of attempts is greater than 1, use StateStore to update
the mapping.
+ if (firstRetry) {
+ try {
+ // persist the mapping of reservationId and the subClusterId which
has
+ // been selected as its home
+ subClusterId =
federationFacade.addReservationHomeSubCluster(reservationHomeSubCluster);
+ firstRetry = false;
+ } catch (YarnException e) {
+ routerMetrics.incrSubmitReservationFailedRetrieved();
+ RouterServerUtil.logAndThrowException(e,
+ "Unable to insert the ReservationId %s into the
FederationStateStore.",
+ reservationId);
+ }
+ } else {
+ try {
+ // update the mapping of reservationId and the home subClusterId to
+ // the new subClusterId we have selected
+
federationFacade.updateReservationHomeSubCluster(reservationHomeSubCluster);
+ } catch (YarnException e) {
+ SubClusterId subClusterIdInStateStore =
+ federationFacade.getReservationHomeSubCluster(reservationId);
+ if (subClusterId == subClusterIdInStateStore) {
+ LOG.info("Reservation {} already submitted on SubCluster {}.",
+ reservationId, subClusterId);
+ } else {
+ routerMetrics.incrSubmitReservationFailedRetrieved();
+ RouterServerUtil.logAndThrowException(e,
+ "Unable to update the ReservationId %s into the
FederationStateStore.",
+ reservationId);
+ }
+ }
+ }
+
+ // Obtain the ApplicationClientProtocol of the corresponding RM
according to the subClusterId,
+ // and call the submitReservation method, If the request is responded to,
+ // If the request is responded, it will return directly, otherwise
retryCount+1,
+ // continue to submit other request.
+ try {
+ ApplicationClientProtocol clientRMProxy =
getClientRMProxyForSubCluster(subClusterId);
+ ReservationSubmissionResponse response =
clientRMProxy.submitReservation(request);
+ if (response != null) {
+ LOG.info("Reservation {} submitted on {}.",
request.getReservationId(), subClusterId);
Review Comment:
I will fix it.
> Support getNewReservation, submitReservation, updateReservation,
> deleteReservation API's for Federation
> -------------------------------------------------------------------------------------------------------
>
> Key: YARN-11177
> URL: https://issues.apache.org/jira/browse/YARN-11177
> Project: Hadoop YARN
> Issue Type: Sub-task
> Reporter: fanshilun
> Assignee: fanshilun
> Priority: Major
> Labels: pull-request-available
> Fix For: 3.4.0
>
>
--
This message was sent by Atlassian Jira
(v8.20.10#820010)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]