This is an automated email from the ASF dual-hosted git repository. hapylestat pushed a commit to branch branch-2.7 in repository https://gitbox.apache.org/repos/asf/ambari.git
The following commit(s) were added to refs/heads/branch-2.7 by this push: new ebde46f AMBARI-25606. Sometimes request aborting doesn't abort IN_PROGRESS task. (dvitiuk via dgrinenko) (#3275) ebde46f is described below commit ebde46f27f0a2f38c176d6a2c66524d0078873d5 Author: dvitiiuk <dmitriiviti...@gmail.com> AuthorDate: Tue Dec 22 17:20:42 2020 +0200 AMBARI-25606. Sometimes request aborting doesn't abort IN_PROGRESS task. (dvitiuk via dgrinenko) (#3275) --- .../server/actionmanager/ActionDBAccessor.java | 4 +-- .../server/actionmanager/ActionDBAccessorImpl.java | 2 +- .../server/actionmanager/ActionScheduler.java | 12 ++------ .../server/actionmanager/HostRoleStatus.java | 4 +++ .../actionmanager/TestActionDBAccessorImpl.java | 12 ++++++-- .../server/actionmanager/TestActionScheduler.java | 35 ---------------------- 6 files changed, 20 insertions(+), 49 deletions(-) diff --git a/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionDBAccessor.java b/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionDBAccessor.java index b095a70..d876e74 100644 --- a/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionDBAccessor.java +++ b/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionDBAccessor.java @@ -57,8 +57,8 @@ public interface ActionDBAccessor { /** * Abort all outstanding operations associated with the given request. This - * method uses the {@link HostRoleStatus#SCHEDULED_STATES} to determine which - * {@link HostRoleCommand} instances to abort. + * method uses the {@link HostRoleStatus#SCHEDULED_STATES} and {@link HostRoleStatus#HOLDING_STATES} + * to determine which {@link HostRoleCommand} instances to abort. * * Returns the list of the aborted operations. */ diff --git a/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionDBAccessorImpl.java b/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionDBAccessorImpl.java index 329ecba..485e763 100644 --- a/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionDBAccessorImpl.java +++ b/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionDBAccessorImpl.java @@ -219,7 +219,7 @@ public class ActionDBAccessorImpl implements ActionDBAccessor { // only request commands which actually need to be aborted; requesting all // commands here can cause OOM problems during large requests like upgrades List<HostRoleCommandEntity> commands = hostRoleCommandDAO.findByRequestIdAndStatuses(requestId, - HostRoleStatus.SCHEDULED_STATES); + HostRoleStatus.STATES_TO_ABORT); for (HostRoleCommandEntity command : commands) { command.setStatus(HostRoleStatus.ABORTED); diff --git a/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionScheduler.java b/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionScheduler.java index 735a774..6267553 100644 --- a/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionScheduler.java +++ b/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionScheduler.java @@ -1234,9 +1234,9 @@ class ActionScheduler implements Runnable { cancelHostRoleCommands(tasksToDequeue, reason); } - // abort any stages in progress that belong to this request; don't execute this for all stages since - // that could lead to OOM errors on large requests, like those for - // upgrades + // abort any stages in progress and holding states that belong to this request; + // don't execute this for all stages since that could lead to OOM errors on large requests, + // like those for upgrades List<Stage> stagesInProgress = db.getStagesInProgressForRequest(requestId); for (Stage stageInProgress : stagesInProgress) { abortOperationsForStage(stageInProgress); @@ -1270,12 +1270,6 @@ class ActionScheduler implements Runnable { } } - if (hostRoleCommand.getStatus().isHoldingState()) { - db.abortHostRole(hostRoleCommand.getHostName(), - hostRoleCommand.getRequestId(), - hostRoleCommand.getStageId(), hostRoleCommand.getRole().name()); - } - // If host role is an Action, we have to send an event if (hostRoleCommand.getRoleCommand().equals(RoleCommand.ACTIONEXECUTE)) { String clusterName = hostRoleCommand.getExecutionCommandWrapper().getExecutionCommand().getClusterName(); diff --git a/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/HostRoleStatus.java b/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/HostRoleStatus.java index 9685f83..ff716df 100644 --- a/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/HostRoleStatus.java +++ b/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/HostRoleStatus.java @@ -22,6 +22,8 @@ import java.util.Collections; import java.util.EnumSet; import java.util.List; +import org.apache.commons.collections.ListUtils; + public enum HostRoleStatus { /** * Not queued for a host. @@ -86,6 +88,8 @@ public enum HostRoleStatus { public static List<HostRoleStatus> SCHEDULED_STATES = Arrays.asList(PENDING, QUEUED, IN_PROGRESS); + public static List<HostRoleStatus> STATES_TO_ABORT = ListUtils.union(SCHEDULED_STATES, HOLDING_STATES); + /** * The {@link HostRoleStatus}s that represent any commands which are * considered to be "Failed". diff --git a/ambari-server/src/test/java/org/apache/ambari/server/actionmanager/TestActionDBAccessorImpl.java b/ambari-server/src/test/java/org/apache/ambari/server/actionmanager/TestActionDBAccessorImpl.java index 94799cc..6fe7735 100644 --- a/ambari-server/src/test/java/org/apache/ambari/server/actionmanager/TestActionDBAccessorImpl.java +++ b/ambari-server/src/test/java/org/apache/ambari/server/actionmanager/TestActionDBAccessorImpl.java @@ -568,6 +568,7 @@ public class TestActionDBAccessorImpl { clusters.addHost("host2"); clusters.addHost("host3"); clusters.addHost("host4"); + clusters.addHost("host5"); s.addHostRoleExecutionCommand("host1", Role.HBASE_MASTER, RoleCommand.START, @@ -589,13 +590,20 @@ public class TestActionDBAccessorImpl { RoleCommand.START, new ServiceComponentHostStartEvent(Role.HBASE_REGIONSERVER .toString(), "host4", System.currentTimeMillis()), "cluster1", "HBASE", false, false); + s.addHostRoleExecutionCommand( + "host5", + Role.HBASE_REGIONSERVER, + RoleCommand.START, + new ServiceComponentHostStartEvent(Role.HBASE_REGIONSERVER + .toString(), "host5", System.currentTimeMillis()), "cluster1", "HBASE", false, false); List<Stage> stages = new ArrayList<>(); stages.add(s); s.getOrderedHostRoleCommands().get(0).setStatus(HostRoleStatus.PENDING); s.getOrderedHostRoleCommands().get(1).setStatus(HostRoleStatus.IN_PROGRESS); - s.getOrderedHostRoleCommands().get(2).setStatus(HostRoleStatus.QUEUED); + s.getOrderedHostRoleCommands().get(2).setStatus(HostRoleStatus.HOLDING_FAILED); + s.getOrderedHostRoleCommands().get(3).setStatus(HostRoleStatus.QUEUED); - HostRoleCommand cmd = s.getOrderedHostRoleCommands().get(3); + HostRoleCommand cmd = s.getOrderedHostRoleCommands().get(4); String hostName = cmd.getHostName(); cmd.setStatus(HostRoleStatus.COMPLETED); diff --git a/ambari-server/src/test/java/org/apache/ambari/server/actionmanager/TestActionScheduler.java b/ambari-server/src/test/java/org/apache/ambari/server/actionmanager/TestActionScheduler.java index 220cd52..2ea44e2 100644 --- a/ambari-server/src/test/java/org/apache/ambari/server/actionmanager/TestActionScheduler.java +++ b/ambari-server/src/test/java/org/apache/ambari/server/actionmanager/TestActionScheduler.java @@ -2893,41 +2893,6 @@ public class TestActionScheduler { } @Test - public void testAbortHolding() throws AmbariException { - UnitOfWork unitOfWork = EasyMock.createMock(UnitOfWork.class); - ActionDBAccessor db = EasyMock.createMock(ActionDBAccessor.class); - Clusters fsm = EasyMock.createMock(Clusters.class); - Configuration conf = new Configuration(new Properties()); - HostEntity hostEntity1 = new HostEntity(); - hostEntity1.setHostName("h1"); - hostDAO.merge(hostEntity1); - - db.abortHostRole("h1", -1L, -1L, "AMBARI_SERVER_ACTION"); - EasyMock.expectLastCall(); - - EasyMock.replay(db); - - ActionScheduler scheduler = new ActionScheduler(100, 50, db, fsm, 3, - new HostsMap((String) null), - unitOfWork, null, conf, entityManagerProviderMock, - (HostRoleCommandDAO)null, (HostRoleCommandFactory)null, null); - - HostRoleCommand hrc1 = hostRoleCommandFactory.create("h1", Role.NAMENODE, null, RoleCommand.EXECUTE); - hrc1.setStatus(HostRoleStatus.COMPLETED); - HostRoleCommand hrc3 = hostRoleCommandFactory.create("h1", Role.AMBARI_SERVER_ACTION, null, RoleCommand.CUSTOM_COMMAND); - hrc3.setStatus(HostRoleStatus.HOLDING); - HostRoleCommand hrc4 = hostRoleCommandFactory.create("h1", Role.FLUME_HANDLER, null, RoleCommand.EXECUTE); - hrc4.setStatus(HostRoleStatus.PENDING); - - List<HostRoleCommand> hostRoleCommands = Arrays.asList(hrc1, hrc3, hrc4); - - scheduler.cancelHostRoleCommands(hostRoleCommands, "foo"); - - EasyMock.verify(db); - - } - - @Test public void testAbortAmbariServerAction() throws AmbariException { UnitOfWork unitOfWork = EasyMock.createMock(UnitOfWork.class); ActionDBAccessor db = EasyMock.createMock(ActionDBAccessor.class);