Repository: oozie Updated Branches: refs/heads/master a5779d75c -> ad2fa21e3
OOZIE-2407 AbandonedService should not send mail if there is no abandoned coord Project: http://git-wip-us.apache.org/repos/asf/oozie/repo Commit: http://git-wip-us.apache.org/repos/asf/oozie/commit/ad2fa21e Tree: http://git-wip-us.apache.org/repos/asf/oozie/tree/ad2fa21e Diff: http://git-wip-us.apache.org/repos/asf/oozie/diff/ad2fa21e Branch: refs/heads/master Commit: ad2fa21e3b8490da3f7f741bd28e0374895d7253 Parents: a5779d7 Author: Purshotam Shah <[email protected]> Authored: Mon Dec 21 14:06:22 2015 -0800 Committer: Purshotam Shah <[email protected]> Committed: Mon Dec 21 14:06:22 2015 -0800 ---------------------------------------------------------------------- .../service/AbandonedCoordCheckerService.java | 71 ++++++++++---------- .../coord/TestAbandonedCoordChecker.java | 29 +++++--- release-log.txt | 1 + 3 files changed, 58 insertions(+), 43 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/oozie/blob/ad2fa21e/core/src/main/java/org/apache/oozie/service/AbandonedCoordCheckerService.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/oozie/service/AbandonedCoordCheckerService.java b/core/src/main/java/org/apache/oozie/service/AbandonedCoordCheckerService.java index 56eacc0..0ff23e6 100644 --- a/core/src/main/java/org/apache/oozie/service/AbandonedCoordCheckerService.java +++ b/core/src/main/java/org/apache/oozie/service/AbandonedCoordCheckerService.java @@ -22,7 +22,6 @@ import java.util.Date; import java.util.List; import org.apache.commons.lang.StringUtils; -import org.apache.hadoop.conf.Configuration; import org.apache.oozie.CoordinatorJobBean; import org.apache.oozie.action.email.EmailActionExecutor; import org.apache.oozie.command.CommandException; @@ -34,8 +33,6 @@ import org.apache.oozie.executor.jpa.JPAExecutorException; import org.apache.oozie.util.DateUtils; import org.apache.oozie.util.XLog; -import com.google.common.annotations.VisibleForTesting; - /** * The Abandoned Coord Checker Service check finds out the abandoned coord jobs in system and kills it. A job is * considered to be abandoned/faulty if total number of actions in failed/timedout/suspended >= limit and there are no @@ -59,7 +56,6 @@ public class AbandonedCoordCheckerService implements Service { private static String serverURL; public static class AbandonedCoordCheckerRunnable implements Runnable { - private StringBuilder msg; final int failureLimit; XLog LOG = XLog.getLog(getClass()); private boolean shouldKill = false; @@ -78,15 +74,9 @@ public class AbandonedCoordCheckerService implements Service { LOG.info("Server is not primary server. Skipping run"); return; } - msg = new StringBuilder(); XLog.Info.get().clear(); - msg.append("<!DOCTYPE html><html><head><style>table,th,td{border:1px solid black;border-collapse:collapse;}</style>" - + "</head><body><table>"); - addTableHeader(); try { checkCoordJobs(); - msg.append("</table></body></html>"); - sendMail(msg.toString()); } catch (Exception e) { LOG.error("Error running AbandonedCoordChecker", e); @@ -95,43 +85,52 @@ public class AbandonedCoordCheckerService implements Service { /** * Check coordinator - * - * @throws CommandException + * @throws Exception */ - private void checkCoordJobs() throws CommandException { + private void checkCoordJobs() throws Exception { + StringBuilder msg = new StringBuilder(); + addTableHeader(msg); List<CoordinatorJobBean> jobs; try { - Timestamp createdTS = new Timestamp( - System.currentTimeMillis() - - (ConfigurationService.getInt(CONF_JOB_OLDER_THAN) * 60 * 1000)); + Timestamp createdTS = new Timestamp(System.currentTimeMillis() + - (ConfigurationService.getInt(CONF_JOB_OLDER_THAN) * 60 * 1000)); jobs = CoordJobQueryExecutor.getInstance().getList(CoordJobQuery.GET_COORD_FOR_ABANDONEDCHECK, failureLimit, createdTS); for (CoordinatorJobBean job : jobs) { - String killStatus = "Coord kill is disabled"; - LOG.info("Abandoned Coord found : " + job.getId()); - if (shouldKill) { - try { - new CoordKillXCommand(job.getId()).call(); - LOG.info("Killed abandoned coord : " + job.getId()); - killStatus = "Successful"; - } - catch (Exception e) { - LOG.error("Can't kill abandoned coord : " + job.getId(), e); - killStatus = " Failed : " + e.getMessage(); - } - } - addCoordToMessage(job, killStatus); + processJob(job, msg); + } + if (jobs.size() > 0) { + addTableTail(msg); + sendMail(msg.toString()); } + } catch (JPAExecutorException je) { throw new CommandException(je); } } - public void addCoordToMessage(CoordinatorJobBean job, String killStatus) { + private void processJob(CoordinatorJobBean job, StringBuilder msg){ + String killStatus = "Coord kill is disabled"; + LOG.info("Abandoned Coord found : " + job.getId()); + if (shouldKill) { + try { + new CoordKillXCommand(job.getId()).call(); + LOG.info("Killed abandoned coord : " + job.getId()); + killStatus = "Successful"; + } + catch (Exception e) { + LOG.error("Can't kill abandoned coord : " + job.getId(), e); + killStatus = " Failed : " + e.getMessage(); + } + } + addCoordToMessage(job, killStatus, msg); + } + + public void addCoordToMessage(CoordinatorJobBean job, String killStatus, StringBuilder msg) { msg.append("<tr>"); msg.append("<td><a href=\"").append(JobXCommand.getJobConsoleUrl(job.getId())).append("\">") .append(job.getId()).append("</a></td>"); @@ -142,7 +141,9 @@ public class AbandonedCoordCheckerService implements Service { msg.append("</tr>"); } - public void addTableHeader() { + public void addTableHeader(StringBuilder msg) { + msg.append("<!DOCTYPE html><html><head><style>table,th,td{border:1px solid black;border-collapse:collapse;}</style>" + + "</head><body><table>"); msg.append("<tr>"); msg.append("<td>").append("Coordinator id").append("</td>"); msg.append("<td>").append("Coordinator name").append("</td>"); @@ -152,11 +153,11 @@ public class AbandonedCoordCheckerService implements Service { msg.append("</tr>"); } - @VisibleForTesting - public String getMessage() { - return msg.toString(); + public void addTableTail(StringBuilder msg) { + msg.append("</table></body></html>"); } + public void sendMail(String body) throws Exception { if (to == null || to.length == 0 || (to.length == 1 && StringUtils.isEmpty(to[0]))) { LOG.info(TO_ADDRESS + " is not configured. Not sending email"); http://git-wip-us.apache.org/repos/asf/oozie/blob/ad2fa21e/core/src/test/java/org/apache/oozie/command/coord/TestAbandonedCoordChecker.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/org/apache/oozie/command/coord/TestAbandonedCoordChecker.java b/core/src/test/java/org/apache/oozie/command/coord/TestAbandonedCoordChecker.java index 24b6f66..7837b52 100644 --- a/core/src/test/java/org/apache/oozie/command/coord/TestAbandonedCoordChecker.java +++ b/core/src/test/java/org/apache/oozie/command/coord/TestAbandonedCoordChecker.java @@ -59,7 +59,7 @@ public class TestAbandonedCoordChecker extends XDataTestCase { createdTime, true, false, 4); addRecordToCoordActionTable(job2.getId(), 4, CoordinatorAction.Status.FAILED); - AbandonedCoordCheckerRunnable coordChecked = new AbandonedCoordCheckerRunnable(5); + ExtendedAbandonedCoordCheckerRunnable coordChecked = new ExtendedAbandonedCoordCheckerRunnable(5); coordChecked.run(); String msg = coordChecked.getMessage(); assertTrue(msg.contains(job1.getId())); @@ -67,7 +67,7 @@ public class TestAbandonedCoordChecker extends XDataTestCase { } - public void testAbandoned_notAbandoned() throws Exception { + public void testNoAbandoned() throws Exception { Date start = DateUtils.addMonths(new Date(), -1); Date end = new Date(start.getTime() + (4 * 60 * 60 * 1000)); // 4 hrs @@ -85,11 +85,9 @@ public class TestAbandonedCoordChecker extends XDataTestCase { addRecordToCoordActionTable(job2.getId(), 6, CoordinatorAction.Status.SUCCEEDED, CoordinatorAction.Status.FAILED); - AbandonedCoordCheckerRunnable coordChecked = new AbandonedCoordCheckerRunnable(5); + ExtendedAbandonedCoordCheckerRunnable coordChecked = new ExtendedAbandonedCoordCheckerRunnable(5); coordChecked.run(); - String msg = coordChecked.getMessage(); - assertFalse(msg.contains(job1.getId())); - assertFalse(msg.contains(job2.getId())); + assertNull(coordChecked.getMessage()); } public void testMessage_withTimedout() throws Exception { @@ -106,7 +104,7 @@ public class TestAbandonedCoordChecker extends XDataTestCase { createdTime, true, false, 4); addRecordToCoordActionTable(job2.getId(), 4, CoordinatorAction.Status.TIMEDOUT); - AbandonedCoordCheckerRunnable coordChecked = new AbandonedCoordCheckerRunnable(10); + ExtendedAbandonedCoordCheckerRunnable coordChecked = new ExtendedAbandonedCoordCheckerRunnable(10); coordChecked.run(); String msg = coordChecked.getMessage(); assertTrue(msg.contains(job1.getId())); @@ -136,7 +134,7 @@ public class TestAbandonedCoordChecker extends XDataTestCase { addRecordToCoordActionTable(job3.getId(), 5, CoordinatorAction.Status.FAILED, CoordinatorAction.Status.SUSPENDED, CoordinatorAction.Status.TIMEDOUT); - AbandonedCoordCheckerRunnable coordChecked = new AbandonedCoordCheckerRunnable(5); + ExtendedAbandonedCoordCheckerRunnable coordChecked = new ExtendedAbandonedCoordCheckerRunnable(5); coordChecked.run(); String msg = coordChecked.getMessage(); assertTrue(msg.contains(job1.getId())); @@ -219,4 +217,19 @@ public class TestAbandonedCoordChecker extends XDataTestCase { addRecordToCoordActionTable(jobId, i, jobStatus, "coord-action-get.xml", 0); } } + + public static class ExtendedAbandonedCoordCheckerRunnable extends AbandonedCoordCheckerRunnable { + String message; + + public ExtendedAbandonedCoordCheckerRunnable(int failureLimit) { + super(failureLimit); + } + + public void sendMail(String body) throws Exception { + message = body; + } + public String getMessage(){ + return message; + } + } } http://git-wip-us.apache.org/repos/asf/oozie/blob/ad2fa21e/release-log.txt ---------------------------------------------------------------------- diff --git a/release-log.txt b/release-log.txt index 01326c8..72da838 100644 --- a/release-log.txt +++ b/release-log.txt @@ -1,5 +1,6 @@ -- Oozie 4.3.0 release (trunk - unreleased) +OOZIE-2407 AbandonedService should not send mail if there is no abandoned coord (puru) OOZIE-2402 oozie-setup.sh sharelib create takes a long time on large clusters (yalovyyi via rkanter) OOZIE-2185 Make oozie cli source conf/oozie-client-env.sh (grimesmi via rkanter) OOZIE-2413 Kerberos credentials can expire if the KDC is slow to respond (rkanter)
