Repository: aurora Updated Branches: refs/heads/master 0813e3a44 -> e75cdba5f
Update leader redirection logic to return an error page if there is no leading scheduler. We'd like to serve Aurora through DNS rather than against the singleton serverset so that when there is no leader users get a friendly error page rather than a generic error (which inevitably leads to them coming to ask for support). In order to accomplish this, I've updated the `LeaderRedirectFilter` to return a 503 with an html body in the event there is no elected leader. Reviewed at https://reviews.apache.org/r/41534/ Project: http://git-wip-us.apache.org/repos/asf/aurora/repo Commit: http://git-wip-us.apache.org/repos/asf/aurora/commit/e75cdba5 Tree: http://git-wip-us.apache.org/repos/asf/aurora/tree/e75cdba5 Diff: http://git-wip-us.apache.org/repos/asf/aurora/diff/e75cdba5 Branch: refs/heads/master Commit: e75cdba5ff7cdc46effebc8ee62c2be6dabe9d97 Parents: 0813e3a Author: Joshua Cohen <jco...@apache.org> Authored: Tue Dec 22 14:09:54 2015 -0600 Committer: Joshua Cohen <jco...@apache.org> Committed: Tue Dec 22 14:09:54 2015 -0600 ---------------------------------------------------------------------- .../aurora/scheduler/http/LeaderRedirect.java | 47 ++++++++++++++- .../scheduler/http/LeaderRedirectFilter.java | 43 ++++++++++++-- .../apache/aurora/scheduler/http/no-leader.html | 43 ++++++++++++++ .../scheduler/http/AbstractJettyTest.java | 16 ++++- .../scheduler/http/LeaderRedirectTest.java | 19 +++--- .../scheduler/http/ServletFilterTest.java | 61 ++++++++++++-------- 6 files changed, 185 insertions(+), 44 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/aurora/blob/e75cdba5/src/main/java/org/apache/aurora/scheduler/http/LeaderRedirect.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/aurora/scheduler/http/LeaderRedirect.java b/src/main/java/org/apache/aurora/scheduler/http/LeaderRedirect.java index 2c86ddb..7c3c7e5 100644 --- a/src/main/java/org/apache/aurora/scheduler/http/LeaderRedirect.java +++ b/src/main/java/org/apache/aurora/scheduler/http/LeaderRedirect.java @@ -39,6 +39,22 @@ import static java.util.Objects.requireNonNull; * leader. */ public class LeaderRedirect { + public enum LeaderStatus { + /** + * This instance is currently the leading scheduler. + */ + LEADING, + + /** + * There is not currently an elected leading scheduler. + */ + NO_LEADER, + + /** + * This instance is not currently the leading scheduler. + */ + NOT_LEADING, + } // TODO(wfarner): Should we tie this directly to the producer of the node (HttpModule? It seems // like the right thing to do, but would introduce an otherwise unnecessary dependency. @@ -73,8 +89,8 @@ public class LeaderRedirect { return Optional.absent(); } - if (leadingScheduler.isSetAdditionalEndpoints()) { - Endpoint leaderHttp = leadingScheduler.getAdditionalEndpoints().get(HTTP_PORT_NAME); + if (leadingScheduler.isSetServiceEndpoint()) { + Endpoint leaderHttp = leadingScheduler.getServiceEndpoint(); if (leaderHttp != null && leaderHttp.isSetHost() && leaderHttp.isSetPort()) { return Optional.of(HostAndPort.fromParts(leaderHttp.getHost(), leaderHttp.getPort())); } @@ -114,6 +130,33 @@ public class LeaderRedirect { } /** + * Gets the current status of the elected leader. + * + * @return a {@code LeaderStatus} indicating whether there is an elected leader (and if so, if + * this instance is the leader). + */ + public LeaderStatus getLeaderStatus() { + ServiceInstance leadingScheduler = leader.get(); + if (leadingScheduler == null) { + return LeaderStatus.NO_LEADER; + } + + if (!leadingScheduler.isSetServiceEndpoint()) { + LOG.warning("Leader service instance seems to be incomplete: " + leadingScheduler); + return LeaderStatus.NO_LEADER; + } + + Optional<HostAndPort> leaderHttp = getLeaderHttp(); + Optional<HostAndPort> localHttp = getLocalHttp(); + + if (leaderHttp.isPresent() && leaderHttp.equals(localHttp)) { + return LeaderStatus.LEADING; + } + + return LeaderStatus.NOT_LEADING; + } + + /** * Gets the optional redirect URI target in the event that this process is not the leading * scheduler. * http://git-wip-us.apache.org/repos/asf/aurora/blob/e75cdba5/src/main/java/org/apache/aurora/scheduler/http/LeaderRedirectFilter.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/aurora/scheduler/http/LeaderRedirectFilter.java b/src/main/java/org/apache/aurora/scheduler/http/LeaderRedirectFilter.java index 51566e9..b83166e 100644 --- a/src/main/java/org/apache/aurora/scheduler/http/LeaderRedirectFilter.java +++ b/src/main/java/org/apache/aurora/scheduler/http/LeaderRedirectFilter.java @@ -15,6 +15,7 @@ package org.apache.aurora.scheduler.http; import java.io.IOException; import java.util.Objects; +import java.util.logging.Logger; import javax.inject.Inject; import javax.servlet.FilterChain; @@ -23,12 +24,20 @@ import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import javax.ws.rs.core.HttpHeaders; +import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Optional; +import com.google.common.io.Resources; + +import static org.apache.aurora.scheduler.http.LeaderRedirect.LeaderStatus; /** * An HTTP filter that will redirect the request to the leading scheduler. */ public class LeaderRedirectFilter extends AbstractFilter { + private static final Logger LOG = Logger.getLogger(LeaderRedirectFilter.class.getName()); + + @VisibleForTesting + static final String NO_LEADER_PAGE = "no-leader.html"; private final LeaderRedirect redirector; @@ -37,16 +46,38 @@ public class LeaderRedirectFilter extends AbstractFilter { this.redirector = Objects.requireNonNull(redirector); } + private void sendServiceUnavailable(HttpServletResponse response) throws IOException { + response.setStatus(HttpServletResponse.SC_SERVICE_UNAVAILABLE); + Resources.asByteSource(Resources.getResource(LeaderRedirectFilter.class, NO_LEADER_PAGE)) + .copyTo(response.getOutputStream()); + } + @Override public void doFilter(HttpServletRequest request, HttpServletResponse response, FilterChain chain) throws IOException, ServletException { - Optional<String> leaderRedirect = redirector.getRedirectTarget(request); - if (leaderRedirect.isPresent()) { - response.setStatus(HttpServletResponse.SC_TEMPORARY_REDIRECT); - response.setHeader(HttpHeaders.LOCATION, leaderRedirect.get()); - } else { - chain.doFilter(request, response); + LeaderStatus leaderStatus = redirector.getLeaderStatus(); + switch (leaderStatus) { + case LEADING: + chain.doFilter(request, response); + return; + case NO_LEADER: + sendServiceUnavailable(response); + return; + case NOT_LEADING: + Optional<String> leaderRedirect = redirector.getRedirectTarget(request); + if (leaderRedirect.isPresent()) { + response.setStatus(HttpServletResponse.SC_TEMPORARY_REDIRECT); + response.setHeader(HttpHeaders.LOCATION, leaderRedirect.get()); + return; + } + + LOG.warning("Got no leader redirect contrary to leader status, treating as if no leader " + + "was found."); + sendServiceUnavailable(response); + return; + default: + throw new IllegalArgumentException("Encountered unknown LeaderStatus: " + leaderStatus); } } } http://git-wip-us.apache.org/repos/asf/aurora/blob/e75cdba5/src/main/resources/org/apache/aurora/scheduler/http/no-leader.html ---------------------------------------------------------------------- diff --git a/src/main/resources/org/apache/aurora/scheduler/http/no-leader.html b/src/main/resources/org/apache/aurora/scheduler/http/no-leader.html new file mode 100644 index 0000000..cf44f01 --- /dev/null +++ b/src/main/resources/org/apache/aurora/scheduler/http/no-leader.html @@ -0,0 +1,43 @@ +<!doctype html> +<!-- + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + --> +<html lang='en'> +<head> + <meta charset='utf-8'> + <meta name='google' value='notranslate'> + <link rel='icon' href='/assets/images/aurora.png' type='image/png'/> + <link href='/assets/bower_components/bootstrap/dist/css/bootstrap.min.css' rel='stylesheet'> + <script src='/assets/bower_components/jquery/dist/jquery.js'></script> + <script src='/assets/bower_components/bootstrap/dist/js/bootstrap.min.js'></script> + <link rel='stylesheet' href='/assets/css/app.css'/> + <title>Aurora</title> +</head> +<body> + +<nav class="navbar navbar-static-top header" role="navigation"> + <div class="container-fluid"> + <div class="navbar-header"> + <a href="/scheduler"><img class="logo" src="/assets/images/aurora_logo.png"/></a> + </div> + </div> +</nav> + +<div class='container-fluid'> + <h2>No Leader Found</h2> + + <p>It appears that the scheduler is failing over. This is routine behavior that should last no more than a few minutes.</p> +</div> + +</body> +</html> http://git-wip-us.apache.org/repos/asf/aurora/blob/e75cdba5/src/test/java/org/apache/aurora/scheduler/http/AbstractJettyTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/aurora/scheduler/http/AbstractJettyTest.java b/src/test/java/org/apache/aurora/scheduler/http/AbstractJettyTest.java index 152af0d..19c8a1f 100644 --- a/src/test/java/org/apache/aurora/scheduler/http/AbstractJettyTest.java +++ b/src/test/java/org/apache/aurora/scheduler/http/AbstractJettyTest.java @@ -19,6 +19,7 @@ import javax.servlet.ServletContextListener; import javax.ws.rs.core.MediaType; import com.google.common.base.Throwables; +import com.google.common.collect.ImmutableSet; import com.google.common.net.HostAndPort; import com.google.common.util.concurrent.RateLimiter; import com.google.inject.AbstractModule; @@ -42,6 +43,7 @@ import org.apache.aurora.common.quantity.Amount; import org.apache.aurora.common.quantity.Time; import org.apache.aurora.common.stats.StatsProvider; import org.apache.aurora.common.testing.easymock.EasyMockTest; +import org.apache.aurora.common.thrift.Endpoint; import org.apache.aurora.common.thrift.ServiceInstance; import org.apache.aurora.common.util.BackoffStrategy; import org.apache.aurora.gen.ServerInfo; @@ -79,7 +81,7 @@ public abstract class AbstractJettyTest extends EasyMockTest { private Injector injector; protected StorageTestUtil storage; protected HostAndPort httpServer; - protected Capture<HostChangeMonitor<ServiceInstance>> schedulerWatcher; + private Capture<HostChangeMonitor<ServiceInstance>> schedulerWatcher; /** * Subclasses should override with a module that configures the servlets they are testing. @@ -138,6 +140,15 @@ public abstract class AbstractJettyTest extends EasyMockTest { expect(schedulers.watch(capture(schedulerWatcher))).andReturn(createMock(Command.class)); } + protected void setLeadingScheduler(String host, int port) { + schedulerWatcher.getValue().onChange( + ImmutableSet.of(new ServiceInstance().setServiceEndpoint(new Endpoint(host, port)))); + } + + protected void unsetLeadingSchduler() { + schedulerWatcher.getValue().onChange(ImmutableSet.of()); + } + protected void replayAndStart() { control.replay(); try { @@ -151,6 +162,9 @@ public abstract class AbstractJettyTest extends EasyMockTest { throw Throwables.propagate(e); } httpServer = injector.getInstance(HttpService.class).getAddress(); + + // By default we'll set this instance to be the leader. + setLeadingScheduler(httpServer.getHostText(), httpServer.getPort()); } protected String makeUrl(String path) { http://git-wip-us.apache.org/repos/asf/aurora/blob/e75cdba5/src/test/java/org/apache/aurora/scheduler/http/LeaderRedirectTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/aurora/scheduler/http/LeaderRedirectTest.java b/src/test/java/org/apache/aurora/scheduler/http/LeaderRedirectTest.java index 98b6ef6..3678266 100644 --- a/src/test/java/org/apache/aurora/scheduler/http/LeaderRedirectTest.java +++ b/src/test/java/org/apache/aurora/scheduler/http/LeaderRedirectTest.java @@ -19,7 +19,6 @@ import javax.servlet.http.HttpServletRequest; import com.google.common.base.Function; import com.google.common.base.Optional; -import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Iterables; import com.google.common.net.HostAndPort; @@ -32,10 +31,9 @@ import org.apache.aurora.common.thrift.Endpoint; import org.apache.aurora.common.thrift.ServiceInstance; import org.easymock.Capture; import org.junit.Before; -import org.junit.Ignore; import org.junit.Test; -import static org.apache.aurora.scheduler.http.LeaderRedirect.HTTP_PORT_NAME; +import static org.apache.aurora.scheduler.http.LeaderRedirect.LeaderStatus; import static org.easymock.EasyMock.capture; import static org.easymock.EasyMock.expect; import static org.junit.Assert.assertEquals; @@ -46,8 +44,7 @@ public class LeaderRedirectTest extends EasyMockTest { private static final Function<HostAndPort, ServiceInstance> CREATE_INSTANCE = endpoint -> new ServiceInstance() - .setAdditionalEndpoints(ImmutableMap.of(HTTP_PORT_NAME, - new Endpoint(endpoint.getHostText(), endpoint.getPort()))); + .setServiceEndpoint(new Endpoint(endpoint.getHostText(), endpoint.getPort())); private Capture<HostChangeMonitor<ServiceInstance>> monitorCapture; @@ -72,13 +69,13 @@ public class LeaderRedirectTest extends EasyMockTest { leaderRedirector.monitor(); } - @Ignore("https://issues.apache.org/jira/browse/AURORA-842") @Test public void testLeader() throws Exception { replayAndMonitor(); publishSchedulers(localPort(HTTP_PORT)); assertEquals(Optional.absent(), leaderRedirector.getRedirect()); + assertEquals(LeaderStatus.LEADING, leaderRedirector.getLeaderStatus()); } @Test @@ -89,6 +86,7 @@ public class LeaderRedirectTest extends EasyMockTest { publishSchedulers(remote); assertEquals(Optional.of(remote), leaderRedirector.getRedirect()); + assertEquals(LeaderStatus.NOT_LEADING, leaderRedirector.getLeaderStatus()); } @Test @@ -99,6 +97,7 @@ public class LeaderRedirectTest extends EasyMockTest { publishSchedulers(local); assertEquals(Optional.of(local), leaderRedirector.getRedirect()); + assertEquals(LeaderStatus.NOT_LEADING, leaderRedirector.getLeaderStatus()); } @Test @@ -106,6 +105,7 @@ public class LeaderRedirectTest extends EasyMockTest { replayAndMonitor(); assertEquals(Optional.absent(), leaderRedirector.getRedirect()); + assertEquals(LeaderStatus.NO_LEADER, leaderRedirector.getLeaderStatus()); } @Test @@ -115,18 +115,17 @@ public class LeaderRedirectTest extends EasyMockTest { publishSchedulers(HostAndPort.fromParts("foobar", 500), HostAndPort.fromParts("baz", 800)); assertEquals(Optional.absent(), leaderRedirector.getRedirect()); + assertEquals(LeaderStatus.NO_LEADER, leaderRedirector.getLeaderStatus()); } @Test public void testBadServiceInstance() throws Exception { replayAndMonitor(); - ServiceInstance badLocal = new ServiceInstance() - .setAdditionalEndpoints(ImmutableMap.of("foo", new Endpoint("localhost", 500))); - - publishSchedulers(ImmutableSet.of(badLocal)); + publishSchedulers(ImmutableSet.of(new ServiceInstance())); assertEquals(Optional.absent(), leaderRedirector.getRedirect()); + assertEquals(LeaderStatus.NO_LEADER, leaderRedirector.getLeaderStatus()); } private HttpServletRequest mockRequest(String attributeValue, String queryString) { http://git-wip-us.apache.org/repos/asf/aurora/blob/e75cdba5/src/test/java/org/apache/aurora/scheduler/http/ServletFilterTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/aurora/scheduler/http/ServletFilterTest.java b/src/test/java/org/apache/aurora/scheduler/http/ServletFilterTest.java index af1e5f1..4057e7c 100644 --- a/src/test/java/org/apache/aurora/scheduler/http/ServletFilterTest.java +++ b/src/test/java/org/apache/aurora/scheduler/http/ServletFilterTest.java @@ -13,17 +13,17 @@ */ package org.apache.aurora.scheduler.http; +import java.io.IOException; +import java.net.URL; +import java.nio.charset.StandardCharsets; import javax.ws.rs.core.HttpHeaders; import javax.ws.rs.core.MediaType; import com.google.common.base.Optional; -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.ImmutableSet; +import com.google.common.io.Resources; import com.sun.jersey.api.client.ClientResponse; import com.sun.jersey.api.client.ClientResponse.Status; -import org.apache.aurora.common.thrift.Endpoint; -import org.apache.aurora.common.thrift.ServiceInstance; import org.junit.Test; import static org.junit.Assert.assertEquals; @@ -71,44 +71,55 @@ public class ServletFilterTest extends AbstractJettyTest { } - private void assertResponseStatus(String path, Status expectedStatus) { + private void assertResponseStatus( + String path, + Status expectedStatus, + Optional<URL> responseResource) throws IOException { + ClientResponse response = get(path); assertEquals(expectedStatus.getStatusCode(), response.getStatus()); - } - private void setLeadingScheduler(String host, int port) { - ServiceInstance instance = new ServiceInstance() - .setAdditionalEndpoints(ImmutableMap.of("http", new Endpoint(host, port))); - schedulerWatcher.getValue().onChange(ImmutableSet.of(instance)); + if (responseResource.isPresent()) { + assertEquals( + Resources.toString(responseResource.get(), StandardCharsets.UTF_8), + response.getEntity(String.class)); + } } - private void leaderRedirectSmokeTest(Status expectedStatus) { - assertResponseStatus("/scheduler", expectedStatus); - assertResponseStatus("/scheduler/", expectedStatus); - assertResponseStatus("/scheduler/role", expectedStatus); - assertResponseStatus("/scheduler/role/env", expectedStatus); - assertResponseStatus("/scheduler/role/env/job", expectedStatus); + private void leaderRedirectSmokeTest(Status expectedStatus, Optional<URL> responseResource) + throws IOException { + + assertResponseStatus("/scheduler", expectedStatus, responseResource); + assertResponseStatus("/scheduler/", expectedStatus, responseResource); + assertResponseStatus("/scheduler/role", expectedStatus, responseResource); + assertResponseStatus("/scheduler/role/env", expectedStatus, responseResource); + assertResponseStatus("/scheduler/role/env/job", expectedStatus, responseResource); - assertResponseStatus("/updates", expectedStatus); - assertResponseStatus("/updates/", expectedStatus); + assertResponseStatus("/updates", expectedStatus, responseResource); + assertResponseStatus("/updates/", expectedStatus, responseResource); } @Test public void testLeaderRedirect() throws Exception { replayAndStart(); - assertResponseStatus("/", Status.OK); + assertResponseStatus("/", Status.OK, Optional.absent()); - // Scheduler is assumed leader at this point, since no members are present in the service - // (not even this process). - leaderRedirectSmokeTest(Status.OK); + // If there's no leader, we should send service unavailable and an error page. + unsetLeadingSchduler(); + leaderRedirectSmokeTest( + Status.SERVICE_UNAVAILABLE, + Optional.of( + Resources.getResource( + LeaderRedirectFilter.class, + LeaderRedirectFilter.NO_LEADER_PAGE))); // This process is leading setLeadingScheduler(httpServer.getHostText(), httpServer.getPort()); - leaderRedirectSmokeTest(Status.OK); + leaderRedirectSmokeTest(Status.OK, Optional.absent()); setLeadingScheduler("otherHost", 1234); - leaderRedirectSmokeTest(Status.TEMPORARY_REDIRECT); - assertResponseStatus("/", Status.OK); + leaderRedirectSmokeTest(Status.TEMPORARY_REDIRECT, Optional.absent()); + assertResponseStatus("/", Status.OK, Optional.absent()); } }