Repository: lens Updated Branches: refs/heads/master dba885ca9 -> 5faaf11b4
LENS-893 : Add exponential backoff for next update of status update, incase of failures from driver Project: http://git-wip-us.apache.org/repos/asf/lens/repo Commit: http://git-wip-us.apache.org/repos/asf/lens/commit/5faaf11b Tree: http://git-wip-us.apache.org/repos/asf/lens/tree/5faaf11b Diff: http://git-wip-us.apache.org/repos/asf/lens/diff/5faaf11b Branch: refs/heads/master Commit: 5faaf11b4d4a75972aa113101c6f2b63ac32763d Parents: dba885c Author: Amareshwari Sriramadasu <amareshw...@gmail.com> Authored: Wed Mar 30 17:18:13 2016 +0530 Committer: Rajat Khandelwal <rajatgupt...@gmail.com> Committed: Wed Mar 30 17:18:13 2016 +0530 ---------------------------------------------------------------------- .../lens/server/api/LensConfConstants.java | 31 +++++++ .../server/api/common/BackOffRetryHandler.java | 71 ++++++++++++++ .../lens/server/api/common/FailureContext.java | 43 +++++++++ ...FibonacciExponentialBackOffRetryHandler.java | 77 +++++++++++++++ .../common/OperationRetryHandlerFactory.java | 41 ++++++++ .../lens/server/api/query/QueryContext.java | 39 ++++++++ .../apache/lens/server/api/util/LensUtil.java | 20 +++- .../TestExponentialBackOffRetryHandler.java | 52 +++++++++++ .../lens/server/api/driver/MockDriver.java | 24 +++++ .../api/query/TestAbstractQueryContext.java | 1 + .../lens/server/api/query/TestQueryContext.java | 98 ++++++++++++++++++++ .../server/query/QueryExecutionServiceImpl.java | 20 +++- .../src/main/resources/lensserver-default.xml | 20 ++++ lens-server/src/test/resources/lens-site.xml | 6 ++ src/site/apt/admin/config.apt | 56 ++++++----- 15 files changed, 570 insertions(+), 29 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/lens/blob/5faaf11b/lens-server-api/src/main/java/org/apache/lens/server/api/LensConfConstants.java ---------------------------------------------------------------------- diff --git a/lens-server-api/src/main/java/org/apache/lens/server/api/LensConfConstants.java b/lens-server-api/src/main/java/org/apache/lens/server/api/LensConfConstants.java index f8a936a..724c868 100644 --- a/lens-server-api/src/main/java/org/apache/lens/server/api/LensConfConstants.java +++ b/lens-server-api/src/main/java/org/apache/lens/server/api/LensConfConstants.java @@ -1046,4 +1046,35 @@ public final class LensConfConstants { * Default value of INMEMORY_RESULT_SET_TTL_SECS is 300 secs (5 minutes) */ public static final int DEFAULT_INMEMORY_RESULT_SET_TTL_SECS = 300; + + /** + * Number of retries status update will be retried, in case of transient failures + */ + public static final String STATUS_UPDATE_EXPONENTIAL_RETRIES = SERVER_PFX + "status.update.num.retries"; + + /** + * Default value of STATUS_UPDATE_EXPONENTIAL_RETRIES is 10 + */ + public static final int DEFAULT_STATUS_UPDATE_EXPONENTIAL_RETRIES = 10; + + /** + * Maximum delay a status update can wait for next update, in case of transient failures + */ + public static final String MAXIMUM_STATUS_UPDATE_DELAY = SERVER_PFX + "status.update.maximum.delay.secs"; + + /** + * Default value of MAXIMUM_STATUS_UPDATE_DELAY is 1800 secs (30 minutes) + */ + public static final long DEFAULT_MAXIMUM_STATUS_UPDATE_DELAY = 1800; + + /** + * Number of seconds that would grow exponentially for next update, incase of transient failures. + */ + public static final String STATUS_UPDATE_EXPONENTIAL_WAIT_FACTOR = SERVER_PFX + + "status.update.exponential.wait.millis"; + + /** + * Default value of DEFAULT_STATUS_UPDATE_EXPONENTIAL_WAIT_FACTOR is 30000 millis (30 seconds) + */ + public static final long DEFAULT_STATUS_UPDATE_EXPONENTIAL_WAIT_FACTOR = 30000; } http://git-wip-us.apache.org/repos/asf/lens/blob/5faaf11b/lens-server-api/src/main/java/org/apache/lens/server/api/common/BackOffRetryHandler.java ---------------------------------------------------------------------- diff --git a/lens-server-api/src/main/java/org/apache/lens/server/api/common/BackOffRetryHandler.java b/lens-server-api/src/main/java/org/apache/lens/server/api/common/BackOffRetryHandler.java new file mode 100644 index 0000000..17bfba0 --- /dev/null +++ b/lens-server-api/src/main/java/org/apache/lens/server/api/common/BackOffRetryHandler.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.lens.server.api.common; + +/** + * A backoff retry handler. + * + * This allows a backoff on any call, so provides methods whether we can try the operation now, + * whats next time when operation can be performed and whether operation has exhausted all retries. + * + * Callers of this would do the following : + * + * if (handler.canTryOpNow(FailureContext)) { + * try { + * tryCallerOperation(); + * FailureContext.clear(); + * } catch (any Transient Exception) { + * FailureContext.updateFailure(); + * if (!handler.hasExhaustedRetries(FailureContext)) { + * // will be tried later again + * } + * throw exception; + * } + * } + */ +public interface BackOffRetryHandler { + + /** + * To know whether operation can be done now. + * + * @param failContext FailureContext holding failures till now. + * + * @return true if operation can be done now, false otherwise. + */ + boolean canTryOpNow(FailureContext failContext); + + /** + * Get the time when the operation can be done next. + * + * @param failContext FailureContext holding failures till now. + * + * @return Next operation time in millis since epoch + */ + long getOperationNextTime(FailureContext failContext); + + /** + * Has the operation exhausted all its retries + * + * @param failContext FailureContext holding failures till now. + * + * @return true if all retries have exhausted, false otherwise. + */ + boolean hasExhaustedRetries(FailureContext failContext); +} http://git-wip-us.apache.org/repos/asf/lens/blob/5faaf11b/lens-server-api/src/main/java/org/apache/lens/server/api/common/FailureContext.java ---------------------------------------------------------------------- diff --git a/lens-server-api/src/main/java/org/apache/lens/server/api/common/FailureContext.java b/lens-server-api/src/main/java/org/apache/lens/server/api/common/FailureContext.java new file mode 100644 index 0000000..70a34b0 --- /dev/null +++ b/lens-server-api/src/main/java/org/apache/lens/server/api/common/FailureContext.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.lens.server.api.common; + +import lombok.Getter; + +/** + * Failure context captures last failure time and number of failures. + */ +public class FailureContext { + + @Getter + private long lastFailedTime = 0; + @Getter + private int failCount = 0; + + public synchronized void updateFailure() { + lastFailedTime = System.currentTimeMillis(); + failCount++; + } + + public synchronized void clear() { + lastFailedTime = 0; + failCount = 0; + } +} http://git-wip-us.apache.org/repos/asf/lens/blob/5faaf11b/lens-server-api/src/main/java/org/apache/lens/server/api/common/FibonacciExponentialBackOffRetryHandler.java ---------------------------------------------------------------------- diff --git a/lens-server-api/src/main/java/org/apache/lens/server/api/common/FibonacciExponentialBackOffRetryHandler.java b/lens-server-api/src/main/java/org/apache/lens/server/api/common/FibonacciExponentialBackOffRetryHandler.java new file mode 100644 index 0000000..e7fb8ce --- /dev/null +++ b/lens-server-api/src/main/java/org/apache/lens/server/api/common/FibonacciExponentialBackOffRetryHandler.java @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.lens.server.api.common; + +import static com.google.common.base.Preconditions.checkArgument; + +/** + * A exponential backoff retry handler. + * + * It allows the the failures to be retried at a next update time, which can increase exponentially. + * + */ +public class FibonacciExponentialBackOffRetryHandler implements BackOffRetryHandler { + final int[] fibonacci; + final long maxDelay; + final long waitMillis; + + public FibonacciExponentialBackOffRetryHandler(int numRetries, long maxDelay, long waitMillis) { + checkArgument(numRetries > 2); + fibonacci = new int[numRetries]; + fibonacci[0] = 1; + fibonacci[1] = 1; + for(int i = 2; i < numRetries; ++i) { + fibonacci[i] = fibonacci[i-1] + fibonacci[i-2]; + } + this.maxDelay = maxDelay; + this.waitMillis = waitMillis; + } + + public boolean canTryOpNow(FailureContext failContext) { + synchronized (failContext) { + if (failContext.getFailCount() != 0) { + long now = System.currentTimeMillis(); + if (now < getOperationNextTime(failContext)) { + return false; + } + } + return true; + } + } + + public long getOperationNextTime(FailureContext failContext) { + synchronized (failContext) { + if (failContext.getFailCount() >= fibonacci.length) { + return failContext.getLastFailedTime() + maxDelay; + } + long delay = Math.min(maxDelay, fibonacci[failContext.getFailCount()] * waitMillis); + return failContext.getLastFailedTime() + delay; + } + } + + public boolean hasExhaustedRetries(FailureContext failContext) { + synchronized (failContext) { + if (failContext.getFailCount() >= fibonacci.length) { + return true; + } + return false; + } + } +} http://git-wip-us.apache.org/repos/asf/lens/blob/5faaf11b/lens-server-api/src/main/java/org/apache/lens/server/api/common/OperationRetryHandlerFactory.java ---------------------------------------------------------------------- diff --git a/lens-server-api/src/main/java/org/apache/lens/server/api/common/OperationRetryHandlerFactory.java b/lens-server-api/src/main/java/org/apache/lens/server/api/common/OperationRetryHandlerFactory.java new file mode 100644 index 0000000..88fbe43 --- /dev/null +++ b/lens-server-api/src/main/java/org/apache/lens/server/api/common/OperationRetryHandlerFactory.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.lens.server.api.common; + +/** + * Factory which creates operation retry handler + */ +public class OperationRetryHandlerFactory { + private OperationRetryHandlerFactory() { + } + + /** + * Create exponential backoff handler + * + * @param numRetries Number of exponential backoff retries + * @param maxDelay Maximum delay an operation can wait for next + * @param waitMillis Number of millis that would grow exponentially incase of failures + * + * @return BackOffRetryHandler + */ + public static BackOffRetryHandler createExponentialBackOffHandler(int numRetries, long maxDelay, long waitMillis) { + return new FibonacciExponentialBackOffRetryHandler(numRetries, maxDelay, waitMillis); + } +} http://git-wip-us.apache.org/repos/asf/lens/blob/5faaf11b/lens-server-api/src/main/java/org/apache/lens/server/api/query/QueryContext.java ---------------------------------------------------------------------- diff --git a/lens-server-api/src/main/java/org/apache/lens/server/api/query/QueryContext.java b/lens-server-api/src/main/java/org/apache/lens/server/api/query/QueryContext.java index 8ee8a3b..94b79d0 100644 --- a/lens-server-api/src/main/java/org/apache/lens/server/api/query/QueryContext.java +++ b/lens-server-api/src/main/java/org/apache/lens/server/api/query/QueryContext.java @@ -33,6 +33,8 @@ import org.apache.lens.api.query.QueryHandle; import org.apache.lens.api.query.QueryStatus; import org.apache.lens.api.query.QueryStatus.Status; import org.apache.lens.server.api.LensConfConstants; +import org.apache.lens.server.api.common.BackOffRetryHandler; +import org.apache.lens.server.api.common.FailureContext; import org.apache.lens.server.api.driver.DriverQueryStatus; import org.apache.lens.server.api.driver.InMemoryResultSet; import org.apache.lens.server.api.driver.LensDriver; @@ -41,6 +43,7 @@ import org.apache.lens.server.api.driver.PartiallyFetchedInMemoryResultSet; import org.apache.lens.server.api.error.LensException; import org.apache.lens.server.api.query.collect.WaitingQueriesSelectionPolicy; import org.apache.lens.server.api.query.constraint.QueryLaunchingConstraint; +import org.apache.lens.server.api.util.LensUtil; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; @@ -186,6 +189,8 @@ public class QueryContext extends AbstractQueryContext { @Getter private transient boolean isDriverResultRegistered; + transient FailureContext statusUpdateFailures = new FailureContext(); + /** * Creates context from query * @@ -279,6 +284,11 @@ public class QueryContext extends AbstractQueryContext { return ctx; } + public void initTransientState() { + super.initTransientState(); + statusUpdateFailures = new FailureContext(); + } + /** * Merge conf. * @@ -357,10 +367,39 @@ public class QueryContext extends AbstractQueryContext { } public synchronized void setStatus(final QueryStatus newStatus) throws LensException { + validateTransition(newStatus); log.info("Updating status of {} from {} to {}", getQueryHandle(), this.status, newStatus); this.status = newStatus; } + /** + * Update status from selected driver + * + * @param statusUpdateRetryHandler The exponential retry handler + * + * @throws LensException Throws exception if update from driver has failed. + */ + public synchronized void updateDriverStatus(BackOffRetryHandler statusUpdateRetryHandler) + throws LensException { + if (statusUpdateRetryHandler.canTryOpNow(statusUpdateFailures)) { + try { + getSelectedDriver().updateStatus(this); + statusUpdateFailures.clear(); + } catch (LensException exc) { + if (LensUtil.isSocketException(exc)) { + statusUpdateFailures.updateFailure(); + if (!statusUpdateRetryHandler.hasExhaustedRetries(statusUpdateFailures)) { + // retries are not exhausted, so failure is ignored and update will be tried later + log.warn("Exception during update status from driver and update will be tried again at {}", + statusUpdateRetryHandler.getOperationNextTime(statusUpdateFailures), exc); + return; + } + } + throw exc; + } + } + } + public String getResultHeader() { return getConf().get(LensConfConstants.QUERY_OUTPUT_HEADER); } http://git-wip-us.apache.org/repos/asf/lens/blob/5faaf11b/lens-server-api/src/main/java/org/apache/lens/server/api/util/LensUtil.java ---------------------------------------------------------------------- diff --git a/lens-server-api/src/main/java/org/apache/lens/server/api/util/LensUtil.java b/lens-server-api/src/main/java/org/apache/lens/server/api/util/LensUtil.java index c3b6d26..a0691a5 100644 --- a/lens-server-api/src/main/java/org/apache/lens/server/api/util/LensUtil.java +++ b/lens-server-api/src/main/java/org/apache/lens/server/api/util/LensUtil.java @@ -18,6 +18,8 @@ */ package org.apache.lens.server.api.util; +import java.net.SocketException; +import java.net.SocketTimeoutException; import java.util.HashMap; import java.util.Set; @@ -29,6 +31,8 @@ import org.apache.hadoop.conf.Configuration; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Sets; +import lombok.NonNull; + /** * Utility methods for Lens */ @@ -45,7 +49,7 @@ public final class LensUtil { * @param e * @return message */ - public static String getCauseMessage(Throwable e) { + public static String getCauseMessage(@NonNull Throwable e) { String expMsg = null; if (e.getCause() != null) { expMsg = getCauseMessage(e.getCause()); @@ -56,6 +60,20 @@ public final class LensUtil { return expMsg; } + public static Throwable getCause(@NonNull Throwable e) { + if (e.getCause() != null) { + return getCause(e.getCause()); + } + return e; + } + + public static boolean isSocketException(@NonNull Throwable e) { + Throwable cause = getCause(e); + if (cause instanceof SocketException || cause instanceof SocketTimeoutException) { + return true; + } + return false; + } public static <T> ImmutableSet<T> getImplementations(final String factoriesKey, final Configuration conf) { Set<T> implSet = Sets.newLinkedHashSet(); http://git-wip-us.apache.org/repos/asf/lens/blob/5faaf11b/lens-server-api/src/test/java/org/apache/lens/server/api/common/TestExponentialBackOffRetryHandler.java ---------------------------------------------------------------------- diff --git a/lens-server-api/src/test/java/org/apache/lens/server/api/common/TestExponentialBackOffRetryHandler.java b/lens-server-api/src/test/java/org/apache/lens/server/api/common/TestExponentialBackOffRetryHandler.java new file mode 100644 index 0000000..5f407af --- /dev/null +++ b/lens-server-api/src/test/java/org/apache/lens/server/api/common/TestExponentialBackOffRetryHandler.java @@ -0,0 +1,52 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.lens.server.api.common; + +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; + +import org.testng.annotations.Test; + +public class TestExponentialBackOffRetryHandler { + + @Test + public void testExponentialBackOff() { + FailureContext failures = new FailureContext(); + BackOffRetryHandler retryHandler = OperationRetryHandlerFactory.createExponentialBackOffHandler(10, 10000, 1000); + assertFalse(retryHandler.hasExhaustedRetries(failures)); + assertTrue(retryHandler.canTryOpNow(failures)); + + long now = System.currentTimeMillis(); + failures.updateFailure(); + assertFalse(retryHandler.hasExhaustedRetries(failures)); + assertFalse(retryHandler.canTryOpNow(failures)); + assertTrue(now + 500 < retryHandler.getOperationNextTime(failures)); + assertTrue(now + 15000 > retryHandler.getOperationNextTime(failures)); + + for (int i = 0; i < 10; i++) { + failures.updateFailure(); + } + assertTrue(retryHandler.hasExhaustedRetries(failures)); + assertFalse(retryHandler.canTryOpNow(failures)); + + failures.clear(); + assertFalse(retryHandler.hasExhaustedRetries(failures)); + assertTrue(retryHandler.canTryOpNow(failures)); + } +} http://git-wip-us.apache.org/repos/asf/lens/blob/5faaf11b/lens-server-api/src/test/java/org/apache/lens/server/api/driver/MockDriver.java ---------------------------------------------------------------------- diff --git a/lens-server-api/src/test/java/org/apache/lens/server/api/driver/MockDriver.java b/lens-server-api/src/test/java/org/apache/lens/server/api/driver/MockDriver.java index 7f39da1..59f8569 100644 --- a/lens-server-api/src/test/java/org/apache/lens/server/api/driver/MockDriver.java +++ b/lens-server-api/src/test/java/org/apache/lens/server/api/driver/MockDriver.java @@ -21,6 +21,8 @@ package org.apache.lens.server.api.driver; import java.io.IOException; import java.io.ObjectInput; import java.io.ObjectOutput; +import java.net.SocketException; +import java.net.SocketTimeoutException; import java.util.List; import java.util.UUID; import java.util.concurrent.atomic.AtomicInteger; @@ -45,6 +47,7 @@ import org.apache.hive.service.cli.ColumnDescriptor; import com.beust.jcommander.internal.Sets; import com.google.common.collect.ImmutableSet; +import lombok.Getter; /** * The Class MockDriver. @@ -148,6 +151,8 @@ public class MockDriver extends AbstractLensDriver { return new MockQueryPlan(explainCtx.getUserQuery()); } + @Getter + private int updateCount = 0; /* * (non-Javadoc) * @@ -155,6 +160,25 @@ public class MockDriver extends AbstractLensDriver { */ @Override public void updateStatus(QueryContext context) throws LensException { + updateCount++; + if ("simulate status retries".equals(context.getUserQuery())) { + try { + if (updateCount < 3) { + throw new SocketTimeoutException("simulated timeout exception"); + } else if (updateCount <= 5) { + throw new SocketException("simulated socket exception"); + } + } catch (Exception e) { + throw new LensException(e); + } + } + if ("simulate status failure".equals(context.getUserQuery())) { + try { + throw new SocketTimeoutException("simulated timeout exception"); + } catch (Exception e) { + throw new LensException(e); + } + } context.getDriverStatus().setProgress(1.0); context.getDriverStatus().setStatusMessage("Done"); context.getDriverStatus().setState(DriverQueryState.SUCCESSFUL); http://git-wip-us.apache.org/repos/asf/lens/blob/5faaf11b/lens-server-api/src/test/java/org/apache/lens/server/api/query/TestAbstractQueryContext.java ---------------------------------------------------------------------- diff --git a/lens-server-api/src/test/java/org/apache/lens/server/api/query/TestAbstractQueryContext.java b/lens-server-api/src/test/java/org/apache/lens/server/api/query/TestAbstractQueryContext.java index 5af45ed..7e9fda9 100644 --- a/lens-server-api/src/test/java/org/apache/lens/server/api/query/TestAbstractQueryContext.java +++ b/lens-server-api/src/test/java/org/apache/lens/server/api/query/TestAbstractQueryContext.java @@ -95,6 +95,7 @@ public class TestAbstractQueryContext { ctxRead.initTransientState(); ctxRead.setConf(ctx.getConf()); assertNotNull(ctxRead.getHiveConf()); + assertNotNull(ctxRead.statusUpdateFailures); } @Test http://git-wip-us.apache.org/repos/asf/lens/blob/5faaf11b/lens-server-api/src/test/java/org/apache/lens/server/api/query/TestQueryContext.java ---------------------------------------------------------------------- diff --git a/lens-server-api/src/test/java/org/apache/lens/server/api/query/TestQueryContext.java b/lens-server-api/src/test/java/org/apache/lens/server/api/query/TestQueryContext.java new file mode 100644 index 0000000..a530e9d --- /dev/null +++ b/lens-server-api/src/test/java/org/apache/lens/server/api/query/TestQueryContext.java @@ -0,0 +1,98 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.lens.server.api.query; + + +import static org.testng.Assert.*; + +import java.util.List; + +import org.apache.lens.api.LensConf; +import org.apache.lens.server.api.common.*; +import org.apache.lens.server.api.driver.LensDriver; +import org.apache.lens.server.api.driver.MockDriver; +import org.apache.lens.server.api.error.LensException; + +import org.apache.hadoop.conf.Configuration; + +import org.testng.annotations.Test; + +/** + * Tests for abstract query context + */ +public class TestQueryContext { + + @Test + public void testUpdateDriverStatusRetries() throws LensException { + Configuration conf = new Configuration(); + List<LensDriver> drivers = MockQueryContext.getDrivers(conf); + MockDriver selectedDriver = (MockDriver)drivers.iterator().next(); + MockQueryContext ctx = new MockQueryContext("simulate status retries", new LensConf(), conf, drivers); + BackOffRetryHandler waitingHandler = new FibonacciExponentialBackOffRetryHandler(10, 10000, 1000); + BackOffRetryHandler noWaitingHandler = new FibonacciExponentialBackOffRetryHandler(10, 0, 0); + // do first update + ctx.updateDriverStatus(waitingHandler); + assertEquals(selectedDriver.getUpdateCount(), 1); + + // try another update, update should be skipped + ctx.updateDriverStatus(waitingHandler); + assertEquals(selectedDriver.getUpdateCount(), 1); + + // update without delays + ctx.updateDriverStatus(noWaitingHandler); + ctx.updateDriverStatus(noWaitingHandler); + ctx.updateDriverStatus(noWaitingHandler); + ctx.updateDriverStatus(noWaitingHandler); + assertEquals(selectedDriver.getUpdateCount(), 5); + + // update with delays, update should be skipped + ctx.updateDriverStatus(waitingHandler); + assertEquals(selectedDriver.getUpdateCount(), 5); + + // update succeeds now. + ctx.updateDriverStatus(noWaitingHandler); + // all next updates should succeed, as retries should be cleared + ctx.updateDriverStatus(waitingHandler); + assertEquals(selectedDriver.getUpdateCount(), 7); + } + + @Test + public void testUpdateDriverStatusRetrieExhausts() throws LensException { + Configuration conf = new Configuration(); + List<LensDriver> drivers = MockQueryContext.getDrivers(conf); + MockQueryContext ctx = new MockQueryContext("simulate status failure", new LensConf(), conf, drivers); + BackOffRetryHandler waitingHandler = new FibonacciExponentialBackOffRetryHandler(10, 10000, 1000); + BackOffRetryHandler noWaitingHandler = new FibonacciExponentialBackOffRetryHandler(10, 0, 0); + for (int i = 0; i < 18; i++) { + if (i % 2 == 0) { + ctx.updateDriverStatus(noWaitingHandler); + } else { + ctx.updateDriverStatus(waitingHandler); + } + } + + // retries should be exhausted and update should fail. + try { + ctx.updateDriverStatus(noWaitingHandler); + fail("Should throw exception"); + } catch (LensException e) { + // pass + } + } +} http://git-wip-us.apache.org/repos/asf/lens/blob/5faaf11b/lens-server/src/main/java/org/apache/lens/server/query/QueryExecutionServiceImpl.java ---------------------------------------------------------------------- diff --git a/lens-server/src/main/java/org/apache/lens/server/query/QueryExecutionServiceImpl.java b/lens-server/src/main/java/org/apache/lens/server/query/QueryExecutionServiceImpl.java index 47efb1c..cf8a5f7 100644 --- a/lens-server/src/main/java/org/apache/lens/server/query/QueryExecutionServiceImpl.java +++ b/lens-server/src/main/java/org/apache/lens/server/query/QueryExecutionServiceImpl.java @@ -48,6 +48,8 @@ import org.apache.lens.server.BaseLensService; import org.apache.lens.server.LensServerConf; import org.apache.lens.server.LensServices; import org.apache.lens.server.api.LensConfConstants; +import org.apache.lens.server.api.common.BackOffRetryHandler; +import org.apache.lens.server.api.common.OperationRetryHandlerFactory; import org.apache.lens.server.api.driver.*; import org.apache.lens.server.api.error.LensException; import org.apache.lens.server.api.error.LensMultiCauseException; @@ -294,6 +296,8 @@ public class QueryExecutionServiceImpl extends BaseLensService implements QueryE }; private UserQueryToCubeQueryRewriter userQueryToCubeQueryRewriter; + // Exponential backoff retry handler for status updates + private BackOffRetryHandler statusUpdateRetryHandler; /** * Instantiates a new query execution service impl. @@ -884,14 +888,14 @@ public class QueryExecutionServiceImpl extends BaseLensService implements QueryE if (!ctx.queued() && !ctx.finished() && !ctx.getDriverStatus().isFinished()) { log.debug("Updating status for {}", ctx.getQueryHandle()); try { - ctx.getSelectedDriver().updateStatus(ctx); - ctx.setStatus(ctx.getDriverStatus().toQueryStatus()); + ctx.updateDriverStatus(statusUpdateRetryHandler); } catch (LensException exc) { - // Driver gave exception while updating status + // Status update from driver failed setFailedStatus(ctx, "Status update failed", exc.getMessage(), exc.buildLensErrorTO(this.errorCollection)); log.error("Status update failed for {}", handle, exc); return; } + ctx.setStatus(ctx.getDriverStatus().toQueryStatus()); // query is successfully executed by driver and // if query result need not be persisted or there is no result available in driver, move the query to // succeeded state immediately, otherwise result formatter will format the result and move it to succeeded @@ -1152,6 +1156,16 @@ public class QueryExecutionServiceImpl extends BaseLensService implements QueryE inMemoryResultsetTTLMillis = conf.getInt( LensConfConstants.INMEMORY_RESULT_SET_TTL_SECS, LensConfConstants.DEFAULT_INMEMORY_RESULT_SET_TTL_SECS) * 1000; + int statusUpdateRetries = conf.getInt(LensConfConstants.STATUS_UPDATE_EXPONENTIAL_RETRIES, + LensConfConstants.DEFAULT_STATUS_UPDATE_EXPONENTIAL_RETRIES); + // Maximum delay a status update can wait for next update, in case of transient failures. + long statusUpdateRetryMaxDelay = conf.getLong(LensConfConstants.MAXIMUM_STATUS_UPDATE_DELAY, + LensConfConstants.DEFAULT_MAXIMUM_STATUS_UPDATE_DELAY) * 1000; + // The wait time for next status update which can grow exponentially, in case of transient failures. + long statusUpdateExponentialWaiFactor = conf.getLong(LensConfConstants.STATUS_UPDATE_EXPONENTIAL_WAIT_FACTOR, + LensConfConstants.DEFAULT_STATUS_UPDATE_EXPONENTIAL_WAIT_FACTOR); + statusUpdateRetryHandler = OperationRetryHandlerFactory.createExponentialBackOffHandler(statusUpdateRetries, + statusUpdateRetryMaxDelay, statusUpdateExponentialWaiFactor); log.info("Query execution service initialized"); } http://git-wip-us.apache.org/repos/asf/lens/blob/5faaf11b/lens-server/src/main/resources/lensserver-default.xml ---------------------------------------------------------------------- diff --git a/lens-server/src/main/resources/lensserver-default.xml b/lens-server/src/main/resources/lensserver-default.xml index 6bb66d9..1a15658 100644 --- a/lens-server/src/main/resources/lensserver-default.xml +++ b/lens-server/src/main/resources/lensserver-default.xml @@ -834,4 +834,24 @@ an exception by saying that opened session limit has been already reached for user. </description> </property> + <property> + <name>lens.server.status.update.num.retries</name> + <value>10</value> + <description>The number of retries a status update will tried with exponentital back off, in case of transient + issues, upon which query will be marked FAILED. + </description> + </property> + <property> + <name>lens.server.status.update.maximum.delay.secs</name> + <value>1800</value> + <description>The maximum delay in seconds for next status update to happen after any transient failure. This will + be used a maximum delay sothat exponential wait times not to grow to bigger value. + </description> + </property> + <property> + <name>lens.server.status.update.exponential.wait.millis</name> + <value>30000</value> + <description>Number of millis that would grow exponentially for next update, incase of transient failures. + </description> +</property> </configuration> http://git-wip-us.apache.org/repos/asf/lens/blob/5faaf11b/lens-server/src/test/resources/lens-site.xml ---------------------------------------------------------------------- diff --git a/lens-server/src/test/resources/lens-site.xml b/lens-server/src/test/resources/lens-site.xml index b5b3220..4205701 100644 --- a/lens-server/src/test/resources/lens-site.xml +++ b/lens-server/src/test/resources/lens-site.xml @@ -192,4 +192,10 @@ <value>20</value> <description>Number of sessions can be allowed for each user.</description> </property> + <property> + <name>lens.server.status.update.exponential.wait.millis</name> + <value>1000</value> + <description>Number of millis that would grow exponentially for next update, incase of transient failures. + </description> + </property> </configuration> http://git-wip-us.apache.org/repos/asf/lens/blob/5faaf11b/src/site/apt/admin/config.apt ---------------------------------------------------------------------- diff --git a/src/site/apt/admin/config.apt b/src/site/apt/admin/config.apt index 6df9796..b5853bf 100644 --- a/src/site/apt/admin/config.apt +++ b/src/site/apt/admin/config.apt @@ -205,54 +205,60 @@ Lens server configuration *--+--+---+--+ |88|lens.server.statistics.warehouse.dir|file:///tmp/lens/statistics/warehouse|Default top level location where stats are moved by the log statistics store.| *--+--+---+--+ -|89|lens.server.total.query.cost.ceiling.per.user|-1.0|A query submitted by user will be launched only if total query cost of all current launched queries of user is less than or equal to total query cost ceiling defined by this property. This configuration value is only useful when TotalQueryCostCeilingConstraint is enabled by using org.apache.lens.server.query.constraint.TotalQueryCostCeilingConstraintFactory as one of the factories in lens.server.query.constraint.factories property. Default is -1.0 which means that there is no limit on the total query cost of launched queries submitted by a user.| +|89|lens.server.status.update.delay.secs.maximum|1800|The maximum delay in seconds for next status update to happen after any transient failure. This will be used a maximum delay sothat exponential wait times not to grow to bigger value.| *--+--+---+--+ -|90|lens.server.ui.base.uri|http://0.0.0.0:19999/|The base url for the Lens UI Server| +|90|lens.server.status.update.exponential.wait.millis|30000|Number of millis that would grow exponentially for next update, incase of transient failures.| *--+--+---+--+ -|91|lens.server.ui.enable|true|Bringing up the ui server is optional. By default it brings up UI server.| +|91|lens.server.status.update.num.retries|10|The number of retries a status update will tried with exponentital back off, in case of transient issues, upon which query will be marked FAILED.| *--+--+---+--+ -|92|lens.server.ui.enable.caching|true|Set this to false to disable static file caching in the UI server| +|92|lens.server.total.query.cost.ceiling.per.user|-1.0|A query submitted by user will be launched only if total query cost of all current launched queries of user is less than or equal to total query cost ceiling defined by this property. This configuration value is only useful when TotalQueryCostCeilingConstraint is enabled by using org.apache.lens.server.query.constraint.TotalQueryCostCeilingConstraintFactory as one of the factories in lens.server.query.constraint.factories property. Default is -1.0 which means that there is no limit on the total query cost of launched queries submitted by a user.| *--+--+---+--+ -|93|lens.server.ui.static.dir|webapp/lens-server/static|The base directory to server UI static files from| +|93|lens.server.ui.base.uri|http://0.0.0.0:19999/|The base url for the Lens UI Server| *--+--+---+--+ -|94|lens.server.user.resolver.custom.class|full.package.name.Classname|Required for CUSTOM user resolver. In case the provided implementations are not sufficient for user config resolver, a custom classname can be provided. Class should extend org.apache.lens.server.user.UserConfigLoader| +|94|lens.server.ui.enable|true|Bringing up the ui server is optional. By default it brings up UI server.| *--+--+---+--+ -|95|lens.server.user.resolver.db.keys|lens.session.cluster.user,mapred.job.queue.name|Required for DATABASE and LDAP_BACKED_DATABASE user resolvers. For database based user config loaders, the conf keys that will be loaded from database.| +|95|lens.server.ui.enable.caching|true|Set this to false to disable static file caching in the UI server| *--+--+---+--+ -|96|lens.server.user.resolver.db.query|select clusteruser,queue from user_config_table where username=?|Required for DATABASE and LDAP_BACKED_DATABASE user resolvers. For database based user config loader, this query will be run with single argument = logged in user and the result columns will be assigned to lens.server.user.resolver.db.keys in order. For ldap backed database resolver, the argument to this query will be the intermediate values obtained from ldap.| +|96|lens.server.ui.static.dir|webapp/lens-server/static|The base directory to server UI static files from| *--+--+---+--+ -|97|lens.server.user.resolver.fixed.value| |Required for FIXED user resolver. when lens.server.user.resolver.type=FIXED, This will be the value cluster user will resolve to.| +|97|lens.server.user.resolver.custom.class|full.package.name.Classname|Required for CUSTOM user resolver. In case the provided implementations are not sufficient for user config resolver, a custom classname can be provided. Class should extend org.apache.lens.server.user.UserConfigLoader| *--+--+---+--+ -|98|lens.server.user.resolver.ldap.bind.dn| |Required for LDAP_BACKED_DATABASE user resolvers. ldap dn for admin binding example: CN=company-it-admin,ou=service-account,ou=company-service-account,dc=dc1,dc=com...| +|98|lens.server.user.resolver.db.keys|lens.session.cluster.user,mapred.job.queue.name|Required for DATABASE and LDAP_BACKED_DATABASE user resolvers. For database based user config loaders, the conf keys that will be loaded from database.| *--+--+---+--+ -|99|lens.server.user.resolver.ldap.bind.password| |Required for LDAP_BACKED_DATABASE user resolvers. ldap password for admin binding above| +|99|lens.server.user.resolver.db.query|select clusteruser,queue from user_config_table where username=?|Required for DATABASE and LDAP_BACKED_DATABASE user resolvers. For database based user config loader, this query will be run with single argument = logged in user and the result columns will be assigned to lens.server.user.resolver.db.keys in order. For ldap backed database resolver, the argument to this query will be the intermediate values obtained from ldap.| *--+--+---+--+ -|100|lens.server.user.resolver.ldap.fields|department|Required for LDAP_BACKED_DATABASE user resolvers. list of fields to be obtained from ldap. These will be cached by the intermediate db.| +|100|lens.server.user.resolver.fixed.value| |Required for FIXED user resolver. when lens.server.user.resolver.type=FIXED, This will be the value cluster user will resolve to.| *--+--+---+--+ -|101|lens.server.user.resolver.ldap.intermediate.db.delete.sql|delete from user_department where username=?|Required for LDAP_BACKED_DATABASE user resolvers. query to delete intermediate values from database backing ldap as cache. one argument: logged in user.| +|101|lens.server.user.resolver.ldap.bind.dn| |Required for LDAP_BACKED_DATABASE user resolvers. ldap dn for admin binding example: CN=company-it-admin,ou=service-account,ou=company-service-account,dc=dc1,dc=com...| *--+--+---+--+ -|102|lens.server.user.resolver.ldap.intermediate.db.insert.sql|insert into user_department (username, department, expiry) values (?, ?, ?)|Required for LDAP_BACKED_DATABASE user resolvers. query to insert intermediate values from database backing ldap as cache. arguments: first logged in user, then all intermediate values, then current time + expiration time| +|102|lens.server.user.resolver.ldap.bind.password| |Required for LDAP_BACKED_DATABASE user resolvers. ldap password for admin binding above| *--+--+---+--+ -|103|lens.server.user.resolver.ldap.intermediate.db.query|select department from user_department where username=? and expiry>?|Required for LDAP_BACKED_DATABASE user resolvers. query to obtain intermediate values from database backing ldap as cache. two arguments: logged in user and current time.| +|103|lens.server.user.resolver.ldap.fields|department|Required for LDAP_BACKED_DATABASE user resolvers. list of fields to be obtained from ldap. These will be cached by the intermediate db.| *--+--+---+--+ -|104|lens.server.user.resolver.ldap.search.base| |Required for LDAP_BACKED_DATABASE user resolvers. for searching intermediate values for a user, the search keys. example: cn=users,dc=dc1,dc=dc2...| +|104|lens.server.user.resolver.ldap.intermediate.db.delete.sql|delete from user_department where username=?|Required for LDAP_BACKED_DATABASE user resolvers. query to delete intermediate values from database backing ldap as cache. one argument: logged in user.| *--+--+---+--+ -|105|lens.server.user.resolver.ldap.search.filter|(&(objectClass=user)(sAMAccountName=%s))|Required for LDAP_BACKED_DATABASE user resolvers. filter pattern for ldap search| +|105|lens.server.user.resolver.ldap.intermediate.db.insert.sql|insert into user_department (username, department, expiry) values (?, ?, ?)|Required for LDAP_BACKED_DATABASE user resolvers. query to insert intermediate values from database backing ldap as cache. arguments: first logged in user, then all intermediate values, then current time + expiration time| *--+--+---+--+ -|106|lens.server.user.resolver.ldap.url| |Required for LDAP_BACKED_DATABASE user resolvers. ldap url to connect to.| +|106|lens.server.user.resolver.ldap.intermediate.db.query|select department from user_department where username=? and expiry>?|Required for LDAP_BACKED_DATABASE user resolvers. query to obtain intermediate values from database backing ldap as cache. two arguments: logged in user and current time.| *--+--+---+--+ -|107|lens.server.user.resolver.propertybased.filename|/path/to/propertyfile|Required for PROPERTYBASED user resolver. when lens.server.user.resolver.type is PROPERTYBASED, then this file will be read and parsed to determine cluster user. Each line should contain username followed by DOT followed by property full name followed by equal-to sign and followed by value. example schema of the file is: user1.lens.server.cluster.user=clusteruser1 user1.mapred.job.queue.name=queue1 *.lens.server.cluster.user=defaultclusteruser *.mapred.job.queue.name=default| +|107|lens.server.user.resolver.ldap.search.base| |Required for LDAP_BACKED_DATABASE user resolvers. for searching intermediate values for a user, the search keys. example: cn=users,dc=dc1,dc=dc2...| *--+--+---+--+ -|108|lens.server.user.resolver.type|FIXED|Type of user config resolver. allowed values are FIXED, PROPERTYBASED, DATABASE, LDAP_BACKED_DATABASE, CUSTOM.| +|108|lens.server.user.resolver.ldap.search.filter|(&(objectClass=user)(sAMAccountName=%s))|Required for LDAP_BACKED_DATABASE user resolvers. filter pattern for ldap search| *--+--+---+--+ -|109|lens.server.waiting.queries.selection.policy.factories|org.apache.lens.server.query.collect.UserSpecificWaitingQueriesSelectionPolicyFactory|Factories used to instantiate waiting queries selection policies. Every factory should be an implementation of org.apache.lens.server.api.common.ConfigBasedObjectCreationFactory and create an implementation of org.apache.lens.server.api.query.collect.WaitingQueriesSelectionPolicy.| +|109|lens.server.user.resolver.ldap.url| |Required for LDAP_BACKED_DATABASE user resolvers. ldap url to connect to.| *--+--+---+--+ -|110|lens.server.ws.featurenames|multipart,moxyjson,moxyjsonconfigresovler|These JAX-RS Feature(s) would be started in the specified order when lens-server starts up| +|110|lens.server.user.resolver.propertybased.filename|/path/to/propertyfile|Required for PROPERTYBASED user resolver. when lens.server.user.resolver.type is PROPERTYBASED, then this file will be read and parsed to determine cluster user. Each line should contain username followed by DOT followed by property full name followed by equal-to sign and followed by value. example schema of the file is: user1.lens.server.cluster.user=clusteruser1 user1.mapred.job.queue.name=queue1 *.lens.server.cluster.user=defaultclusteruser *.mapred.job.queue.name=default| *--+--+---+--+ -|111|lens.server.ws.filternames|authentication,consistentState,serverMode|These JAX-RS filters would be started in the specified order when lens-server starts up| +|111|lens.server.user.resolver.type|FIXED|Type of user config resolver. allowed values are FIXED, PROPERTYBASED, DATABASE, LDAP_BACKED_DATABASE, CUSTOM.| *--+--+---+--+ -|112|lens.server.ws.listenernames|appevent|These listeners would be called in the specified order when lens-server starts up| +|112|lens.server.waiting.queries.selection.policy.factories|org.apache.lens.server.query.collect.UserSpecificWaitingQueriesSelectionPolicyFactory|Factories used to instantiate waiting queries selection policies. Every factory should be an implementation of org.apache.lens.server.api.common.ConfigBasedObjectCreationFactory and create an implementation of org.apache.lens.server.api.query.collect.WaitingQueriesSelectionPolicy.| *--+--+---+--+ -|113|lens.server.ws.resourcenames|session,metastore,query,quota,scheduler,index,log|These JAX-RS resources would be started in the specified order when lens-server starts up| +|113|lens.server.ws.featurenames|multipart,moxyjson,moxyjsonconfigresovler|These JAX-RS Feature(s) would be started in the specified order when lens-server starts up| +*--+--+---+--+ +|114|lens.server.ws.filternames|authentication,consistentState,serverMode|These JAX-RS filters would be started in the specified order when lens-server starts up| +*--+--+---+--+ +|115|lens.server.ws.listenernames|appevent|These listeners would be called in the specified order when lens-server starts up| +*--+--+---+--+ +|116|lens.server.ws.resourcenames|session,metastore,query,quota,scheduler,index,log|These JAX-RS resources would be started in the specified order when lens-server starts up| *--+--+---+--+ The configuration parameters and their default values