DRILL-6143: Made FragmentsRunner's rpc timeout larger to reduce random failures 
and made it configurable as a SystemOption.

closes #1119


Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/33149059
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/33149059
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/33149059

Branch: refs/heads/master
Commit: 3314905987ee6d28c89cc5cfd8c7dbd6f36da41c
Parents: 5d4fbd1
Author: Timothy Farkas <timothyfar...@apache.org>
Authored: Thu Feb 8 15:25:59 2018 -0800
Committer: Vitalii Diravka <vitalii.dira...@gmail.com>
Committed: Fri Feb 16 20:15:57 2018 +0000

----------------------------------------------------------------------
 .../src/main/java/org/apache/drill/exec/ExecConstants.java      | 2 ++
 .../apache/drill/exec/server/options/SystemOptionManager.java   | 3 ++-
 .../org/apache/drill/exec/work/foreman/FragmentsRunner.java     | 5 ++---
 exec/java-exec/src/main/resources/drill-module.conf             | 1 +
 4 files changed, 7 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/drill/blob/33149059/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java
----------------------------------------------------------------------
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java 
b/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java
index c949e51..f3572d8 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java
@@ -59,6 +59,8 @@ public final class ExecConstants {
   public static final String CLIENT_RPC_THREADS = 
"drill.exec.rpc.user.client.threads";
   public static final String BIT_SERVER_RPC_THREADS = 
"drill.exec.rpc.bit.server.threads";
   public static final String USER_SERVER_RPC_THREADS = 
"drill.exec.rpc.user.server.threads";
+  public static final String FRAG_RUNNER_RPC_TIMEOUT = 
"drill.exec.rpc.fragrunner.timeout";
+  public static final PositiveLongValidator FRAG_RUNNER_RPC_TIMEOUT_VALIDATOR 
= new PositiveLongValidator(FRAG_RUNNER_RPC_TIMEOUT, Long.MAX_VALUE);
   public static final String TRACE_DUMP_DIRECTORY = 
"drill.exec.trace.directory";
   public static final String TRACE_DUMP_FILESYSTEM = 
"drill.exec.trace.filesystem";
   public static final String TEMP_DIRECTORIES = "drill.exec.tmp.directories";

http://git-wip-us.apache.org/repos/asf/drill/blob/33149059/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java
----------------------------------------------------------------------
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java
index 4dba96d..2b170e7 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java
@@ -212,7 +212,8 @@ public class SystemOptionManager extends BaseOptionManager 
implements AutoClosea
       new OptionDefinition(ExecConstants.CPU_LOAD_AVERAGE),
       new OptionDefinition(ExecConstants.ENABLE_VECTOR_VALIDATOR),
       new OptionDefinition(ExecConstants.ENABLE_ITERATOR_VALIDATOR),
-      new OptionDefinition(ExecConstants.OUTPUT_BATCH_SIZE_VALIDATOR, new 
OptionMetaData(OptionValue.AccessibleScopes.SYSTEM, true, false))
+      new OptionDefinition(ExecConstants.OUTPUT_BATCH_SIZE_VALIDATOR, new 
OptionMetaData(OptionValue.AccessibleScopes.SYSTEM, true, false)),
+      new OptionDefinition(ExecConstants.FRAG_RUNNER_RPC_TIMEOUT_VALIDATOR, 
new OptionMetaData(OptionValue.AccessibleScopes.SYSTEM, true, true)),
     };
 
     final CaseInsensitiveMap<OptionDefinition> map = 
CaseInsensitiveMap.newHashMap();

http://git-wip-us.apache.org/repos/asf/drill/blob/33149059/exec/java-exec/src/main/java/org/apache/drill/exec/work/foreman/FragmentsRunner.java
----------------------------------------------------------------------
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/work/foreman/FragmentsRunner.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/work/foreman/FragmentsRunner.java
index 2e5f2dd..b677576 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/work/foreman/FragmentsRunner.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/work/foreman/FragmentsRunner.java
@@ -24,6 +24,7 @@ import io.netty.buffer.ByteBuf;
 import org.apache.drill.common.concurrent.ExtendedLatch;
 import org.apache.drill.common.exceptions.ExecutionSetupException;
 import org.apache.drill.common.exceptions.UserException;
+import org.apache.drill.exec.ExecConstants;
 import org.apache.drill.exec.ops.FragmentContextImpl;
 import org.apache.drill.exec.physical.base.FragmentRoot;
 import org.apache.drill.exec.proto.BitControl.InitializeFragments;
@@ -61,8 +62,6 @@ public class FragmentsRunner {
   private static final org.slf4j.Logger logger = 
org.slf4j.LoggerFactory.getLogger(FragmentsRunner.class);
   private static final ControlsInjector injector = 
ControlsInjectorFactory.getInjector(FragmentsRunner.class);
 
-  private static final long RPC_WAIT_IN_MSECS_PER_FRAGMENT = 5000;
-
   private final WorkerBee bee;
   private final UserClientConnection initiatingClient;
   private final DrillbitContext drillbitContext;
@@ -278,7 +277,7 @@ public class FragmentsRunner {
       sendRemoteFragments(ep, remoteFragmentMap.get(ep), endpointLatch, 
fragmentSubmitFailures);
     }
 
-    final long timeout = RPC_WAIT_IN_MSECS_PER_FRAGMENT * numIntFragments;
+    final long timeout = 
drillbitContext.getOptionManager().getLong(ExecConstants.FRAG_RUNNER_RPC_TIMEOUT)
 * numIntFragments;
     if (numIntFragments > 0 && !endpointLatch.awaitUninterruptibly(timeout)) {
       long numberRemaining = endpointLatch.getCount();
       throw UserException.connectionError()

http://git-wip-us.apache.org/repos/asf/drill/blob/33149059/exec/java-exec/src/main/resources/drill-module.conf
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/resources/drill-module.conf 
b/exec/java-exec/src/main/resources/drill-module.conf
index 5659c82..39320c2 100644
--- a/exec/java-exec/src/main/resources/drill-module.conf
+++ b/exec/java-exec/src/main/resources/drill-module.conf
@@ -408,6 +408,7 @@ drill.exec.options: {
     debug.validate_iterators: false,
     debug.validate_vectors: false,
     drill.exec.functions.cast_empty_string_to_null: false,
+    drill.exec.rpc.fragrunner.timeout: 10000,
     # Setting to control if HashAgg should fallback to older behavior of 
consuming
     # unbounded memory. In case of 2 phase Agg when available memory is not 
enough
     # to start at least 2 partitions then HashAgg fallbacks to this case. It 
can be

Reply via email to