IMPALA-4522: Bound Kudu client threads to avoid stress crash In stress testing on physical boxes (80 cores, 200gb ram) we discovered that the Kudu Java client creates a huge number of threads (2x the #cores) per Kudu client, and this was causing the impalad to crash when the JVM couldn't create more threads.
This addresses the issue by setting the number of Kudu client worker threads rather than letting the Kudu client pick the default (2 * #cores). The number set here was suggested by the Kudu team as being sufficient for Impala's FE usage and this has been tested for 8+ hours on the stress cluster where the crash was previously observed quickly. In the future, Impala should probably be sharing a single Kudu client (it is multithreaded), but additional support from Kudu may be needed to ensure this usage is correct (e.g. client metadata may need invalidation after some operations). Change-Id: I3940df776eaa5ad22e1bbb572559afcc8990bf1d Reviewed-on: http://gerrit.cloudera.org:8080/5205 Reviewed-by: Alex Behm <[email protected]> Tested-by: Internal Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/1fea9973 Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/1fea9973 Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/1fea9973 Branch: refs/heads/master Commit: 1fea9973d2cd4fd61d9377ef9ce4f5accafb41b0 Parents: 3934e13 Author: Matthew Jacobs <[email protected]> Authored: Wed Nov 23 11:39:25 2016 -0800 Committer: Internal Jenkins <[email protected]> Committed: Thu Nov 24 02:39:30 2016 +0000 ---------------------------------------------------------------------- .../java/org/apache/impala/util/KuduUtil.java | 28 ++++++++++++-------- 1 file changed, 17 insertions(+), 11 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1fea9973/fe/src/main/java/org/apache/impala/util/KuduUtil.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/util/KuduUtil.java b/fe/src/main/java/org/apache/impala/util/KuduUtil.java index dd09a28..559c4a5 100644 --- a/fe/src/main/java/org/apache/impala/util/KuduUtil.java +++ b/fe/src/main/java/org/apache/impala/util/KuduUtil.java @@ -22,37 +22,42 @@ import static java.lang.String.format; import java.util.HashSet; import java.util.List; +import org.apache.impala.analysis.Expr; +import org.apache.impala.analysis.LiteralExpr; import org.apache.impala.catalog.ScalarType; import org.apache.impala.catalog.Type; import org.apache.impala.common.ImpalaRuntimeException; import org.apache.impala.common.Pair; import org.apache.impala.service.BackendConfig; -import org.apache.impala.thrift.TExpr; -import org.apache.impala.thrift.TExprNode; -import org.apache.impala.analysis.LiteralExpr; -import org.apache.impala.analysis.Expr; import org.apache.impala.thrift.TColumn; import org.apache.impala.thrift.TColumnEncoding; +import org.apache.impala.thrift.TExpr; +import org.apache.impala.thrift.TExprNode; import org.apache.impala.thrift.THdfsCompression; - -import com.google.common.base.Splitter; -import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; -import com.google.common.collect.Sets; - import org.apache.kudu.ColumnSchema; -import org.apache.kudu.ColumnSchema.Encoding; import org.apache.kudu.ColumnSchema.CompressionAlgorithm; +import org.apache.kudu.ColumnSchema.Encoding; import org.apache.kudu.Schema; import org.apache.kudu.client.KuduClient; import org.apache.kudu.client.KuduClient.KuduClientBuilder; import org.apache.kudu.client.PartialRow; import org.apache.kudu.client.RangePartitionBound; +import com.google.common.base.Preconditions; +import com.google.common.base.Splitter; +import com.google.common.collect.Lists; +import com.google.common.collect.Sets; + public class KuduUtil { private static final String KUDU_TABLE_NAME_PREFIX = "impala::"; + // Number of worker threads created by each KuduClient, regardless of whether or not + // they're needed. Impala does not share KuduClients between operations, so the number + // of threads created can get very large under concurrent workloads. This number should + // be sufficient for the Frontend/Catalog use, and has been tested in stress tests. + private static int KUDU_CLIENT_WORKER_THREAD_COUNT = 5; + /** * Creates a KuduClient with the specified Kudu master addresses (as a comma-separated * list of host:port pairs). The 'admin operation timeout' and the 'operation timeout' @@ -64,6 +69,7 @@ public class KuduUtil { KuduClientBuilder b = new KuduClient.KuduClientBuilder(kuduMasters); b.defaultAdminOperationTimeoutMs(BackendConfig.INSTANCE.getKuduClientTimeoutMs()); b.defaultOperationTimeoutMs(BackendConfig.INSTANCE.getKuduClientTimeoutMs()); + b.workerCount(KUDU_CLIENT_WORKER_THREAD_COUNT); return b.build(); }
