[ https://issues.apache.org/jira/browse/DRILL-1139?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Jacques Nadeau updated DRILL-1139: ---------------------------------- Assignee: Steven Phillips > Drillbit fails with OutOfMemoryError Exception when Drill-smoke test is run > for a long time > ------------------------------------------------------------------------------------------- > > Key: DRILL-1139 > URL: https://issues.apache.org/jira/browse/DRILL-1139 > Project: Apache Drill > Issue Type: Bug > Components: Execution - Flow > Reporter: Amit Katti > Assignee: Steven Phillips > Fix For: 0.5.0 > > > I ran the Drill-smoke test in an infinite loop on a cluster with 2 drillbits. > After about 11 hours of running successfully, the smoke test started to fail > and both drillbits went down. > I had also put in the below option in the /etc/drill/conf/drill-env.sh file: > export DRILL_JAVA_OPTS="-Xms$DRILL_INIT_HEAP -Xmx$DRILL_MAX_HEAP > -XX:MaxDirectMemorySize=$DRILL_MAX_DIRECT_MEMORY -ea -XX:MaxPermSize=512M > -XX:+UseConcMarkSweepGC -XX:ReservedCodeCacheSize=1G > -XX:+CMSClassUnloadingEnabled" > The error message at the smoke test was: > {code} > 2014-07-12 05:36:34 INFO ClientCnxn:852 - Socket connection established to > 10.10.30.156/10.10.30.156:5181, initiating session > 2014-07-12 05:36:34 ERROR ConnectionState:201 - Connection timed out for > connection string (10.10.30.156:5181) and timeout (5000) / elapsed (5003) > org.apache.curator.CuratorConnectionLossException: KeeperErrorCode = > ConnectionLoss > at > org.apache.curator.ConnectionState.checkTimeouts(ConnectionState.java:198) > at > org.apache.curator.ConnectionState.getZooKeeper(ConnectionState.java:88) > at > org.apache.curator.CuratorZookeeperClient.getZooKeeper(CuratorZookeeperClient.java:115) > at > org.apache.curator.utils.EnsurePath$InitialHelper$1.call(EnsurePath.java:148) > at org.apache.curator.RetryLoop.callWithRetry(RetryLoop.java:107) > at > org.apache.curator.utils.EnsurePath$InitialHelper.ensure(EnsurePath.java:140) > at org.apache.curator.utils.EnsurePath.ensure(EnsurePath.java:99) > at > org.apache.curator.framework.imps.NamespaceImpl.fixForNamespace(NamespaceImpl.java:74) > at > org.apache.curator.framework.imps.NamespaceImpl.newNamespaceAwareEnsurePath(NamespaceImpl.java:87) > at > org.apache.curator.framework.imps.CuratorFrameworkImpl.newNamespaceAwareEnsurePath(CuratorFrameworkImpl.java:468) > at > org.apache.curator.framework.recipes.cache.PathChildrenCache.<init>(PathChildrenCache.java:223) > at > org.apache.curator.framework.recipes.cache.PathChildrenCache.<init>(PathChildrenCache.java:182) > at > org.apache.curator.x.discovery.details.ServiceCacheImpl.<init>(ServiceCacheImpl.java:65) > at > org.apache.curator.x.discovery.details.ServiceCacheBuilderImpl.build(ServiceCacheBuilderImpl.java:47) > at > org.apache.drill.exec.coord.zk.ZKClusterCoordinator.<init>(ZKClusterCoordinator.java:81) > at > org.apache.drill.exec.client.DrillClient.connect(DrillClient.java:144) > at > org.apache.drill.jdbc.DrillConnectionImpl.<init>(DrillConnectionImpl.java:90) > at > org.apache.drill.jdbc.DrillJdbc41Factory$DrillJdbc41Connection.<init>(DrillJdbc41Factory.java:87) > at > org.apache.drill.jdbc.DrillJdbc41Factory.newDrillConnection(DrillJdbc41Factory.java:56) > at > org.apache.drill.jdbc.DrillJdbc41Factory.newDrillConnection(DrillJdbc41Factory.java:43) > at > org.apache.drill.jdbc.DrillFactory.newConnection(DrillFactory.java:51) > at > net.hydromatic.avatica.UnregisteredDriver.connect(UnregisteredDriver.java:126) > at java.sql.DriverManager.getConnection(DriverManager.java:571) > at java.sql.DriverManager.getConnection(DriverManager.java:233) > at > org.apache.drill.test.framework.DrillTestBase.runTest(DrillTestBase.java:172) > at > org.apache.drill.test.framework.DrillTests.positiveTests(DrillTests.java:32) > at sun.reflect.GeneratedMethodAccessor12.invoke(Unknown Source) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:606) > at > org.testng.internal.MethodInvocationHelper.invokeMethod(MethodInvocationHelper.java:80) > at org.testng.internal.Invoker.invokeMethod(Invoker.java:701) > at org.testng.internal.Invoker.invokeTestMethod(Invoker.java:893) > at org.testng.internal.Invoker.invokeTestMethods(Invoker.java:1218) > at > org.testng.internal.TestMethodWorker.invokeTestMethods(TestMethodWorker.java:127) > at org.testng.internal.TestMethodWorker.run(TestMethodWorker.java:111) > at org.testng.TestRunner.privateRun(TestRunner.java:758) > at org.testng.TestRunner.run(TestRunner.java:613) > at org.testng.SuiteRunner.runTest(SuiteRunner.java:334) > at org.testng.SuiteRunner.runSequentially(SuiteRunner.java:329) > at org.testng.SuiteRunner.privateRun(SuiteRunner.java:291) > at org.testng.SuiteRunner.run(SuiteRunner.java:240) > at org.testng.SuiteRunnerWorker.runSuite(SuiteRunnerWorker.java:53) > at org.testng.SuiteRunnerWorker.run(SuiteRunnerWorker.java:87) > at org.testng.TestNG.runSuitesSequentially(TestNG.java:1170) > at org.testng.TestNG.runSuitesLocally(TestNG.java:1095) > at org.testng.TestNG.run(TestNG.java:1007) > at > org.apache.maven.surefire.testng.TestNGExecutor.run(TestNGExecutor.java:70) > at > org.apache.maven.surefire.testng.TestNGDirectoryTestSuite.execute(TestNGDirectoryTestSuite.java:102) > at > org.apache.maven.surefire.testng.TestNGProvider.invoke(TestNGProvider.java:114) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:606) > at > org.apache.maven.surefire.booter.ProviderFactory$ClassLoaderProxy.invoke(ProviderFactory.java:103) > at com.sun.proxy.$Proxy0.invoke(Unknown Source) > at > org.apache.maven.surefire.booter.SurefireStarter.invokeProvider(SurefireStarter.java:150) > at > org.apache.maven.surefire.booter.SurefireStarter.runSuitesInProcess(SurefireStarter.java:91) > at > org.apache.maven.surefire.booter.ForkedBooter.main(ForkedBooter.java:69) > {code} > The exception in the drillbit.log was: > {code} > 2014-07-11 02:02:39,506 [5e9e75ae-419a-4aac-a2aa-9c4253563699:foreman] ERROR > o.a.drill.exec.work.foreman.Foreman - Error > 8c6dffab-e845-4e9e-a75b-60649d64c337: Failure while setting up Foreman. > java.lang.OutOfMemoryError: PermGen space > at sun.misc.Unsafe.defineClass(Native Method) ~[na:1.7.0_55] > at sun.reflect.ClassDefiner.defineClass(ClassDefiner.java:63) > ~[na:1.7.0_55] > at > sun.reflect.MethodAccessorGenerator$1.run(MethodAccessorGenerator.java:399) > ~[na:1.7.0_55] > at > sun.reflect.MethodAccessorGenerator$1.run(MethodAccessorGenerator.java:396) > ~[na:1.7.0_55] > at java.security.AccessController.doPrivileged(Native Method) > ~[na:1.7.0_55] > at > sun.reflect.MethodAccessorGenerator.generate(MethodAccessorGenerator.java:395) > ~[na:1.7.0_55] > at > sun.reflect.MethodAccessorGenerator.generateConstructor(MethodAccessorGenerator.java:94) > ~[na:1.7.0_55] > at > sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:48) > ~[na:1.7.0_55] > at > sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45) > ~[na:1.7.0_55] > at java.lang.reflect.Constructor.newInstance(Constructor.java:526) > ~[na:1.7.0_55] > at java.lang.reflect.Proxy.newInstance(Proxy.java:748) ~[na:1.7.0_55] > at java.lang.reflect.Proxy.newProxyInstance(Proxy.java:739) > ~[na:1.7.0_55] > at > org.eigenbase.rel.metadata.ReflectiveRelMetadataProvider$2.apply(ReflectiveRelMetadataProvider.java:112) > ~[optiq-core-0.7-20140708.001905-9.jar:na] > at > org.eigenbase.rel.metadata.ReflectiveRelMetadataProvider$2.apply(ReflectiveRelMetadataProvider.java:1) > ~[optiq-core-0.7-20140708.001905-9.jar:na] > at > org.eigenbase.rel.metadata.MetadataFactoryImpl.query(MetadataFactoryImpl.java:71) > ~[optiq-core-0.7-20140708.001905-9.jar:na] > at org.eigenbase.rel.AbstractRelNode.metadata(AbstractRelNode.java:269) > ~[optiq-core-0.7-20140708.001905-9.jar:na] > at > org.eigenbase.rel.metadata.RelMetadataQuery.getNonCumulativeCost(RelMetadataQuery.java:121) > ~[optiq-core-0.7-20140708.001905-9.jar:na] > at > org.eigenbase.relopt.volcano.VolcanoPlanner.getCost(VolcanoPlanner.java:918) > ~[optiq-core-0.7-20140708.001905-9.jar:na] > at > org.eigenbase.relopt.volcano.RelSubset.propagateCostImprovements0(RelSubset.java:333) > ~[optiq-core-0.7-20140708.001905-9.jar:na] > at > org.eigenbase.relopt.volcano.RelSubset.propagateCostImprovements(RelSubset.java:314) > ~[optiq-core-0.7-20140708.001905-9.jar:na] > at > org.eigenbase.relopt.volcano.RelSubset.propagateCostImprovements0(RelSubset.java:349) > ~[optiq-core-0.7-20140708.001905-9.jar:na] > at > org.eigenbase.relopt.volcano.RelSubset.propagateCostImprovements(RelSubset.java:314) > ~[optiq-core-0.7-20140708.001905-9.jar:na] > at > org.eigenbase.relopt.volcano.VolcanoPlanner.asd(VolcanoPlanner.java:1611) > ~[optiq-core-0.7-20140708.001905-9.jar:na] > at > org.eigenbase.relopt.volcano.VolcanoPlanner.registerImpl(VolcanoPlanner.java:1549) > ~[optiq-core-0.7-20140708.001905-9.jar:na] > at > org.eigenbase.relopt.volcano.VolcanoPlanner.register(VolcanoPlanner.java:829) > ~[optiq-core-0.7-20140708.001905-9.jar:na] > at > org.eigenbase.relopt.volcano.VolcanoPlanner.ensureRegistered(VolcanoPlanner.java:852) > ~[optiq-core-0.7-20140708.001905-9.jar:na] > at > org.eigenbase.relopt.volcano.VolcanoPlanner.ensureRegistered(VolcanoPlanner.java:1726) > ~[optiq-core-0.7-20140708.001905-9.jar:na] > at > org.eigenbase.relopt.volcano.VolcanoRuleCall.transformTo(VolcanoRuleCall.java:129) > ~[optiq-core-0.7-20140708.001905-9.jar:na] > at > org.eigenbase.relopt.RelOptRuleCall.transformTo(RelOptRuleCall.java:210) > ~[optiq-core-0.7-20140708.001905-9.jar:na] > at > org.apache.drill.exec.planner.physical.ScanPrule.onMatch(ScanPrule.java:49) > ~[drill-java-exec-1.0.0-m2-incubating-SNAPSHOT-rebuffed.jar:1.0.0-m2-incubating-SNAPSHOT] > at > org.eigenbase.relopt.volcano.VolcanoRuleCall.onMatch(VolcanoRuleCall.java:221) > ~[optiq-core-0.7-20140708.001905-9.jar:na] > at > org.eigenbase.relopt.volcano.VolcanoPlanner.findBestExp(VolcanoPlanner.java:653) > ~[optiq-core-0.7-20140708.001905-9.jar:na] > 2014-07-11 02:05:24,124 [ShutdownHook] INFO > o.apache.drill.exec.server.Drillbit - Received shutdown request. > {code} -- This message was sent by Atlassian JIRA (v6.2#6252)