Hi guys, I suddenly started getting this errors in one of the schedulers. Any idea of what could be happening? after the error the aurora process restarts but it's flaky. The error keep showing after and restarting. I kind of fixed it by deleting the replicated log in the problematic node. Thanks
0216 19:35:37.039 THREAD134 org.apache.aurora.scheduler.SchedulerLifecycle$SchedulerCandidateImpl.onDefeated: Lost leadership, committing suicide. I0216 19:35:37.039 THREAD134 org.apache.aurora.common.util.StateMachine$Builder$1.execute: SchedulerLifecycle state machine transition DEAD -> DEAD I0216 19:35:37.039 THREAD134 org.apache.aurora.scheduler.SchedulerLifecycle$8.execute: Shutdown already invoked, ignoring extra call. then.. org.apache.aurora.scheduler.storage.Storage$StorageException: There was a problem committing the transaction to the log. at org.apache.aurora.scheduler.storage.log.LogStorage$24.apply(LogStorage.java:621) at org.apache.aurora.scheduler.storage.log.LogStorage$24.apply(LogStorage.java:611) at org.apache.aurora.scheduler.storage.db.DbStorage.transactionedWrite(DbStorage.java:146) at org.mybatis.guice.transactional.TransactionalMethodInterceptor.invoke(TransactionalMethodInterceptor.java:101) at org.apache.aurora.scheduler.storage.db.DbStorage$2.doWithGateClosed(DbStorage.java:162) at org.apache.aurora.scheduler.async.GatingDelayExecutor.closeDuring(GatingDelayExecutor.java:62) at org.apache.aurora.scheduler.storage.db.DbStorage.write(DbStorage.java:158) at org.apache.aurora.common.inject.TimedInterceptor.invoke(TimedInterceptor.java:84) at org.apache.aurora.scheduler.storage.log.LogStorage.doInTransaction(LogStorage.java:611) at org.apache.aurora.scheduler.storage.log.LogStorage.write(LogStorage.java:644) at org.apache.aurora.scheduler.storage.CallOrderEnforcingStorage.write(CallOrderEnforcingStorage.java:130) at org.apache.aurora.scheduler.thrift.SchedulerThriftInterface.killTasks(SchedulerThriftInterface.java:479) at org.apache.aurora.scheduler.thrift.aop.ThriftStatsExporterInterceptor.invoke(ThriftStatsExporterInterceptor.java:47) at org.apache.aurora.scheduler.thrift.aop.FeatureToggleInterceptor.invoke(FeatureToggleInterceptor.java:38) at org.apache.aurora.scheduler.thrift.aop.LoggingInterceptor.invoke(LoggingInterceptor.java:73) at org.apache.aurora.scheduler.thrift.aop.ServerInfoInterceptor.invoke(ServerInfoInterceptor.java:30) at org.apache.aurora.gen.AuroraSchedulerManager$Processor$killTasks.getResult(AuroraSchedulerManager.java:1348) at org.apache.aurora.gen.AuroraSchedulerManager$Processor$killTasks.getResult(AuroraSchedulerManager.java:1333) at org.apache.thrift.ProcessFunction.process(ProcessFunction.java:39) at org.apache.thrift.TBaseProcessor.process(TBaseProcessor.java:39) at org.apache.thrift.server.TServlet.doPost(TServlet.java:83) at javax.servlet.http.HttpServlet.service(HttpServlet.java:727) at javax.servlet.http.HttpServlet.service(HttpServlet.java:820) at com.google.inject.servlet.ServletDefinition.doService(ServletDefinition.java:263) at com.google.inject.servlet.ServletDefinition.service(ServletDefinition.java:178) at com.google.inject.servlet.ManagedServletPipeline.service(ManagedServletPipeline.java:91) at com.google.inject.servlet.FilterChainInvocation.doFilter(FilterChainInvocation.java:62) at com.google.inject.servlet.FilterDefinition.doFilter(FilterDefinition.java:168) at com.google.inject.servlet.FilterChainInvocation.doFilter(FilterChainInvocation.java:58) at com.google.inject.servlet.FilterDefinition.doFilter(FilterDefinition.java:168) at com.google.inject.servlet.FilterChainInvocation.doFilter(FilterChainInvocation.java:58) at com.google.inject.servlet.FilterDefinition.doFilter(FilterDefinition.java:168) at com.google.inject.servlet.FilterChainInvocation.doFilter(FilterChainInvocation.java:58) at org.apache.aurora.scheduler.http.LeaderRedirectFilter.doFilter(LeaderRedirectFilter.java:49) at org.apache.aurora.scheduler.http.AbstractFilter.doFilter(AbstractFilter.java:44) at com.google.inject.servlet.FilterDefinition.doFilter(FilterDefinition.java:163) at com.google.inject.servlet.FilterChainInvocation.doFilter(FilterChainInvocation.java:58) at org.apache.aurora.scheduler.http.HttpStatsFilter.doFilter(HttpStatsFilter.java:70) at org.apache.aurora.scheduler.http.AbstractFilter.doFilter(AbstractFilter.java:44) at com.google.inject.servlet.FilterDefinition.doFilter(FilterDefinition.java:163) at com.google.inject.servlet.FilterChainInvocation.doFilter(FilterChainInvocation.java:58) at com.google.inject.servlet.FilterDefinition.doFilter(FilterDefinition.java:168) at com.google.inject.servlet.FilterChainInvocation.doFilter(FilterChainInvocation.java:58) at com.google.inject.servlet.FilterDefinition.doFilter(FilterDefinition.java:168) at com.google.inject.servlet.FilterChainInvocation.doFilter(FilterChainInvocation.java:58) at com.google.inject.servlet.FilterDefinition.doFilter(FilterDefinition.java:168) at com.google.inject.servlet.FilterChainInvocation.doFilter(FilterChainInvocation.java:58) at com.google.inject.servlet.FilterDefinition.doFilter(FilterDefinition.java:168) at com.google.inject.servlet.FilterChainInvocation.doFilter(FilterChainInvocation.java:58) at org.eclipse.jetty.servlets.UserAgentFilter.doFilter(UserAgentFilter.java:82) at org.eclipse.jetty.servlets.GzipFilter.doFilter(GzipFilter.java:294) at com.google.inject.servlet.FilterDefinition.doFilter(FilterDefinition.java:163) at com.google.inject.servlet.FilterChainInvocation.doFilter(FilterChainInvocation.java:58) at com.google.inject.servlet.ManagedFilterPipeline.dispatch(ManagedFilterPipeline.java:118) at com.google.inject.servlet.GuiceFilter.doFilter(GuiceFilter.java:113) at org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1288) at org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:443) at org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1044) at org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:372) at org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:978) at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:135) at org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:154) at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:116) at org.eclipse.jetty.rewrite.handler.RewriteHandler.handle(RewriteHandler.java:317) at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:116) at org.eclipse.jetty.server.Server.handle(Server.java:369) at org.eclipse.jetty.server.AbstractHttpConnection.handleRequest(AbstractHttpConnection.java:486) at org.eclipse.jetty.server.AbstractHttpConnection.content(AbstractHttpConnection.java:944) at org.eclipse.jetty.server.AbstractHttpConnection$RequestHandler.content(AbstractHttpConnection.java:1005) at org.eclipse.jetty.http.HttpParser.parseNext(HttpParser.java:865) at org.eclipse.jetty.http.HttpParser.parseAvailable(HttpParser.java:240) at org.eclipse.jetty.server.AsyncHttpConnection.handle(AsyncHttpConnection.java:82) at org.eclipse.jetty.io.nio.SelectChannelEndPoint.handle(SelectChannelEndPoint.java:667) at org.eclipse.jetty.io.nio.SelectChannelEndPoint$1.run(SelectChannelEndPoint.java:52) at org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:608) at org.eclipse.jetty.util.thread.QueuedThreadPool$3.run(QueuedThreadPool.java:543) at java.lang.Thread.run(Thread.java:745) Caused by: org.apache.aurora.scheduler.log.Log$Stream$StreamAccessException: Timeout performing log append at org.apache.aurora.scheduler.log.mesos.MesosLog$LogStream.disableLog(MesosLog.java:352) at org.apache.aurora.scheduler.log.mesos.MesosLog$LogStream.mutate(MesosLog.java:367) at org.apache.aurora.scheduler.log.mesos.MesosLog$LogStream.append(MesosLog.java:315) at org.apache.aurora.scheduler.log.mesos.MesosLog$LogStream.append(MesosLog.java:145) at org.apache.aurora.scheduler.storage.log.StreamManagerImpl.appendAndGetPosition(StreamManagerImpl.java:238) at org.apache.aurora.common.inject.TimedInterceptor.invoke(TimedInterceptor.java:84) at org.apache.aurora.scheduler.storage.log.StreamManagerImpl$StreamTransactionImpl.commit(StreamManagerImpl.java:267) at org.apache.aurora.scheduler.storage.log.LogStorage$24.apply(LogStorage.java:616) ... 76 more Caused by: java.util.concurrent.TimeoutException: Timed out while attempting to append at org.apache.mesos.Log$Writer.append(Native Method) at org.apache.aurora.scheduler.log.mesos.MesosLogStreamModule$5.append(MesosLogStreamModule.java:188) at org.apache.aurora.scheduler.log.mesos.MesosLog$LogStream$3.apply(MesosLog.java:319) at org.apache.aurora.scheduler.log.mesos.MesosLog$LogStream$3.apply(MesosLog.java:315) at org.apache.aurora.scheduler.log.mesos.MesosLog$LogStream.mutate(MesosLog.java:365) ... 82 more