[ https://issues.apache.org/jira/browse/GEODE-8131?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17125333#comment-17125333 ]
Kirk Lund commented on GEODE-8131: ---------------------------------- Here's the thread stack with line feeds added back in: {noformat} "P2P message reader for 10.236.28.120(servername-removed)<v491>:56152 shared unordered uid=9 port=41204" tid=0xd49 (in native) java.lang.Thread.State: RUNNABLE at sun.nio.ch.FileDispatcherImpl.write0(Native Method) at sun.nio.ch.SocketDispatcher.write(SocketDispatcher.java:47) at sun.nio.ch.IOUtil.writeFromNativeBuffer(IOUtil.java:93) at sun.nio.ch.IOUtil.write(IOUtil.java:51) at sun.nio.ch.SocketChannelImpl.write(SocketChannelImpl.java:471) - locked java.lang.Object@24528b9b at org.apache.geode.internal.tcp.Connection.nioWriteFully(Connection.java:3291) - locked java.lang.Object@42a1a79b at org.apache.geode.internal.tcp.Connection.sendPreserialized(Connection.java:2527) at org.apache.geode.internal.tcp.MsgStreamer.realFlush(MsgStreamer.java:319) at org.apache.geode.internal.tcp.MsgStreamer.writeMessage(MsgStreamer.java:244) at org.apache.geode.distributed.internal.direct.DirectChannel.sendToMany(DirectChannel.java:393) at org.apache.geode.distributed.internal.direct.DirectChannel.sendToOne(DirectChannel.java:250) at org.apache.geode.distributed.internal.direct.DirectChannel.send(DirectChannel.java:615) at org.apache.geode.distributed.internal.membership.gms.mgr.GMSMembershipManager.directChannelSend(GMSMembershipManager.java:1717) at org.apache.geode.distributed.internal.membership.gms.mgr.GMSMembershipManager.send(GMSMembershipManager.java:1898) at org.apache.geode.distributed.internal.ClusterDistributionManager.sendViaMembershipManager(ClusterDistributionManager.java:2878) at org.apache.geode.distributed.internal.ClusterDistributionManager.sendOutgoing(ClusterDistributionManager.java:2798) at org.apache.geode.distributed.internal.ClusterDistributionManager.sendMessage(ClusterDistributionManager.java:2837) at org.apache.geode.distributed.internal.ClusterDistributionManager.putOutgoing(ClusterDistributionManager.java:1531) at org.apache.geode.internal.alerting.AlertMessaging.sendAlert(AlertMessaging.java:75) at org.apache.geode.internal.logging.log4j.AlertAppender.sendAlertMessage(AlertAppender.java:188) at org.apache.geode.internal.logging.log4j.AlertAppender.doAppend(AlertAppender.java:163) at org.apache.geode.internal.logging.log4j.AlertAppender.lambda$append$0(AlertAppender.java:159) at org.apache.geode.internal.logging.log4j.AlertAppender$$Lambda$168/1102181662.run(Unknown Source) at org.apache.geode.internal.alerting.AlertingAction.execute(AlertingAction.java:29) at org.apache.geode.internal.logging.log4j.AlertAppender.append(AlertAppender.java:159) at org.apache.logging.log4j.core.config.AppenderControl.tryCallAppender(AppenderControl.java:156) at org.apache.logging.log4j.core.config.AppenderControl.callAppender0(AppenderControl.java:129) at org.apache.logging.log4j.core.config.AppenderControl.callAppenderPreventRecursion(AppenderControl.java:120) at org.apache.logging.log4j.core.config.AppenderControl.callAppender(AppenderControl.java:84) at org.apache.logging.log4j.core.config.LoggerConfig.callAppenders(LoggerConfig.java:464) at org.apache.logging.log4j.core.config.LoggerConfig.processLogEvent(LoggerConfig.java:448) at org.apache.logging.log4j.core.config.LoggerConfig.log(LoggerConfig.java:431) at org.apache.logging.log4j.core.config.LoggerConfig.logParent(LoggerConfig.java:455) at org.apache.logging.log4j.core.config.LoggerConfig.processLogEvent(LoggerConfig.java:450) at org.apache.logging.log4j.core.config.LoggerConfig.log(LoggerConfig.java:431) at org.apache.logging.log4j.core.config.LoggerConfig.log(LoggerConfig.java:406) at org.apache.logging.log4j.core.config.AwaitCompletionReliabilityStrategy.log(AwaitCompletionReliabilityStrategy.java:63) at org.apache.logging.log4j.core.Logger.logMessage(Logger.java:146) at org.apache.logging.log4j.spi.ExtendedLoggerWrapper.logMessage(ExtendedLoggerWrapper.java:217) at org.apache.logging.log4j.spi.AbstractLogger.tryLogMessage(AbstractLogger.java:2170) at org.apache.logging.log4j.spi.AbstractLogger.logMessageTrackRecursion(AbstractLogger.java:2125) at org.apache.logging.log4j.spi.AbstractLogger.logMessageSafely(AbstractLogger.java:2108) at org.apache.logging.log4j.spi.AbstractLogger.logMessage(AbstractLogger.java:2002) at org.apache.logging.log4j.spi.AbstractLogger.logIfEnabled(AbstractLogger.java:1974) at org.apache.logging.log4j.spi.AbstractLogger.fatal(AbstractLogger.java:1054) at org.apache.geode.internal.tcp.Connection.processNIOBuffer(Connection.java:3610) at org.apache.geode.internal.tcp.Connection.runNioReader(Connection.java:1824) at org.apache.geode.internal.tcp.Connection.run(Connection.java:1686) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) {noformat} > alert service hangs, blocking cache operations > ---------------------------------------------- > > Key: GEODE-8131 > URL: https://issues.apache.org/jira/browse/GEODE-8131 > Project: Geode > Issue Type: Bug > Components: logging > Reporter: Bruce J Schuchardt > Assignee: Kirk Lund > Priority: Major > Labels: GeodeOperationAPI > > This v1.8 TcpConduit reader thread was blocked in a production system. It > had experienced a deserialization error and was trying to log the exception. > A Manager was present in the cluster and had registered as an alert listener. > Another thread was blocked sending something on the shared/unordered > connection that this alert should be sent on. This persisted for over 6 > hours and we never saw the serialization exception in the log file. > Consequently we had to recommend setting the alert level to None and have > them run into the serialization problem again. > This is a serious flaw in the alerting system and it's caused us grief many > times. The alerting system should not block other threads. Maybe a > background thread could consume and transmit alerts to alert-listeners? > > {noformat} > "P2P message reader for 10.236.28.120(servername-removed)<v491>:56152 shared > unordered uid=9 port=41204" tid=0xd49 (in native) java.lang.Thread.State: > RUNNABLE at sun.nio.ch.FileDispatcherImpl.write0(Native Method) at > sun.nio.ch.SocketDispatcher.write(SocketDispatcher.java:47) at > sun.nio.ch.IOUtil.writeFromNativeBuffer(IOUtil.java:93) at > sun.nio.ch.IOUtil.write(IOUtil.java:51) at > sun.nio.ch.SocketChannelImpl.write(SocketChannelImpl.java:471) - locked > java.lang.Object@24528b9b at > org.apache.geode.internal.tcp.Connection.nioWriteFully(Connection.java:3291) > - locked java.lang.Object@42a1a79b at > org.apache.geode.internal.tcp.Connection.sendPreserialized(Connection.java:2527) > at org.apache.geode.internal.tcp.MsgStreamer.realFlush(MsgStreamer.java:319) > at > org.apache.geode.internal.tcp.MsgStreamer.writeMessage(MsgStreamer.java:244) > at > org.apache.geode.distributed.internal.direct.DirectChannel.sendToMany(DirectChannel.java:393) > at > org.apache.geode.distributed.internal.direct.DirectChannel.sendToOne(DirectChannel.java:250) > at > org.apache.geode.distributed.internal.direct.DirectChannel.send(DirectChannel.java:615) > at > org.apache.geode.distributed.internal.membership.gms.mgr.GMSMembershipManager.directChannelSend(GMSMembershipManager.java:1717) > at > org.apache.geode.distributed.internal.membership.gms.mgr.GMSMembershipManager.send(GMSMembershipManager.java:1898) > at > org.apache.geode.distributed.internal.ClusterDistributionManager.sendViaMembershipManager(ClusterDistributionManager.java:2878) > at > org.apache.geode.distributed.internal.ClusterDistributionManager.sendOutgoing(ClusterDistributionManager.java:2798) > at > org.apache.geode.distributed.internal.ClusterDistributionManager.sendMessage(ClusterDistributionManager.java:2837) > at > org.apache.geode.distributed.internal.ClusterDistributionManager.putOutgoing(ClusterDistributionManager.java:1531) > at > org.apache.geode.internal.alerting.AlertMessaging.sendAlert(AlertMessaging.java:75) > at > org.apache.geode.internal.logging.log4j.AlertAppender.sendAlertMessage(AlertAppender.java:188) > at > org.apache.geode.internal.logging.log4j.AlertAppender.doAppend(AlertAppender.java:163) > at > org.apache.geode.internal.logging.log4j.AlertAppender.lambda$append$0(AlertAppender.java:159) > at > org.apache.geode.internal.logging.log4j.AlertAppender$$Lambda$168/1102181662.run(Unknown > Source) at > org.apache.geode.internal.alerting.AlertingAction.execute(AlertingAction.java:29) > at > org.apache.geode.internal.logging.log4j.AlertAppender.append(AlertAppender.java:159) > at > org.apache.logging.log4j.core.config.AppenderControl.tryCallAppender(AppenderControl.java:156) > at > org.apache.logging.log4j.core.config.AppenderControl.callAppender0(AppenderControl.java:129) > at > org.apache.logging.log4j.core.config.AppenderControl.callAppenderPreventRecursion(AppenderControl.java:120) > at > org.apache.logging.log4j.core.config.AppenderControl.callAppender(AppenderControl.java:84) > at > org.apache.logging.log4j.core.config.LoggerConfig.callAppenders(LoggerConfig.java:464) > at > org.apache.logging.log4j.core.config.LoggerConfig.processLogEvent(LoggerConfig.java:448) > at > org.apache.logging.log4j.core.config.LoggerConfig.log(LoggerConfig.java:431) > at > org.apache.logging.log4j.core.config.LoggerConfig.logParent(LoggerConfig.java:455) > at > org.apache.logging.log4j.core.config.LoggerConfig.processLogEvent(LoggerConfig.java:450) > at > org.apache.logging.log4j.core.config.LoggerConfig.log(LoggerConfig.java:431) > at > org.apache.logging.log4j.core.config.LoggerConfig.log(LoggerConfig.java:406) > at > org.apache.logging.log4j.core.config.AwaitCompletionReliabilityStrategy.log(AwaitCompletionReliabilityStrategy.java:63) > at org.apache.logging.log4j.core.Logger.logMessage(Logger.java:146) at > org.apache.logging.log4j.spi.ExtendedLoggerWrapper.logMessage(ExtendedLoggerWrapper.java:217) > at > org.apache.logging.log4j.spi.AbstractLogger.tryLogMessage(AbstractLogger.java:2170) > at > org.apache.logging.log4j.spi.AbstractLogger.logMessageTrackRecursion(AbstractLogger.java:2125) > at > org.apache.logging.log4j.spi.AbstractLogger.logMessageSafely(AbstractLogger.java:2108) > at > org.apache.logging.log4j.spi.AbstractLogger.logMessage(AbstractLogger.java:2002) > at > org.apache.logging.log4j.spi.AbstractLogger.logIfEnabled(AbstractLogger.java:1974) > at > org.apache.logging.log4j.spi.AbstractLogger.fatal(AbstractLogger.java:1054) > at > org.apache.geode.internal.tcp.Connection.processNIOBuffer(Connection.java:3610) > at > org.apache.geode.internal.tcp.Connection.runNioReader(Connection.java:1824) > at org.apache.geode.internal.tcp.Connection.run(Connection.java:1686) at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:748) {noformat} -- This message was sent by Atlassian Jira (v8.3.4#803005)