[ https://issues.apache.org/jira/browse/GEODE-8131?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17113560#comment-17113560 ]
ASF subversion and git services commented on GEODE-8131: -------------------------------------------------------- Commit dd500e8509150f1c753e777bac6ce3ec24f7adce in geode's branch refs/heads/support/1.12 from Bruce Schuchardt [ https://gitbox.apache.org/repos/asf?p=geode.git;h=dd500e8 ] GEODE-8131: reader thread blocked attempting to issue an alert (#5132) * GEODE-8131: reader thread blocked attempting to issue an alert This PR does not solve the problem of the alert system causing a P2P message reader to block but instead ensures that the reader thread will write deserialization information to the log before issuing a fatal alert. If the alert level is set to "info" this won't help, but no-one would set the alert level to that level. Along the way I removed some duplicate strings. * removing duplicate code & logging toString of exceptions at fatal-level (cherry picked from commit 358fd7067cc56b1ceb8c3d7c271c3a5254d7ee93) > reader thread blocked attempting to issue an alert > -------------------------------------------------- > > Key: GEODE-8131 > URL: https://issues.apache.org/jira/browse/GEODE-8131 > Project: Geode > Issue Type: Bug > Components: logging, membership > Reporter: Bruce J Schuchardt > Priority: Major > > This v1.8 TcpConduit reader thread was blocked in a production system. It > had experienced a deserialization error and was trying to log the exception. > A Manager was present in the cluster and had registered as an alert listener. > Another thread was blocked sending something on the shared/unordered > connection that this alert should be sent on. This persisted for over 6 > hours and we never saw the serialization exception in the log file. > Consequently we had to recommend setting the alert level to None and have > them run into the serialization problem again. > This is a serious flaw in the alerting system and it's caused us grief many > times. We should log alerts before attempting to send them to > alert-listeners. > > {noformat} > "P2P message reader for 10.236.28.120(servername-removed)<v491>:56152 shared > unordered uid=9 port=41204" tid=0xd49 (in native) java.lang.Thread.State: > RUNNABLE at sun.nio.ch.FileDispatcherImpl.write0(Native Method) at > sun.nio.ch.SocketDispatcher.write(SocketDispatcher.java:47) at > sun.nio.ch.IOUtil.writeFromNativeBuffer(IOUtil.java:93) at > sun.nio.ch.IOUtil.write(IOUtil.java:51) at > sun.nio.ch.SocketChannelImpl.write(SocketChannelImpl.java:471) - locked > java.lang.Object@24528b9b at > org.apache.geode.internal.tcp.Connection.nioWriteFully(Connection.java:3291) > - locked java.lang.Object@42a1a79b at > org.apache.geode.internal.tcp.Connection.sendPreserialized(Connection.java:2527) > at org.apache.geode.internal.tcp.MsgStreamer.realFlush(MsgStreamer.java:319) > at > org.apache.geode.internal.tcp.MsgStreamer.writeMessage(MsgStreamer.java:244) > at > org.apache.geode.distributed.internal.direct.DirectChannel.sendToMany(DirectChannel.java:393) > at > org.apache.geode.distributed.internal.direct.DirectChannel.sendToOne(DirectChannel.java:250) > at > org.apache.geode.distributed.internal.direct.DirectChannel.send(DirectChannel.java:615) > at > org.apache.geode.distributed.internal.membership.gms.mgr.GMSMembershipManager.directChannelSend(GMSMembershipManager.java:1717) > at > org.apache.geode.distributed.internal.membership.gms.mgr.GMSMembershipManager.send(GMSMembershipManager.java:1898) > at > org.apache.geode.distributed.internal.ClusterDistributionManager.sendViaMembershipManager(ClusterDistributionManager.java:2878) > at > org.apache.geode.distributed.internal.ClusterDistributionManager.sendOutgoing(ClusterDistributionManager.java:2798) > at > org.apache.geode.distributed.internal.ClusterDistributionManager.sendMessage(ClusterDistributionManager.java:2837) > at > org.apache.geode.distributed.internal.ClusterDistributionManager.putOutgoing(ClusterDistributionManager.java:1531) > at > org.apache.geode.internal.alerting.AlertMessaging.sendAlert(AlertMessaging.java:75) > at > org.apache.geode.internal.logging.log4j.AlertAppender.sendAlertMessage(AlertAppender.java:188) > at > org.apache.geode.internal.logging.log4j.AlertAppender.doAppend(AlertAppender.java:163) > at > org.apache.geode.internal.logging.log4j.AlertAppender.lambda$append$0(AlertAppender.java:159) > at > org.apache.geode.internal.logging.log4j.AlertAppender$$Lambda$168/1102181662.run(Unknown > Source) at > org.apache.geode.internal.alerting.AlertingAction.execute(AlertingAction.java:29) > at > org.apache.geode.internal.logging.log4j.AlertAppender.append(AlertAppender.java:159) > at > org.apache.logging.log4j.core.config.AppenderControl.tryCallAppender(AppenderControl.java:156) > at > org.apache.logging.log4j.core.config.AppenderControl.callAppender0(AppenderControl.java:129) > at > org.apache.logging.log4j.core.config.AppenderControl.callAppenderPreventRecursion(AppenderControl.java:120) > at > org.apache.logging.log4j.core.config.AppenderControl.callAppender(AppenderControl.java:84) > at > org.apache.logging.log4j.core.config.LoggerConfig.callAppenders(LoggerConfig.java:464) > at > org.apache.logging.log4j.core.config.LoggerConfig.processLogEvent(LoggerConfig.java:448) > at > org.apache.logging.log4j.core.config.LoggerConfig.log(LoggerConfig.java:431) > at > org.apache.logging.log4j.core.config.LoggerConfig.logParent(LoggerConfig.java:455) > at > org.apache.logging.log4j.core.config.LoggerConfig.processLogEvent(LoggerConfig.java:450) > at > org.apache.logging.log4j.core.config.LoggerConfig.log(LoggerConfig.java:431) > at > org.apache.logging.log4j.core.config.LoggerConfig.log(LoggerConfig.java:406) > at > org.apache.logging.log4j.core.config.AwaitCompletionReliabilityStrategy.log(AwaitCompletionReliabilityStrategy.java:63) > at org.apache.logging.log4j.core.Logger.logMessage(Logger.java:146) at > org.apache.logging.log4j.spi.ExtendedLoggerWrapper.logMessage(ExtendedLoggerWrapper.java:217) > at > org.apache.logging.log4j.spi.AbstractLogger.tryLogMessage(AbstractLogger.java:2170) > at > org.apache.logging.log4j.spi.AbstractLogger.logMessageTrackRecursion(AbstractLogger.java:2125) > at > org.apache.logging.log4j.spi.AbstractLogger.logMessageSafely(AbstractLogger.java:2108) > at > org.apache.logging.log4j.spi.AbstractLogger.logMessage(AbstractLogger.java:2002) > at > org.apache.logging.log4j.spi.AbstractLogger.logIfEnabled(AbstractLogger.java:1974) > at > org.apache.logging.log4j.spi.AbstractLogger.fatal(AbstractLogger.java:1054) > at > org.apache.geode.internal.tcp.Connection.processNIOBuffer(Connection.java:3610) > at > org.apache.geode.internal.tcp.Connection.runNioReader(Connection.java:1824) > at org.apache.geode.internal.tcp.Connection.run(Connection.java:1686) at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:748) {noformat} -- This message was sent by Atlassian Jira (v8.3.4#803005)