[
https://issues.apache.org/jira/browse/IGNITE-20267?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17757851#comment-17757851
]
Sebastian Fabisz commented on IGNITE-20267:
-------------------------------------------
{{ERROR 2023-08-02 14:19:53,500
[grid-nio-worker-tcp-comm-0-#23%TcpCommunicationSpi%]
o.a.i.s.c.t.TcpCommunicationSpi traceId="" spanId="" - Failed to process
selector key [ses=GridSelectorNioSessionImpl [worker=DirectNioClientWorker
[super=AbstractNioClientWorker [idx=0, bytesRcvd=8197316, bytesSent=105099,
bytesRcvd0=0, bytesSent0=0, select=true, super=GridWorker
[name=grid-nio-worker-tcp-comm-0, igniteInstanceName=TcpCommunicationSpi,
finished=false, heartbeatTs=1690982392494, hashCode=744530904,
interrupted=false,
runner=grid-nio-worker-tcp-comm-0-#23%TcpCommunicationSpi%]]],
writeBuf=java.nio.DirectByteBuffer[pos=0 lim=32768 cap=32768],
readBuf=java.nio.DirectByteBuffer[pos=0 lim=32768 cap=32768], inRecovery=null,
outRecovery=null, closeSocket=true,
outboundMessagesQueueSizeMetric=o.a.i.i.processors.metric.impl.LongAdderMetric@69a257d1,
super=GridNioSessionImpl [locAddr=\{removed}, rmtAddr=\{removed},
createTime=1690978654235, closeTime=0, bytesSent=18, bytesRcvd=0, bytesSent0=0,
bytesRcvd0=0, sndSchedTime=1690982259458, lastSndTime=1690978654235,
lastRcvTime=1690982259458, readsPaused=false,
filterChain=FilterChain[filters=[GridNioCodecFilter
[parser=o.a.i.i.util.nio.GridDirectParser@707a270e, directMode=true],
GridConnectionBytesVerifyFilter], accepted=true, markedForClose=false]]]}}
{{java.net.SocketException: Connection reset}}
{{ at
java.base/sun.nio.ch.SocketChannelImpl.throwConnectionReset(SocketChannelImpl.java:394)}}
{{ at
java.base/sun.nio.ch.SocketChannelImpl.read(SocketChannelImpl.java:411)}}
{{ at
org.apache.ignite.internal.util.nio.GridNioServer$DirectNioClientWorker.processRead(GridNioServer.java:1351)}}
{{ at
org.apache.ignite.internal.util.nio.GridNioServer$AbstractNioClientWorker.processSelectedKeys(GridNioServer.java:2575)}}
{{ at
org.apache.ignite.internal.util.nio.GridNioServer$AbstractNioClientWorker.bodyInternal(GridNioServer.java:2271)}}
{{ at
org.apache.ignite.internal.util.nio.GridNioServer$AbstractNioClientWorker.body(GridNioServer.java:1910)}}
{{ at
org.apache.ignite.internal.util.worker.GridWorker.run(GridWorker.java:125)}}
{{ at java.base/java.lang.Thread.run(Thread.java:833)}}
{{TRACE 2023-08-02 14:19:53,500
[grid-nio-worker-tcp-comm-0-#23%TcpCommunicationSpi%]
o.a.i.s.c.t.TcpCommunicationSpi traceId="" spanId="" - Processing keys in
client worker: 1}}
{{DEBUG 2023-08-02 14:19:53,563 [grid-timeout-worker-#22%GRID_NAME_REPLACED%]
o.a.i.i.p.t.GridTimeoutProcessor traceId="" spanId="" - Timeout has occurred
[obj=CancelableTask [id=82140b0b981-8bdf698a-49e0-48e8-a6db-4901eb305943,
endTime=1690982393559, period=5000, cancel=false,
task=org.apache.ignite.internal.processors.cache.query.continuous.CacheContinuousQueryManager$BackupCleaner@327e8671],
process=true]}}
{{DEBUG 2023-08-02 14:19:53,563 [grid-timeout-worker-#22%GRID_NAME_REPLACED%]
o.a.i.i.p.t.GridTimeoutProcessor traceId="" spanId="" - Timeout has occurred
[obj=CancelableTask [id=92140b0b981-8bdf698a-49e0-48e8-a6db-4901eb305943,
endTime=1690982393559, period=5000, cancel=false,
task=org.apache.ignite.internal.processors.cache.query.continuous.CacheContinuousQueryManager$BackupCleaner@208b506],
process=true]}}
{{INFO 2023-08-02 14:19:53,824 [Thread-82414] o.a.i.s.d.t.TcpDiscoverySpi
traceId="" spanId="" - Pinging node: bf79c9d1-746b-4454-bad2-a74ce36fe6c1}}
{{INFO 2023-08-02 14:19:53,827 [Thread-82414] o.a.i.s.d.t.TcpDiscoverySpi
traceId="" spanId="" - Finished node ping
[nodeId=bf79c9d1-746b-4454-bad2-a74ce36fe6c1, res=true, time=5ms]}}
{{INFO 2023-08-02 14:19:53,827 [Thread-82414] o.a.i.s.d.t.TcpDiscoverySpi
traceId="" spanId="" - Pinging node: ad61ff66-d298-4311-8785-193c1dbe443b}}
{{INFO 2023-08-02 14:19:53,828 [Thread-82414] o.a.i.s.d.t.TcpDiscoverySpi
traceId="" spanId="" - Finished node ping
[nodeId=ad61ff66-d298-4311-8785-193c1dbe443b, res=true, time=6ms]}}
{{DEBUG 2023-08-02 14:19:53,829 [ClientMessageReceptor2]
o.j.o.i.ClientIIOPConnection traceId="" spanId="" - read 12 bytes from
\{removed}}}
{{DEBUG 2023-08-02 14:19:53,829 [ClientMessageReceptor2]
o.j.o.i.ClientIIOPConnection traceId="" spanId="" - read 23 bytes from
\{removed}}}
{{DEBUG 2023-08-02 14:19:53,829 [ClientMessageReceptor2] o.j.g.conn traceId=""
spanId="" - read GIOP message of size 35 from ClientGIOPConnection to
\{removed} (e8696fa)}}
{{TRACE 2023-08-02 14:19:53,924 [tcp-disco-msg-worker-[ad61ff66
\{removed}]-#2%GRID_NAME_REPLACED%-#42%GRID_NAME_REPLACED%]
o.a.i.s.d.t.TcpDiscoverySpi traceId="" spanId="" - Next node remains the same
[nextId=ad61ff66-d298-4311-8785-193c1dbe443b, nextOrder=3]}}
{{TRACE 2023-08-02 14:19:53,925 [tcp-disco-msg-worker-[ad61ff66
\{removed}]-#2%GRID_NAME_REPLACED%-#42%GRID_NAME_REPLACED%]
o.a.i.s.d.t.TcpDiscoverySpi traceId="" spanId="" - Message has been sent to
next node [msg=TcpDiscoveryConnectionCheckMessage
[super=TcpDiscoveryAbstractMessage [sndNodeId=null,
id=8837ab0b981-53df807d-0cc1-4e03-9a21-d8f670ef6f1b, verifierNodeId=null,
topVer=0, pendingIdx=0, failedNodes=null, isClient=false]],
next=ad61ff66-d298-4311-8785-193c1dbe443b, res=1]}}
{{TRACE 2023-08-02 14:19:53,927 [tcp-disco-sock-reader-[bf79c9d1
\{removed}]-#6%GRID_NAME_REPLACED%-#46%GRID_NAME_REPLACED%]
o.a.i.s.d.t.TcpDiscoverySpi traceId="" spanId="" - Message has been received:
TcpDiscoveryConnectionCheckMessage [super=TcpDiscoveryAbstractMessage
[sndNodeId=bf79c9d1-746b-4454-bad2-a74ce36fe6c1, id=null, verifierNodeId=null,
topVer=0, pendingIdx=0, failedNodes=null, isClient=false]]}}
{{DEBUG 2023-08-02 14:19:53,950 [grid-timeout-worker-#22%GRID_NAME_REPLACED%]
o.a.i.i.p.t.GridTimeoutProcessor traceId="" spanId="" - Timeout has occurred
[obj=CancelableTask [id=11140b0b981-8bdf698a-49e0-48e8-a6db-4901eb305943,
endTime=1690982393937, period=1000, cancel=false,
task=org.apache.ignite.internal.processors.cache.transactions.IgniteTxManager$$Lambda$1147/0x000000080156fb50@6f313905],
process=true]}}
{{TRACE 2023-08-02 14:19:54,426 [tcp-disco-msg-worker-[ad61ff66
\{removed}]-#2%GRID_NAME_REPLACED%-#42%GRID_NAME_REPLACED%]
o.a.i.s.d.t.TcpDiscoverySpi traceId="" spanId="" - Next node remains the same
[nextId=ad61ff66-d298-4311-8785-193c1dbe443b, nextOrder=3]}}
{{TRACE 2023-08-02 14:19:54,426 [tcp-disco-msg-worker-[ad61ff66
\{removed}]-#2%GRID_NAME_REPLACED%-#42%GRID_NAME_REPLACED%]
o.a.i.s.d.t.TcpDiscoverySpi traceId="" spanId="" - Message has been sent to
next node [msg=TcpDiscoveryConnectionCheckMessage
[super=TcpDiscoveryAbstractMessage [sndNodeId=null,
id=b837ab0b981-53df807d-0cc1-4e03-9a21-d8f670ef6f1b, verifierNodeId=null,
topVer=0, pendingIdx=0, failedNodes=null, isClient=false]],
next=ad61ff66-d298-4311-8785-193c1dbe443b, res=1]}}
{{TRACE 2023-08-02 14:19:54,433 [tcp-disco-sock-reader-[bf79c9d1
\{removed}]-#6%GRID_NAME_REPLACED%-#46%GRID_NAME_REPLACED%]
o.a.i.s.d.t.TcpDiscoverySpi traceId="" spanId="" - Message has been received:
TcpDiscoveryConnectionCheckMessage [super=TcpDiscoveryAbstractMessage
[sndNodeId=bf79c9d1-746b-4454-bad2-a74ce36fe6c1, id=null, verifierNodeId=null,
topVer=0, pendingIdx=0, failedNodes=null, isClient=false]]}}
{{DEBUG 2023-08-02 14:19:54,473 [scheduling-1]
o.a.h.i.c.PoolingHttpClientConnectionManager traceId="" spanId="" - Closing
expired connections}}
{{DEBUG 2023-08-02 14:19:54,474 [scheduling-1]
o.a.h.i.c.PoolingHttpClientConnectionManager traceId="" spanId="" - Closing
connections idle longer than 500 MILLISECONDS}}
{{ERROR 2023-08-02 14:19:54,500
[grid-nio-worker-tcp-comm-0-#23%TcpCommunicationSpi%]
o.a.i.s.c.t.TcpCommunicationSpi traceId="" spanId="" - Failed to process
selector key [ses=GridSelectorNioSessionImpl [worker=DirectNioClientWorker
[super=AbstractNioClientWorker [idx=0, bytesRcvd=8197316, bytesSent=105099,
bytesRcvd0=0, bytesSent0=0, select=true, super=GridWorker
[name=grid-nio-worker-tcp-comm-0, igniteInstanceName=TcpCommunicationSpi,
finished=false, heartbeatTs=1690982393490, hashCode=744530904,
interrupted=false,
runner=grid-nio-worker-tcp-comm-0-#23%TcpCommunicationSpi%]]],
writeBuf=java.nio.DirectByteBuffer[pos=0 lim=32768 cap=32768],
readBuf=java.nio.DirectByteBuffer[pos=0 lim=32768 cap=32768], inRecovery=null,
outRecovery=null, closeSocket=true,
outboundMessagesQueueSizeMetric=o.a.i.i.processors.metric.impl.LongAdderMetric@69a257d1,
super=GridNioSessionImpl [locAddr=\{removed}, rmtAddr=\{removed},
createTime=1690978654235, closeTime=0, bytesSent=18, bytesRcvd=0, bytesSent0=0,
bytesRcvd0=0, sndSchedTime=1690982259458, lastSndTime=1690978654235,
lastRcvTime=1690982259458, readsPaused=false,
filterChain=FilterChain[filters=[GridNioCodecFilter
[parser=o.a.i.i.util.nio.GridDirectParser@707a270e, directMode=true],
GridConnectionBytesVerifyFilter], accepted=true, markedForClose=false]]]}}
{{java.net.SocketException: Connection reset}}
{{ at
java.base/sun.nio.ch.SocketChannelImpl.throwConnectionReset(SocketChannelImpl.java:394)}}
{{ at
java.base/sun.nio.ch.SocketChannelImpl.read(SocketChannelImpl.java:411)}}
{{ at
org.apache.ignite.internal.util.nio.GridNioServer$DirectNioClientWorker.processRead(GridNioServer.java:1351)}}
{{ at
org.apache.ignite.internal.util.nio.GridNioServer$AbstractNioClientWorker.processSelectedKeys(GridNioServer.java:2575)}}
{{ at
org.apache.ignite.internal.util.nio.GridNioServer$AbstractNioClientWorker.bodyInternal(GridNioServer.java:2271)}}
{{ at
org.apache.ignite.internal.util.nio.GridNioServer$AbstractNioClientWorker.body(GridNioServer.java:1910)}}
{{ at
org.apache.ignite.internal.util.worker.GridWorker.run(GridWorker.java:125)}}
{{ at java.base/java.lang.Thread.run(Thread.java:833)}}
> Infinite loop of SockeException
> -------------------------------
>
> Key: IGNITE-20267
> URL: https://issues.apache.org/jira/browse/IGNITE-20267
> Project: Ignite
> Issue Type: Bug
> Components: thin client
> Affects Versions: 2.15
> Reporter: Sebastian Fabisz
> Priority: Major
>
> Some of our ignite instances are experiencing infinite loop of same error:
> {{ERROR 2023-07-27 08:26:44,876
> [grid-nio-worker-tcp-comm-2-#25%TcpCommunicationSpi%]
> o.a.i.s.c.t.TcpCommunicationSpi traceId="" spanId="" - Failed to process
> selector key [ses=GridSelectorNioSessionImpl [worker=DirectNioClientWorker
> [super=AbstractNioClientWorker [idx=2, bytesRcvd=21528, bytesSent=15345,
> bytesRcvd0=0, bytesSent0=0, select=true, super=GridWorker
> [name=grid-nio-worker-tcp-comm-2, igniteInstanceName=TcpCommunicationSpi,
> finished=false, heartbeatTs=1690442803865, hashCode=2102759141,
> interrupted=false,
> runner=grid-nio-worker-tcp-comm-2-#25%TcpCommunicationSpi%]]],
> writeBuf=java.nio.DirectByteBuffer[pos=0 lim=32768 cap=32768],
> readBuf=java.nio.DirectByteBuffer[pos=0 lim=32768 cap=32768],
> inRecovery=null, outRecovery=null, closeSocket=true,
> outboundMessagesQueueSizeMetric=o.a.i.i.processors.metric.impl.LongAdderMetric@69a257d1,
> super=GridNioSessionImpl [locAddr=\{removed}, rmtAddr=\{removed},
> createTime=1690249023154, closeTime=0, bytesSent=18, bytesRcvd=3,
> bytesSent0=0, bytesRcvd0=0, sndSchedTime=1690442567813,
> lastSndTime=1690249023154, lastRcvTime=1690442567813, readsPaused=false,
> filterChain=FilterChain[filters=[GridNioCodecFilter
> [parser=o.a.i.i.util.nio.GridDirectParser@1fff7116, directMode=true],
> GridConnectionBytesVerifyFilter], accepted=true, markedForClose=false]]]
> java.net.SocketException: Connection reset at
> java.base/sun.nio.ch.SocketChannelImpl.throwConnectionReset(SocketChannelImpl.java:394)
> at java.base/sun.nio.ch.SocketChannelImpl.read(SocketChannelImpl.java:411)
> at
> org.apache.ignite.internal.util.nio.GridNioServer$DirectNioClientWorker.processRead(GridNioServer.java:1351)
> at
> org.apache.ignite.internal.util.nio.GridNioServer$AbstractNioClientWorker.processSelectedKeys(GridNioServer.java:2575)
> at
> org.apache.ignite.internal.util.nio.GridNioServer$AbstractNioClientWorker.bodyInternal(GridNioServer.java:2271)
> at
> org.apache.ignite.internal.util.nio.GridNioServer$AbstractNioClientWorker.body(GridNioServer.java:1910)
> at
> org.apache.ignite.internal.util.worker.GridWorker.run(GridWorker.java:125) at
> java.base/java.lang.Thread.run(Thread.java:833)}}
> Each error contains same message except of heartbeatTs field.
> This error repeats approximately every second. Not all of Ignite instances
> are affected. We have figured out that problem is caused by Nessus security
> scanner. It walks over all boxes and runs some security checks. It looks like
> one of security checks (which can be http request) causes Ignite to fall into
> infinite loop of errors. We think that nessus opens a connection to Ignite,
> then connection is closed by nessus, but Iginite won't kill the socket.
>
> We have already updated Ignite to latest version.
>
--
This message was sent by Atlassian Jira
(v8.20.10#820010)