[ 
https://issues.apache.org/jira/browse/KAFKA-7057?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Moshe Lavi updated KAFKA-7057:
------------------------------
    Description: 
We build 3 Kafka brokers (0.10.1.1) version using Spring Cloud Stream consumer 
to poll messages.
 We encountered consumer lags alerted and found some consumers were blocked and 
not polling anymore messages. This requires us to restart the microservice 
where that consumer resides.

I wonder if this has to do with lack of available threads or to the fact the 
heartbeat daemon does not exist/work.

*The thread dump shows:*

kafka-coordinator-heartbeat-thread | SiteAgreementItem" #4943 daemon prio=5 
os_prio=0 tid=0x00007f3abdd08000 nid=0x83ac waiting for monitor entry 
[0x00007f3a5dcdb000]

   java.lang.Thread.State: BLOCKED (on object monitor)

                at 
org.apache.kafka.clients.consumer.internals.ConsumerNetworkClient.disableWakeups(ConsumerNetworkClient.java:409)

                - waiting to lock <*0x00000005df800450*> (a 
org.apache.kafka.clients.consumer.internals.ConsumerNetworkClient)

                at 
org.apache.kafka.clients.consumer.internals.ConsumerNetworkClient.pollNoWakeup(ConsumerNetworkClient.java:264)

                at 
org.apache.kafka.clients.consumer.internals.AbstractCoordinator$HeartbeatThread.run(AbstractCoordinator.java:865)

                - locked <0x00000005df800488> (a 
org.apache.kafka.clients.consumer.internals.ConsumerCoordinator)

 

-kafka-consumer-1" #4940 prio=5 os_prio=0 tid=0x00007f3a8d433800 nid=0x838e 
runnable [0x00007f3a5dedd000]

   java.lang.Thread.State: RUNNABLE

                at sun.nio.ch.EPollArrayWrapper.epollWait(Native Method)

                at sun.nio.ch.EPollArrayWrapper.poll(EPollArrayWrapper.java:269)

                at 
sun.nio.ch.EPollSelectorImpl.doSelect(EPollSelectorImpl.java:79)

                at sun.nio.ch.SelectorImpl.lockAndDoSelect(SelectorImpl.java:86)

                - locked <0x00000005df7705e0> (a sun.nio.ch.Util$2)

                - locked <0x00000005df7705d0> (a 
java.util.Collections$UnmodifiableSet)

                - locked <0x00000005df7705f0> (a sun.nio.ch.EPollSelectorImpl)

                at sun.nio.ch.SelectorImpl.select(SelectorImpl.java:97)

                at 
org.apache.kafka.common.network.Selector.select(Selector.java:470)

                at 
org.apache.kafka.common.network.Selector.poll(Selector.java:286)

                at 
org.apache.kafka.clients.NetworkClient.poll(NetworkClient.java:260)

                at 
org.apache.kafka.clients.consumer.internals.ConsumerNetworkClient.poll(ConsumerNetworkClient.java:232)

                - locked <*0x00000005df800450*> (a 
org.apache.kafka.clients.consumer.internals.ConsumerNetworkClient)

                at 
org.apache.kafka.clients.consumer.KafkaConsumer.pollOnce(KafkaConsumer.java:1031)

                at 
org.apache.kafka.clients.consumer.KafkaConsumer.poll(KafkaConsumer.java:979)

                at 
org.springframework.kafka.listener.KafkaMessageListenerContainer$ListenerConsumer.run(KafkaMessageListenerContainer.java:532)

                at 
java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)

                at java.util.concurrent.FutureTask.run(FutureTask.java:266)

                at java.lang.Thread.run(Thread.java:745)

  was:
We build 3 Kafka brokers (0.10.1.1) version using Spring Cloud Stream consumer 
to poll messages.
We encountered consumer lags alerted and found some consumers were blocked and 
not polling anymore messages. This requires us to restart the microservice 
where that consumer resides.

I wonder if this has to do with lack of available threads or to the fact there 
heartbeat daemon does not exist/work.


*The thread dump shows:*

kafka-coordinator-heartbeat-thread | SiteAgreementItem" #4943 daemon prio=5 
os_prio=0 tid=0x00007f3abdd08000 nid=0x83ac waiting for monitor entry 
[0x00007f3a5dcdb000]

   java.lang.Thread.State: BLOCKED (on object monitor)

                at 
org.apache.kafka.clients.consumer.internals.ConsumerNetworkClient.disableWakeups(ConsumerNetworkClient.java:409)

                - waiting to lock <*0x00000005df800450*> (a 
org.apache.kafka.clients.consumer.internals.ConsumerNetworkClient)

                at 
org.apache.kafka.clients.consumer.internals.ConsumerNetworkClient.pollNoWakeup(ConsumerNetworkClient.java:264)

                at 
org.apache.kafka.clients.consumer.internals.AbstractCoordinator$HeartbeatThread.run(AbstractCoordinator.java:865)

                - locked <0x00000005df800488> (a 
org.apache.kafka.clients.consumer.internals.ConsumerCoordinator)

 

-kafka-consumer-1" #4940 prio=5 os_prio=0 tid=0x00007f3a8d433800 nid=0x838e 
runnable [0x00007f3a5dedd000]

   java.lang.Thread.State: RUNNABLE

                at sun.nio.ch.EPollArrayWrapper.epollWait(Native Method)

                at sun.nio.ch.EPollArrayWrapper.poll(EPollArrayWrapper.java:269)

                at 
sun.nio.ch.EPollSelectorImpl.doSelect(EPollSelectorImpl.java:79)

                at sun.nio.ch.SelectorImpl.lockAndDoSelect(SelectorImpl.java:86)

                - locked <0x00000005df7705e0> (a sun.nio.ch.Util$2)

                - locked <0x00000005df7705d0> (a 
java.util.Collections$UnmodifiableSet)

                - locked <0x00000005df7705f0> (a sun.nio.ch.EPollSelectorImpl)

                at sun.nio.ch.SelectorImpl.select(SelectorImpl.java:97)

                at 
org.apache.kafka.common.network.Selector.select(Selector.java:470)

                at 
org.apache.kafka.common.network.Selector.poll(Selector.java:286)

                at 
org.apache.kafka.clients.NetworkClient.poll(NetworkClient.java:260)

                at 
org.apache.kafka.clients.consumer.internals.ConsumerNetworkClient.poll(ConsumerNetworkClient.java:232)

                - locked <*0x00000005df800450*> (a 
org.apache.kafka.clients.consumer.internals.ConsumerNetworkClient)

                at 
org.apache.kafka.clients.consumer.KafkaConsumer.pollOnce(KafkaConsumer.java:1031)

                at 
org.apache.kafka.clients.consumer.KafkaConsumer.poll(KafkaConsumer.java:979)

                at 
org.springframework.kafka.listener.KafkaMessageListenerContainer$ListenerConsumer.run(KafkaMessageListenerContainer.java:532)

                at 
java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)

                at java.util.concurrent.FutureTask.run(FutureTask.java:266)

                at java.lang.Thread.run(Thread.java:745)


> Consumer stop polling
> ---------------------
>
>                 Key: KAFKA-7057
>                 URL: https://issues.apache.org/jira/browse/KAFKA-7057
>             Project: Kafka
>          Issue Type: Bug
>          Components: consumer, controller
>    Affects Versions: 0.10.1.1
>            Reporter: Moshe Lavi
>            Priority: Major
>
> We build 3 Kafka brokers (0.10.1.1) version using Spring Cloud Stream 
> consumer to poll messages.
>  We encountered consumer lags alerted and found some consumers were blocked 
> and not polling anymore messages. This requires us to restart the 
> microservice where that consumer resides.
> I wonder if this has to do with lack of available threads or to the fact the 
> heartbeat daemon does not exist/work.
> *The thread dump shows:*
> kafka-coordinator-heartbeat-thread | SiteAgreementItem" #4943 daemon prio=5 
> os_prio=0 tid=0x00007f3abdd08000 nid=0x83ac waiting for monitor entry 
> [0x00007f3a5dcdb000]
>    java.lang.Thread.State: BLOCKED (on object monitor)
>                 at 
> org.apache.kafka.clients.consumer.internals.ConsumerNetworkClient.disableWakeups(ConsumerNetworkClient.java:409)
>                 - waiting to lock <*0x00000005df800450*> (a 
> org.apache.kafka.clients.consumer.internals.ConsumerNetworkClient)
>                 at 
> org.apache.kafka.clients.consumer.internals.ConsumerNetworkClient.pollNoWakeup(ConsumerNetworkClient.java:264)
>                 at 
> org.apache.kafka.clients.consumer.internals.AbstractCoordinator$HeartbeatThread.run(AbstractCoordinator.java:865)
>                 - locked <0x00000005df800488> (a 
> org.apache.kafka.clients.consumer.internals.ConsumerCoordinator)
>  
> -kafka-consumer-1" #4940 prio=5 os_prio=0 tid=0x00007f3a8d433800 nid=0x838e 
> runnable [0x00007f3a5dedd000]
>    java.lang.Thread.State: RUNNABLE
>                 at sun.nio.ch.EPollArrayWrapper.epollWait(Native Method)
>                 at 
> sun.nio.ch.EPollArrayWrapper.poll(EPollArrayWrapper.java:269)
>                 at 
> sun.nio.ch.EPollSelectorImpl.doSelect(EPollSelectorImpl.java:79)
>                 at 
> sun.nio.ch.SelectorImpl.lockAndDoSelect(SelectorImpl.java:86)
>                 - locked <0x00000005df7705e0> (a sun.nio.ch.Util$2)
>                 - locked <0x00000005df7705d0> (a 
> java.util.Collections$UnmodifiableSet)
>                 - locked <0x00000005df7705f0> (a sun.nio.ch.EPollSelectorImpl)
>                 at sun.nio.ch.SelectorImpl.select(SelectorImpl.java:97)
>                 at 
> org.apache.kafka.common.network.Selector.select(Selector.java:470)
>                 at 
> org.apache.kafka.common.network.Selector.poll(Selector.java:286)
>                 at 
> org.apache.kafka.clients.NetworkClient.poll(NetworkClient.java:260)
>                 at 
> org.apache.kafka.clients.consumer.internals.ConsumerNetworkClient.poll(ConsumerNetworkClient.java:232)
>                 - locked <*0x00000005df800450*> (a 
> org.apache.kafka.clients.consumer.internals.ConsumerNetworkClient)
>                 at 
> org.apache.kafka.clients.consumer.KafkaConsumer.pollOnce(KafkaConsumer.java:1031)
>                 at 
> org.apache.kafka.clients.consumer.KafkaConsumer.poll(KafkaConsumer.java:979)
>                 at 
> org.springframework.kafka.listener.KafkaMessageListenerContainer$ListenerConsumer.run(KafkaMessageListenerContainer.java:532)
>                 at 
> java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
>                 at java.util.concurrent.FutureTask.run(FutureTask.java:266)
>                 at java.lang.Thread.run(Thread.java:745)



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to