rdhabalia opened a new pull request #9499:
URL: https://github.com/apache/pulsar/pull/9499
### Motivation
Standalone pulsar broker and function service can have deadlock which also
sometime causes failures in function unit-test case. Pulsar function tries to
create a subscription which blocks the zk thread and can be cause of possible
deadlock. Below is the thread-dump when the deadlock happens and function
service start up fails.
So, remove blocking call while creating subscription using admin-api.
```
"pulsar-load-manager-139-1" #717 prio=5 os_prio=31 tid=0x00007f8a68698000
nid=0x3610f waiting on condition [0x00007000156e9000]
java.lang.Thread.State: TIMED_WAITING (parking)
at sun.misc.Unsafe.park(Native Method)
- parking to wait for <0x0000000792a07020> (a
java.util.concurrent.CompletableFuture$Signaller)
at
java.util.concurrent.locks.LockSupport.parkNanos(LockSupport.java:215)
at
java.util.concurrent.CompletableFuture$Signaller.block(CompletableFuture.java:1709)
at
java.util.concurrent.ForkJoinPool.managedBlock(ForkJoinPool.java:3323)
at
java.util.concurrent.CompletableFuture.timedGet(CompletableFuture.java:1788)
at
java.util.concurrent.CompletableFuture.get(CompletableFuture.java:1928)
at
org.apache.pulsar.zookeeper.ZooKeeperDataCache.get(ZooKeeperDataCache.java:97)
at
org.apache.pulsar.broker.loadbalance.impl.SimpleLoadManagerImpl.getDynamicConfigurationFromZK(SimpleLoadManagerImpl.java:391)
at
org.apache.pulsar.broker.loadbalance.impl.SimpleLoadManagerImpl.getDynamicConfigurationDouble(SimpleLoadManagerImpl.java:401)
at
org.apache.pulsar.broker.loadbalance.impl.SimpleLoadManagerImpl.getLoadBalancerBrokerOverloadedThresholdPercentage(SimpleLoadManagerImpl.java:450)
at
org.apache.pulsar.broker.loadbalance.impl.SimpleLoadManagerImpl.generateLoadReportForcefully(SimpleLoadManagerImpl.java:1139)
- locked <0x000000078e53f2d0> (a java.util.HashSet)
at
org.apache.pulsar.broker.loadbalance.impl.SimpleLoadManagerImpl.writeLoadReportOnZookeeper(SimpleLoadManagerImpl.java:1295)
at
org.apache.pulsar.broker.loadbalance.LoadReportUpdaterTask.run(LoadReportUpdaterTask.java:39)
at
java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.runAndReset(FutureTask.java:308)
at
java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$301(ScheduledThreadPoolExecutor.java:180)
at
java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:294)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at
io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)
at java.lang.Thread.run(Thread.java:748)
"pulsar-ordered-OrderedExecutor-1-0-EventThread" #621 daemon prio=5
os_prio=31 tid=0x00007f89fd8e1800 nid=0x2fe07 waiting on condition
[0x000070000f7ce000]
java.lang.Thread.State: WAITING (parking)
at sun.misc.Unsafe.park(Native Method)
- parking to wait for <0x0000000792ecb8b0> (a
java.util.concurrent.CompletableFuture$Signaller)
at java.util.concurrent.locks.LockSupport.park(LockSupport.java:175)
at
java.util.concurrent.CompletableFuture$Signaller.block(CompletableFuture.java:1707)
at
java.util.concurrent.ForkJoinPool.managedBlock(ForkJoinPool.java:3323)
at
java.util.concurrent.CompletableFuture.waitingGet(CompletableFuture.java:1742)
at
java.util.concurrent.CompletableFuture.get(CompletableFuture.java:1908)
at
org.apache.pulsar.broker.admin.impl.PersistentTopicsBase.internalCreateSubscriptionForNonPartitionedTopic(PersistentTopicsBase.java:2060)
at
org.apache.pulsar.broker.admin.impl.PersistentTopicsBase.lambda$69(PersistentTopicsBase.java:2034)
at
org.apache.pulsar.broker.admin.impl.PersistentTopicsBase$$Lambda$676/1709042625.accept(Unknown
Source)
at
java.util.concurrent.CompletableFuture.uniAccept(CompletableFuture.java:670)
at
java.util.concurrent.CompletableFuture$UniAccept.tryFire(CompletableFuture.java:646)
at
java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:488)
at
java.util.concurrent.CompletableFuture.complete(CompletableFuture.java:1975)
at
org.apache.pulsar.zookeeper.ZooKeeperCache.lambda$5(ZooKeeperCache.java:249)
at
org.apache.pulsar.zookeeper.ZooKeeperCache$$Lambda$154/5998675.processResult(Unknown
Source)
at
org.apache.bookkeeper.zookeeper.ZooKeeperClient$15$1.processResult(ZooKeeperClient.java:879)
at
org.apache.zookeeper.ClientCnxn$EventThread.processEvent(ClientCnxn.java:583)
at
org.apache.zookeeper.ClientCnxn$EventThread.run(ClientCnxn.java:510)
main" #1 prio=5 os_prio=31 tid=0x00007f8aa0013000 nid=0x1603 waiting on
condition [0x0000700004160000]
java.lang.Thread.State: WAITING (parking)
at sun.misc.Unsafe.park(Native Method)
- parking to wait for <0x00000007930efe68> (a
java.util.concurrent.CompletableFuture$Signaller)
at java.util.concurrent.locks.LockSupport.park(LockSupport.java:175)
at
java.util.concurrent.CompletableFuture$Signaller.block(CompletableFuture.java:1707)
at
java.util.concurrent.ForkJoinPool.managedBlock(ForkJoinPool.java:3323)
at
java.util.concurrent.CompletableFuture.waitingGet(CompletableFuture.java:1742)
at
java.util.concurrent.CompletableFuture.get(CompletableFuture.java:1908)
at
org.apache.pulsar.client.admin.internal.http.AsyncHttpConnector.apply(AsyncHttpConnector.java:178)
at
org.glassfish.jersey.client.ClientRuntime.invoke(ClientRuntime.java:297)
at
org.glassfish.jersey.client.JerseyInvocation.lambda$invoke$1(JerseyInvocation.java:632)
at
org.glassfish.jersey.client.JerseyInvocation$$Lambda$672/237755480.call(Unknown
Source)
at
org.glassfish.jersey.client.JerseyInvocation.call(JerseyInvocation.java:654)
at
org.glassfish.jersey.client.JerseyInvocation.lambda$runInScope$3(JerseyInvocation.java:648)
at
org.glassfish.jersey.client.JerseyInvocation$$Lambda$673/197134325.call(Unknown
Source)
at org.glassfish.jersey.internal.Errors.process(Errors.java:292)
at org.glassfish.jersey.internal.Errors.process(Errors.java:274)
at org.glassfish.jersey.internal.Errors.process(Errors.java:205)
at
org.glassfish.jersey.process.internal.RequestScope.runInScope(RequestScope.java:390)
at
org.glassfish.jersey.client.JerseyInvocation.runInScope(JerseyInvocation.java:648)
at
org.glassfish.jersey.client.JerseyInvocation.invoke(JerseyInvocation.java:631)
at
org.glassfish.jersey.client.JerseyInvocation$Builder.method(JerseyInvocation.java:434)
at
org.glassfish.jersey.client.JerseyInvocation$Builder.put(JerseyInvocation.java:318)
at
org.apache.pulsar.client.admin.internal.TopicsImpl.createSubscription(TopicsImpl.java:1143)
at
org.apache.pulsar.functions.worker.PulsarWorkerService.start(PulsarWorkerService.java:454)
at
org.apache.pulsar.broker.PulsarService.startWorkerService(PulsarService.java:1343)
at
org.apache.pulsar.broker.PulsarService.start(PulsarService.java:671)
at
org.apache.pulsar.io.PulsarFunctionE2ETest.setup(PulsarFunctionE2ETest.java:209)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
```
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]