lhotari commented on code in PR #21498: URL: https://github.com/apache/pulsar/pull/21498#discussion_r1380049462
########## pulsar-broker/src/test/java/org/apache/pulsar/client/impl/InjectedClientCnxClientBuilder.java: ########## @@ -0,0 +1,416 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pulsar.client.impl; + +import static org.apache.pulsar.common.api.proto.BaseCommand.Type; +import com.google.common.annotations.VisibleForTesting; +import io.netty.buffer.ByteBuf; +import io.netty.channel.ChannelHandlerContext; +import io.netty.channel.EventLoopGroup; +import java.util.HashSet; +import java.util.LinkedHashSet; +import java.util.Set; +import java.util.concurrent.ConcurrentSkipListSet; +import java.util.concurrent.ThreadFactory; +import org.apache.pulsar.client.impl.conf.ClientConfigurationData; +import org.apache.pulsar.client.util.ExecutorProvider; +import org.apache.pulsar.common.api.proto.BaseCommand; +import org.apache.pulsar.common.api.proto.CommandAckResponse; +import org.apache.pulsar.common.api.proto.CommandActiveConsumerChange; +import org.apache.pulsar.common.api.proto.CommandAddPartitionToTxnResponse; +import org.apache.pulsar.common.api.proto.CommandAddSubscriptionToTxnResponse; +import org.apache.pulsar.common.api.proto.CommandAuthChallenge; +import org.apache.pulsar.common.api.proto.CommandCloseConsumer; +import org.apache.pulsar.common.api.proto.CommandCloseProducer; +import org.apache.pulsar.common.api.proto.CommandConnected; +import org.apache.pulsar.common.api.proto.CommandEndTxnOnPartitionResponse; +import org.apache.pulsar.common.api.proto.CommandEndTxnOnSubscriptionResponse; +import org.apache.pulsar.common.api.proto.CommandEndTxnResponse; +import org.apache.pulsar.common.api.proto.CommandError; +import org.apache.pulsar.common.api.proto.CommandGetLastMessageIdResponse; +import org.apache.pulsar.common.api.proto.CommandGetOrCreateSchemaResponse; +import org.apache.pulsar.common.api.proto.CommandGetSchemaResponse; +import org.apache.pulsar.common.api.proto.CommandGetTopicsOfNamespaceResponse; +import org.apache.pulsar.common.api.proto.CommandLookupTopicResponse; +import org.apache.pulsar.common.api.proto.CommandMessage; +import org.apache.pulsar.common.api.proto.CommandNewTxnResponse; +import org.apache.pulsar.common.api.proto.CommandPartitionedTopicMetadataResponse; +import org.apache.pulsar.common.api.proto.CommandPing; +import org.apache.pulsar.common.api.proto.CommandProducerSuccess; +import org.apache.pulsar.common.api.proto.CommandReachedEndOfTopic; +import org.apache.pulsar.common.api.proto.CommandSendError; +import org.apache.pulsar.common.api.proto.CommandSendReceipt; +import org.apache.pulsar.common.api.proto.CommandSuccess; +import org.apache.pulsar.common.api.proto.CommandTcClientConnectResponse; +import org.apache.pulsar.common.api.proto.CommandTopicMigrated; +import org.apache.pulsar.common.api.proto.CommandWatchTopicListSuccess; +import org.apache.pulsar.common.api.proto.CommandWatchTopicUpdate; +import org.apache.pulsar.common.util.netty.EventLoopUtil; + +public class InjectedClientCnxClientBuilder { Review Comment: Great idea to have a separate class for this. ########## pulsar-broker/src/test/java/org/apache/pulsar/client/api/DispatcherLockTest.java: ########## @@ -0,0 +1,133 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pulsar.client.api; + +import static org.testng.Assert.assertEquals; +import static org.apache.pulsar.client.impl.InjectedClientCnxClientBuilder.ClientCnxCustomizer; +import static org.apache.pulsar.client.api.KeySharedPolicy.KeySharedPolicySticky; +import static org.apache.pulsar.common.api.proto.BaseCommand.Type; +import java.util.Collections; +import java.util.List; +import lombok.extern.slf4j.Slf4j; +import org.apache.pulsar.broker.BrokerTestUtil; +import org.apache.pulsar.broker.service.persistent.PersistentTopic; +import org.apache.pulsar.client.impl.ClientBuilderImpl; +import org.apache.pulsar.client.impl.InjectedClientCnxClientBuilder; +import org.apache.pulsar.client.impl.PulsarClientImpl; +import org.apache.pulsar.common.api.proto.CommandPing; +import org.awaitility.Awaitility; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +@Slf4j +@Test(groups = "broker-api") +public class DispatcherLockTest extends ProducerConsumerBase { + + @BeforeMethod + @Override + protected void setup() throws Exception { + super.internalSetup(); + super.producerBaseSetup(); + } + + @AfterMethod(alwaysRun = true) + @Override + protected void cleanup() throws Exception { + super.internalCleanup(); + } + + /** + * The method is used to verify that the Broker will not leave an orphan consumer in the scenario below: + * 1. Register "consumer-1" + * - "consumer-1" will be maintained by the Subscription. + * - "consumer-1" will be maintained by the Dispatcher. + * 2. The connection of "consumer-1" has something wrong. We call this connection "connection-1" + * 3. Try to register "consumer-2" + * - "consumer-2" will be maintained by the Subscription. At this time, there are two consumers under this + * subscription. + * - This will trigger a connection check task for connection-1, we call this task "CheckConnectionLiveness". + * This task will be executed in another thread, which means it will release the lock `Synchronized(dispatcher)` + * - "consumer-2" was not maintained by the Dispatcher yet. + * 4. "CheckConnectionLiveness" will kick out "consumer-1" after 5 seconds, then "consumer-2" will be maintained + * by the Dispatcher. + * (Highlight) Race condition: if the connection of "consumer-2" went to a wrong state before step 4, + * "consumer-2" maintained by the Subscription and not maintained by the Dispatcher. Would the scenario below + * will happen? + * 1. "connection-2" closed. + * 2. Remove "consumer-2" from the Subscription. + * 3. Try to remove "consumer-2" from the Dispatcher, but there are no consumers under this Dispatcher. To remove + * nothing. + * 4. "CheckConnectionLiveness" is finished; put "consumer-2" into the Dispatcher. + * 5. At this moment, the consumer's state of Subscription and Dispatcher are not consistent. There is an orphan + * consumer under the Dispatcher. + */ + @Test + public void testNoOrphanConsumerIfLostDispatcherLock() throws Exception { + final String tpName = BrokerTestUtil.newUniqueName("persistent://public/default/tp"); + final String subscription = "s1"; + admin.topics().createNonPartitionedTopic(tpName); + admin.topics().createSubscription(tpName, subscription, MessageId.earliest); + List<Range> ranges = Collections.singletonList(new Range(0, 65535)); + KeySharedPolicySticky sharedPolicySticky = new KeySharedPolicySticky.KeySharedPolicySticky().ranges(ranges); + final String consumerName1 = "c1"; + final String consumerName2 = "c2"; + + // Create a client that injected logic: do not answer for the command Ping + ClientBuilderImpl clientBuilder = (ClientBuilderImpl) PulsarClient.builder().serviceUrl(lookupUrl.toString()); + PulsarClient skipHealthCheckClient = InjectedClientCnxClientBuilder.create(clientBuilder, + new ClientCnxCustomizer(Type.PING){ + @Override + public void handleCommand(Object command) { + if (command instanceof CommandPing) { + // do not response anything. + } + } + }); Review Comment: Is this ClientCnxCustomizer any easier than providing an actual anonymous ClientCnx class? Passing a `BiFunction<ClientConfigurationData, EventLoopGroup, ClientCnx>` as a parameter in the `InjectedClientCnxClientBuilder` should be sufficient to support anything that is needed. ########## pulsar-broker/src/test/java/org/apache/pulsar/client/api/DispatcherLockTest.java: ########## @@ -0,0 +1,133 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pulsar.client.api; + +import static org.testng.Assert.assertEquals; +import static org.apache.pulsar.client.impl.InjectedClientCnxClientBuilder.ClientCnxCustomizer; +import static org.apache.pulsar.client.api.KeySharedPolicy.KeySharedPolicySticky; +import static org.apache.pulsar.common.api.proto.BaseCommand.Type; +import java.util.Collections; +import java.util.List; +import lombok.extern.slf4j.Slf4j; +import org.apache.pulsar.broker.BrokerTestUtil; +import org.apache.pulsar.broker.service.persistent.PersistentTopic; +import org.apache.pulsar.client.impl.ClientBuilderImpl; +import org.apache.pulsar.client.impl.InjectedClientCnxClientBuilder; +import org.apache.pulsar.client.impl.PulsarClientImpl; +import org.apache.pulsar.common.api.proto.CommandPing; +import org.awaitility.Awaitility; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +@Slf4j +@Test(groups = "broker-api") +public class DispatcherLockTest extends ProducerConsumerBase { + + @BeforeMethod + @Override + protected void setup() throws Exception { + super.internalSetup(); + super.producerBaseSetup(); + } + + @AfterMethod(alwaysRun = true) + @Override + protected void cleanup() throws Exception { + super.internalCleanup(); + } + + /** + * The method is used to verify that the Broker will not leave an orphan consumer in the scenario below: + * 1. Register "consumer-1" + * - "consumer-1" will be maintained by the Subscription. + * - "consumer-1" will be maintained by the Dispatcher. + * 2. The connection of "consumer-1" has something wrong. We call this connection "connection-1" + * 3. Try to register "consumer-2" + * - "consumer-2" will be maintained by the Subscription. At this time, there are two consumers under this + * subscription. + * - This will trigger a connection check task for connection-1, we call this task "CheckConnectionLiveness". + * This task will be executed in another thread, which means it will release the lock `Synchronized(dispatcher)` + * - "consumer-2" was not maintained by the Dispatcher yet. + * 4. "CheckConnectionLiveness" will kick out "consumer-1" after 5 seconds, then "consumer-2" will be maintained + * by the Dispatcher. + * (Highlight) Race condition: if the connection of "consumer-2" went to a wrong state before step 4, + * "consumer-2" maintained by the Subscription and not maintained by the Dispatcher. Would the scenario below + * will happen? + * 1. "connection-2" closed. + * 2. Remove "consumer-2" from the Subscription. + * 3. Try to remove "consumer-2" from the Dispatcher, but there are no consumers under this Dispatcher. To remove + * nothing. + * 4. "CheckConnectionLiveness" is finished; put "consumer-2" into the Dispatcher. + * 5. At this moment, the consumer's state of Subscription and Dispatcher are not consistent. There is an orphan + * consumer under the Dispatcher. + */ + @Test + public void testNoOrphanConsumerIfLostDispatcherLock() throws Exception { + final String tpName = BrokerTestUtil.newUniqueName("persistent://public/default/tp"); + final String subscription = "s1"; + admin.topics().createNonPartitionedTopic(tpName); + admin.topics().createSubscription(tpName, subscription, MessageId.earliest); + List<Range> ranges = Collections.singletonList(new Range(0, 65535)); + KeySharedPolicySticky sharedPolicySticky = new KeySharedPolicySticky.KeySharedPolicySticky().ranges(ranges); + final String consumerName1 = "c1"; + final String consumerName2 = "c2"; + + // Create a client that injected logic: do not answer for the command Ping + ClientBuilderImpl clientBuilder = (ClientBuilderImpl) PulsarClient.builder().serviceUrl(lookupUrl.toString()); + PulsarClient skipHealthCheckClient = InjectedClientCnxClientBuilder.create(clientBuilder, + new ClientCnxCustomizer(Type.PING){ + @Override + public void handleCommand(Object command) { + if (command instanceof CommandPing) { + // do not response anything. + } + } + }); + PulsarClientImpl normalClient = (PulsarClientImpl) newPulsarClient(lookupUrl.toString(), 0); + + // 1. Register "consumer-1" + skipHealthCheckClient.newConsumer().topic(tpName).subscriptionName(subscription) + .consumerName(consumerName1).keySharedPolicy(sharedPolicySticky) + .subscriptionType(SubscriptionType.Key_Shared).subscribe(); + // Wait for all commands of the consumer c1 have been handled. To avoid the Broker mark the connection is active + // after it receive anything. + Thread.sleep(1000); Review Comment: how do we know that 1000ms is sufficient? ########## pulsar-broker/src/test/java/org/apache/pulsar/client/impl/InjectedClientCnxClientBuilder.java: ########## @@ -0,0 +1,416 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pulsar.client.impl; + +import static org.apache.pulsar.common.api.proto.BaseCommand.Type; +import com.google.common.annotations.VisibleForTesting; +import io.netty.buffer.ByteBuf; +import io.netty.channel.ChannelHandlerContext; +import io.netty.channel.EventLoopGroup; +import java.util.HashSet; +import java.util.LinkedHashSet; +import java.util.Set; +import java.util.concurrent.ConcurrentSkipListSet; +import java.util.concurrent.ThreadFactory; +import org.apache.pulsar.client.impl.conf.ClientConfigurationData; +import org.apache.pulsar.client.util.ExecutorProvider; +import org.apache.pulsar.common.api.proto.BaseCommand; +import org.apache.pulsar.common.api.proto.CommandAckResponse; +import org.apache.pulsar.common.api.proto.CommandActiveConsumerChange; +import org.apache.pulsar.common.api.proto.CommandAddPartitionToTxnResponse; +import org.apache.pulsar.common.api.proto.CommandAddSubscriptionToTxnResponse; +import org.apache.pulsar.common.api.proto.CommandAuthChallenge; +import org.apache.pulsar.common.api.proto.CommandCloseConsumer; +import org.apache.pulsar.common.api.proto.CommandCloseProducer; +import org.apache.pulsar.common.api.proto.CommandConnected; +import org.apache.pulsar.common.api.proto.CommandEndTxnOnPartitionResponse; +import org.apache.pulsar.common.api.proto.CommandEndTxnOnSubscriptionResponse; +import org.apache.pulsar.common.api.proto.CommandEndTxnResponse; +import org.apache.pulsar.common.api.proto.CommandError; +import org.apache.pulsar.common.api.proto.CommandGetLastMessageIdResponse; +import org.apache.pulsar.common.api.proto.CommandGetOrCreateSchemaResponse; +import org.apache.pulsar.common.api.proto.CommandGetSchemaResponse; +import org.apache.pulsar.common.api.proto.CommandGetTopicsOfNamespaceResponse; +import org.apache.pulsar.common.api.proto.CommandLookupTopicResponse; +import org.apache.pulsar.common.api.proto.CommandMessage; +import org.apache.pulsar.common.api.proto.CommandNewTxnResponse; +import org.apache.pulsar.common.api.proto.CommandPartitionedTopicMetadataResponse; +import org.apache.pulsar.common.api.proto.CommandPing; +import org.apache.pulsar.common.api.proto.CommandProducerSuccess; +import org.apache.pulsar.common.api.proto.CommandReachedEndOfTopic; +import org.apache.pulsar.common.api.proto.CommandSendError; +import org.apache.pulsar.common.api.proto.CommandSendReceipt; +import org.apache.pulsar.common.api.proto.CommandSuccess; +import org.apache.pulsar.common.api.proto.CommandTcClientConnectResponse; +import org.apache.pulsar.common.api.proto.CommandTopicMigrated; +import org.apache.pulsar.common.api.proto.CommandWatchTopicListSuccess; +import org.apache.pulsar.common.api.proto.CommandWatchTopicUpdate; +import org.apache.pulsar.common.util.netty.EventLoopUtil; + +public class InjectedClientCnxClientBuilder { + + public static PulsarClientImpl create(final ClientBuilderImpl clientBuilder, final ClientCnxCustomizer customizer) + throws Exception { + ClientConfigurationData conf = clientBuilder.getClientConfigurationData(); + ThreadFactory threadFactory = new ExecutorProvider + .ExtendedThreadFactory("pulsar-client-io", Thread.currentThread().isDaemon()); + EventLoopGroup eventLoopGroup = Review Comment: This would currently leak threads since it doesn't get closed. One possibility would be to override the closeAsync and shutdown methods of PulsarClientImpl like it was done in #21468 for PulsarTestClient. ########## pulsar-broker/src/test/java/org/apache/pulsar/client/impl/InjectedClientCnxClientBuilder.java: ########## @@ -0,0 +1,416 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pulsar.client.impl; + +import static org.apache.pulsar.common.api.proto.BaseCommand.Type; +import com.google.common.annotations.VisibleForTesting; +import io.netty.buffer.ByteBuf; +import io.netty.channel.ChannelHandlerContext; +import io.netty.channel.EventLoopGroup; +import java.util.HashSet; +import java.util.LinkedHashSet; +import java.util.Set; +import java.util.concurrent.ConcurrentSkipListSet; +import java.util.concurrent.ThreadFactory; +import org.apache.pulsar.client.impl.conf.ClientConfigurationData; +import org.apache.pulsar.client.util.ExecutorProvider; +import org.apache.pulsar.common.api.proto.BaseCommand; +import org.apache.pulsar.common.api.proto.CommandAckResponse; +import org.apache.pulsar.common.api.proto.CommandActiveConsumerChange; +import org.apache.pulsar.common.api.proto.CommandAddPartitionToTxnResponse; +import org.apache.pulsar.common.api.proto.CommandAddSubscriptionToTxnResponse; +import org.apache.pulsar.common.api.proto.CommandAuthChallenge; +import org.apache.pulsar.common.api.proto.CommandCloseConsumer; +import org.apache.pulsar.common.api.proto.CommandCloseProducer; +import org.apache.pulsar.common.api.proto.CommandConnected; +import org.apache.pulsar.common.api.proto.CommandEndTxnOnPartitionResponse; +import org.apache.pulsar.common.api.proto.CommandEndTxnOnSubscriptionResponse; +import org.apache.pulsar.common.api.proto.CommandEndTxnResponse; +import org.apache.pulsar.common.api.proto.CommandError; +import org.apache.pulsar.common.api.proto.CommandGetLastMessageIdResponse; +import org.apache.pulsar.common.api.proto.CommandGetOrCreateSchemaResponse; +import org.apache.pulsar.common.api.proto.CommandGetSchemaResponse; +import org.apache.pulsar.common.api.proto.CommandGetTopicsOfNamespaceResponse; +import org.apache.pulsar.common.api.proto.CommandLookupTopicResponse; +import org.apache.pulsar.common.api.proto.CommandMessage; +import org.apache.pulsar.common.api.proto.CommandNewTxnResponse; +import org.apache.pulsar.common.api.proto.CommandPartitionedTopicMetadataResponse; +import org.apache.pulsar.common.api.proto.CommandPing; +import org.apache.pulsar.common.api.proto.CommandProducerSuccess; +import org.apache.pulsar.common.api.proto.CommandReachedEndOfTopic; +import org.apache.pulsar.common.api.proto.CommandSendError; +import org.apache.pulsar.common.api.proto.CommandSendReceipt; +import org.apache.pulsar.common.api.proto.CommandSuccess; +import org.apache.pulsar.common.api.proto.CommandTcClientConnectResponse; +import org.apache.pulsar.common.api.proto.CommandTopicMigrated; +import org.apache.pulsar.common.api.proto.CommandWatchTopicListSuccess; +import org.apache.pulsar.common.api.proto.CommandWatchTopicUpdate; +import org.apache.pulsar.common.util.netty.EventLoopUtil; + +public class InjectedClientCnxClientBuilder { + + public static PulsarClientImpl create(final ClientBuilderImpl clientBuilder, final ClientCnxCustomizer customizer) + throws Exception { + ClientConfigurationData conf = clientBuilder.getClientConfigurationData(); + ThreadFactory threadFactory = new ExecutorProvider + .ExtendedThreadFactory("pulsar-client-io", Thread.currentThread().isDaemon()); + EventLoopGroup eventLoopGroup = + EventLoopUtil.newEventLoopGroup(conf.getNumIoThreads(), conf.isEnableBusyWait(), threadFactory); + + // Inject into ClientCnx. + ConnectionPool pool = new ConnectionPool(conf, eventLoopGroup, + () -> new InjectedClientCnx(conf, eventLoopGroup, customizer)); + + return new PulsarClientImpl(conf, eventLoopGroup, pool); + } + + public static abstract class ClientCnxCustomizer { Review Comment: I don't see a reason to have ClientCnxCustomizer and InjectedClientCnx. Simply providing a `BiFunction<ClientConfigurationData, EventLoopGroup, ClientCnx>` parameter for creating the ClientCnx instance should be sufficient. For example, you'd use it somewhat like this: ``` .clientCnxFunction((conf, eventLoopGroup) -> { return new ClientCnx(conf, eventLoopGroup) { @Override protected void handlePing(CommandPing ping) { // do not respond to CommandPing } }; }); ``` The PulsarTestClient shows how it could be implemented. PulsarTestClient is specialized for a certain test, but it contains some useful ideas how clientCnxFunction could be used to instantiate the ClientCnx when it's needed. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
