This is an automated email from the ASF dual-hosted git repository. lhotari pushed a commit to branch branch-4.17 in repository https://gitbox.apache.org/repos/asf/bookkeeper.git
commit e199e8dfcc9e1235b23f49332c43fde8d2ec0285 Author: xiaolong ran <[email protected]> AuthorDate: Mon Nov 24 11:33:06 2025 +0800 Supports configuring TCP Keepalive related parameters in Bookie Client. (#4683) ### Motivation In a private environment, network connectivity between data centers is limited by firewall configuration. When the Broker accesses Bookkeeper infrequently, exceeding the firewall's default deactivation time (20 minutes), the firewall will actively disconnect the Broker → Bookkeeper connection. At this point, if a new production or consumption request is made, the Broker cannot safely access the Bookkeeper node again, causing production/consumption failures that cannot be automatically recovered from. The error message primarily indicates connection-level anomalies: <img width="2944" height="738" alt="Clipboard_Screenshot_1763091013" src="https://github.com/user-attachments/assets/e58b53b9-1c79-49cd-a4d7-1040f52569e0" /> The current Broker → Bookkeeper access chain is based on the Netty communication framework, and the keepalive function at the TCP connection layer reuses the system default SO_KEEPALIVE. The code is `PerChannelBookieClient` → `connect()`, as follows: <img width="1982" height="434" alt="Clipboard_Screenshot_1763091110" src="https://github.com/user-attachments/assets/979716b6-d99a-4dfd-9829-cd724c597c20" /> If no policy is explicitly set, the "System Default" option in the table follow will be used directly: TCP_KEEPIDLE | 7200s -- | -- TCP_KEEPINTVL | 75s TCP_KEEPCNT | 9 ### Changes Include the following three configuration items in ClientConfiguration: ``` public static final String TCP_KEEPIDLE = "tcpKeepIdle"; public static final String TCP_KEEPINTVL = "tcpKeepIntvl"; public static final String TCP_KEEPCNT = "tcpKeepCnt"; ``` To maintain compatibility, the system default configuration will still be used by default. (cherry picked from commit ea7884a82d9f877773157f8756a08a91d95ed5be) --- .../bookkeeper/conf/ClientConfiguration.java | 59 +++++++++ .../bookkeeper/proto/PerChannelBookieClient.java | 13 +- .../TestPerChannelBookieClientTcpKeepalive.java | 141 +++++++++++++++++++++ 3 files changed, 212 insertions(+), 1 deletion(-) diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/conf/ClientConfiguration.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/conf/ClientConfiguration.java index 85445e49a7..f3dc75f553 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/conf/ClientConfiguration.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/conf/ClientConfiguration.java @@ -206,6 +206,11 @@ public class ClientConfiguration extends AbstractConfiguration<ClientConfigurati //For batch read api, it the batch read is not stable, we can fail back to single read by this config. protected static final String BATCH_READ_ENABLED = "batchReadEnabled"; + // SO_KEEPALIVE related configurations + public static final String TCP_KEEPIDLE = "tcpKeepIdle"; + public static final String TCP_KEEPINTVL = "tcpKeepIntvl"; + public static final String TCP_KEEPCNT = "tcpKeepCnt"; + /** * Construct a default client-side configuration. */ @@ -2107,6 +2112,60 @@ public class ClientConfiguration extends AbstractConfiguration<ClientConfigurati return getBoolean(BATCH_READ_ENABLED, true); } + /** + * Set TCP_KEEPIDLE value for SO_KEEPALIVE. + * @param keepIdle time in seconds + * @return client configuration + */ + public ClientConfiguration setTcpKeepIdle(int keepIdle) { + setProperty(TCP_KEEPIDLE, keepIdle); + return this; + } + + /** + * Get TCP_KEEPIDLE value for SO_KEEPALIVE. + * @return time in seconds, -1 means use system default + */ + public int getTcpKeepIdle() { + return getInt(TCP_KEEPIDLE, -1); + } + + /** + * Set TCP_KEEPINTVL value for SO_KEEPALIVE. + * @param keepIntvl time in seconds + * @return client configuration + */ + public ClientConfiguration setTcpKeepIntvl(int keepIntvl) { + setProperty(TCP_KEEPINTVL, keepIntvl); + return this; + } + + /** + * Get TCP_KEEPINTVL value for SO_KEEPALIVE. + * @return time in seconds, -1 means use system default + */ + public int getTcpKeepIntvl() { + return getInt(TCP_KEEPINTVL, -1); + } + + /** + * Set TCP_KEEPCNT value for SO_KEEPALIVE. + * @param keepCnt count + * @return client configuration + */ + public ClientConfiguration setTcpKeepCnt(int keepCnt) { + setProperty(TCP_KEEPCNT, keepCnt); + return this; + } + + /** + * Get TCP_KEEPCNT value for SO_KEEPALIVE. + * @return count, -1 means use system default + */ + public int getTcpKeepCnt() { + return getInt(TCP_KEEPCNT, -1); + } + @Override protected ClientConfiguration getThis() { return this; diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/PerChannelBookieClient.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/PerChannelBookieClient.java index fbef35be39..a8c95ca5ec 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/PerChannelBookieClient.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/PerChannelBookieClient.java @@ -561,7 +561,18 @@ public class PerChannelBookieClient extends ChannelInboundHandlerAdapter { if (!(eventLoopGroup instanceof DefaultEventLoopGroup)) { bootstrap.option(ChannelOption.TCP_NODELAY, conf.getClientTcpNoDelay()); bootstrap.option(ChannelOption.SO_KEEPALIVE, conf.getClientSockKeepalive()); - + if (eventLoopGroup instanceof EpollEventLoopGroup) { + // Set TCP keepalive parameters if configured + if (conf.getTcpKeepIdle() > 0) { + bootstrap.option(EpollChannelOption.TCP_KEEPIDLE, conf.getTcpKeepIdle()); + } + if (conf.getTcpKeepIntvl() > 0) { + bootstrap.option(EpollChannelOption.TCP_KEEPINTVL, conf.getTcpKeepIntvl()); + } + if (conf.getTcpKeepCnt() > 0) { + bootstrap.option(EpollChannelOption.TCP_KEEPCNT, conf.getTcpKeepCnt()); + } + } // if buffer sizes are 0, let OS auto-tune it if (conf.getClientSendBufferSize() > 0) { bootstrap.option(ChannelOption.SO_SNDBUF, conf.getClientSendBufferSize()); diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/TestPerChannelBookieClientTcpKeepalive.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/TestPerChannelBookieClientTcpKeepalive.java new file mode 100644 index 0000000000..6ecf2d71d9 --- /dev/null +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/proto/TestPerChannelBookieClientTcpKeepalive.java @@ -0,0 +1,141 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.bookkeeper.proto; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import io.netty.channel.nio.NioEventLoopGroup; +import java.util.concurrent.TimeUnit; +import org.apache.bookkeeper.conf.ClientConfiguration; +import org.junit.Test; + +/** + * Unit tests for TCP keepalive configuration in PerChannelBookieClient. + * This is a standalone test class that does not inherit from BookKeeperClusterTestCase + * to avoid NativeIO assertion errors during cluster startup. + */ +public class TestPerChannelBookieClientTcpKeepalive { + + /** + * Test ClientConfiguration TCP keepalive getter methods. + */ + @Test + public void testClientConfigurationGetters() { + ClientConfiguration conf = new ClientConfiguration(); + + // Test default values (should be -1 as per implementation) + assertEquals("Default TCP keep idle should be -1", -1, conf.getTcpKeepIdle()); + assertEquals("Default TCP keep interval should be -1", -1, conf.getTcpKeepIntvl()); + assertEquals("Default TCP keep count should be -1", -1, conf.getTcpKeepCnt()); + + // Test setter and getter methods + conf.setTcpKeepIdle(60); + conf.setTcpKeepIntvl(30); + conf.setTcpKeepCnt(5); + + assertEquals("TCP keep idle should be 60", 60, conf.getTcpKeepIdle()); + assertEquals("TCP keep interval should be 30", 30, conf.getTcpKeepIntvl()); + assertEquals("TCP keep count should be 5", 5, conf.getTcpKeepCnt()); + + // Test that -1 means use system default (as per implementation comments) + conf.setTcpKeepIdle(-1); + conf.setTcpKeepIntvl(-1); + conf.setTcpKeepCnt(-1); + + assertEquals("TCP keep idle should be -1 for system default", -1, conf.getTcpKeepIdle()); + assertEquals("TCP keep interval should be -1 for system default", -1, conf.getTcpKeepIntvl()); + assertEquals("TCP keep count should be -1 for system default", -1, conf.getTcpKeepCnt()); + } + + /** + * Test TCP keepalive configuration conditional logic. + * This test focuses on the conditional logic in PerChannelBookieClient + * without actually creating network connections. + */ + @Test + public void testTcpKeepaliveConditionalLogic() { + // Test various configuration scenarios + ClientConfiguration conf = new ClientConfiguration(); + + // Test with positive values + conf.setTcpKeepIdle(60); + conf.setTcpKeepIntvl(30); + conf.setTcpKeepCnt(5); + + assertTrue("TCP keep idle should be > 0", conf.getTcpKeepIdle() > 0); + assertTrue("TCP keep interval should be > 0", conf.getTcpKeepIntvl() > 0); + assertTrue("TCP keep count should be > 0", conf.getTcpKeepCnt() > 0); + + // Test with zero values + conf.setTcpKeepIdle(0); + conf.setTcpKeepIntvl(0); + conf.setTcpKeepCnt(0); + + assertFalse("TCP keep idle should not be configured for zero", conf.getTcpKeepIdle() > 0); + assertFalse("TCP keep interval should not be configured for zero", conf.getTcpKeepIntvl() > 0); + assertFalse("TCP keep count should not be configured for zero", conf.getTcpKeepCnt() > 0); + + // Test with negative values (system default) + conf.setTcpKeepIdle(-1); + conf.setTcpKeepIntvl(-1); + conf.setTcpKeepCnt(-1); + + assertFalse("TCP keep idle should not be configured for negative", conf.getTcpKeepIdle() > 0); + assertFalse("TCP keep interval should not be configured for negative", conf.getTcpKeepIntvl() > 0); + assertFalse("TCP keep count should not be configured for negative", conf.getTcpKeepCnt() > 0); + + // Test partial configuration + conf.setTcpKeepIdle(60); + conf.setTcpKeepIntvl(0); + conf.setTcpKeepCnt(5); + + assertTrue("TCP keep idle should be configured", conf.getTcpKeepIdle() > 0); + assertFalse("TCP keep interval should not be configured", conf.getTcpKeepIntvl() > 0); + assertTrue("TCP keep count should be configured", conf.getTcpKeepCnt() > 0); + } + + /** + * Test Epoll support detection logic. + */ + @Test + public void testEpollSupportDetection() { + // Test Epoll support detection + boolean isEpollSupported = false; + try { + Class.forName("io.netty.channel.epoll.EpollEventLoopGroup"); + isEpollSupported = true; + } catch (ClassNotFoundException e) { + // Epoll not supported on this platform + } + + // Test NIO event loop group detection + NioEventLoopGroup nioGroup = new NioEventLoopGroup(1); + assertFalse("NIO event loop group should not be detected as Epoll", + nioGroup.getClass().getName().contains("Epoll")); + + nioGroup.shutdownGracefully(0, 0, TimeUnit.MILLISECONDS); + + // The important thing is that the detection logic works correctly + // The actual result depends on the platform + System.out.println("Epoll support detected: " + isEpollSupported); + System.out.println("Operating system: " + System.getProperty("os.name")); + } +}
