[ https://issues.apache.org/jira/browse/ARTEMIS-4251?focusedWorklogId=860327&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-860327 ]
ASF GitHub Bot logged work on ARTEMIS-4251: ------------------------------------------- Author: ASF GitHub Bot Created on: 03/May/23 12:56 Start Date: 03/May/23 12:56 Worklog Time Spent: 10m Work Description: brusdev commented on code in PR #4447: URL: https://github.com/apache/activemq-artemis/pull/4447#discussion_r1183650110 ########## artemis-core-client/src/main/java/org/apache/activemq/artemis/core/client/impl/ClientSessionFactoryImpl.java: ########## @@ -664,33 +669,94 @@ private void failoverOrReconnect(final Object connectionID, sessionsToFailover = new HashSet<>(sessions); } + // Notify sessions before failover. for (ClientSessionInternal session : sessionsToFailover) { session.preHandleFailover(connection); } - boolean allSessionReconnected = false; - int failedReconnectSessionsCounter = 0; - do { - allSessionReconnected = reconnectSessions(sessionsToFailover, oldConnection, reconnectAttempts, me); - if (oldConnection != null) { - oldConnection.destroy(); + + // Try to reconnect to the current connector pair. + // Before ARTEMIS-4251 ClientSessionFactoryImpl only tries to reconnect to the current connector pair. + int reconnectRetries = 0; + boolean sessionsReconnected = false; + BiPredicate<Boolean, Integer> reconnectRetryPredicate = + (reconnected, retries) -> clientProtocolManager.isAlive() && + !reconnected && (reconnectAttempts == -1 || retries < reconnectAttempts); + while (reconnectRetryPredicate.test(sessionsReconnected, reconnectRetries)) { + + int remainingReconnectRetries = reconnectAttempts == -1 ? -1 : reconnectAttempts - reconnectRetries; + reconnectRetries += getConnectionWithRetry(remainingReconnectRetries, oldConnection); + + if (connection != null) { + sessionsReconnected = reconnectSessions(sessionsToFailover, oldConnection, me); + + if (!sessionsReconnected) { + if (oldConnection != null) { + oldConnection.destroy(); + } + + oldConnection = connection; + connection = null; + } + } + + reconnectRetries++; + if (reconnectRetryPredicate.test(sessionsReconnected, reconnectRetries)) { + waitForRetry(retryInterval); } + } - if (!allSessionReconnected) { - failedReconnectSessionsCounter++; - oldConnection = connection; - connection = null; - // Wait for retry when the connection is established but not all session are reconnected. - if ((reconnectAttempts == -1 || failedReconnectSessionsCounter < reconnectAttempts) && oldConnection != null) { + // Try to connect to other connector pairs. + // After ARTEMIS-4251 ClientSessionFactoryImpl tries to connect to + // other connector pairs when reconnection o the current connector pair fails. + int failoverReties = 0; + int connectorsCount = 0; + Pair<TransportConfiguration, TransportConfiguration> connectorPair; + BiPredicate<Boolean, Integer> failoverRetryPredicate = + (reconnected, retries) -> clientProtocolManager.isAlive() && + !reconnected && (failoverAttempts == -1 || retries < failoverAttempts); + while (failoverRetryPredicate.test(sessionsReconnected, failoverReties)) { + + connectorsCount++; + connectorPair = serverLocator.selectNextConnectorPair(); + + if (connectorPair != null) { + connectorConfig = connectorPair.getA(); + currentConnectorConfig = connectorPair.getA(); + if (connectorPair.getB() != null) { + backupConnectorConfig = connectorPair.getB(); + } + + getConnection(); + } + + if (connection != null) { + sessionsReconnected = reconnectSessions(sessionsToFailover, oldConnection, me); + + if (!sessionsReconnected) { + if (oldConnection != null) { + oldConnection.destroy(); + } + + oldConnection = connection; + connection = null; + } + } + + if (connectorsCount >= serverLocator.getConnectorsSize()) { + connectorsCount = 0; + failoverReties++; + if (failoverRetryPredicate.test(false, failoverReties)) { Review Comment: Use the exponential back-off algorithm also for `failover connect block` sounds good to me Issue Time Tracking ------------------- Worklog Id: (was: 860327) Time Spent: 1h 40m (was: 1.5h) > Support CORE client failover to other live servers > -------------------------------------------------- > > Key: ARTEMIS-4251 > URL: https://issues.apache.org/jira/browse/ARTEMIS-4251 > Project: ActiveMQ Artemis > Issue Type: Improvement > Reporter: Domenico Francesco Bruscino > Assignee: Domenico Francesco Bruscino > Priority: Major > Time Spent: 1h 40m > Remaining Estimate: 0h > > The CORE clients support failover only reconnecting to the current > live/backup pair. Improve the CORE client failover connecting to other live > servers when all reconnect attempts fails, i.e. in a cluster composed of 2 > live servers, when the server to which the CORE client is connected goes down > the CORE client should reconnect its sessions to the other liver broker. -- This message was sent by Atlassian Jira (v8.20.10#820010)