[ 
https://issues.apache.org/jira/browse/TINKERPOP-2569?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17422829#comment-17422829
 ] 

ASF GitHub Bot commented on TINKERPOP-2569:
-------------------------------------------

divijvaidya commented on a change in pull request #1476:
URL: https://github.com/apache/tinkerpop/pull/1476#discussion_r719472426



##########
File path: 
gremlin-driver/src/main/java/org/apache/tinkerpop/gremlin/driver/Client.java
##########
@@ -526,18 +527,24 @@ protected void initializeImplementation() {
 
             try {
                 CompletableFuture.allOf(cluster.allHosts().stream()
-                        .map(host -> CompletableFuture.runAsync(() -> 
initializeConnectionSetupForHost.accept(host), hostExecutor))
-                        .toArray(CompletableFuture[]::new))
+                                .map(host -> CompletableFuture.runAsync(() -> 
initializeConnectionSetupForHost.accept(host), hostExecutor))
+                                .toArray(CompletableFuture[]::new))
                         .join();
             } catch (CompletionException ex) {
-                Throwable cause;
-                Throwable result = ex;
-                if (null != (cause = ex.getCause())) {
-                    result = cause;
-                }
+                logger.error("", (ex.getCause() == null) ? ex : ex.getCause());
+            }
 
-                logger.error("", result);
-            } finally {
+            // throw an error if there is no host available after initializing 
connection pool.
+            if (cluster.availableHosts().isEmpty()) {
+                throw new NoHostAvailableException();

Review comment:
       we should shutdown the hostExecutor when this exception is thrown else 
it would lead to a leak

##########
File path: 
gremlin-driver/src/main/java/org/apache/tinkerpop/gremlin/driver/Client.java
##########
@@ -561,16 +568,56 @@ protected void initializeImplementation() {
 
         private Consumer<Host> initializeConnectionSetupForHost = host -> {
             try {
-                // hosts that don't initialize connection pools will come up 
as a dead host
+                // hosts that don't initialize connection pools will come up 
as a dead host.
                 hostConnectionPools.put(host, new ConnectionPool(host, 
ClusteredClient.this));
 
-                // added a new host to the cluster so let the load-balancer 
know
+                // hosts are not marked as available at cluster 
initialization, and are made available here instead.
+                host.makeAvailable();
+
+                // added a new host to the cluster so let the load-balancer 
know.
                 
ClusteredClient.this.cluster.loadBalancingStrategy().onNew(host);
             } catch (RuntimeException ex) {
-                final String errMsg = "Could not initialize client for " + 
host;
-                throw new RuntimeException(errMsg, ex);
+                throw new RuntimeException(String.format("Could not initialize 
client for %s.", host), ex);
             }
         };
+
+        private void handleUnavailableHosts(List<Host> unavailableHosts, 
ExecutorService hostExecutor) {
+            // start the re-initialization attempt for each of the unavailable 
hosts through Host.makeUnavailable().
+            try {
+                CompletableFuture.allOf(unavailableHosts.stream()
+                                .map(host -> CompletableFuture.runAsync(() -> 
host.makeUnavailable(this::tryReInitializeHost), hostExecutor))
+                                .toArray(CompletableFuture[]::new))
+                        .join();
+            } catch (CompletionException ex) {
+                logger.error("", (ex.getCause() == null) ? ex : ex.getCause());
+            } finally {
+                hostExecutor.shutdown();
+            }
+        }
+
+        /**
+         * Attempt to re-initialize the {@link Host} that was previously 
marked as unavailable.  This method gets called
+         * as part of a schedule in {@link Host} to periodically try to 
re-initialize.
+         */
+        public boolean tryReInitializeHost(final Host host) {
+            logger.debug("Trying to re-initiate host connection pool on {}", 
host);
+
+            try {
+                // hosts that don't initialize connection pools will come up 
as a dead host
+                hostConnectionPools.put(host, new ConnectionPool(host, 
ClusteredClient.this));

Review comment:
       re-use `initializeConnectionSetupForHost()` ?

##########
File path: 
gremlin-driver/src/main/java/org/apache/tinkerpop/gremlin/driver/Client.java
##########
@@ -561,16 +568,56 @@ protected void initializeImplementation() {
 
         private Consumer<Host> initializeConnectionSetupForHost = host -> {
             try {
-                // hosts that don't initialize connection pools will come up 
as a dead host
+                // hosts that don't initialize connection pools will come up 
as a dead host.
                 hostConnectionPools.put(host, new ConnectionPool(host, 
ClusteredClient.this));
 
-                // added a new host to the cluster so let the load-balancer 
know
+                // hosts are not marked as available at cluster 
initialization, and are made available here instead.
+                host.makeAvailable();
+
+                // added a new host to the cluster so let the load-balancer 
know.
                 
ClusteredClient.this.cluster.loadBalancingStrategy().onNew(host);
             } catch (RuntimeException ex) {
-                final String errMsg = "Could not initialize client for " + 
host;
-                throw new RuntimeException(errMsg, ex);
+                throw new RuntimeException(String.format("Could not initialize 
client for %s.", host), ex);
             }
         };
+
+        private void handleUnavailableHosts(List<Host> unavailableHosts, 
ExecutorService hostExecutor) {
+            // start the re-initialization attempt for each of the unavailable 
hosts through Host.makeUnavailable().
+            try {
+                CompletableFuture.allOf(unavailableHosts.stream()
+                                .map(host -> CompletableFuture.runAsync(() -> 
host.makeUnavailable(this::tryReInitializeHost), hostExecutor))
+                                .toArray(CompletableFuture[]::new))
+                        .join();
+            } catch (CompletionException ex) {
+                logger.error("", (ex.getCause() == null) ? ex : ex.getCause());
+            } finally {
+                hostExecutor.shutdown();
+            }
+        }
+
+        /**
+         * Attempt to re-initialize the {@link Host} that was previously 
marked as unavailable.  This method gets called
+         * as part of a schedule in {@link Host} to periodically try to 
re-initialize.
+         */
+        public boolean tryReInitializeHost(final Host host) {
+            logger.debug("Trying to re-initiate host connection pool on {}", 
host);
+
+            try {
+                // hosts that don't initialize connection pools will come up 
as a dead host
+                hostConnectionPools.put(host, new ConnectionPool(host, 
ClusteredClient.this));
+
+                // added a new host to the cluster so let the load-balancer 
know
+                
ClusteredClient.this.cluster.loadBalancingStrategy().onNew(host);
+
+                // mark this host as available
+                host.makeAvailable();

Review comment:
       this is already done in `reconnected()` function called in 
`makeUnavailable()`

##########
File path: 
gremlin-driver/src/main/java/org/apache/tinkerpop/gremlin/driver/Client.java
##########
@@ -561,16 +568,56 @@ protected void initializeImplementation() {
 
         private Consumer<Host> initializeConnectionSetupForHost = host -> {
             try {
-                // hosts that don't initialize connection pools will come up 
as a dead host
+                // hosts that don't initialize connection pools will come up 
as a dead host.
                 hostConnectionPools.put(host, new ConnectionPool(host, 
ClusteredClient.this));
 
-                // added a new host to the cluster so let the load-balancer 
know
+                // hosts are not marked as available at cluster 
initialization, and are made available here instead.
+                host.makeAvailable();
+
+                // added a new host to the cluster so let the load-balancer 
know.
                 
ClusteredClient.this.cluster.loadBalancingStrategy().onNew(host);
             } catch (RuntimeException ex) {
-                final String errMsg = "Could not initialize client for " + 
host;
-                throw new RuntimeException(errMsg, ex);
+                throw new RuntimeException(String.format("Could not initialize 
client for %s.", host), ex);
             }
         };
+
+        private void handleUnavailableHosts(List<Host> unavailableHosts, 
ExecutorService hostExecutor) {

Review comment:
       why do we need the hostExecutor to re-initialize the host? Asking 
because makeUnavailable() already uses the cluster's scheduler pool 
asynchronously to run the lambda. We can hand over the reconnect/re-initialize 
responsibility to the scheduler from here.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@tinkerpop.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


> Reconnect to server if Java driver fails to initialize
> ------------------------------------------------------
>
>                 Key: TINKERPOP-2569
>                 URL: https://issues.apache.org/jira/browse/TINKERPOP-2569
>             Project: TinkerPop
>          Issue Type: Bug
>          Components: driver
>    Affects Versions: 3.4.11
>            Reporter: Stephen Mallette
>            Priority: Minor
>
> As reported here on SO: 
> https://stackoverflow.com/questions/67586427/how-to-recover-with-a-retry-from-gremlin-nohostavailableexception
> If the host is unavailable at {{Client}} initialization then the host is not 
> put in a state where reconnect is possible. Essentially, this test for 
> {{GremlinServerIntegrateTest}} should pass:
> {code}
> @Test
>     public void shouldFailOnInitiallyDeadHost() throws Exception {
>         // start test with no server
>         this.stopServer();
>         final Cluster cluster = TestClientFactory.build().create();
>         final Client client = cluster.connect();
>         try {
>             // try to re-issue a request now that the server is down
>             client.submit("g").all().get(3000, TimeUnit.MILLISECONDS);
>             fail("Should throw an exception.");
>         } catch (RuntimeException re) {
>             // Client would have no active connections to the host, hence it 
> would encounter a timeout
>             // trying to find an alive connection to the host.
>             assertThat(re.getCause(), 
> instanceOf(NoHostAvailableException.class));
>             //
>             // should recover when the server comes back
>             //
>             // restart server
>             this.startServer();
>             // try a bunch of times to reconnect. on slower systems this may 
> simply take longer...looking at you travis
>             for (int ix = 1; ix < 11; ix++) {
>                 // the retry interval is 1 second, wait a bit longer
>                 TimeUnit.SECONDS.sleep(5);
>                 try {
>                     final List<Result> results = 
> client.submit("1+1").all().get(3000, TimeUnit.MILLISECONDS);
>                     assertEquals(1, results.size());
>                     assertEquals(2, results.get(0).getInt());
>                 } catch (Exception ex) {
>                     if (ix == 10)
>                         fail("Should have eventually succeeded");
>                 }
>             }
>         } finally {
>             cluster.close();
>         }
>     }
> {code}
> Note that there is a similar test that first allows a connect to a host and 
> then kills it and then restarts it again called {{shouldFailOnDeadHost()}} 
> which demonstrates that reconnection works in that situation.
> I thought it might be an easy to fix to simply call 
> {{considerHostUnavailable()}} in the {{ConnectionPool}} constructor in the 
> event of a {{CompletionException}} which should kickstart the reconnect 
> process. The reconnects started firing but they all failed for some reason. I 
> didn't have time to investigate further than than. 
> Currently the only workaround is to recreate the `Client` if this sort of 
> situation occurs.



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

Reply via email to