[
https://issues.apache.org/jira/browse/NIFI-2360?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15390294#comment-15390294
]
ASF GitHub Bot commented on NIFI-2360:
--------------------------------------
Github user JPercivall commented on the issue:
https://github.com/apache/nifi/pull/705
Reproducible error:
3 node cluster
stop 2 of the nodes
restart all three nodes cluster
None of them can start up, hit "Unexpected Error".
In the nifi.app logs see a lot of:
2016-07-22 18:13:37,643 WARN [Replicate Request Thread-8]
o.a.n.c.c.h.r.ThreadPoolRequestReplicator
com.sun.jersey.api.client.ClientHandlerException:
java.net.SocketTimeoutException: Read timed out
at
com.sun.jersey.client.urlconnection.URLConnectionClientHandler.handle(URLConnectionClientHandler.java:155)
~[jersey-client-1.19.jar:1.19]
at com.sun.jersey.api.client.Client.handle(Client.java:652)
~[jersey-client-1.19.jar:1.19]
at
com.sun.jersey.api.client.filter.GZIPContentEncodingFilter.handle(GZIPContentEncodingFilter.java:123)
~[jersey-client-1.19.jar:1.19]
at
com.sun.jersey.api.client.WebResource.handle(WebResource.java:682)
~[jersey-client-1.19.jar:1.19]
at
com.sun.jersey.api.client.WebResource.access$200(WebResource.java:74)
~[jersey-client-1.19.jar:1.19]
at
com.sun.jersey.api.client.WebResource$Builder.get(WebResource.java:509)
~[jersey-client-1.19.jar:1.19]
at
org.apache.nifi.cluster.coordination.http.replication.ThreadPoolRequestReplicator.replicateRequest(ThreadPoolRequestReplicator.java:493)
~[nifi-framework-cluster-1.0.0-SNAPSHOT.jar:1.0.0-SNAPSHOT]
at
org.apache.nifi.cluster.coordination.http.replication.ThreadPoolRequestReplicator$NodeHttpRequest.run(ThreadPoolRequestReplicator.java:687)
~[nifi-framework-cluster-1.0.0-SNAPSHOT.jar:1.0.0-SNAPSHOT]
at
java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
[na:1.8.0_74]
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
[na:1.8.0_74]
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
[na:1.8.0_74]
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
[na:1.8.0_74]
at java.lang.Thread.run(Thread.java:745) [na:1.8.0_74]
Caused by: java.net.SocketTimeoutException: Read timed out
at java.net.SocketInputStream.socketRead0(Native Method)
~[na:1.8.0_74]
at
java.net.SocketInputStream.socketRead(SocketInputStream.java:116) ~[na:1.8.0_74]
at java.net.SocketInputStream.read(SocketInputStream.java:170)
~[na:1.8.0_74]
at java.net.SocketInputStream.read(SocketInputStream.java:141)
~[na:1.8.0_74]
at sun.security.ssl.InputRecord.readFully(InputRecord.java:465)
~[na:1.8.0_74]
at sun.security.ssl.InputRecord.read(InputRecord.java:503)
~[na:1.8.0_74]
at
sun.security.ssl.SSLSocketImpl.readRecord(SSLSocketImpl.java:973) ~[na:1.8.0_74]
at
sun.security.ssl.SSLSocketImpl.readDataRecord(SSLSocketImpl.java:930)
~[na:1.8.0_74]
at sun.security.ssl.AppInputStream.read(AppInputStream.java:105)
~[na:1.8.0_74]
at java.io.BufferedInputStream.fill(BufferedInputStream.java:246)
~[na:1.8.0_74]
at java.io.BufferedInputStream.read1(BufferedInputStream.java:286)
~[na:1.8.0_74]
at java.io.BufferedInputStream.read(BufferedInputStream.java:345)
~[na:1.8.0_74]
at sun.net.www.http.HttpClient.parseHTTPHeader(HttpClient.java:704)
~[na:1.8.0_74]
at sun.net.www.http.HttpClient.parseHTTP(HttpClient.java:647)
~[na:1.8.0_74]
at
sun.net.www.protocol.http.HttpURLConnection.getInputStream0(HttpURLConnection.java:1536)
~[na:1.8.0_74]
at
sun.net.www.protocol.http.HttpURLConnection.getInputStream(HttpURLConnection.java:1441)
~[na:1.8.0_74]
at
java.net.HttpURLConnection.getResponseCode(HttpURLConnection.java:480)
~[na:1.8.0_74]
at
sun.net.www.protocol.https.HttpsURLConnectionImpl.getResponseCode(HttpsURLConnectionImpl.java:338)
~[na:1.8.0_74]
at
com.sun.jersey.client.urlconnection.URLConnectionClientHandler._invoke(URLConnectionClientHandler.java:253)
~[jersey-client-1.19.jar:1.19]
at
com.sun.jersey.client.urlconnection.URLConnectionClientHandler.handle(URLConnectionClientHandler.java:153)
~[jersey-client-1.19.jar:1.19]
... 12 common frames omitted
2016-07-22 18:13:37,652 WARN [Replicate Request Thread-7]
o.a.n.c.c.h.r.ThreadPoolRequestReplicator Failed to replicate request GET
/nifi-api/flow/controller/bulletins to localhost:8481 due to {}
com.sun.jersey.api.client.ClientHandlerException:
java.net.SocketTimeoutException: Read timed out
at
com.sun.jersey.client.urlconnection.URLConnectionClientHandler.handle(URLConnectionClientHandler.java:155)
~[jersey-client-1.19.jar:1.19]
at com.sun.jersey.api.client.Client.handle(Client.java:652)
~[jersey-client-1.19.jar:1.19]
at
com.sun.jersey.api.client.filter.GZIPContentEncodingFilter.handle(GZIPContentEncodingFilter.java:123)
~[jersey-client-1.19.jar:1.19]
at
com.sun.jersey.api.client.WebResource.handle(WebResource.java:682)
~[jersey-client-1.19.jar:1.19]
at
com.sun.jersey.api.client.WebResource.access$200(WebResource.java:74)
~[jersey-client-1.19.jar:1.19]
at
com.sun.jersey.api.client.WebResource$Builder.get(WebResource.java:509)
~[jersey-client-1.19.jar:1.19]
at
org.apache.nifi.cluster.coordination.http.replication.ThreadPoolRequestReplicator.replicateRequest(ThreadPoolRequestReplicator.java:493)
~[nifi-framework-cluster-1.0.0-SNAPSHOT.jar:1.0.0-SNAPSHOT]
at
org.apache.nifi.cluster.coordination.http.replication.ThreadPoolRequestReplicator$NodeHttpRequest.run(ThreadPoolRequestReplicator.java:687)
~[nifi-framework-cluster-1.0.0-SNAPSHOT.jar:1.0.0-SNAPSHOT]
at
java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
[na:1.8.0_74]
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
[na:1.8.0_74]
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
[na:1.8.0_74]
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
[na:1.8.0_74]
at java.lang.Thread.run(Thread.java:745) [na:1.8.0_74]
Caused by: java.net.SocketTimeoutException: Read timed out
at java.net.SocketInputStream.socketRead0(Native Method)
~[na:1.8.0_74]
at
java.net.SocketInputStream.socketRead(SocketInputStream.java:116) ~[na:1.8.0_74]
at java.net.SocketInputStream.read(SocketInputStream.java:170)
~[na:1.8.0_74]
at java.net.SocketInputStream.read(SocketInputStream.java:141)
~[na:1.8.0_74]
at sun.security.ssl.InputRecord.readFully(InputRecord.java:465)
~[na:1.8.0_74]
at sun.security.ssl.InputRecord.read(InputRecord.java:503)
~[na:1.8.0_74]
at
sun.security.ssl.SSLSocketImpl.readRecord(SSLSocketImpl.java:973) ~[na:1.8.0_74]
at
sun.security.ssl.SSLSocketImpl.readDataRecord(SSLSocketImpl.java:930)
~[na:1.8.0_74]
at sun.security.ssl.AppInputStream.read(AppInputStream.java:105)
~[na:1.8.0_74]
at java.io.BufferedInputStream.fill(BufferedInputStream.java:246)
~[na:1.8.0_74]
at java.io.BufferedInputStream.read1(BufferedInputStream.java:286)
~[na:1.8.0_74]
at java.io.BufferedInputStream.read(BufferedInputStream.java:345)
~[na:1.8.0_74]
at sun.net.www.http.HttpClient.parseHTTPHeader(HttpClient.java:704)
~[na:1.8.0_74]
at sun.net.www.http.HttpClient.parseHTTP(HttpClient.java:647)
~[na:1.8.0_74]
at
sun.net.www.protocol.http.HttpURLConnection.getInputStream0(HttpURLConnection.java:1536)
~[na:1.8.0_74]
at
sun.net.www.protocol.http.HttpURLConnection.getInputStream(HttpURLConnection.java:1441)
~[na:1.8.0_74]
at
java.net.HttpURLConnection.getResponseCode(HttpURLConnection.java:480)
~[na:1.8.0_74]
at
sun.net.www.protocol.https.HttpsURLConnectionImpl.getResponseCode(HttpsURLConnectionImpl.java:338)
~[na:1.8.0_74]
at
com.sun.jersey.client.urlconnection.URLConnectionClientHandler._invoke(URLConnectionClientHandler.java:253)
~[jersey-client-1.19.jar:1.19]
at
com.sun.jersey.client.urlconnection.URLConnectionClientHandler.handle(URLConnectionClientHandler.java:153)
~[jersey-client-1.19.jar:1.19]
... 12 common frames omitted
> Improve robustness of cluster when relying on embedded zookeeper
> ----------------------------------------------------------------
>
> Key: NIFI-2360
> URL: https://issues.apache.org/jira/browse/NIFI-2360
> Project: Apache NiFi
> Issue Type: Bug
> Components: Core Framework
> Affects Versions: 1.0.0
> Reporter: Mark Payne
> Assignee: Mark Payne
> Fix For: 1.0.0
>
>
> I have a 3 node cluster running an embedded zookeeper. If 2 nodes disconnect,
> I no longer have a ZooKeeper quorum. As a result, the third node is also
> disconnected. If I had more nodes, they would also all be disconnected. As a
> result, I also cannot then go to the cluster page to re-connect them, since
> there is no cluster coordinator.
> We should make this more robust, predominantly by not starting & stopping the
> embedded zookeeper server when a node connects & disconnects from cluster but
> instead just leave the embedded zookeeper server running, even when the node
> is disconnected from the cluster.
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)