symat commented on a change in pull request #1048: ZOOKEEPER-3188: Improve resilience to network URL: https://github.com/apache/zookeeper/pull/1048#discussion_r331403263
########## File path: zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/Leader.java ########## @@ -418,66 +426,108 @@ public boolean isQuorumSynced(QuorumVerifier qv) { class LearnerCnxAcceptor extends ZooKeeperCriticalThread { - private volatile boolean stop = false; + private final AtomicBoolean stop = new AtomicBoolean(false); + private final AtomicBoolean fail = new AtomicBoolean(false); - public LearnerCnxAcceptor() { - super("LearnerCnxAcceptor-" + ss.getLocalSocketAddress(), zk.getZooKeeperServerListener()); + LearnerCnxAcceptor() { + super("LearnerCnxAcceptor-" + serverSockets.stream() + .map(ServerSocket::getLocalSocketAddress) + .map(Objects::toString) + .collect(Collectors.joining(",")), + zk.getZooKeeperServerListener()); } @Override public void run() { - try { - while (!stop) { - Socket s = null; - boolean error = false; - try { - s = ss.accept(); - - // start with the initLimit, once the ack is processed - // in LearnerHandler switch to the syncLimit - s.setSoTimeout(self.tickTime * self.initLimit); - s.setTcpNoDelay(nodelay); - - BufferedInputStream is = new BufferedInputStream(s.getInputStream()); - LearnerHandler fh = new LearnerHandler(s, is, Leader.this); - fh.start(); - } catch (SocketException e) { - error = true; - if (stop) { - LOG.info("exception while shutting down acceptor: " + e); - - // When Leader.shutdown() calls ss.close(), - // the call to accept throws an exception. - // We catch and set stop to true. - stop = true; - } else { - throw e; - } - } catch (SaslException e) { - LOG.error("Exception while connecting to quorum learner", e); - error = true; - } catch (Exception e) { - error = true; + if (!stop.get() && !serverSockets.isEmpty()) { + ExecutorService executor = Executors.newFixedThreadPool(serverSockets.size()); + CountDownLatch latch = new CountDownLatch(serverSockets.size()); + + serverSockets.forEach(serverSocket -> + executor.submit(new LearnerCnxAcceptorHandler(serverSocket, latch))); + + try { + latch.await(); + } catch (InterruptedException ie) { + LOG.error("Interrupted while sleeping. Ignoring exception", ie); + } finally { + closeSockets(); + } + } + } + + public void halt() { + stop.set(true); + closeSockets(); + } + + class LearnerCnxAcceptorHandler implements Runnable { + private ServerSocket serverSocket; + private CountDownLatch latch; + + LearnerCnxAcceptorHandler(ServerSocket serverSocket, CountDownLatch latch) { + this.serverSocket = serverSocket; + this.latch = latch; + } + + @Override + public void run() { + try { + Thread.currentThread().setName("LearnerCnxAcceptorHandler-" + serverSocket.getLocalSocketAddress()); + + while (!stop.get()) { + acceptConnections(); + } + } catch (Exception e) { + LOG.warn("Exception while accepting follower", e); + if (fail.compareAndSet(false, true)) { + handleException(getName(), e); + halt(); + } + } finally { + latch.countDown(); + } + } + + private void acceptConnections() throws IOException { + Socket socket = null; + boolean error = false; + try { + socket = serverSocket.accept(); + + // start with the initLimit, once the ack is processed + // in LearnerHandler switch to the syncLimit + socket.setSoTimeout(self.tickTime * self.initLimit); + socket.setTcpNoDelay(nodelay); + + BufferedInputStream is = new BufferedInputStream(socket.getInputStream()); + LearnerHandler fh = new LearnerHandler(socket, is, Leader.this); + fh.start(); + } catch (SocketException e) { + error = true; + if (stop.get()) { + LOG.info("Exception while shutting down acceptor", e); + } else { throw e; - } finally { - // Don't leak sockets on errors - if (error && s != null && !s.isClosed()) { - try { - s.close(); - } catch (IOException e) { - LOG.warn("Error closing socket", e); - } + } + } catch (SaslException e) { + LOG.error("Exception while connecting to quorum learner", e); + error = true; + } catch (Exception e) { + error = true; + throw e; + } finally { + // Don't leak sockets on errors + if (error && socket != null && !socket.isClosed()) { + try { + socket.close(); + } catch (IOException e) { + LOG.warn("Error closing socket", e); Review comment: thanks, I will do it ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services