This is an automated email from the ASF dual-hosted git repository.

eolivelli pushed a commit to branch branch-3.6
in repository https://gitbox.apache.org/repos/asf/zookeeper.git


The following commit(s) were added to refs/heads/branch-3.6 by this push:
     new 68466c8  ZOOKEEPER-3769: handling malformed Leader Election 
notification messages
68466c8 is described below

commit 68466c8767fa1b3ab0e355278921752392e2ea8b
Author: Mate Szalay-Beko <[email protected]>
AuthorDate: Tue Apr 7 09:07:50 2020 +0200

    ZOOKEEPER-3769: handling malformed Leader Election notification messages
    
    Using ZooKeeper with JDK 12.0.2 on CentOS 7 when the current leader is 
killed, we saw a few times that some partial Leader Election notification 
(vote) messages were delivered to the other ZooKeeper servers. The malformed / 
partial messages are causing different exceptions in the WorkerReceiver thread 
of FastLeaderElection which were not handled before. This was leading to the 
death of the WorkerReceiver thread, which caused that the given ZooKeeper 
Server was unable to receive leader [...]
    
    In the proposed fix I created unit tests to simulate certain error cases 
with regards to partial leader election messages, and fixed the error handling 
in FastLeaderElection.
    
    Author: Mate Szalay-Beko <[email protected]>
    
    Reviewers: Enrico Olivelli <[email protected]>, Norbert Kalmar 
<[email protected]>
    
    Closes #1300 from symat/ZOOKEEPER-3769-master
    
    (cherry picked from commit a548253408384d9e943b151c942d795017dca70c)
    Signed-off-by: Enrico Olivelli <[email protected]>
---
 .../server/quorum/FastLeaderElection.java          | 115 +++++-----
 .../FLEMalformedNotificationMessageTest.java       | 249 +++++++++++++++++++++
 2 files changed, 313 insertions(+), 51 deletions(-)

diff --git 
a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/FastLeaderElection.java
 
b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/FastLeaderElection.java
index 3ab1007..967adf0 100644
--- 
a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/FastLeaderElection.java
+++ 
b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/FastLeaderElection.java
@@ -19,6 +19,7 @@
 package org.apache.zookeeper.server.quorum;
 
 import java.io.IOException;
+import java.nio.BufferUnderflowException;
 import java.nio.ByteBuffer;
 import java.util.HashMap;
 import java.util.Map;
@@ -237,19 +238,21 @@ public class FastLeaderElection implements Election {
                             continue;
                         }
 
+                        final int capacity = response.buffer.capacity();
+
                         // The current protocol and two previous generations 
all send at least 28 bytes
-                        if (response.buffer.capacity() < 28) {
-                            LOG.error("Got a short response: {}", 
response.buffer.capacity());
+                        if (capacity < 28) {
+                            LOG.error("Got a short response from server {}: 
{}", response.sid, capacity);
                             continue;
                         }
 
                         // this is the backwardCompatibility mode in place 
before ZK-107
                         // It is for a version of the protocol in which we 
didn't send peer epoch
                         // With peer epoch and version the message became 40 
bytes
-                        boolean backCompatibility28 = 
(response.buffer.capacity() == 28);
+                        boolean backCompatibility28 = (capacity == 28);
 
                         // this is the backwardCompatibility mode for no 
version information
-                        boolean backCompatibility40 = 
(response.buffer.capacity() == 40);
+                        boolean backCompatibility40 = (capacity == 40);
 
                         response.buffer.clear();
 
@@ -263,64 +266,74 @@ public class FastLeaderElection implements Election {
                         long rpeerepoch;
 
                         int version = 0x0;
-                        if (!backCompatibility28) {
-                            rpeerepoch = response.buffer.getLong();
-                            if (!backCompatibility40) {
-                                /*
-                                 * Version added in 3.4.6
-                                 */
+                        QuorumVerifier rqv = null;
 
-                                version = response.buffer.getInt();
+                        try {
+                            if (!backCompatibility28) {
+                                rpeerepoch = response.buffer.getLong();
+                                if (!backCompatibility40) {
+                                    /*
+                                     * Version added in 3.4.6
+                                     */
+
+                                    version = response.buffer.getInt();
+                                } else {
+                                    LOG.info("Backward compatibility mode (36 
bits), server id: {}", response.sid);
+                                }
                             } else {
-                                LOG.info("Backward compatibility mode (36 
bits), server id: {}", response.sid);
+                                LOG.info("Backward compatibility mode (28 
bits), server id: {}", response.sid);
+                                rpeerepoch = ZxidUtils.getEpochFromZxid(rzxid);
                             }
-                        } else {
-                            LOG.info("Backward compatibility mode (28 bits), 
server id: {}", response.sid);
-                            rpeerepoch = ZxidUtils.getEpochFromZxid(rzxid);
-                        }
 
-                        QuorumVerifier rqv = null;
+                            // check if we have a version that includes 
config. If so extract config info from message.
+                            if (version > 0x1) {
+                                int configLength = response.buffer.getInt();
+
+                                // we want to avoid errors caused by the 
allocation of a byte array with negative length
+                                // (causing NegativeArraySizeException) or 
huge length (causing e.g. OutOfMemoryError)
+                                if (configLength < 0 || configLength > 
capacity) {
+                                    throw new 
IOException(String.format("Invalid configLength in notification message! 
sid=%d, capacity=%d, version=%d, configLength=%d",
+                                                                        
response.sid, capacity, version, configLength));
+                                }
 
-                        // check if we have a version that includes config. If 
so extract config info from message.
-                        if (version > 0x1) {
-                            int configLength = response.buffer.getInt();
-                            byte[] b = new byte[configLength];
-
-                            response.buffer.get(b);
-
-                            synchronized (self) {
-                                try {
-                                    rqv = self.configFromString(new String(b));
-                                    QuorumVerifier curQV = 
self.getQuorumVerifier();
-                                    if (rqv.getVersion() > curQV.getVersion()) 
{
-                                        LOG.info("{} Received version: {} my 
version: {}",
-                                                 self.getId(),
-                                                 
Long.toHexString(rqv.getVersion()),
-                                                 
Long.toHexString(self.getQuorumVerifier().getVersion()));
-                                        if (self.getPeerState() == 
ServerState.LOOKING) {
-                                            LOG.debug("Invoking 
processReconfig(), state: {}", self.getServerState());
-                                            self.processReconfig(rqv, null, 
null, false);
-                                            if (!rqv.equals(curQV)) {
-                                                LOG.info("restarting leader 
election");
-                                                self.shuttingDownLE = true;
-                                                
self.getElectionAlg().shutdown();
-
-                                                break;
+                                byte[] b = new byte[configLength];
+                                response.buffer.get(b);
+
+                                synchronized (self) {
+                                    try {
+                                        rqv = self.configFromString(new 
String(b));
+                                        QuorumVerifier curQV = 
self.getQuorumVerifier();
+                                        if (rqv.getVersion() > 
curQV.getVersion()) {
+                                            LOG.info("{} Received version: {} 
my version: {}",
+                                                     self.getId(),
+                                                     
Long.toHexString(rqv.getVersion()),
+                                                     
Long.toHexString(self.getQuorumVerifier().getVersion()));
+                                            if (self.getPeerState() == 
ServerState.LOOKING) {
+                                                LOG.debug("Invoking 
processReconfig(), state: {}", self.getServerState());
+                                                self.processReconfig(rqv, 
null, null, false);
+                                                if (!rqv.equals(curQV)) {
+                                                    LOG.info("restarting 
leader election");
+                                                    self.shuttingDownLE = true;
+                                                    
self.getElectionAlg().shutdown();
+
+                                                    break;
+                                                }
+                                            } else {
+                                                LOG.debug("Skip 
processReconfig(), state: {}", self.getServerState());
                                             }
-                                        } else {
-                                            LOG.debug("Skip processReconfig(), 
state: {}", self.getServerState());
                                         }
+                                    } catch (IOException | ConfigException e) {
+                                        LOG.error("Something went wrong while 
processing config received from {}", response.sid);
                                     }
-                                } catch (IOException e) {
-                                    LOG.error("Something went wrong while 
processing config received from {}", response.sid);
-                                } catch (ConfigException e) {
-                                    LOG.error("Something went wrong while 
processing config received from {}", response.sid);
                                 }
+                            } else {
+                                LOG.info("Backward compatibility mode (before 
reconfig), server id: {}", response.sid);
                             }
-                        } else {
-                            LOG.info("Backward compatibility mode (before 
reconfig), server id: {}", response.sid);
+                        } catch (BufferUnderflowException | IOException e) {
+                            LOG.warn("Skipping the processing of a partial / 
malformed response message sent by sid={} (message length: {})",
+                                     response.sid, capacity, e);
+                            continue;
                         }
-
                         /*
                          * If it is from a non-voting server (such as an 
observer or
                          * a non-voting follower), respond right away.
diff --git 
a/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/FLEMalformedNotificationMessageTest.java
 
b/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/FLEMalformedNotificationMessageTest.java
new file mode 100644
index 0000000..8465c9e
--- /dev/null
+++ 
b/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/FLEMalformedNotificationMessageTest.java
@@ -0,0 +1,249 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.zookeeper.server.quorum;
+
+import java.io.File;
+import java.io.IOException;
+import java.net.InetSocketAddress;
+import java.nio.ByteBuffer;
+import java.util.HashMap;
+import org.apache.zookeeper.PortAssignment;
+import org.apache.zookeeper.ZKTestCase;
+import org.apache.zookeeper.server.quorum.QuorumPeer.QuorumServer;
+import org.apache.zookeeper.server.quorum.QuorumPeer.ServerState;
+import org.apache.zookeeper.test.ClientBase;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+public class FLEMalformedNotificationMessageTest extends ZKTestCase {
+    private static final Logger LOG = 
LoggerFactory.getLogger(FLEMalformedNotificationMessageTest.class);
+    private static final byte[] CONFIG_BYTES = "my very invalid config 
string".getBytes();
+    private static final int CONFIG_BYTES_LENGTH = CONFIG_BYTES.length;
+
+    int count;
+    HashMap<Long, QuorumServer> peers;
+    File tmpdir[];
+    int port[];
+
+    QuorumCnxManager mockCnxManager;
+    FLETestUtils.LEThread leaderElectionThread;
+    QuorumPeer peerRunningLeaderElection;
+
+
+    @Before
+    public void setUp() throws Exception {
+        count = 3;
+
+        peers = new HashMap<>(count);
+        tmpdir = new File[count];
+        port = new int[count];
+
+        LOG.info("FLEMalformedNotificationMessageTest: {}, {}", getTestName(), 
count);
+        for (int i = 0; i < count; i++) {
+            int clientport = PortAssignment.unique();
+            peers.put((long) i,
+                      new QuorumServer(i,
+                                       new InetSocketAddress(clientport),
+                                       new 
InetSocketAddress(PortAssignment.unique())));
+            tmpdir[i] = ClientBase.createTmpDir();
+            port[i] = clientport;
+        }
+
+        /*
+         * Start server 0
+         */
+        peerRunningLeaderElection = new QuorumPeer(peers, tmpdir[0], 
tmpdir[0], port[0], 3, 0, 1000, 2, 2, 2);
+        peerRunningLeaderElection.startLeaderElection();
+        leaderElectionThread = new 
FLETestUtils.LEThread(peerRunningLeaderElection, 0);
+        leaderElectionThread.start();
+    }
+
+
+    @After
+    public void tearDown() throws Exception {
+        peerRunningLeaderElection.shutdown();
+        mockCnxManager.halt();
+    }
+
+
+    @Test
+    public void testTooShortPartialNotificationMessage() throws Exception {
+
+        /*
+         * Start mock server 1, send a message too short to be compatible with 
any protocol version
+         * This simulates the case when only some parts of the whole message 
is received.
+         */
+        startMockServer(1);
+        byte requestBytes[] = new byte[12];
+        ByteBuffer requestBuffer = ByteBuffer.wrap(requestBytes);
+        requestBuffer.clear();
+        requestBuffer.putInt(ServerState.LOOKING.ordinal());   // state
+        requestBuffer.putLong(0);                              // leader
+        mockCnxManager.toSend(0L, requestBuffer);
+
+        /*
+         * Assert that the message receiver thread in leader election is still 
healthy:
+         * we are sending valid votes and waiting for the leader election to 
be finished.
+         */
+        sendValidNotifications(1, 0);
+        leaderElectionThread.join(5000);
+        if (leaderElectionThread.isAlive()) {
+            Assert.fail("Leader election thread didn't join, something went 
wrong.");
+        }
+    }
+
+
+    @Test
+    public void testNotificationMessageWithNegativeConfigLength() throws 
Exception {
+
+        /*
+         * Start mock server 1, send a message with negative configLength field
+         */
+        startMockServer(1);
+        byte requestBytes[] = new byte[48];
+        ByteBuffer requestBuffer = ByteBuffer.wrap(requestBytes);
+        requestBuffer.clear();
+        requestBuffer.putInt(ServerState.LOOKING.ordinal());   // state
+        requestBuffer.putLong(0);                              // leader
+        requestBuffer.putLong(0);                              // zxid
+        requestBuffer.putLong(0);                              // electionEpoch
+        requestBuffer.putLong(0);                              // epoch
+        requestBuffer.putInt(FastLeaderElection.Notification.CURRENTVERSION);  
 // version
+        requestBuffer.putInt(-123);                            // 
configData.length
+        mockCnxManager.toSend(0L, requestBuffer);
+
+        /*
+         * Assert that the message receiver thread in leader election is still 
healthy:
+         * we are sending valid votes and waiting for the leader election to 
be finished.
+         */
+        sendValidNotifications(1, 0);
+        leaderElectionThread.join(5000);
+        if (leaderElectionThread.isAlive()) {
+            Assert.fail("Leader election thread didn't join, something went 
wrong.");
+        }
+    }
+
+
+    @Test
+    public void testNotificationMessageWithInvalidConfigLength() throws 
Exception {
+
+        /*
+         * Start mock server 1, send a message with an invalid configLength 
field
+         * (instead of sending CONFIG_BYTES_LENGTH, we send 10000)
+         */
+        startMockServer(1);
+        byte requestBytes[] = new byte[48 + CONFIG_BYTES_LENGTH];
+        ByteBuffer requestBuffer = ByteBuffer.wrap(requestBytes);
+        requestBuffer.clear();
+        requestBuffer.putInt(ServerState.LOOKING.ordinal());   // state
+        requestBuffer.putLong(0);                              // leader
+        requestBuffer.putLong(0);                              // zxid
+        requestBuffer.putLong(0);                              // electionEpoch
+        requestBuffer.putLong(0);                              // epoch
+        requestBuffer.putInt(FastLeaderElection.Notification.CURRENTVERSION);  
 // version
+        requestBuffer.putInt(10000);                           // 
configData.length
+        requestBuffer.put(CONFIG_BYTES);                       // configData
+        mockCnxManager.toSend(0L, requestBuffer);
+
+        /*
+         * Assert that the message receiver thread in leader election is still 
healthy:
+         * we are sending valid votes and waiting for the leader election to 
be finished.
+         */
+        sendValidNotifications(1, 0);
+        leaderElectionThread.join(5000);
+        if (leaderElectionThread.isAlive()) {
+            Assert.fail("Leader election thread didn't join, something went 
wrong.");
+        }
+    }
+
+
+    @Test
+    public void testNotificationMessageWithInvalidConfig() throws Exception {
+
+        /*
+         * Start mock server 1, send a message with an invalid config field
+         * (the receiver should not be able to parse the config part of the 
message)
+         */
+        startMockServer(1);
+        ByteBuffer requestBuffer = 
FastLeaderElection.buildMsg(ServerState.LOOKING.ordinal(), 1, 0, 0, 0, 
CONFIG_BYTES);
+        mockCnxManager.toSend(0L, requestBuffer);
+
+        /*
+         * Assert that the message receiver thread in leader election is still 
healthy:
+         * we are sending valid votes and waiting for the leader election to 
be finished.
+         */
+        sendValidNotifications(1, 0);
+        leaderElectionThread.join(5000);
+        if (leaderElectionThread.isAlive()) {
+            Assert.fail("Leader election thread didn't join, something went 
wrong.");
+        }
+    }
+
+
+    @Test
+    public void testNotificationMessageWithBadProtocol() throws Exception {
+
+        /*
+         * Start mock server 1, send an invalid 30 bytes long message
+         * (the receiver should not be able to parse the message and should 
skip it)
+         * This simulates the case when only some parts of the whole message 
is received.
+         */
+        startMockServer(1);
+        byte requestBytes[] = new byte[30];
+        ByteBuffer requestBuffer = ByteBuffer.wrap(requestBytes);
+        requestBuffer.clear();
+        requestBuffer.putInt(ServerState.LOOKING.ordinal());   // state
+        requestBuffer.putLong(1);                              // leader
+        requestBuffer.putLong(0);                              // zxid
+        requestBuffer.putLong(0);                              // electionEpoch
+        requestBuffer.putShort((short) 0);                      // this is the 
first two bytes of a proper
+                                                               // 8 bytes Long 
we should send here
+        mockCnxManager.toSend(0L, requestBuffer);
+
+        /*
+         * Assert that the message receiver thread in leader election is still 
healthy:
+         * we are sending valid votes and waiting for the leader election to 
be finished.
+         */
+        sendValidNotifications(1, 0);
+        leaderElectionThread.join(5000);
+        if (leaderElectionThread.isAlive()) {
+            Assert.fail("Leader election thread didn't join, something went 
wrong.");
+        }
+    }
+
+
+    void startMockServer(int sid) throws IOException {
+        QuorumPeer peer = new QuorumPeer(peers, tmpdir[sid], tmpdir[sid], 
port[sid], 3, sid, 1000, 2, 2, 2);
+        mockCnxManager = peer.createCnxnManager();
+        mockCnxManager.listener.start();
+    }
+
+
+    void sendValidNotifications(int fromSid, int toSid) throws 
InterruptedException {
+        mockCnxManager.toSend((long) toSid, 
FLETestUtils.createMsg(ServerState.LOOKING.ordinal(), fromSid, 0, 0));
+        mockCnxManager.recvQueue.take();
+        mockCnxManager.toSend((long) toSid, 
FLETestUtils.createMsg(ServerState.FOLLOWING.ordinal(), toSid, 0, 0));
+    }
+
+}

Reply via email to