Author: aconway
Date: Wed Feb 25 15:49:53 2015
New Revision: 1662247

URL: http://svn.apache.org/r1662247
Log:
QPID-6413: Sporadic failure of HA tests causd by maxNegotiateTimeout

Increased maxNegotiateTimeout to the default (10 seconds). A smaller value 
speeds up
detection of non-running brokers on remote hosts, but this is not necessary for 
the tests.

Increased some other test timeouts and added some improved error reporting.

The occasional long (> 1 second) connection delays are caused by Cyrus SASL 
authentication.
Not clear why this takes so long, but that is a separate issue. Here's a client 
log excerpt
showing the delay.

2015-02-25 08:29:37.461299895 [Network] trace RECV 
[[127.0.0.1:34247-127.0.0.1:45983]]: Frame[BEbe; channel=0; 
{ConnectionStartBody: 
server-properties={qpid.federation_tag:V2:36:str16(77800bff-a176-46c1-917a-32f136dee650)};
 mechanisms=str16{V2:9:str16(ANONYMOUS), V2:5:str16(PLAIN)}; 
locales=str16{V2:5:str16(en_US)}; }]
2015-02-25 08:29:37.463116303 [Security] debug CyrusSasl::start(ANONYMOUS PLAIN)
(Note delay > 1 sec here)
2015-02-25 08:29:38.839793753 [Security] debug min_ssf: 0, max_ssf: 256
2015-02-25 08:29:38.839851781 [Security] debug CyrusSasl::start(ANONYMOUS 
PLAIN): selected ANONYMOUS response: 'anonymous@wallace'
2015-02-25 08:29:38.839963162 [Client] warning Connection 
[127.0.0.1:34247-127.0.0.1:45983] closed

Modified:
    qpid/trunk/qpid/cpp/src/qpid/sys/AsynchIOHandler.cpp
    qpid/trunk/qpid/cpp/src/tests/ha_test.py

Modified: qpid/trunk/qpid/cpp/src/qpid/sys/AsynchIOHandler.cpp
URL: 
http://svn.apache.org/viewvc/qpid/trunk/qpid/cpp/src/qpid/sys/AsynchIOHandler.cpp?rev=1662247&r1=1662246&r2=1662247&view=diff
==============================================================================
--- qpid/trunk/qpid/cpp/src/qpid/sys/AsynchIOHandler.cpp (original)
+++ qpid/trunk/qpid/cpp/src/qpid/sys/AsynchIOHandler.cpp Wed Feb 25 15:49:53 
2015
@@ -36,17 +36,20 @@ namespace sys {
 struct ProtocolTimeoutTask : public sys::TimerTask {
     AsynchIOHandler& handler;
     std::string id;
+    Duration timeout;
 
-    ProtocolTimeoutTask(const std::string& i, const Duration& timeout, 
AsynchIOHandler& h) :
-        TimerTask(timeout, "ProtocolTimeout"),
+    ProtocolTimeoutTask(const std::string& i, const Duration& timeout_, 
AsynchIOHandler& h) :
+        TimerTask(timeout_, "ProtocolTimeout"),
         handler(h),
-        id(i)
+        id(i),
+        timeout(timeout_)
     {}
 
     void fire() {
         // If this fires it means that we didn't negotiate the connection in 
the timeout period
         // Schedule closing the connection for the io thread
-        QPID_LOG(error, "Connection " << id << " No protocol received 
closing");
+        QPID_LOG(error, "Connection " << id << " No protocol received after " 
<< timeout
+                 << ", closing");
         handler.abort();
     }
 };

Modified: qpid/trunk/qpid/cpp/src/tests/ha_test.py
URL: 
http://svn.apache.org/viewvc/qpid/trunk/qpid/cpp/src/tests/ha_test.py?rev=1662247&r1=1662246&r2=1662247&view=diff
==============================================================================
--- qpid/trunk/qpid/cpp/src/tests/ha_test.py (original)
+++ qpid/trunk/qpid/cpp/src/tests/ha_test.py Wed Feb 25 15:49:53 2015
@@ -129,9 +129,6 @@ class HaBroker(Broker):
         args += ["--load-module", BrokerTest.ha_lib,
                  # Non-standard settings for faster tests.
                  "--link-maintenance-interval=0.1",
-                 # Heartbeat and negotiate time are needed so that a broker 
wont
-                 # stall on an address that doesn't currently have a broker 
running.
-                 "--max-negotiate-time=1000",
                  "--ha-cluster=%s"%ha_cluster]
         # Add default --log-enable arguments unless args already has --log 
arguments.
         if not [l for l in args if l.startswith("--log")]:
@@ -195,23 +192,26 @@ acl allow all all
 
     def ha_status(self): return self.qmf().status
 
-    def wait_status(self, status, timeout=5):
+    def wait_status(self, status, timeout=10):
+
         def try_get_status():
             self._status = "<unknown>"
-            # Ignore ConnectionError, the broker may not be up yet.
             try:
                 self._status = self.ha_status()
-                return self._status == status;
-            except qm.ConnectionError: return False
+            except qm.ConnectionError, e:
+                # Record the error but don't raise, the broker may not be up 
yet.
+                self._status = "%s: %s" % (type(e).__name__, e)
+            return self._status == status;
         assert retry(try_get_status, timeout=timeout), "%s expected=%r, 
actual=%r"%(
             self, status, self._status)
 
-    def wait_queue(self, queue, timeout=1, msg="wait_queue"):
+    def wait_queue(self, queue, timeout=10, msg="wait_queue"):
         """ Wait for queue to be visible via QMF"""
         agent = self.agent
-        assert retry(lambda: agent.getQueue(queue) is not None, 
timeout=timeout), msg+"queue %s not present"%queue
+        assert retry(lambda: agent.getQueue(queue) is not None, 
timeout=timeout), \
+            "%s queue %s not present" % (msg, queue)
 
-    def wait_no_queue(self, queue, timeout=1, msg="wait_no_queue"):
+    def wait_no_queue(self, queue, timeout=10, msg="wait_no_queue"):
         """ Wait for queue to be invisible via QMF"""
         agent = self.agent
         assert retry(lambda: agent.getQueue(queue) is None, timeout=timeout), 
"%s: queue %s still present"%(msg,queue)
@@ -325,7 +325,7 @@ class HaCluster(object):
         ha_port = self._ports[i]
         b = HaBroker(ha_port.test, ha_port, brokers_url=self.url, name=name,
                      args=args, **self.kwargs)
-        b.ready(timeout=5)
+        b.ready(timeout=10)
         return b
 
     def start(self):



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to