Ema has submitted this change and it was merged. (
https://gerrit.wikimedia.org/r/403677 )
Change subject: Use up-and-enabled servers in can-depool logic
......................................................................
Use up-and-enabled servers in can-depool logic
The number of pooled servers should not be computed as `total-servers -
servers-down`. We need to consider hosts which are serving traffic, that
is: servers administratively enabled and up according to monitoring.
Change can-depool logic accordingly.
Bug: T184715
Change-Id: Ic83382938810331dd4292f45cbb85f6aaa7b7707
---
M pybal/coordinator.py
A pybal/test/test_coordinator.py
2 files changed, 113 insertions(+), 4 deletions(-)
Approvals:
Ema: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pybal/coordinator.py b/pybal/coordinator.py
index a526b5b..a4b4e16 100755
--- a/pybal/coordinator.py
+++ b/pybal/coordinator.py
@@ -442,11 +442,21 @@
def canDepool(self):
"""Returns a boolean denoting whether another server can be depooled"""
- # Construct a list of servers that have status 'down'
- downServers = [server for server in self.servers.itervalues() if not
server.up]
+ # Total number of servers
+ totalServerCount = len(self.servers)
- # The total amount of pooled servers may never drop below a configured
threshold
- return len(self.servers) - len(downServers) >= len(self.servers) *
self.lvsservice.getDepoolThreshold()
+ # Number of hosts considered to be up by PyBal's monitoring and
+ # administratively enabled. Under normal circumstances, they would be
+ # the hosts serving traffic.
+ # However, a host can go down after PyBal has reached the depool
+ # threshold for the service the host belongs to. In that case, the
+ # misbehaving server is kept pooled. This count does not include such
+ # hosts.
+ upServerCount = len([server for server in self.servers.itervalues() if
server.up and server.enabled])
+
+ # The total amount of hosts serving traffic may never drop below a
+ # configured threshold
+ return upServerCount >= totalServerCount *
self.lvsservice.getDepoolThreshold()
def onConfigUpdate(self, config):
"""Parses the server list and changes the state accordingly."""
diff --git a/pybal/test/test_coordinator.py b/pybal/test/test_coordinator.py
new file mode 100644
index 0000000..35cede6
--- /dev/null
+++ b/pybal/test/test_coordinator.py
@@ -0,0 +1,99 @@
+# -*- coding: utf-8 -*-
+"""
+ PyBal unit tests
+ ~~~~~~~~~~~~~~~~
+
+ This module contains tests for `pybal.coordinator`.
+
+"""
+import mock
+
+import pybal.coordinator
+import pybal.util
+
+from twisted.internet.reactor import getDelayedCalls
+
+from .fixtures import PyBalTestCase
+
+
+class CoordinatorTestCase(PyBalTestCase):
+ """Test case for `pybal.coordinator.Coordinator`."""
+
+ def setUp(self):
+ super(CoordinatorTestCase, self).setUp()
+
+ configUrl = "file:///dev/null"
+
+ self.coordinator = pybal.coordinator.Coordinator(
+ mock.MagicMock(), configUrl)
+
+ self.coordinator.lvsservice.getDepoolThreshold = mock.MagicMock(
+ return_value=0.5)
+
+ pybal.coordinator.Server.initialize = mock.MagicMock()
+
+ def tearDown(self):
+ self.coordinator.configObserver.reloadTask.stop()
+
+ for call in getDelayedCalls():
+ if call.func.func_name == 'maybeParseConfig':
+ call.cancel()
+
+ def setServers(self, servers):
+ self.coordinator.onConfigUpdate(config=servers)
+
+ for server in self.coordinator.servers.itervalues():
+ server.up = True
+ server.enabled = True
+
+ def test2serversCanDepool(self):
+ servers = {
+ 'cp1045.eqiad.wmnet': {},
+ 'cp1046.eqiad.wmnet': {},
+ }
+ self.setServers(servers)
+
+ # 2/2 hosts serving traffic. We can depool.
+ self.assertTrue(self.coordinator.canDepool())
+
+ # 1 host goes down
+ self.coordinator.servers['cp1045.eqiad.wmnet'].up = False
+
+ # By depooling, we would end up with 1/2 hosts serving traffic. We can
+ # depool.
+ self.assertTrue(self.coordinator.canDepool())
+
+ # The other host goes down too
+ self.coordinator.servers['cp1046.eqiad.wmnet'].up = False
+
+ # By depooling, we would end up with 0/2 hosts serving traffic. We
+ # cannot depool.
+ self.assertFalse(self.coordinator.canDepool())
+
+ def test4serversCanDepool(self):
+ servers = {
+ 'cp1045.eqiad.wmnet': {},
+ 'cp1046.eqiad.wmnet': {},
+ 'cp1047.eqiad.wmnet': {},
+ 'cp1048.eqiad.wmnet': {},
+ }
+ self.setServers(servers)
+
+ self.coordinator.servers['cp1045.eqiad.wmnet'].enabled = False
+
+ # 3/4 hosts serving traffic. We can depool.
+ self.assertTrue(self.coordinator.canDepool())
+
+ # 1 host goes down
+ self.coordinator.servers['cp1046.eqiad.wmnet'].up = False
+
+ # By depooling, we would end up with 2/4 hosts serving traffic. We can
+ # depool.
+ self.assertTrue(self.coordinator.canDepool())
+
+ # Another host goes down
+ self.coordinator.servers['cp1047.eqiad.wmnet'].up = False
+
+ # By depooling, we would end up with 1/4 hosts serving traffic. We
+ # cannot depool.
+ self.assertFalse(self.coordinator.canDepool())
--
To view, visit https://gerrit.wikimedia.org/r/403677
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Ic83382938810331dd4292f45cbb85f6aaa7b7707
Gerrit-PatchSet: 5
Gerrit-Project: operations/debs/pybal
Gerrit-Branch: master
Gerrit-Owner: Ema <[email protected]>
Gerrit-Reviewer: BBlack <[email protected]>
Gerrit-Reviewer: Ema <[email protected]>
Gerrit-Reviewer: Giuseppe Lavagetto <[email protected]>
Gerrit-Reviewer: Mark Bergsma <[email protected]>
Gerrit-Reviewer: Volans <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits