Ema has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/404680 )
Change subject: Use up-and-enabled servers in can-depool logic ...................................................................... Use up-and-enabled servers in can-depool logic The number of pooled servers should not be computed as `total-servers - servers-down`. We need to consider hosts which are serving traffic, that is: servers administratively enabled and up according to monitoring. Change can-depool logic accordingly. Bug: T184715 Change-Id: Ic83382938810331dd4292f45cbb85f6aaa7b7707 (cherry picked from commit 1264a784bbcbedee37466102246775e2ee26367a) --- M pybal/coordinator.py A pybal/test/test_coordinator.py 2 files changed, 113 insertions(+), 4 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/operations/debs/pybal refs/changes/80/404680/1 diff --git a/pybal/coordinator.py b/pybal/coordinator.py index a526b5b..a4b4e16 100755 --- a/pybal/coordinator.py +++ b/pybal/coordinator.py @@ -442,11 +442,21 @@ def canDepool(self): """Returns a boolean denoting whether another server can be depooled""" - # Construct a list of servers that have status 'down' - downServers = [server for server in self.servers.itervalues() if not server.up] + # Total number of servers + totalServerCount = len(self.servers) - # The total amount of pooled servers may never drop below a configured threshold - return len(self.servers) - len(downServers) >= len(self.servers) * self.lvsservice.getDepoolThreshold() + # Number of hosts considered to be up by PyBal's monitoring and + # administratively enabled. Under normal circumstances, they would be + # the hosts serving traffic. + # However, a host can go down after PyBal has reached the depool + # threshold for the service the host belongs to. In that case, the + # misbehaving server is kept pooled. This count does not include such + # hosts. + upServerCount = len([server for server in self.servers.itervalues() if server.up and server.enabled]) + + # The total amount of hosts serving traffic may never drop below a + # configured threshold + return upServerCount >= totalServerCount * self.lvsservice.getDepoolThreshold() def onConfigUpdate(self, config): """Parses the server list and changes the state accordingly.""" diff --git a/pybal/test/test_coordinator.py b/pybal/test/test_coordinator.py new file mode 100644 index 0000000..35cede6 --- /dev/null +++ b/pybal/test/test_coordinator.py @@ -0,0 +1,99 @@ +# -*- coding: utf-8 -*- +""" + PyBal unit tests + ~~~~~~~~~~~~~~~~ + + This module contains tests for `pybal.coordinator`. + +""" +import mock + +import pybal.coordinator +import pybal.util + +from twisted.internet.reactor import getDelayedCalls + +from .fixtures import PyBalTestCase + + +class CoordinatorTestCase(PyBalTestCase): + """Test case for `pybal.coordinator.Coordinator`.""" + + def setUp(self): + super(CoordinatorTestCase, self).setUp() + + configUrl = "file:///dev/null" + + self.coordinator = pybal.coordinator.Coordinator( + mock.MagicMock(), configUrl) + + self.coordinator.lvsservice.getDepoolThreshold = mock.MagicMock( + return_value=0.5) + + pybal.coordinator.Server.initialize = mock.MagicMock() + + def tearDown(self): + self.coordinator.configObserver.reloadTask.stop() + + for call in getDelayedCalls(): + if call.func.func_name == 'maybeParseConfig': + call.cancel() + + def setServers(self, servers): + self.coordinator.onConfigUpdate(config=servers) + + for server in self.coordinator.servers.itervalues(): + server.up = True + server.enabled = True + + def test2serversCanDepool(self): + servers = { + 'cp1045.eqiad.wmnet': {}, + 'cp1046.eqiad.wmnet': {}, + } + self.setServers(servers) + + # 2/2 hosts serving traffic. We can depool. + self.assertTrue(self.coordinator.canDepool()) + + # 1 host goes down + self.coordinator.servers['cp1045.eqiad.wmnet'].up = False + + # By depooling, we would end up with 1/2 hosts serving traffic. We can + # depool. + self.assertTrue(self.coordinator.canDepool()) + + # The other host goes down too + self.coordinator.servers['cp1046.eqiad.wmnet'].up = False + + # By depooling, we would end up with 0/2 hosts serving traffic. We + # cannot depool. + self.assertFalse(self.coordinator.canDepool()) + + def test4serversCanDepool(self): + servers = { + 'cp1045.eqiad.wmnet': {}, + 'cp1046.eqiad.wmnet': {}, + 'cp1047.eqiad.wmnet': {}, + 'cp1048.eqiad.wmnet': {}, + } + self.setServers(servers) + + self.coordinator.servers['cp1045.eqiad.wmnet'].enabled = False + + # 3/4 hosts serving traffic. We can depool. + self.assertTrue(self.coordinator.canDepool()) + + # 1 host goes down + self.coordinator.servers['cp1046.eqiad.wmnet'].up = False + + # By depooling, we would end up with 2/4 hosts serving traffic. We can + # depool. + self.assertTrue(self.coordinator.canDepool()) + + # Another host goes down + self.coordinator.servers['cp1047.eqiad.wmnet'].up = False + + # By depooling, we would end up with 1/4 hosts serving traffic. We + # cannot depool. + self.assertFalse(self.coordinator.canDepool()) -- To view, visit https://gerrit.wikimedia.org/r/404680 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Ic83382938810331dd4292f45cbb85f6aaa7b7707 Gerrit-PatchSet: 1 Gerrit-Project: operations/debs/pybal Gerrit-Branch: 1.14 Gerrit-Owner: Ema <e...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits