Martin Sivák has uploaded a new change for review. Change subject: Fix the way we compute cpu_load average and add tests for it ......................................................................
Fix the way we compute cpu_load average and add tests for it Change-Id: Id9e05c753f41dcd94d5a4734ac39f90581ddc0c0 Signed-off-by: Martin Sivak <[email protected]> --- M ovirt_hosted_engine_ha/agent/Makefile.am M ovirt_hosted_engine_ha/agent/constants.py.in M ovirt_hosted_engine_ha/agent/state_data.py A ovirt_hosted_engine_ha/agent/state_data_test.py M ovirt_hosted_engine_ha/agent/state_machine.py M ovirt_hosted_engine_ha/agent/states.py 6 files changed, 137 insertions(+), 10 deletions(-) git pull ssh://gerrit.ovirt.org:29418/ovirt-hosted-engine-ha refs/changes/19/24819/1 diff --git a/ovirt_hosted_engine_ha/agent/Makefile.am b/ovirt_hosted_engine_ha/agent/Makefile.am index 36c140e..2aafa9f 100644 --- a/ovirt_hosted_engine_ha/agent/Makefile.am +++ b/ovirt_hosted_engine_ha/agent/Makefile.am @@ -41,6 +41,7 @@ agent.py \ hosted_engine.py \ state_data.py \ + state_data_test.py \ state_decorators.py \ state_machine.py \ states.py \ diff --git a/ovirt_hosted_engine_ha/agent/constants.py.in b/ovirt_hosted_engine_ha/agent/constants.py.in index 5769597..6e8166c 100644 --- a/ovirt_hosted_engine_ha/agent/constants.py.in +++ b/ovirt_hosted_engine_ha/agent/constants.py.in @@ -84,3 +84,6 @@ # Maximum request error count MAX_ERROR_COUNT = 3 + +# The length of history +STATS_HISTORY_SECS = 15 * 60 diff --git a/ovirt_hosted_engine_ha/agent/state_data.py b/ovirt_hosted_engine_ha/agent/state_data.py index c876048..1b7c915 100644 --- a/ovirt_hosted_engine_ha/agent/state_data.py +++ b/ovirt_hosted_engine_ha/agent/state_data.py @@ -75,3 +75,34 @@ :type he_data: HostedEngineData """ return he_data.stats.collect_finish + + +def load_factor(he_data): + """ + Computes the average CPU load over all known history. Uses weighted average + to account for different time spans between data points. The load intervals + are integrated using the trapezoidal method: + + area = (b - a)(f(a) + f(b))/2 + where a,b are timestamps and f(a) and f(b) are values + """ + + def trapezoid(acc, point): + area, time, last_load, last_time = acc + cur_load = point.local["cpu-load"] + cur_time = point.collect_start + if cur_load is not None and last_load is not None: + seg_time = last_time - cur_time + seg_area = (seg_time * + (last_load + cur_load) / 2.0) + return area+seg_area, time+seg_time, cur_load, cur_time + else: + return area, time, cur_load, cur_time + + load_area, load_time = reduce(trapezoid, + he_data.history, + (0, 0, None, None))[:2] + if load_time == 0: + return 0.0 + else: + return load_area / load_time diff --git a/ovirt_hosted_engine_ha/agent/state_data_test.py b/ovirt_hosted_engine_ha/agent/state_data_test.py new file mode 100644 index 0000000..f722c84 --- /dev/null +++ b/ovirt_hosted_engine_ha/agent/state_data_test.py @@ -0,0 +1,90 @@ +__author__ = 'msivak' +import state_data +import unittest + + +class TestCases(unittest.TestCase): + @staticmethod + def cpu_load(time, load): + """ + Prepares stats structure with timestamp and cpu-load info + """ + return state_data.StatsData( + collect_start=time, + collect_finish=time, + metadata_too_new=False, + cluster={}, + host_id=1, + hosts={}, + local={ + "cpu-load": load + }, + maintenance=False + ) + + @staticmethod + def he_historic_data(history): + """ + Prepares dummy HostedEngineData with provided history + """ + sorted_history = history[:] + sorted_history.sort(reverse=True, key=lambda s: s.collect_start) + s = {"history": sorted_history, + "stats": sorted_history[0], + "host_id": sorted_history[0].host_id} + for k in state_data.HostedEngineData._fields: + s.setdefault(k, None) + return state_data.HostedEngineData(**s) + + def test_cpu_load_zero(self): + history = [] + history.append(self.cpu_load(0, 0)) + he_data = self.he_historic_data(history) + self.assertEqual(state_data.load_factor(he_data), 0.0) + + def test_cpu_load_const(self): + history = [] + history.append(self.cpu_load(0, 5)) + history.append(self.cpu_load(1, 5)) + he_data = self.he_historic_data(history) + self.assertEqual(state_data.load_factor(he_data), 5.0) + + def test_cpu_load_grow(self): + history = [] + history.append(self.cpu_load(0, 0)) + history.append(self.cpu_load(1, 10)) + he_data = self.he_historic_data(history) + self.assertEqual(state_data.load_factor(he_data), 5.0) + + def test_cpu_load_grow_fall(self): + history = [] + history.append(self.cpu_load(0, 0)) + history.append(self.cpu_load(1, 10)) + history.append(self.cpu_load(2, 0)) + he_data = self.he_historic_data(history) + self.assertEqual(state_data.load_factor(he_data), 5.0) + + def test_cpu_load_grow_fall_nonlinear(self): + history = [] + history.append(self.cpu_load(0, 0)) + history.append(self.cpu_load(1, 30)) + history.append(self.cpu_load(3, 0)) + he_data = self.he_historic_data(history) + self.assertEqual(state_data.load_factor(he_data), 15.0) + + def test_const_nonlinear(self): + history = [] + history.append(self.cpu_load(0, 5)) + history.append(self.cpu_load(1, 5)) + history.append(self.cpu_load(3, 5)) + he_data = self.he_historic_data(history) + self.assertEqual(state_data.load_factor(he_data), 5.0) + + def test_const_nonlinear_with_hole(self): + history = [] + history.append(self.cpu_load(0, 5)) + history.append(self.cpu_load(1, 5)) + history.append(self.cpu_load(2, None)) + history.append(self.cpu_load(5, 5)) + he_data = self.he_historic_data(history) + self.assertEqual(state_data.load_factor(he_data), 5.0) diff --git a/ovirt_hosted_engine_ha/agent/state_machine.py b/ovirt_hosted_engine_ha/agent/state_machine.py index 9d06833..debb7fb 100644 --- a/ovirt_hosted_engine_ha/agent/state_machine.py +++ b/ovirt_hosted_engine_ha/agent/state_machine.py @@ -161,8 +161,9 @@ # beginning, we are not interested in more than # 15 second history snapshots # namedtuple._replace makes a copy with updated values + hlimit = stats.collect_start - constants.STATS_HISTORY_SECS new_data = old_data._replace( - history=(stats,) + old_data.history[:14], + history=(stats,) + self.trim_history(old_data.history, hlimit), stats=stats, host_id=self.hosted_engine.host_id, @@ -170,3 +171,8 @@ **new_data) return new_data + + @staticmethod + def trim_history(history, time_limit): + return tuple(stat for stat in history + if stat.collect_start > time_limit) diff --git a/ovirt_hosted_engine_ha/agent/states.py b/ovirt_hosted_engine_ha/agent/states.py index 4a6e187..08723a6 100644 --- a/ovirt_hosted_engine_ha/agent/states.py +++ b/ovirt_hosted_engine_ha/agent/states.py @@ -5,7 +5,7 @@ from . import constants from .state_decorators import check_local_maintenance, check_timeout from .state_decorators import check_local_vm_unknown, check_global_maintenance -from .state_data import time as dtime +from .state_data import time as dtime, load_factor import time @@ -124,29 +124,25 @@ self.LF_PENALTY_INT)) score -= score_cfg['mgmt-bridge-score-penalty'] - # Record 15 minute cpu load history (not counting load caused by + # Compute cpu load average (not counting load caused by # the engine vm. The default load penalty is: # load 0-40% : 0 # load 40-90%: 0 to 1000 (rising linearly with increasing load) # load 90%+ : 1000 # Thus, a load of 80% causes an 800 point penalty - load_factor = reduce(lambda acc, point: - acc + point.local["cpu-load"] - if point.local["cpu-load"] is not None else acc, - self.data.history, - 0) / len(self.data.history) + load_average = load_factor(self.data) if score_cfg['cpu-load-penalty-max'] \ == score_cfg['cpu-load-penalty-min']: # Avoid divide by 0 in penalty calculation below - if load_factor < score_cfg['cpu-load-penalty-min']: + if load_average < score_cfg['cpu-load-penalty-min']: penalty = 0 else: penalty = score_cfg['cpu-load-score-penalty'] else: # Penalty is normalized to [0, max penalty] and is linear based on # (magnitude of value within penalty range) / (size of range) - penalty = int((load_factor + penalty = int((load_average - score_cfg['cpu-load-penalty-min']) / (score_cfg['cpu-load-penalty-max'] - score_cfg['cpu-load-penalty-min']) -- To view, visit http://gerrit.ovirt.org/24819 To unsubscribe, visit http://gerrit.ovirt.org/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Id9e05c753f41dcd94d5a4734ac39f90581ddc0c0 Gerrit-PatchSet: 1 Gerrit-Project: ovirt-hosted-engine-ha Gerrit-Branch: ovirt-hosted-engine-ha-1.1 Gerrit-Owner: Martin Sivák <[email protected]> _______________________________________________ Engine-patches mailing list [email protected] http://lists.ovirt.org/mailman/listinfo/engine-patches
