Martin Sivák has uploaded a new change for review.

Change subject: Fix the way we compute cpu_load average and add tests for it
......................................................................

Fix the way we compute cpu_load average and add tests for it

Change-Id: Id9e05c753f41dcd94d5a4734ac39f90581ddc0c0
Signed-off-by: Martin Sivak <[email protected]>
---
M ovirt_hosted_engine_ha/agent/Makefile.am
M ovirt_hosted_engine_ha/agent/constants.py.in
M ovirt_hosted_engine_ha/agent/state_data.py
A ovirt_hosted_engine_ha/agent/state_data_test.py
M ovirt_hosted_engine_ha/agent/state_machine.py
M ovirt_hosted_engine_ha/agent/states.py
6 files changed, 137 insertions(+), 10 deletions(-)


  git pull ssh://gerrit.ovirt.org:29418/ovirt-hosted-engine-ha 
refs/changes/19/24819/1

diff --git a/ovirt_hosted_engine_ha/agent/Makefile.am 
b/ovirt_hosted_engine_ha/agent/Makefile.am
index 36c140e..2aafa9f 100644
--- a/ovirt_hosted_engine_ha/agent/Makefile.am
+++ b/ovirt_hosted_engine_ha/agent/Makefile.am
@@ -41,6 +41,7 @@
        agent.py \
        hosted_engine.py \
        state_data.py \
+       state_data_test.py \
        state_decorators.py \
        state_machine.py \
        states.py \
diff --git a/ovirt_hosted_engine_ha/agent/constants.py.in 
b/ovirt_hosted_engine_ha/agent/constants.py.in
index 5769597..6e8166c 100644
--- a/ovirt_hosted_engine_ha/agent/constants.py.in
+++ b/ovirt_hosted_engine_ha/agent/constants.py.in
@@ -84,3 +84,6 @@
 
 # Maximum request error count
 MAX_ERROR_COUNT = 3
+
+# The length of history
+STATS_HISTORY_SECS = 15 * 60
diff --git a/ovirt_hosted_engine_ha/agent/state_data.py 
b/ovirt_hosted_engine_ha/agent/state_data.py
index c876048..1b7c915 100644
--- a/ovirt_hosted_engine_ha/agent/state_data.py
+++ b/ovirt_hosted_engine_ha/agent/state_data.py
@@ -75,3 +75,34 @@
     :type he_data: HostedEngineData
     """
     return he_data.stats.collect_finish
+
+
+def load_factor(he_data):
+    """
+    Computes the average CPU load over all known history. Uses weighted average
+    to account for different time spans between data points. The load intervals
+    are integrated using the trapezoidal method:
+
+    area = (b - a)(f(a) + f(b))/2
+    where a,b are timestamps and f(a) and f(b) are values
+    """
+
+    def trapezoid(acc, point):
+        area, time, last_load, last_time = acc
+        cur_load = point.local["cpu-load"]
+        cur_time = point.collect_start
+        if cur_load is not None and last_load is not None:
+            seg_time = last_time - cur_time
+            seg_area = (seg_time *
+                        (last_load + cur_load) / 2.0)
+            return area+seg_area, time+seg_time, cur_load, cur_time
+        else:
+            return area, time, cur_load, cur_time
+
+    load_area, load_time = reduce(trapezoid,
+                                  he_data.history,
+                                  (0, 0, None, None))[:2]
+    if load_time == 0:
+        return 0.0
+    else:
+        return load_area / load_time
diff --git a/ovirt_hosted_engine_ha/agent/state_data_test.py 
b/ovirt_hosted_engine_ha/agent/state_data_test.py
new file mode 100644
index 0000000..f722c84
--- /dev/null
+++ b/ovirt_hosted_engine_ha/agent/state_data_test.py
@@ -0,0 +1,90 @@
+__author__ = 'msivak'
+import state_data
+import unittest
+
+
+class TestCases(unittest.TestCase):
+    @staticmethod
+    def cpu_load(time, load):
+        """
+        Prepares stats structure with timestamp and cpu-load info
+        """
+        return state_data.StatsData(
+            collect_start=time,
+            collect_finish=time,
+            metadata_too_new=False,
+            cluster={},
+            host_id=1,
+            hosts={},
+            local={
+                "cpu-load": load
+            },
+            maintenance=False
+        )
+
+    @staticmethod
+    def he_historic_data(history):
+        """
+        Prepares dummy HostedEngineData with provided history
+        """
+        sorted_history = history[:]
+        sorted_history.sort(reverse=True, key=lambda s: s.collect_start)
+        s = {"history": sorted_history,
+             "stats": sorted_history[0],
+             "host_id": sorted_history[0].host_id}
+        for k in state_data.HostedEngineData._fields:
+            s.setdefault(k, None)
+        return state_data.HostedEngineData(**s)
+
+    def test_cpu_load_zero(self):
+        history = []
+        history.append(self.cpu_load(0, 0))
+        he_data = self.he_historic_data(history)
+        self.assertEqual(state_data.load_factor(he_data), 0.0)
+
+    def test_cpu_load_const(self):
+        history = []
+        history.append(self.cpu_load(0, 5))
+        history.append(self.cpu_load(1, 5))
+        he_data = self.he_historic_data(history)
+        self.assertEqual(state_data.load_factor(he_data), 5.0)
+
+    def test_cpu_load_grow(self):
+        history = []
+        history.append(self.cpu_load(0, 0))
+        history.append(self.cpu_load(1, 10))
+        he_data = self.he_historic_data(history)
+        self.assertEqual(state_data.load_factor(he_data), 5.0)
+
+    def test_cpu_load_grow_fall(self):
+        history = []
+        history.append(self.cpu_load(0, 0))
+        history.append(self.cpu_load(1, 10))
+        history.append(self.cpu_load(2, 0))
+        he_data = self.he_historic_data(history)
+        self.assertEqual(state_data.load_factor(he_data), 5.0)
+
+    def test_cpu_load_grow_fall_nonlinear(self):
+        history = []
+        history.append(self.cpu_load(0, 0))
+        history.append(self.cpu_load(1, 30))
+        history.append(self.cpu_load(3, 0))
+        he_data = self.he_historic_data(history)
+        self.assertEqual(state_data.load_factor(he_data), 15.0)
+
+    def test_const_nonlinear(self):
+        history = []
+        history.append(self.cpu_load(0, 5))
+        history.append(self.cpu_load(1, 5))
+        history.append(self.cpu_load(3, 5))
+        he_data = self.he_historic_data(history)
+        self.assertEqual(state_data.load_factor(he_data), 5.0)
+
+    def test_const_nonlinear_with_hole(self):
+        history = []
+        history.append(self.cpu_load(0, 5))
+        history.append(self.cpu_load(1, 5))
+        history.append(self.cpu_load(2, None))
+        history.append(self.cpu_load(5, 5))
+        he_data = self.he_historic_data(history)
+        self.assertEqual(state_data.load_factor(he_data), 5.0)
diff --git a/ovirt_hosted_engine_ha/agent/state_machine.py 
b/ovirt_hosted_engine_ha/agent/state_machine.py
index 9d06833..debb7fb 100644
--- a/ovirt_hosted_engine_ha/agent/state_machine.py
+++ b/ovirt_hosted_engine_ha/agent/state_machine.py
@@ -161,8 +161,9 @@
         # beginning, we are not interested in more than
         # 15 second history snapshots
         # namedtuple._replace makes a copy with updated values
+        hlimit = stats.collect_start - constants.STATS_HISTORY_SECS
         new_data = old_data._replace(
-            history=(stats,) + old_data.history[:14],
+            history=(stats,) + self.trim_history(old_data.history, hlimit),
             stats=stats,
             host_id=self.hosted_engine.host_id,
 
@@ -170,3 +171,8 @@
             **new_data)
 
         return new_data
+
+    @staticmethod
+    def trim_history(history, time_limit):
+        return tuple(stat for stat in history
+                     if stat.collect_start > time_limit)
diff --git a/ovirt_hosted_engine_ha/agent/states.py 
b/ovirt_hosted_engine_ha/agent/states.py
index 4a6e187..08723a6 100644
--- a/ovirt_hosted_engine_ha/agent/states.py
+++ b/ovirt_hosted_engine_ha/agent/states.py
@@ -5,7 +5,7 @@
 from . import constants
 from .state_decorators import check_local_maintenance, check_timeout
 from .state_decorators import check_local_vm_unknown, check_global_maintenance
-from .state_data import time as dtime
+from .state_data import time as dtime, load_factor
 import time
 
 
@@ -124,29 +124,25 @@
                                                  self.LF_PENALTY_INT))
             score -= score_cfg['mgmt-bridge-score-penalty']
 
-        # Record 15 minute cpu load history (not counting load caused by
+        # Compute cpu load average (not counting load caused by
         # the engine vm.  The default load penalty is:
         #   load 0-40% : 0
         #   load 40-90%: 0 to 1000 (rising linearly with increasing load)
         #   load 90%+  : 1000
         # Thus, a load of 80% causes an 800 point penalty
-        load_factor = reduce(lambda acc, point:
-                             acc + point.local["cpu-load"]
-                             if point.local["cpu-load"] is not None else acc,
-                             self.data.history,
-                             0) / len(self.data.history)
+        load_average = load_factor(self.data)
 
         if score_cfg['cpu-load-penalty-max'] \
                 == score_cfg['cpu-load-penalty-min']:
             # Avoid divide by 0 in penalty calculation below
-            if load_factor < score_cfg['cpu-load-penalty-min']:
+            if load_average < score_cfg['cpu-load-penalty-min']:
                 penalty = 0
             else:
                 penalty = score_cfg['cpu-load-score-penalty']
         else:
             # Penalty is normalized to [0, max penalty] and is linear based on
             # (magnitude of value within penalty range) / (size of range)
-            penalty = int((load_factor
+            penalty = int((load_average
                            - score_cfg['cpu-load-penalty-min'])
                           / (score_cfg['cpu-load-penalty-max']
                              - score_cfg['cpu-load-penalty-min'])


-- 
To view, visit http://gerrit.ovirt.org/24819
To unsubscribe, visit http://gerrit.ovirt.org/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Id9e05c753f41dcd94d5a4734ac39f90581ddc0c0
Gerrit-PatchSet: 1
Gerrit-Project: ovirt-hosted-engine-ha
Gerrit-Branch: ovirt-hosted-engine-ha-1.1
Gerrit-Owner: Martin Sivák <[email protected]>
_______________________________________________
Engine-patches mailing list
[email protected]
http://lists.ovirt.org/mailman/listinfo/engine-patches

Reply via email to