Greg Padgett has uploaded a new change for review.

Change subject: WIP agent, lib: agent compatibility with metadata updates
......................................................................

WIP agent, lib: agent compatibility with metadata updates

When the metadata version changes, both old and new agents should
recognize this and behave appropriately.  The desired behavior is
described in this comment from constants.py.in:

The range of valid metadata versions for this host is from
METADATA_PARSE_VERSION <= version <= METADATA_FEATURE_VERSION.  If an
incompatible change is made to the metadata, both the parse and feature
versions should be raised beyond the current feature version.  This will
ensure that a) the old agent will recognize that it has been superseded
and shut down the vm, and b) the newer agents will ignore the older
agents' reported scores, thus assuming control of the vm.

Change-Id: I2357ce26aeb20439dec161c7b8ee46d94a4b3e3d
Bug-Url: https://bugzilla.redhat.com/??????
Signed-off-by: Greg Padgett <[email protected]>
---
M ovirt_hosted_engine_ha/agent/constants.py.in
M ovirt_hosted_engine_ha/agent/hosted_engine.py
M ovirt_hosted_engine_ha/lib/exceptions.py
M ovirt_hosted_engine_ha/lib/metadata.py
4 files changed, 55 insertions(+), 5 deletions(-)


  git pull ssh://gerrit.ovirt.org:29418/ovirt-hosted-engine-ha 
refs/changes/12/21912/1

diff --git a/ovirt_hosted_engine_ha/agent/constants.py.in 
b/ovirt_hosted_engine_ha/agent/constants.py.in
index 0adf30e..dca5eb0 100644
--- a/ovirt_hosted_engine_ha/agent/constants.py.in
+++ b/ovirt_hosted_engine_ha/agent/constants.py.in
@@ -28,6 +28,14 @@
 # Metadata can be parsed by agents as old as METADATA_PARSE_VERSION, but may
 # contain new/additional information as new as METADATA_FEATURE_VERSION.  This
 # allows detection of enhancements without breaking backwards compatibility.
+#
+# Note: The range of valid metadata versions for this host is from
+# METADATA_PARSE_VERSION <= version <= METADATA_FEATURE_VERSION.  If an
+# incompatible change is made to the metadata, both the parse and feature
+# versions should be raised beyond the current feature version.  This will
+# ensure that a) the old agent will recognize that it has been superseded
+# and shut down the vm, and b) the newer agents will ignore the older agents'
+# reported scores, thus assuming control of the vm.
 METADATA_FEATURE_VERSION = 1
 METADATA_PARSE_VERSION = 1
 
diff --git a/ovirt_hosted_engine_ha/agent/hosted_engine.py 
b/ovirt_hosted_engine_ha/agent/hosted_engine.py
index fa3d20d..22765e4 100644
--- a/ovirt_hosted_engine_ha/agent/hosted_engine.py
+++ b/ovirt_hosted_engine_ha/agent/hosted_engine.py
@@ -739,6 +739,10 @@
             constants.SERVICE_TYPE)
         local_ts = int(time.time())
 
+        # Flag is set if the local agent discovers metadata too new for it
+        # to parse, in which case the agent will shut down the engine VM.
+        self._rinfo['superseded-agent'] = False
+
         # host_id 0 is a special case, representing global metadata
         data = all_stats.pop(0, None)
         md = {}
@@ -758,6 +762,12 @@
         for host_id, data in all_stats.iteritems():
             try:
                 md = metadata.parse_metadata_to_dict(host_id, data)
+            except ex.FatalMetadataError as e:
+                self._log.error(
+                    str(e),
+                    extra=self._get_lf_args(self.LF_MD_ERROR))
+                self._rinfo['superseded-agent'] = True
+                continue
             except ex.MetadataError as e:
                 self._log.error(
                     str(e),
@@ -978,6 +988,11 @@
             # TODO local maintenance should have its own state
             self._log.info("Local HA maintenance enabled")
             return self.States.OFF, True
+        elif self._rinfo['superseded-agent']:
+            # TODO superseded agent should have its own state
+            self._log.error("Local agent has been superseded by newer"
+                            " agents running in this cluster")
+            return self.States.OFF, True
 
         if self._rinfo['best-score-host-id'] != local_host_id:
             self._log.info("Engine down, local host does not have best score",
@@ -1102,8 +1117,13 @@
             self._log.info("Global HA maintenance enabled")
             return self.States.MAINTENANCE, True
         elif self._rinfo['maintenance'] == self.MaintenanceMode.LOCAL:
+            # TODO local maintenance should have its own state
             self._log.info("Local HA maintenance enabled")
-            # TODO local maintenance should have its own state as well
+            return self.States.STOP, False
+        elif self._rinfo['superseded-agent']:
+            # TODO superseded agent should have its own state
+            self._log.error("Local agent has been superseded by newer"
+                            " agents running in this cluster")
             return self.States.STOP, False
 
         best_host_id = self._rinfo['best-score-host-id']
diff --git a/ovirt_hosted_engine_ha/lib/exceptions.py 
b/ovirt_hosted_engine_ha/lib/exceptions.py
index c5563c0..e3872d4 100644
--- a/ovirt_hosted_engine_ha/lib/exceptions.py
+++ b/ovirt_hosted_engine_ha/lib/exceptions.py
@@ -41,3 +41,6 @@
 
 class MetadataError(Exception):
     pass
+
+class FatalMetadataError(Exception):
+    pass
diff --git a/ovirt_hosted_engine_ha/lib/metadata.py 
b/ovirt_hosted_engine_ha/lib/metadata.py
index 54224c3..fb78fec 100644
--- a/ovirt_hosted_engine_ha/lib/metadata.py
+++ b/ovirt_hosted_engine_ha/lib/metadata.py
@@ -136,10 +136,29 @@
                             .format(host_id, tokens[0]))
 
     if md_parse_vers > constants.METADATA_FEATURE_VERSION:
-        raise MetadataError("Metadata version {0} for host {1}"
-                            " too new for this agent ({2})"
-                            .format(md_parse_vers, host_id,
-                                    constants.METADATA_FEATURE_VERSION))
+        # Another agent in the cluster is writing newer metadata.  Raise a
+        # fatal error so the caller knows to stop processing.
+        raise FatalMetadataError("Metadata version {0} from host {1}"
+                                 " too new for this agent"
+                                 " (highest compatible version: {2})"
+                                 .format(md_parse_vers, host_id,
+                                         constants.METADATA_FEATURE_VERSION))
+
+    try:
+        md_feature_vers = int(tokens[1])
+    except ValueError:
+        raise MetadataError("Malformed metadata for host {0}:"
+                            " non-parsable metadata version {1}"
+                            .format(host_id, tokens[1]))
+
+    if md_feature_vers < constants.METADATA_PARSE_VERSION:
+        # Our metadata is incompatible; we'll ignore the old agent's metadata
+        # and it will ignore ours.
+        raise MetadataError("Metadata version {0} from host {1}"
+                            " too old for this agent"
+                            " (lowest compatible version: {2})"
+                            .format(md_feature_vers, host_id,
+                                    constants.METADATA_PARSE_VERSION))
 
     ret = {
         'host-id': host_id,


-- 
To view, visit http://gerrit.ovirt.org/21912
To unsubscribe, visit http://gerrit.ovirt.org/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I2357ce26aeb20439dec161c7b8ee46d94a4b3e3d
Gerrit-PatchSet: 1
Gerrit-Project: ovirt-hosted-engine-ha
Gerrit-Branch: master
Gerrit-Owner: Greg Padgett <[email protected]>
_______________________________________________
Engine-patches mailing list
[email protected]
http://lists.ovirt.org/mailman/listinfo/engine-patches

Reply via email to