All the errors encountered during an opcode execution are reported via exceptions. Intercept all the exceptions and execute POST global hooks with ERROR status.
Signed-off-by: Oleg Ponomarev <[email protected]> --- lib/hooksmaster.py | 18 ++++++++++-- lib/mcpu.py | 83 +++++++++++++++++++++++++++++++++++------------------- 2 files changed, 70 insertions(+), 31 deletions(-) diff --git a/lib/hooksmaster.py b/lib/hooksmaster.py index 1326dc0..ca76ee8 100644 --- a/lib/hooksmaster.py +++ b/lib/hooksmaster.py @@ -39,7 +39,7 @@ from ganeti import compat from ganeti import pathutils -def _RpcResultsToHooksResults(rpc_results): +def RpcResultsToHooksResults(rpc_results): """Function to convert RPC results to the format expected by HooksMaster. @type rpc_results: dict(node: L{rpc.RpcResult}) @@ -324,6 +324,20 @@ class HooksMaster(object): cluster_name = lu.cfg.GetClusterName() return HooksMaster(lu.op.OP_ID, lu.HPATH, nodes, hooks_execution_fn, - _RpcResultsToHooksResults, lu.BuildHooksEnv, + RpcResultsToHooksResults, lu.BuildHooksEnv, lu.PreparePostHookNodes, lu.LogWarning, lu.HTYPE, cluster_name, master_name, master_uuid, job_id) + + +def ExecGlobalPostHooks(opcode, master_name, rpc_runner, log_fn, + cluster_name, master_uuid, job_id, status): + """ Build hooks manager and executes global post hooks just on the master + """ + hm = HooksMaster(opcode, hooks_path=None, nodes=([], [master_name]), + hooks_execution_fn=rpc_runner, + hooks_results_adapt_fn=RpcResultsToHooksResults, + build_env_fn=None, prepare_post_nodes_fn=None, + log_fn=log_fn, htype=None, cluster_name=cluster_name, + master_name=master_name, master_uuid=master_uuid, + job_id=job_id) + hm.RunPhase(constants.HOOKS_PHASE_POST, is_global=True, post_status=status) diff --git a/lib/mcpu.py b/lib/mcpu.py index 8bb7969..63cc8ee 100644 --- a/lib/mcpu.py +++ b/lib/mcpu.py @@ -662,40 +662,24 @@ class Processor(object): raise errors.OpResultError("Opcode result does not match %s: %s" % (resultcheck_fn, utils.Truncate(result, 80))) - def ExecOpCode(self, op, cbs, timeout=None): + def _PrepareLockListsAndExecLU(self, op, lu_class, calc_timeout): """Execute an opcode. - @type op: an OpCode instance @param op: the opcode to be executed - @type cbs: L{OpExecCbBase} - @param cbs: Runtime callbacks - @type timeout: float or None - @param timeout: Maximum time to acquire all locks, None for no timeout + @param lu_class: the LU class implementing the current opcode + @param calc_timeout: The function calculating the time remaining + to acquire all locks, None for no timeout @raise LockAcquireTimeout: In case locks couldn't be acquired in specified amount of time """ - if not isinstance(op, opcodes.OpCode): - raise errors.ProgrammerError("Non-opcode instance passed" - " to ExecOpcode (%s)" % type(op)) - - lu_class = self.DISPATCH_TABLE.get(op.__class__, None) - if lu_class is None: - raise errors.OpCodeUnknown("Unknown opcode") - - if timeout is None: - calc_timeout = lambda: None - else: - calc_timeout = utils.RunningTimeout(timeout, False).Remaining - - self._cbs = cbs try: if self._enable_locks: # Acquire the Big Ganeti Lock exclusively if this LU requires it, # and in a shared fashion otherwise (to prevent concurrent run with # an exclusive LU. self._AcquireLocks(locking.LEVEL_CLUSTER, locking.BGL, - not lu_class.REQ_BGL, False, calc_timeout()) + not lu_class.REQ_BGL, False, calc_timeout()) elif lu_class.REQ_BGL: raise errors.ProgrammerError("Opcode '%s' requires BGL, but locks are" " disabled" % op.OP_ID) @@ -718,15 +702,56 @@ class Processor(object): self._wconfdcontext, locking.LEVEL_NAMES[locking.LEVEL_CLUSTER]) self._cbs = None - self._CheckLUResult(op, result) + return result + + def ExecOpCode(self, op, cbs, timeout=None): + """Execute an opcode. + + @type op: an OpCode instance + @param op: the opcode to be executed + @type cbs: L{OpExecCbBase} + @param cbs: Runtime callbacks + @type timeout: float or None + @param timeout: Maximum time to acquire all locks, None for no timeout + @raise LockAcquireTimeout: In case locks couldn't be acquired in specified + amount of time + + """ + if not isinstance(op, opcodes.OpCode): + raise errors.ProgrammerError("Non-opcode instance passed" + " to ExecOpcode (%s)" % type(op)) - # The post hooks below are always executed with a SUCCESS status because - # all the possible errors during pre hooks and LU execution cause - # exception and therefore the statement below will be skipped. - if self._hm is not None: - self._hm.RunPhase(constants.HOOKS_PHASE_POST, node_names=None, - is_global=True, - post_status=constants.POST_HOOKS_STATUS_SUCCESS) + lu_class = self.DISPATCH_TABLE.get(op.__class__, None) + if lu_class is None: + raise errors.OpCodeUnknown("Unknown opcode") + + if timeout is None: + calc_timeout = lambda: None + else: + calc_timeout = utils.RunningTimeout(timeout, False).Remaining + + self._cbs = cbs + try: + result = self._PrepareLockListsAndExecLU(op, lu_class, calc_timeout) + + # The post hooks below are always executed with a SUCCESS status because + # all the possible errors during pre hooks and LU execution cause + # exception and therefore the statement below will be skipped. + if self._hm is not None: + self._hm.RunPhase(constants.HOOKS_PHASE_POST, node_names=None, + is_global=True, + post_status=constants.POST_HOOKS_STATUS_SUCCESS) + except: + # execute global post hooks with the failed status on any exception + hooksmaster.ExecGlobalPostHooks(op.OP_ID, self.cfg.GetMasterNodeName(), + self.rpc.call_hooks_runner, + logging.warning, + self.cfg.GetClusterName(), + self.cfg.GetMasterNode(), self.GetECId(), + constants.POST_HOOKS_STATUS_ERROR) + raise + + self._CheckLUResult(op, result) return result def Log(self, *args): -- 2.6.0.rc2.230.g3dd15c0
