On Fri, 28 Aug 2015 at 13:44 'Klaus Aehlig' via ganeti-devel < [email protected]> wrote:
> As per our design, once a job belonging to the handling of > an incident failed, the incident handling is aborted and > tagged and marked as failed. Implement this. > > Signed-off-by: Klaus Aehlig <[email protected]> > --- > Makefile.am | 1 + > src/Ganeti/MaintD/FailIncident.hs | 91 > +++++++++++++++++++++++++++++++++++++++ > 2 files changed, 92 insertions(+) > create mode 100644 src/Ganeti/MaintD/FailIncident.hs > > diff --git a/Makefile.am b/Makefile.am > index 06361b5..acb80c3 100644 > --- a/Makefile.am > +++ b/Makefile.am > @@ -982,6 +982,7 @@ HS_LIB_SRCS = \ > src/Ganeti/MaintD/Balance.hs \ > src/Ganeti/MaintD/CleanupIncidents.hs \ > src/Ganeti/MaintD/CollectIncidents.hs \ > + src/Ganeti/MaintD/FailIncident.hs \ > src/Ganeti/MaintD/HandleIncidents.hs \ > src/Ganeti/MaintD/MemoryState.hs \ > src/Ganeti/MaintD/Server.hs \ > diff --git a/src/Ganeti/MaintD/FailIncident.hs > b/src/Ganeti/MaintD/FailIncident.hs > new file mode 100644 > index 0000000..c2d9db4 > --- /dev/null > +++ b/src/Ganeti/MaintD/FailIncident.hs > @@ -0,0 +1,91 @@ > +{-| Incident failing in the maintenace daemon > + > +This module implements the treatment of an incident, once > +a job failed. > + > +-} > + > +{- > + > +Copyright (C) 2015 Google Inc. > +All rights reserved. > + > +Redistribution and use in source and binary forms, with or without > +modification, are permitted provided that the following conditions are > +met: > + > +1. Redistributions of source code must retain the above copyright notice, > +this list of conditions and the following disclaimer. > + > +2. Redistributions in binary form must reproduce the above copyright > +notice, this list of conditions and the following disclaimer in the > +documentation and/or other materials provided with the distribution. > + > +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS > +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED > +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR > +PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR > +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, > +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, > +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR > +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF > +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING > +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS > +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > + > +-} > + > +module Ganeti.MaintD.FailIncident > + ( failIncident > + ) where > + > +import Control.Exception.Lifted (bracket) > +import Control.Lens.Setter (over) > +import Control.Monad (liftM, when) > +import Control.Monad.IO.Class (liftIO) > +import Data.IORef (IORef) > +import System.IO.Error (tryIOError) > + > +import Ganeti.BasicTypes (ResultT, mkResultT, GenericResult(..)) > +import Ganeti.JQueue (currentTimestamp) > +import Ganeti.Jobs (execJobsWaitOkJid) > +import Ganeti.Logging.Lifted > +import qualified Ganeti.Luxi as L > +import Ganeti.MaintD.MemoryState (MemoryState, getIncidents, > updateIncident) > +import Ganeti.MaintD.Utils (annotateOpCode) > +import Ganeti.Objects.Lens (incidentJobsL) > +import Ganeti.Objects.Maintenance (Incident(..), RepairStatus(..)) > +import Ganeti.OpCodes (OpCode(..)) > +import qualified Ganeti.Path as Path > +import Ganeti.Types (JobId, fromJobId, TagKind(..)) > + > +-- | Mark an incident as failed. > +markAsFailed :: IORef MemoryState -> Incident -> ResultT String IO () > +markAsFailed memstate incident = do > + let uuid = incidentUuid incident > + newtag = "maintd:repairfailed:" ++ uuid > Consider putting strings like these into a constant to avoid typos and inconsistencies. > + logInfo $ "Marking incident " ++ uuid ++ " as failed" > + now <- liftIO currentTimestamp > + luxiSocket <- liftIO Path.defaultQuerySocket > + jids <- bracket (mkResultT . liftM (either (Bad . show) Ok) > + . tryIOError $ L.getLuxiClient luxiSocket) > + (liftIO . L.closeClient) > + (mkResultT . execJobsWaitOkJid > + [[ annotateOpCode "marking incident handling as > failed" now > + . OpTagsSet TagKindNode [ newtag ] > + . Just $ incidentNode incident ]]) > + let incident' = over incidentJobsL (++ jids) > + $ incident { incidentRepairStatus = RSFailed > + , incidentTag = newtag > + } > + liftIO $ updateIncident memstate incident' > + > +-- | Mark the incident, if any, belonging to the given job as > +-- failed after having tagged it appropriately. > +failIncident :: IORef MemoryState -> JobId -> ResultT String IO () > +failIncident memstate jid = do > + incidents <- getIncidents memstate > + let affected = filter (elem jid . incidentJobs) incidents > + when (null affected) . logInfo > + $ "Job " ++ show (fromJobId jid) ++ " does not belong to an incident" > + mapM_ (markAsFailed memstate) affected > -- > 2.5.0.457.gab17608 > LGTM, thanks -- Helga Velroyen Software Engineer [email protected] Google Germany GmbH Dienerstraße 12 80331 München Geschäftsführer: Graham Law, Christine Elizabeth Flores Registergericht und -nummer: Hamburg, HRB 86891 Sitz der Gesellschaft: Hamburg Diese E-Mail ist vertraulich. Wenn Sie nicht der richtige Adressat sind, leiten Sie diese bitte nicht weiter, informieren Sie den Absender und löschen Sie die E-Mail und alle Anhänge. Vielen Dank. This e-mail is confidential. If you are not the right addressee please do not forward it, please inform the sender, and please erase this e-mail including any attachments. Thanks.
