As harep is a tool submitting jobs, make it identify itself in the reason trail. Also allow giving a reason for calling harep to be able to pass on further information, which can also be used in job filtering.
Signed-off-by: Klaus Aehlig <aeh...@google.com> --- src/Ganeti/HTools/Program/Harep.hs | 49 ++++++++++++++++++++++++++++---------- 1 file changed, 36 insertions(+), 13 deletions(-) diff --git a/src/Ganeti/HTools/Program/Harep.hs b/src/Ganeti/HTools/Program/Harep.hs index afeccaa..54628b0 100644 --- a/src/Ganeti/HTools/Program/Harep.hs +++ b/src/Ganeti/HTools/Program/Harep.hs @@ -40,6 +40,7 @@ module Ganeti.HTools.Program.Harep , options) where import Control.Exception (bracket) +import Control.Lens (over) import Control.Monad import Data.Function import Data.List @@ -51,8 +52,11 @@ import qualified Data.Map as Map import Ganeti.BasicTypes import Ganeti.Common import Ganeti.Errors +import Ganeti.JQueue (currentTimestamp, reasonTrailTimestamp) +import Ganeti.JQueue.Objects (Timestamp) import Ganeti.Jobs import Ganeti.OpCodes +import Ganeti.OpCodes.Lens (metaParamsL, opReasonL) import Ganeti.OpParams import Ganeti.Types import Ganeti.Utils @@ -69,6 +73,9 @@ import qualified Ganeti.HTools.Container as Container import qualified Ganeti.HTools.Instance as Instance import qualified Ganeti.HTools.Node as Node +import Ganeti.Version (version) + + -- | Options list and functions. options :: IO [OptType] options = do @@ -76,11 +83,22 @@ options = do return [ luxi , oJobDelay + , oReason ] arguments :: [ArgCompletion] arguments = [] +-- | Wraps an 'OpCode' in a 'MetaOpCode' while also adding a comment +-- about what generated the opcode. +annotateOpCode :: Maybe String -> Timestamp -> OpCode -> MetaOpCode +annotateOpCode reason ts = + over (metaParamsL . opReasonL) + (++ [( "harep", fromMaybe ("harep " ++ version ++ " called") reason + , reasonTrailTimestamp ts)]) + . setOpComment ("automated repairs by harep " ++ version) + . wrapOpCode + data InstanceData = InstanceData { arInstance :: Instance.Instance , arState :: AutoRepairStatus , tagsToRemove :: [String] @@ -211,8 +229,8 @@ arStatusCmp instData arData = "programming error: ArNeedsRepair found as an initial state" -- | Query jobs of a pending repair, returning the new instance data. -processPending :: L.Client -> InstanceData -> IO InstanceData -processPending client instData = +processPending :: Options -> L.Client -> InstanceData -> IO InstanceData +processPending opts client instData = case arState instData of (ArPendingRepair arData) -> do sts <- L.queryJobsStatus client $ arJobs arData @@ -228,7 +246,7 @@ processPending client instData = destSt = arStateName arState' putStrLn ("Moving " ++ iname ++ " from " ++ show srcSt ++ " to " ++ show destSt) - commitChange client instData' + commitChange opts client instData' where instData' = instData { arState = arState' @@ -264,13 +282,15 @@ updateTag arData = -- 'AutoRepairData', add its tag to the instance object. Additionally, if -- /tagsToRemove/ is not empty, remove those tags from the instance object. The -- returned /InstanceData/ object always has an empty /tagsToRemove/. -commitChange :: L.Client -> InstanceData -> IO InstanceData -commitChange client instData = do +commitChange :: Options -> L.Client -> InstanceData -> IO InstanceData +commitChange opts client instData = do + now <- currentTimestamp let iname = Instance.name $ arInstance instData arData = getArData $ arState instData rmTags = tagsToRemove instData execJobsWaitOk' opcodes = do - res <- execJobsWaitOk [map wrapOpCode opcodes] client + res <- execJobsWaitOk + [map (annotateOpCode (optReason opts) now) opcodes] client case res of Ok _ -> return () Bad e -> exitErr e @@ -382,12 +402,13 @@ detectBroken nl inst = -- DTFile, DTSharedFile, DTBlock, DTRbd, DTExt. -- | Perform the suggested repair on an instance if its policy allows it. -doRepair :: L.Client -- ^ The Luxi client +doRepair :: Options + -> L.Client -- ^ The Luxi client -> Double -- ^ Delay to insert before the first repair opcode -> InstanceData -- ^ The instance data -> (AutoRepairType, [OpCode]) -- ^ The repair job to perform -> IO InstanceData -- ^ The updated instance data -doRepair client delay instData (rtype, opcodes) = +doRepair opts client delay instData (rtype, opcodes) = let inst = arInstance instData ipol = Instance.arPolicy inst iname = Instance.name inst @@ -407,8 +428,9 @@ doRepair client delay instData (rtype, opcodes) = putStrLn ("Not performing a repair of type " ++ show rtype ++ " on " ++ iname ++ " because only repairs up to " ++ show maxtype ++ " are allowed") - commitChange client instData' -- Adds "enoperm" result label. + commitChange opts client instData' -- Adds "enoperm" result label. else do + now <- currentTimestamp putStrLn ("Executing " ++ show rtype ++ " repair on " ++ iname) -- After submitting the job, we must write an autorepair:pending tag, @@ -445,7 +467,8 @@ doRepair client delay instData (rtype, opcodes) = uuid <- newUUID time <- getClockTime - jids <- submitJobs [map wrapOpCode opcodes'] client + jids <- submitJobs [map (annotateOpCode (optReason opts) now) opcodes'] + client case jids of Bad e -> exitErr e @@ -456,7 +479,7 @@ doRepair client delay instData (rtype, opcodes) = , tagsToRemove = delCurTag instData } in - commitChange client instData' -- Adds "pending" label. + commitChange opts client instData' -- Adds "pending" label. otherSt -> do putStrLn ("Not repairing " ++ iname ++ " because it's in state " ++ @@ -480,7 +503,7 @@ main opts args = do -- First step: check all pending repairs, see if they are completed. iniData' <- bracket (L.getLuxiClient master) L.closeClient $ - forM iniData . processPending + forM iniData . processPending opts -- Second step: detect any problems. let repairs = map (detectBroken nl . arInstance) iniData' @@ -489,7 +512,7 @@ main opts args = do let maybeRepair c (i, r) = maybe (return i) (repairHealthy c i) r jobDelay = optJobDelay opts repairHealthy c i = case arState i of - ArHealthy _ -> doRepair c jobDelay i + ArHealthy _ -> doRepair opts c jobDelay i _ -> const (return i) repairDone <- bracket (L.getLuxiClient master) L.closeClient $ -- 2.4.3.573.g4eafbef