On Tue, Jul 14, 2015 at 05:59:31PM +0200, 'Klaus Aehlig' via ganeti-devel wrote:
Also, back off if a round is bad. This is usually the case,
if the communication with some essential daemon failed. In
this case, we do not want to put additional load on the system.

Signed-off-by: Klaus Aehlig <[email protected]>
---
src/Ganeti/MaintD/Server.hs | 17 +++++++++++++----
1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/src/Ganeti/MaintD/Server.hs b/src/Ganeti/MaintD/Server.hs
index 582b0b4..ef0134c 100644
--- a/src/Ganeti/MaintD/Server.hs
+++ b/src/Ganeti/MaintD/Server.hs
@@ -42,8 +42,8 @@ module Ganeti.MaintD.Server
  ) where

import Control.Applicative ((<|>))
-import Control.Concurrent (forkIO, threadDelay)
-import Control.Monad (forever, void, unless)
+import Control.Concurrent (forkIO)
+import Control.Monad (forever, void, unless, when)
import Control.Monad.IO.Class (liftIO)
import qualified Data.Set as Set
import Snap.Core (Snap, method, Method(GET), ifTop)
@@ -52,7 +52,8 @@ import Snap.Http.Server.Config (Config)
import System.IO.Error (tryIOError)
import System.Time (getClockTime)

-import Ganeti.BasicTypes (GenericResult(..), ResultT, runResultT, mkResultT)
+import Ganeti.BasicTypes ( GenericResult(..), ResultT, runResultT, mkResultT
+                         , withErrorT, isBad)
import qualified Ganeti.Constants as C
import Ganeti.Daemon ( OptType, CheckFn, PrepFn, MainFn, oDebug
                     , oNoVoting, oYesDoIt, oPort, oBindAddress, oNoDaemonize)
@@ -60,12 +61,15 @@ import Ganeti.Daemon.Utils (handleMasterVerificationOptions)
import qualified Ganeti.HTools.Backend.Luxi as Luxi
import qualified Ganeti.HTools.Container as Container
import Ganeti.HTools.Loader (ClusterData(..), mergeData, checkData)
+import Ganeti.THH.HsRPC (runRpcClient)
import Ganeti.Logging.Lifted
import Ganeti.MaintD.Autorepairs (harepTasks)
import qualified Ganeti.Path as Path
import Ganeti.Runtime (GanetiDaemon(GanetiMaintd))
import Ganeti.Types (JobId(..))
+import Ganeti.Utils (threadDelaySeconds)
import Ganeti.Utils.Http (httpConfFromOpts, plainJSON, error404)
+import Ganeti.WConfd.Client (getWConfdClient, maintenanceRoundDelay)

-- | Options list and functions.
options :: [OptType]
@@ -108,7 +112,9 @@ loadClusterData = do
-- | Perform one round of maintenance
maintenance :: ResultT String IO ()
maintenance = do
-  liftIO $ threadDelay 60000000
+  wconfdClient <- liftIO $ getWConfdClient =<< Path.defaultWConfdSocket
+  delay <- withErrorT show $ runRpcClient maintenanceRoundDelay wconfdClient
+  liftIO $ threadDelaySeconds delay

While the client will timeout on its own, it'd be probably better to close it explicitly using `bracket` or so when not needed any more.

  logDebug "New round of maintenance started"
  cData <- loadClusterData
  let il = cdInstances cData
@@ -137,4 +143,7 @@ main _ _ httpConf = do
  void . forkIO . forever $ do
    res <- runResultT maintenance
    logDebug $ "Maintenance round done, result is " ++ show res
+    when (isBad res) $ do
+      logInfo "Backing off after a round with internal errors"
+      threadDelaySeconds C.maintdDefaultRoundDelay
  httpServe httpConf httpInterface
--
2.4.3.573.g4eafbef

Rest LGTM

Reply via email to