On Thu, Jul 09, 2015 at 04:11:37PM +0200, 'Klaus Aehlig' via ganeti-devel wrote:
For the moment, we wait a fixed amount of time between the
runs of the main loop. Later, once we added maintd's state
to the configuration, we will wait for all submitted jobs
but at least a configurable amount of time.
Signed-off-by: Klaus Aehlig <[email protected]>
---
src/Ganeti/MaintD/Server.hs | 53 ++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 52 insertions(+), 1 deletion(-)
diff --git a/src/Ganeti/MaintD/Server.hs b/src/Ganeti/MaintD/Server.hs
index 6442650..582b0b4 100644
--- a/src/Ganeti/MaintD/Server.hs
+++ b/src/Ganeti/MaintD/Server.hs
@@ -42,15 +42,29 @@ module Ganeti.MaintD.Server
) where
import Control.Applicative ((<|>))
+import Control.Concurrent (forkIO, threadDelay)
+import Control.Monad (forever, void, unless)
+import Control.Monad.IO.Class (liftIO)
+import qualified Data.Set as Set
import Snap.Core (Snap, method, Method(GET), ifTop)
import Snap.Http.Server (httpServe)
import Snap.Http.Server.Config (Config)
+import System.IO.Error (tryIOError)
+import System.Time (getClockTime)
+import Ganeti.BasicTypes (GenericResult(..), ResultT, runResultT, mkResultT)
import qualified Ganeti.Constants as C
import Ganeti.Daemon ( OptType, CheckFn, PrepFn, MainFn, oDebug
, oNoVoting, oYesDoIt, oPort, oBindAddress, oNoDaemonize)
import Ganeti.Daemon.Utils (handleMasterVerificationOptions)
+import qualified Ganeti.HTools.Backend.Luxi as Luxi
+import qualified Ganeti.HTools.Container as Container
+import Ganeti.HTools.Loader (ClusterData(..), mergeData, checkData)
+import Ganeti.Logging.Lifted
+import Ganeti.MaintD.Autorepairs (harepTasks)
+import qualified Ganeti.Path as Path
import Ganeti.Runtime (GanetiDaemon(GanetiMaintd))
+import Ganeti.Types (JobId(..))
import Ganeti.Utils.Http (httpConfFromOpts, plainJSON, error404)
-- | Options list and functions.
@@ -70,6 +84,40 @@ type CheckResult = ()
-- | Type alias for prepMain results
type PrepResult = Config Snap ()
+-- | Load cluster data
+--
+-- At the moment, only the static data is fetched via luxi;
+-- once we support load-based balancing in maintd as well,
+-- we also need to query the MonDs for the load data.
+loadClusterData :: ResultT String IO ClusterData
+loadClusterData = do
+ now <- liftIO getClockTime
+ socket <- liftIO Path.defaultQuerySocket
+ either_inp <- liftIO . tryIOError $ Luxi.loadData socket
+ input_data <- mkResultT $ case either_inp of
+ Left e -> do
+ let msg = show e
+ logNotice $ "Couldn't read data from luxid: " ++ msg
+ return $ Bad msg
+ Right r -> return r
+ cdata <- mkResultT . return $ mergeData [] [] [] [] now input_data
+ let (msgs, nl) = checkData (cdNodes cdata) (cdInstances cdata)
+ unless (null msgs) . logDebug $ "Cluster data inconsistencies: " ++ show msgs
+ return $ cdata { cdNodes = nl }
+
+-- | Perform one round of maintenance
+maintenance :: ResultT String IO ()
+maintenance = do
+ liftIO $ threadDelay 60000000
+ logDebug "New round of maintenance started"
+ cData <- loadClusterData
+ let il = cdInstances cData
+ nl = cdNodes cData
+ nidxs = Set.fromList $ Container.keys nl
+ (nidxs', jobs) <- harepTasks (nl, il) nidxs
+ logDebug $ "Unaffected nodes " ++ show (Set.toList nidxs')
+ ++ ", jobs submitted " ++ show (map fromJobId jobs)
+
-- | The information to serve via HTTP
httpInterface :: Snap ()
httpInterface = ifTop (method GET $ plainJSON [1 :: Int])
@@ -85,5 +133,8 @@ prepMain opts _ = httpConfFromOpts GanetiMaintd opts
-- | Main function.
main :: MainFn CheckResult PrepResult
-main _ _ httpConf =
+main _ _ httpConf = do
+ void . forkIO . forever $ do
+ res <- runResultT maintenance
+ logDebug $ "Maintenance round done, result is " ++ show res
httpServe httpConf httpInterface
--
2.4.3.573.g4eafbef
LGTM, thanks