As luxid now starts jobs, make it verify that it is running on the master node by carrying out the Ganeti voting process.
Signed-off-by: Klaus Aehlig <[email protected]> --- src/Ganeti/Query/Server.hs | 65 ++++++++++++++++++++++++++++++++++++++++++++-- src/hluxid.hs | 2 ++ 2 files changed, 65 insertions(+), 2 deletions(-) diff --git a/src/Ganeti/Query/Server.hs b/src/Ganeti/Query/Server.hs index f9dc213..bf7ee97 100644 --- a/src/Ganeti/Query/Server.hs +++ b/src/Ganeti/Query/Server.hs @@ -32,18 +32,23 @@ module Ganeti.Query.Server import Control.Applicative import Control.Concurrent import Control.Exception -import Control.Monad (forever, when, mzero, guard, zipWithM, liftM, void) +import Control.Monad (forever, when, mzero, guard, zipWithM, liftM, void, + unless) import Control.Monad.IO.Class import Control.Monad.Trans (lift) import Control.Monad.Trans.Maybe import Data.Bits (bitSize) +import Data.Either (rights) +import qualified Data.Foldable as F import qualified Data.Set as Set (toList) import Data.IORef +import Data.List (partition) import Data.Maybe (fromMaybe) import qualified Text.JSON as J import Text.JSON (encode, showJSON, JSValue(..)) import System.Info (arch) import System.Directory +import System.Exit (ExitCode(..)) import System.Posix.Signals as P import qualified Ganeti.Constants as C @@ -445,9 +450,65 @@ activateMasterIP = runResultT $ do liftIO $ logDebug "finished activating master IP address" return () +-- | Gather votes from all nodes and verify that we we are +-- the master. Return True if the voting is won, False if +-- not enough +verifyMasterVotes :: IO (Result Bool) +verifyMasterVotes = runResultT $ do + liftIO $ logDebug "Gathering votes for the master node" + myName <- liftIO getFQDN + liftIO . logDebug $ "My hostname is " ++ myName + conf_file <- liftIO Path.clusterConfFile + config <- mkResultT $ Config.loadConfig conf_file + let nodes = F.toList $ configNodes config + votes <- liftIO . executeRpcCall nodes $ RpcCallMasterNodeName + let (missing, valid) = partition (isLeft . snd) votes + noDataNodes = map (nodeName . fst) missing + validVotes = map rpcResultMasterNodeNameMaster . rights $ map snd valid + inFavor = length $ filter (== myName) validVotes + voters = length nodes + unknown = length missing + liftIO . unless (null noDataNodes) . logWarning + . (++) "No voting RPC result from " $ show noDataNodes + liftIO . logDebug . (++) "Valid votes: " $ show validVotes + if 2 * inFavor > voters + then return True + else if 2 * (inFavor + unknown) > voters + then return False + else fail $ "Voting cannot be won by " ++ myName + ++ ", valid votes of " ++ show voters + ++ " are " ++ show validVotes + +-- | Verify, by voting, that this node is the master. Bad if we're not. +-- Allow the given number of retries to wait for not available nodes. +verifyMaster :: Int -> IO (Result ()) +verifyMaster retries = runResultT $ do + won <- mkResultT verifyMasterVotes + unless won $ + if retries <= 0 + then fail "Couldn't gather voting results of enough nodes" + else do + liftIO $ logDebug "Voting not final due to missing votes." + liftIO . threadDelay $ C.masterVotingRetryIntervall * 1000000 + mkResultT $ verifyMaster (retries - 1) + -- | Check function for luxid. checkMain :: CheckFn () -checkMain _ = return $ Right () +checkMain opts = + if optNoVoting opts + then if optYesDoIt opts + then return $ Right () + else do + logError "The no-voting option is dangerous and cannot be\ + \ given without providing yes-do-it as well." + return . Left $ ExitFailure C.exitFailure + else do + masterStatus <- verifyMaster C.masterVotingRetries + case masterStatus of + Bad s -> do + logError $ "Failed to verify master status: " ++ s + return . Left $ ExitFailure C.exitFailure + Ok _ -> return $ Right () -- | Prepare function for luxid. prepMain :: PrepFn () PrepResult diff --git a/src/hluxid.hs b/src/hluxid.hs index b4e9e54..b87ac7b 100644 --- a/src/hluxid.hs +++ b/src/hluxid.hs @@ -36,6 +36,8 @@ options = , oNoUserChecks , oDebug , oSyslogUsage + , oNoVoting + , oYesDoIt ] -- | Main function. -- 1.9.1.423.g4596e3a
