As luxid now starts jobs, make it verify that it
is running on the master node by carrying out the
Ganeti voting process.

Signed-off-by: Klaus Aehlig <[email protected]>
---
 src/Ganeti/Query/Server.hs | 65 ++++++++++++++++++++++++++++++++++++++++++++--
 src/hluxid.hs              |  2 ++
 2 files changed, 65 insertions(+), 2 deletions(-)

diff --git a/src/Ganeti/Query/Server.hs b/src/Ganeti/Query/Server.hs
index f9dc213..bf7ee97 100644
--- a/src/Ganeti/Query/Server.hs
+++ b/src/Ganeti/Query/Server.hs
@@ -32,18 +32,23 @@ module Ganeti.Query.Server
 import Control.Applicative
 import Control.Concurrent
 import Control.Exception
-import Control.Monad (forever, when, mzero, guard, zipWithM, liftM, void)
+import Control.Monad (forever, when, mzero, guard, zipWithM, liftM, void,
+                      unless)
 import Control.Monad.IO.Class
 import Control.Monad.Trans (lift)
 import Control.Monad.Trans.Maybe
 import Data.Bits (bitSize)
+import Data.Either (rights)
+import qualified Data.Foldable as F
 import qualified Data.Set as Set (toList)
 import Data.IORef
+import Data.List (partition)
 import Data.Maybe (fromMaybe)
 import qualified Text.JSON as J
 import Text.JSON (encode, showJSON, JSValue(..))
 import System.Info (arch)
 import System.Directory
+import System.Exit (ExitCode(..))
 import System.Posix.Signals as P
 
 import qualified Ganeti.Constants as C
@@ -445,9 +450,65 @@ activateMasterIP = runResultT $ do
   liftIO $ logDebug "finished activating master IP address"
   return ()
 
+-- | Gather votes from all nodes and verify that we we are
+-- the master. Return True if the voting is won, False if
+-- not enough
+verifyMasterVotes :: IO (Result Bool)
+verifyMasterVotes = runResultT $ do
+  liftIO $ logDebug "Gathering votes for the master node"
+  myName <- liftIO getFQDN
+  liftIO . logDebug $ "My hostname is " ++ myName
+  conf_file <- liftIO Path.clusterConfFile
+  config <- mkResultT $ Config.loadConfig conf_file
+  let nodes = F.toList $ configNodes config
+  votes <- liftIO . executeRpcCall nodes $ RpcCallMasterNodeName
+  let (missing, valid) = partition (isLeft . snd) votes
+      noDataNodes = map (nodeName . fst) missing
+      validVotes = map rpcResultMasterNodeNameMaster . rights $ map snd valid
+      inFavor = length $ filter (== myName) validVotes
+      voters = length nodes
+      unknown = length missing
+  liftIO . unless (null noDataNodes) . logWarning
+    . (++) "No voting RPC result from " $ show noDataNodes
+  liftIO . logDebug . (++) "Valid votes: " $ show validVotes
+  if 2 * inFavor > voters
+    then return True
+    else if 2 * (inFavor + unknown) > voters
+           then return False
+           else fail $ "Voting cannot be won by " ++ myName
+                       ++ ", valid votes of " ++ show voters
+                       ++ " are " ++ show validVotes
+
+-- | Verify, by voting, that this node is the master. Bad if we're not.
+-- Allow the given number of retries to wait for not available nodes.
+verifyMaster :: Int -> IO (Result ())
+verifyMaster retries = runResultT $ do
+  won <- mkResultT verifyMasterVotes
+  unless won $
+    if retries <= 0
+      then fail "Couldn't gather voting results of enough nodes"
+      else do
+        liftIO $ logDebug "Voting not final due to missing votes."
+        liftIO . threadDelay $ C.masterVotingRetryIntervall * 1000000
+        mkResultT $ verifyMaster (retries - 1)
+
 -- | Check function for luxid.
 checkMain :: CheckFn ()
-checkMain _ = return $ Right ()
+checkMain opts =
+  if optNoVoting opts
+    then if optYesDoIt opts
+           then return $ Right ()
+           else do
+             logError "The no-voting option is dangerous and cannot be\
+                      \ given without providing yes-do-it as well."
+             return . Left $ ExitFailure C.exitFailure
+    else do
+      masterStatus <- verifyMaster C.masterVotingRetries
+      case masterStatus of
+        Bad s -> do
+          logError $ "Failed to verify master status: " ++ s
+          return . Left $ ExitFailure C.exitFailure
+        Ok _ -> return $ Right ()
 
 -- | Prepare function for luxid.
 prepMain :: PrepFn () PrepResult
diff --git a/src/hluxid.hs b/src/hluxid.hs
index b4e9e54..b87ac7b 100644
--- a/src/hluxid.hs
+++ b/src/hluxid.hs
@@ -36,6 +36,8 @@ options =
   , oNoUserChecks
   , oDebug
   , oSyslogUsage
+  , oNoVoting
+  , oYesDoIt
   ]
 
 -- | Main function.
-- 
1.9.1.423.g4596e3a

Reply via email to