On Mon, Mar 23, 2015 at 04:27:53PM +0100, 'Klaus Aehlig' via ganeti-devel wrote:
To avoid subtle races when destroying a cluster, we need to make
sure that the cluster destroy LU is the last LU ever executed.
That LU gets the BGL exclusively; however, it needs the daemons
for it to proceed, so the daemons will still be running at its
finish, thus causing the race. (Also note, that the watcher might
restart stopped daemons at any time.) Therefore, the last thing
that LU will do is to transfer the BGL to WConfD itself; WConfD
will also modify the configuration to a no-master state, thus
making sure it will refuse to restart.

Signed-off-by: Klaus Aehlig <[email protected]>
---
src/Ganeti/WConfd/Core.hs | 47 +++++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 45 insertions(+), 2 deletions(-)

diff --git a/src/Ganeti/WConfd/Core.hs b/src/Ganeti/WConfd/Core.hs
index dadcd2c..d5bbf75 100644
--- a/src/Ganeti/WConfd/Core.hs
+++ b/src/Ganeti/WConfd/Core.hs
@@ -41,19 +41,28 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
module Ganeti.WConfd.Core where

import Control.Arrow ((&&&))
+import Control.Concurrent (myThreadId)
+import Control.Lens.Setter (set)
import Control.Monad (liftM, unless, when)
import qualified Data.Map as M
import qualified Data.Set as S
import Language.Haskell.TH (Name)
+import System.Posix.Process (getProcessID)
import qualified System.Random as Rand

import Ganeti.BasicTypes
+import qualified Ganeti.Constants as C
import qualified Ganeti.JSON as J
import qualified Ganeti.Locking.Allocation as L
-import Ganeti.Locking.Locks ( GanetiLocks(ConfigLock), LockLevel(LevelConfig)
-                            , lockLevel, LockLevel, ClientId )
+import Ganeti.Logging (logDebug)
+import Ganeti.Locking.Locks ( GanetiLocks(ConfigLock, BGL)
+                            , LockLevel(LevelConfig)
+                            , lockLevel, LockLevel
+                            , ClientType(ClientOther), ClientId(..) )
import qualified Ganeti.Locking.Waiting as LW
import Ganeti.Objects (ConfigData, DRBDSecret, LogicalVolume, Ip4Address)
+import Ganeti.Objects.Lens (configClusterL, clusterMasterNodeL)
+import Ganeti.WConfd.ConfigState (csConfigDataL)
import qualified Ganeti.WConfd.ConfigVerify as V
import Ganeti.WConfd.Language
import Ganeti.WConfd.Monad
@@ -309,10 +318,44 @@ guardedOpportunisticLockUnion :: Int
guardedOpportunisticLockUnion count cid req =
  modifyLockWaiting $ LW.guardedOpportunisticLockUnion count cid req

+-- * Prepareation for cluster destruction
+
+-- | Prepare daemon for cluster destruction. This consists of
+-- verifying that the requester owns the BGL exclusively, transfering the BGL
+-- to WConfD itself, and modifying the configuration so that no
+-- node is the master any more. Note that, since the BGL exclusively,

probably a typo - something like s/since/since we own/ ?

+-- we can safely modify the configuration, as no other process can request
+-- changes.
+prepareClusterDestruction :: ClientId -> WConfdMonad ()
+prepareClusterDestruction cid = do
+  la <- readLockAllocation
+  unless (L.holdsLock cid BGL L.OwnExclusive la)
+    . failError $ "Cluster destruction requested without owning BGL 
exclusively"
+  logDebug $ "preparing cluster destruction as requested by " ++ show cid
+  -- transfer BGL to ourselfs. The do this, by adding a super-priority waiting
+  -- request and then releasing the BGL of the requestor.
+  dh <- daemonHandle
+  pid <- liftIO getProcessID
+  tid <- liftIO myThreadId
+  let mycid = ClientId { ciIdentifier = ClientOther $ "wconfd-" ++ show tid
+                       , ciLockFile = dhLivelock dh
+                       , ciPid = pid
+                       }
+  _ <- modifyLockWaiting $ LW.updateLocksWaiting
+                           (fromIntegral C.opPrioHighest - 1) mycid
+                           [L.requestExclusive BGL]
+  _ <- modifyLockWaiting $ LW.updateLocks cid [L.requestRelease BGL]
+  -- To avoid beeing restarted we change the configuration to a no-master
+  -- state.
+  modifyConfigState $ (,) ()
+    . set (csConfigDataL . configClusterL . clusterMasterNodeL) ""
+
+
-- * The list of all functions exported to RPC.

exportedFunctions :: [Name]
exportedFunctions = [ 'echo
+                    , 'prepareClusterDestruction
                    -- config
                    , 'readConfig
                    , 'writeConfig
--
2.2.0.rc0.207.ga3a616c


LGTM, no need to resend

Reply via email to