This is an attempt to fix balancing on 2.16 using KVM hypervisor. The problem with KVM is, that it reports 'active' memory from /proc/memstat for node memory a.k.a. nMem a.k.a. memory_dom0.
This is plain wrong, as 'active' memory in Linux includes memory from all running processes, including instances (qemu processes). We can't calculate the node memory on the hypervisor side, simply subtracting RES used by qemu processes would give us only approximate value and still it would be the actively used memory rather than a upper hard limit as in dom0 size in Xen. As KVM does not provide proper isolation for node memory, the only thing we can do is to instruct htools to reserve a static value for node memory. This is solved in 2.17 in patch f971341471 implementing the obtainNodeMemory function that takes the live dom0 size in Xen, but uses a configurable cluster parameter for KVM. I've opted for _not_ backporting the cluster parameter from 2.17 for these reasons: * this way the change is isolated to hbal, no need to change cluster config * the obtainNodeMemory is for Luxi only, this flag can be used with any backend * the goal is to fix balancing as it is, without adding support for memory overcommitment, doing so would require backporting a bunch of other dependencies: d2bfc50608, f971341471 and possibly more. The flag is ignored on Xen, and on KVM is ste by default to 4G, so the change should be backward compatible (no need to specify this flag unless you need a different value). Signed-off-by: Viktor Bachraty <[email protected]> --- src/Ganeti/HTools/Backend/Luxi.hs | 13 ++++++++----- src/Ganeti/HTools/CLI.hs | 13 ++++++++++++- src/Ganeti/HTools/ExtLoader.hs | 6 +++++- src/Ganeti/HTools/Loader.hs | 9 +++++++++ src/Ganeti/HTools/Node.hs | 7 +++++++ src/Ganeti/HTools/Program/Hail.hs | 1 + src/Ganeti/HTools/Program/Hbal.hs | 1 + src/Ganeti/HTools/Program/Hinfo.hs | 1 + src/Ganeti/HTools/Program/Hroller.hs | 1 + src/Ganeti/HTools/Program/Hspace.hs | 1 + src/Ganeti/HTools/Program/Hsqueeze.hs | 1 + 11 files changed, 47 insertions(+), 7 deletions(-) diff --git a/src/Ganeti/HTools/Backend/Luxi.hs b/src/Ganeti/HTools/Backend/Luxi.hs index 01b420f1e..e6db471d2 100644 --- a/src/Ganeti/HTools/Backend/Luxi.hs +++ b/src/Ganeti/HTools/Backend/Luxi.hs @@ -48,6 +48,7 @@ import qualified Ganeti.Luxi as L import qualified Ganeti.Query.Language as Qlang import Ganeti.HTools.Loader import Ganeti.HTools.Types +import qualified Ganeti.HTools.Container as Container import qualified Ganeti.HTools.Group as Group import qualified Ganeti.HTools.Node as Node import qualified Ganeti.HTools.Instance as Instance @@ -179,7 +180,7 @@ parseInstance :: NameAssoc -> [(JSValue, JSValue)] -> Result (String, Instance.Instance) parseInstance ktn [ name, disk, mem, vcpus - , status, pnode, snodes, tags, + , status, pnode, snodes, tags , auto_balance, disk_template, su , dsizes, dspindles, forthcoming ] = do xname <- annotateResult "Parsing new instance" (fromJValWithStatus name) @@ -257,14 +258,15 @@ parseNode ktg [ name, mtotal, mnode, mfree, dtotal, dfree parseNode _ v = fail ("Invalid node query result: " ++ show v) -- | Parses the cluster tags. -getClusterData :: JSValue -> Result ([String], IPolicy, String) +getClusterData :: JSValue -> Result ([String], IPolicy, String, String) getClusterData (JSObject obj) = do let errmsg = "Parsing cluster info" obj' = fromJSObject obj ctags <- tryFromObj errmsg obj' "tags" cpol <- tryFromObj errmsg obj' "ipolicy" master <- tryFromObj errmsg obj' "master" - return (ctags, cpol, master) + hypervisor <- tryFromObj errmsg obj' "default_hypervisor" + return (ctags, cpol, master, hypervisor) getClusterData _ = Bad "Cannot parse cluster info, not a JSON record" @@ -308,15 +310,16 @@ readData master = parseData :: (Result JSValue, Result JSValue, Result JSValue, Result JSValue) -> Result ClusterData parseData (groups, nodes, instances, cinfo) = do + (ctags, cpol, master, hypervisor) <- cinfo >>= getClusterData group_data <- groups >>= getGroups let (group_names, group_idx) = assignIndices group_data node_data <- nodes >>= getNodes group_names let (node_names, node_idx) = assignIndices node_data inst_data <- instances >>= getInstances node_names let (_, inst_idx) = assignIndices inst_data - (ctags, cpol, master) <- cinfo >>= getClusterData node_idx' <- setMaster node_names node_idx master - return (ClusterData group_idx node_idx' inst_idx ctags cpol) + let node_idx'' = Container.map (`Node.setHypervisor` hypervisor) node_idx' + return (ClusterData group_idx node_idx'' inst_idx ctags cpol) -- | Top level function for data loading. loadData :: String -- ^ Unix socket to use as source diff --git a/src/Ganeti/HTools/CLI.hs b/src/Ganeti/HTools/CLI.hs index 7ca25d973..687e141bd 100644 --- a/src/Ganeti/HTools/CLI.hs +++ b/src/Ganeti/HTools/CLI.hs @@ -112,6 +112,7 @@ module Ganeti.HTools.CLI , oShowVer , oShowComp , oSkipNonRedundant + , oStaticKvmNodeMemory , oStdSpec , oTargetResources , oTieredSpec @@ -203,6 +204,7 @@ data Options = Options , optShowNodes :: Maybe [String] -- ^ Whether to show node status , optShowVer :: Bool -- ^ Just show the program version , optSkipNonRedundant :: Bool -- ^ Skip nodes with non-redundant instance + , optStaticKvmNodeMemory :: Int -- ^ Use static value for node memory on KVM , optStdSpec :: Maybe RSpec -- ^ Requested standard specs , optTargetResources :: Double -- ^ Target resources for squeezing , optTestCount :: Maybe Int -- ^ Optional test count override @@ -259,6 +261,7 @@ defaultOptions = Options , optNodeSim = [] , optNodeTags = Nothing , optSkipNonRedundant = False + , optStaticKvmNodeMemory = 4096 , optOffline = [] , optRestrictToNodes = Nothing , optOfflineMaintenance = False @@ -659,7 +662,7 @@ oNodeTags = (ReqArg (\ f opts -> Ok opts { optNodeTags = Just $ sepSplit ',' f }) "TAG,...") "Restrict to nodes with the given tags", OptComplString) - + oOfflineMaintenance :: OptType oOfflineMaintenance = (Option "" ["offline-maintenance"] @@ -762,6 +765,14 @@ oSkipNonRedundant = "Skip nodes that host a non-redundant instance", OptComplNone) +oStaticKvmNodeMemory :: OptType +oStaticKvmNodeMemory = + (Option "" ["static-kvm-node-memory"] + (reqWithConversion (tryRead "static node memory") + (\i opts -> Ok opts { optStaticKvmNodeMemory = i }) "N") + "use static node memory [in MB] on KVM instead of value reported by hypervisor.", + OptComplInteger) + oStdSpec :: OptType oStdSpec = (Option "" ["standard-alloc"] diff --git a/src/Ganeti/HTools/ExtLoader.hs b/src/Ganeti/HTools/ExtLoader.hs index 56e2e8010..eb0c21c28 100644 --- a/src/Ganeti/HTools/ExtLoader.hs +++ b/src/Ganeti/HTools/ExtLoader.hs @@ -61,7 +61,7 @@ import qualified Ganeti.HTools.Backend.IAlloc as IAlloc import qualified Ganeti.HTools.Backend.MonD as MonD import Ganeti.HTools.CLI import Ganeti.HTools.Loader (mergeData, checkData, ClusterData(..) - , commonSuffix, clearDynU) + , commonSuffix, clearDynU, setStaticKvmNodeMem) import Ganeti.HTools.Types import Ganeti.Utils (sepSplit, tryRead, exitIfBad, exitWhen) @@ -122,8 +122,12 @@ loadExternalData opts = do now <- getClockTime let ignoreDynU = optIgnoreDynu opts + nodeMem = optStaticKvmNodeMemory opts eff_u = if ignoreDynU then [] else util_data ldresult = input_data >>= (if ignoreDynU then clearDynU else return) + >>= (if nodeMem >= 0 + then flip setStaticKvmNodeMem nodeMem + else return) >>= mergeData eff_u exTags selInsts exInsts now cdata <- exitIfBad "failed to load data, aborting" ldresult (cdata', ok) <- runWriterT $ if optMonD opts diff --git a/src/Ganeti/HTools/Loader.hs b/src/Ganeti/HTools/Loader.hs index 50ffbc1cc..ed7ae22f6 100644 --- a/src/Ganeti/HTools/Loader.hs +++ b/src/Ganeti/HTools/Loader.hs @@ -55,6 +55,7 @@ module Ganeti.HTools.Loader , emptyCluster , extractDesiredLocations , updateDesiredLocationTags + , setStaticKvmNodeMem ) where import Control.Monad @@ -372,6 +373,14 @@ clearDynU cdata@(ClusterData _ _ il _ _) = let il2 = Container.map (\ inst -> inst {Instance.util = zeroUtil }) il in Ok cdata { cdInstances = il2 } +-- | Update cluster data to use static node memory on KVM. +setStaticKvmNodeMem :: ClusterData -> Int -> Result ClusterData +setStaticKvmNodeMem cdata@(ClusterData _ nl _ _ _) staticNodeMem = + let nl' = Container.map (\node -> node { Node.nMem = if Node.hypervisor node == "kvm" + then staticNodeMem + else Node.nMem node }) nl + in Ok cdata { cdNodes = nl' } + -- | Checks the cluster data for consistency. checkData :: Node.List -> Instance.List -> ([String], Node.List) diff --git a/src/Ganeti/HTools/Node.hs b/src/Ganeti/HTools/Node.hs index 79993ad3c..8b2f25958 100644 --- a/src/Ganeti/HTools/Node.hs +++ b/src/Ganeti/HTools/Node.hs @@ -59,6 +59,7 @@ module Ganeti.HTools.Node , setMigrationTags , setRecvMigrationTags , setLocationTags + , setHypervisor -- * Tag maps , addTags , delTags @@ -213,6 +214,7 @@ data Node = Node , instanceMap :: Map.Map (String, String) Int -- ^ Number of instances with -- each exclusion/location tags -- pair + , hypervisor :: String -- ^ Active hypervisor on the node } deriving (Show, Eq) {- A note on how we handle spindles @@ -378,6 +380,7 @@ create name_init mem_t_init mem_n_init mem_f_init , locationTags = Set.empty , locationScore = 0 , instanceMap = Map.empty + , hypervisor = "" } -- | Conversion formula from mDsk\/tDsk to loDsk. @@ -432,6 +435,10 @@ setLocationTags t val = t { locationTags = val } setXmem :: Node -> Int -> Node setXmem t val = t { xMem = val } +-- | Sets the hypervisor attribute. +setHypervisor :: Node -> String -> Node +setHypervisor t val = t { hypervisor = val } + -- | Sets the max disk usage ratio. setMdsk :: Node -> Double -> Node setMdsk t val = t { mDsk = val, loDsk = mDskToloDsk val (tDsk t) } diff --git a/src/Ganeti/HTools/Program/Hail.hs b/src/Ganeti/HTools/Program/Hail.hs index 7e369660a..1ef240858 100644 --- a/src/Ganeti/HTools/Program/Hail.hs +++ b/src/Ganeti/HTools/Program/Hail.hs @@ -70,6 +70,7 @@ options = , oRestrictToNodes , oMonD , oMonDXen + , oStaticKvmNodeMemory ] -- | The list of arguments supported by the program. diff --git a/src/Ganeti/HTools/Program/Hbal.hs b/src/Ganeti/HTools/Program/Hbal.hs index 084433ae8..eafdabdf5 100644 --- a/src/Ganeti/HTools/Program/Hbal.hs +++ b/src/Ganeti/HTools/Program/Hbal.hs @@ -95,6 +95,7 @@ options = do , oVerbose , oQuiet , oOfflineNode + , oStaticKvmNodeMemory , oMinScore , oMaxCpu , oMinDisk diff --git a/src/Ganeti/HTools/Program/Hinfo.hs b/src/Ganeti/HTools/Program/Hinfo.hs index 0c49faa48..86ae8ef57 100644 --- a/src/Ganeti/HTools/Program/Hinfo.hs +++ b/src/Ganeti/HTools/Program/Hinfo.hs @@ -75,6 +75,7 @@ options = do , oIgnoreDyn , oMonD , oMonDDataFile + , oStaticKvmNodeMemory ] -- | The list of arguments supported by the program. diff --git a/src/Ganeti/HTools/Program/Hroller.hs b/src/Ganeti/HTools/Program/Hroller.hs index 74730ed89..5fa1ac7e7 100644 --- a/src/Ganeti/HTools/Program/Hroller.hs +++ b/src/Ganeti/HTools/Program/Hroller.hs @@ -85,6 +85,7 @@ options = do , oIgnoreNonRedundant , oForce , oOneStepOnly + , oStaticKvmNodeMemory ] -- | The list of arguments supported by the program. diff --git a/src/Ganeti/HTools/Program/Hspace.hs b/src/Ganeti/HTools/Program/Hspace.hs index 9db17dabf..8b3554b91 100644 --- a/src/Ganeti/HTools/Program/Hspace.hs +++ b/src/Ganeti/HTools/Program/Hspace.hs @@ -93,6 +93,7 @@ options = do , oStdSpec , oTieredSpec , oSaveCluster + , oStaticKvmNodeMemory ] -- | The list of arguments supported by the program. diff --git a/src/Ganeti/HTools/Program/Hsqueeze.hs b/src/Ganeti/HTools/Program/Hsqueeze.hs index 6bdb8e554..fa6cd7f2c 100644 --- a/src/Ganeti/HTools/Program/Hsqueeze.hs +++ b/src/Ganeti/HTools/Program/Hsqueeze.hs @@ -84,6 +84,7 @@ options = do , oPrintCommands , oVerbose , oNoHeaders + , oStaticKvmNodeMemory ] -- | The list of arguments supported by the program. -- 2.12.0.367.g23dc2f6d3c-goog
