Hi!

When you say GRE I hope that you mean GRES, right?

In our site we have created a lua job submission filter where we set a default 
value for a custom GRES (GPFS pagepoolsize).

I am sending you the script (attached) so you can see what we are doing and if 
you want you can do something similar for your bandwidth GRES.

You should also check the online documentation how to build Slurm with lua 
support and use your custom job_submit script.

Cheers,
Chrysovalantis Paschoulas

On 02/19/2015 04:00 PM, Josh McSavaney wrote:
Hey Brian,

Can't say for GRE requests, but I'd advise looking into DefMemPerNode and 
DefMemPerCPU with regards to default memory requests.

Have a good one,

-J

On Thu Feb 19 2015 at 9:43:21 AM Brian B 
<[email protected]<mailto:[email protected]>> wrote:
Greetings,

Our cluster is used for a variety of different processes but some are extremely 
IO intensive and can cause the whole file system to hang if too many are 
running at once. I created a “bandwidth” GRE that, when properly used solves 
the problem. Unfortunately not all users are reading the instructions and 
requesting the proper bandwidth. Is there a way to set a default GRE request? 
Be it in the partition, user, account, etc. I do not care. Similarly can one 
make a default Memory request? The only default I have found is DefaultTime.

http://slurm.schedmd.com/slurm.conf.html

Regards,
Brian





------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------
Forschungszentrum Juelich GmbH
52425 Juelich
Sitz der Gesellschaft: Juelich
Eingetragen im Handelsregister des Amtsgerichts Dueren Nr. HR B 3498
Vorsitzender des Aufsichtsrats: MinDir Dr. Karl Eugen Huthmacher
Geschaeftsfuehrung: Prof. Dr.-Ing. Wolfgang Marquardt (Vorsitzender),
Karsten Beneke (stellv. Vorsitzender), Prof. Dr.-Ing. Harald Bolt,
Prof. Dr. Sebastian M. Schmidt
------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------

--[[

 Example lua script demonstrating the SLURM job_submit/lua interface.
 This is only an example, not meant for use in its current form.

 Leave the function names, arguments, local varialbes and setmetatable
 set up logic in each function unchanged. Change only the logic after
 the line containing "*** YOUR LOGIC GOES BELOW ***".

 For use, this script should be copied into a file name "job_submit.lua"
 in the same directory as the SLURM configuration file, slurm.conf.

--]]

function _build_part_table ( part_list )
        local part_rec = {}
        for i in ipairs(part_list) do
                part_rec[i] = { part_rec_ptr=part_list[i] }
                setmetatable (part_rec[i], part_rec_meta)
        end
        return part_rec
end

--########################################################################--
--
--  SLURM job_submit/lua interface:
--
--########################################################################--

local default_gpfs_poolsize = 512
local min_gpfs_poolsize = 4
local max_gpfs_poolsize = 48400
local gpfs_gres_name = "gpfs"
local sm = { ["k"]=1024, ["m"]=1048576, ["g"]=1073741824, ["t"]=1099511627776 }

function slurm_job_submit ( job_desc, part_list, submit_uid )
        setmetatable (job_desc, job_req_meta)
        local part_rec = _build_part_table (part_list)

--      *** YOUR LOGIC GOES BELOW ***
        local val,k,v,d,s
        local gres_table = {}

        if job_desc.gres ~= nil then
                if string.find(job_desc.gres,gpfs_gres_name) == nil then
                        job_desc.gres = job_desc.gres..","..gpfs_gres_name..":"..default_gpfs_poolsize
                else
                        for k,v in string.gmatch(job_desc.gres,"([^:]+):([^,]+),?") do gres_table[k] = v end
                        val = tonumber(gres_table[gpfs_gres_name])
                        if val == nil then
                                d,s = string.match(gres_table[gpfs_gres_name],"(%d+)(%a+)")
                                if d == nil then 
                                        log_err("[LUA] slurm_job_submit:: not valid gpfs poolsize in gres: %s for user: %s", job_desc.gres, submit_uid)
                                        return 2072 
                                end
                                s = string.lower(s)
                                if sm[s] == nil then 
                                        log_err("[LUA] slurm_job_submit:: not valid gpfs poolsize in gres: %s for user: %s", job_desc.gres, submit_uid)
                                        return -1 
                                end
                                val = tonumber(d) * sm[s]
                                log_debug("[LUA] slurm_job_submit:: expanded gpfs value: %s", val)
                        end
                        log_debug("[LUA] slurm_job_submit:: requested size for gpfs: %s", val)
                        
                        if val<min_gpfs_poolsize or val>max_gpfs_poolsize then 
                                log_err("[LUA] slurm_job_submit:: out of range requested gpfs poolsize in gres: %s for user: %s", job_desc.gres, submit_uid)
                                return 2072 
                        end
                end
        else
                job_desc.gres = gpfs_gres_name..":"..default_gpfs_poolsize
        end

        if job_desc.gres ~= nil then
                log_info("[LUA] slurm_job_submit:: Gres in job description %s for user %s", job_desc.gres, submit_uid)
        end

        return 0
end

function slurm_job_modify ( job_desc, job_rec, part_list, modify_uid )
        setmetatable (job_desc, job_req_meta)
        setmetatable (job_rec,  job_rec_meta)
        local part_rec = _build_part_table (part_list)

--      *** YOUR LOGIC GOES BELOW ***
        local k,v,val,d,s
        local desc_table = {}
        --local rec_table = {}
        if job_desc.gres ~= nil then
                if string.find(job_desc.gres,gpfs_gres_name) ~= nil then
                        for k,v in string.gmatch(job_desc.gres,"([^:]+):([^,]+),?") do desc_table[k] = v end
                        --for k,v in string.gmatch(job_desc.gres,"([^:]+):([^,]+),?") do rec_table[k] = v end
                        --if desc_table[gpfs_gres_name] ~= rec_table[gpfs_gres_name] or rec_table[gpfs_gres_name] == nil then
                                val = tonumber(desc_table[gpfs_gres_name]) 
                                if val == nil then
                                        d,s = string.match(desc_table[gpfs_gres_name],"(%d+)(%a+)")
                                        if d == nil then 
                                                log_err("[LUA] slurm_job_modify:: not valid gpfs poolsize in gres: %s for job: %d - user: %s", job_desc.gres, job_rec.job_id, modify_uid)
                                                return 2072 
                                        end
                                        s = string.lower(s)
                                        if sm[s] == nil then 
                                                log_err("[LUA] slurm_job_modify:: not valid gpfs poolsize in gres: %s for job: %d - user: %s", job_desc.gres, job_rec.job_id, modify_uid)
                                                return 2072 
                                        end
                                        val = tonumber(d) * sm[s]
                                        log_debug("[LUA] slurm_job_modify:: expanded gpfs value: %s", val)
                                end
                                if val<min_gpfs_poolsize or val>max_gpfs_poolsize then 
                                        log_err("[LUA] slurm_job_modify:: out of range requested gpfspoolsize in gres: %s for job: %d - user: %s", job_desc.gres, job_rec.job_id, modify_uid)
                                        return 2072 
                                end                     
                        --end
                else
                        job_desc.gres = job_desc.gres..","..gpfs_gres_name..":"..default_gpfs_poolsize
                        job_desc.gres = job_desc.gres:gsub("^," , "")
                        --if string.sub(s1,1,string.len(","))=="," then print("remove"); print(string.sub(s1,string.len(",")+1,string.len(s1))) else print ("ok") end
                end
        else 
                job_desc.gres = gpfs_gres_name..":"..default_gpfs_poolsize

--              if job_rec.gres == nil then
--                      job_desc.gres = gpfs_gres_name..":"..default_gpfs_poolsize
--                      job_rec.gres = job_desc.gres
--              else
--                      job_desc.gres = job_rec.gres
--              end

        end

        if job_desc.gres ~= nil and job_rec.gres ~= nil then
                log_info("[LUA] slurm_job_modify:: requested job_desc: %s for job_id: %d - user %s", job_desc.gres, job_rec.job_id, modify_uid)
        end

        return 0
end

--########################################################################--
--
--  Initialization code:
--
--  Define functions for logging and accessing slurmctld structures
--
--########################################################################--


log_info = slurm.log_info
log_verbose = slurm.log_verbose
log_debug = slurm.log_debug
log_err = slurm.error

job_rec_meta = {
        __index = function (table, key)
                return _get_job_rec_field(table.job_rec_ptr, key)
        end
}
job_req_meta = {
        __index = function (table, key)
                return _get_job_req_field(table.job_desc_ptr, key)
        end,
        __newindex = function (table, key, value)
                return _set_job_req_field(table.job_desc_ptr, key, value or "")
        end
}
part_rec_meta = {
        __index = function (table, key)
                return _get_part_rec_field(table.part_rec_ptr, key)
        end
}

log_info("initialized")

return slurm.SUCCESS

Reply via email to