Hi!
When you say GRE I hope that you mean GRES, right?
In our site we have created a lua job submission filter where we set a default
value for a custom GRES (GPFS pagepoolsize).
I am sending you the script (attached) so you can see what we are doing and if
you want you can do something similar for your bandwidth GRES.
You should also check the online documentation how to build Slurm with lua
support and use your custom job_submit script.
Cheers,
Chrysovalantis Paschoulas
On 02/19/2015 04:00 PM, Josh McSavaney wrote:
Hey Brian,
Can't say for GRE requests, but I'd advise looking into DefMemPerNode and
DefMemPerCPU with regards to default memory requests.
Have a good one,
-J
On Thu Feb 19 2015 at 9:43:21 AM Brian B
<[email protected]<mailto:[email protected]>> wrote:
Greetings,
Our cluster is used for a variety of different processes but some are extremely
IO intensive and can cause the whole file system to hang if too many are
running at once. I created a “bandwidth” GRE that, when properly used solves
the problem. Unfortunately not all users are reading the instructions and
requesting the proper bandwidth. Is there a way to set a default GRE request?
Be it in the partition, user, account, etc. I do not care. Similarly can one
make a default Memory request? The only default I have found is DefaultTime.
http://slurm.schedmd.com/slurm.conf.html
Regards,
Brian
------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------
Forschungszentrum Juelich GmbH
52425 Juelich
Sitz der Gesellschaft: Juelich
Eingetragen im Handelsregister des Amtsgerichts Dueren Nr. HR B 3498
Vorsitzender des Aufsichtsrats: MinDir Dr. Karl Eugen Huthmacher
Geschaeftsfuehrung: Prof. Dr.-Ing. Wolfgang Marquardt (Vorsitzender),
Karsten Beneke (stellv. Vorsitzender), Prof. Dr.-Ing. Harald Bolt,
Prof. Dr. Sebastian M. Schmidt
------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------
--[[
Example lua script demonstrating the SLURM job_submit/lua interface.
This is only an example, not meant for use in its current form.
Leave the function names, arguments, local varialbes and setmetatable
set up logic in each function unchanged. Change only the logic after
the line containing "*** YOUR LOGIC GOES BELOW ***".
For use, this script should be copied into a file name "job_submit.lua"
in the same directory as the SLURM configuration file, slurm.conf.
--]]
function _build_part_table ( part_list )
local part_rec = {}
for i in ipairs(part_list) do
part_rec[i] = { part_rec_ptr=part_list[i] }
setmetatable (part_rec[i], part_rec_meta)
end
return part_rec
end
--########################################################################--
--
-- SLURM job_submit/lua interface:
--
--########################################################################--
local default_gpfs_poolsize = 512
local min_gpfs_poolsize = 4
local max_gpfs_poolsize = 48400
local gpfs_gres_name = "gpfs"
local sm = { ["k"]=1024, ["m"]=1048576, ["g"]=1073741824, ["t"]=1099511627776 }
function slurm_job_submit ( job_desc, part_list, submit_uid )
setmetatable (job_desc, job_req_meta)
local part_rec = _build_part_table (part_list)
-- *** YOUR LOGIC GOES BELOW ***
local val,k,v,d,s
local gres_table = {}
if job_desc.gres ~= nil then
if string.find(job_desc.gres,gpfs_gres_name) == nil then
job_desc.gres = job_desc.gres..","..gpfs_gres_name..":"..default_gpfs_poolsize
else
for k,v in string.gmatch(job_desc.gres,"([^:]+):([^,]+),?") do gres_table[k] = v end
val = tonumber(gres_table[gpfs_gres_name])
if val == nil then
d,s = string.match(gres_table[gpfs_gres_name],"(%d+)(%a+)")
if d == nil then
log_err("[LUA] slurm_job_submit:: not valid gpfs poolsize in gres: %s for user: %s", job_desc.gres, submit_uid)
return 2072
end
s = string.lower(s)
if sm[s] == nil then
log_err("[LUA] slurm_job_submit:: not valid gpfs poolsize in gres: %s for user: %s", job_desc.gres, submit_uid)
return -1
end
val = tonumber(d) * sm[s]
log_debug("[LUA] slurm_job_submit:: expanded gpfs value: %s", val)
end
log_debug("[LUA] slurm_job_submit:: requested size for gpfs: %s", val)
if val<min_gpfs_poolsize or val>max_gpfs_poolsize then
log_err("[LUA] slurm_job_submit:: out of range requested gpfs poolsize in gres: %s for user: %s", job_desc.gres, submit_uid)
return 2072
end
end
else
job_desc.gres = gpfs_gres_name..":"..default_gpfs_poolsize
end
if job_desc.gres ~= nil then
log_info("[LUA] slurm_job_submit:: Gres in job description %s for user %s", job_desc.gres, submit_uid)
end
return 0
end
function slurm_job_modify ( job_desc, job_rec, part_list, modify_uid )
setmetatable (job_desc, job_req_meta)
setmetatable (job_rec, job_rec_meta)
local part_rec = _build_part_table (part_list)
-- *** YOUR LOGIC GOES BELOW ***
local k,v,val,d,s
local desc_table = {}
--local rec_table = {}
if job_desc.gres ~= nil then
if string.find(job_desc.gres,gpfs_gres_name) ~= nil then
for k,v in string.gmatch(job_desc.gres,"([^:]+):([^,]+),?") do desc_table[k] = v end
--for k,v in string.gmatch(job_desc.gres,"([^:]+):([^,]+),?") do rec_table[k] = v end
--if desc_table[gpfs_gres_name] ~= rec_table[gpfs_gres_name] or rec_table[gpfs_gres_name] == nil then
val = tonumber(desc_table[gpfs_gres_name])
if val == nil then
d,s = string.match(desc_table[gpfs_gres_name],"(%d+)(%a+)")
if d == nil then
log_err("[LUA] slurm_job_modify:: not valid gpfs poolsize in gres: %s for job: %d - user: %s", job_desc.gres, job_rec.job_id, modify_uid)
return 2072
end
s = string.lower(s)
if sm[s] == nil then
log_err("[LUA] slurm_job_modify:: not valid gpfs poolsize in gres: %s for job: %d - user: %s", job_desc.gres, job_rec.job_id, modify_uid)
return 2072
end
val = tonumber(d) * sm[s]
log_debug("[LUA] slurm_job_modify:: expanded gpfs value: %s", val)
end
if val<min_gpfs_poolsize or val>max_gpfs_poolsize then
log_err("[LUA] slurm_job_modify:: out of range requested gpfspoolsize in gres: %s for job: %d - user: %s", job_desc.gres, job_rec.job_id, modify_uid)
return 2072
end
--end
else
job_desc.gres = job_desc.gres..","..gpfs_gres_name..":"..default_gpfs_poolsize
job_desc.gres = job_desc.gres:gsub("^," , "")
--if string.sub(s1,1,string.len(","))=="," then print("remove"); print(string.sub(s1,string.len(",")+1,string.len(s1))) else print ("ok") end
end
else
job_desc.gres = gpfs_gres_name..":"..default_gpfs_poolsize
-- if job_rec.gres == nil then
-- job_desc.gres = gpfs_gres_name..":"..default_gpfs_poolsize
-- job_rec.gres = job_desc.gres
-- else
-- job_desc.gres = job_rec.gres
-- end
end
if job_desc.gres ~= nil and job_rec.gres ~= nil then
log_info("[LUA] slurm_job_modify:: requested job_desc: %s for job_id: %d - user %s", job_desc.gres, job_rec.job_id, modify_uid)
end
return 0
end
--########################################################################--
--
-- Initialization code:
--
-- Define functions for logging and accessing slurmctld structures
--
--########################################################################--
log_info = slurm.log_info
log_verbose = slurm.log_verbose
log_debug = slurm.log_debug
log_err = slurm.error
job_rec_meta = {
__index = function (table, key)
return _get_job_rec_field(table.job_rec_ptr, key)
end
}
job_req_meta = {
__index = function (table, key)
return _get_job_req_field(table.job_desc_ptr, key)
end,
__newindex = function (table, key, value)
return _set_job_req_field(table.job_desc_ptr, key, value or "")
end
}
part_rec_meta = {
__index = function (table, key)
return _get_part_rec_field(table.part_rec_ptr, key)
end
}
log_info("initialized")
return slurm.SUCCESS