Madhuvishy has uploaded a new change for review. (
https://gerrit.wikimedia.org/r/352895 )
Change subject: sge: Revamp queue configuration puppet
......................................................................
sge: Revamp queue configuration puppet
Change-Id: I424f59aab5785991bee6324a46cb7f1872f4baf9
---
M modules/gridengine/manifests/master.pp
M modules/gridengine/manifests/queue.pp
R modules/gridengine/templates/queue-conf.erb
M modules/role/manifests/toollabs/node/compute/dedicated.pp
M modules/toollabs/manifests/master.pp
D modules/toollabs/templates/gridengine/queue-continuous.erb
D modules/toollabs/templates/gridengine/queue-dedicated.erb
D modules/toollabs/templates/gridengine/queue-task.erb
8 files changed, 163 insertions(+), 181 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/operations/puppet
refs/changes/95/352895/1
diff --git a/modules/gridengine/manifests/master.pp
b/modules/gridengine/manifests/master.pp
index 2e837bf..8a5bfcd 100644
--- a/modules/gridengine/manifests/master.pp
+++ b/modules/gridengine/manifests/master.pp
@@ -16,7 +16,6 @@
$etcdir = '/var/lib/gridengine/etc'
- gridengine::resourcedir { 'queues': }
gridengine::resourcedir { 'hostgroups': }
gridengine::resourcedir { 'quotas': }
gridengine::resourcedir { 'checkpoints': }
diff --git a/modules/gridengine/manifests/queue.pp
b/modules/gridengine/manifests/queue.pp
index 1cef26c..306c636 100644
--- a/modules/gridengine/manifests/queue.pp
+++ b/modules/gridengine/manifests/queue.pp
@@ -1,13 +1,85 @@
# gridengine/queue.pp
+# Define a grid engine queue resource - this drops a config file in a directory
+# for a grid engine queue. The actual queue is only created on running
+# `qconf -Aq conf_file` or modified on running `qconf -Mq conf_file`
+#
+# The params are based on queue_conf - the sge queue configuration file format.
+# See http://gridscheduler.sourceforge.net/htmlman/htmlman5/queue_conf.html
+# for all parameters.
+#
+# [*hostlist]
+# Space separated list of hosts or name of hostgroup, default NONE. For each
host
+# SGE maintains a queue instance for running jobs on that host.
+#
+# [*seq_no]
+# Integer, default 0, spefifies queue's position in scheduling order. Set this
as
+# a monotonically increasing sequence
+#
+# [*np_load_avg_threshold]
+# Load threshold for complex np_load_avg. Default: 1.75
+#
+# [*priority]
+# Integer, default 0. Specifies nice value at which jobs in the queue will be
run.
+# Negative values (upto -20) = higher priority. Positive values (upto +20) =
+# lower priority
+#
+# [*qtype]
+# Type of queue, default BATCH INTERACTIVE. Can be batch, interactive, or
combination.
+#
+# [*ckpt_list]
+# List of checkpointing interface names. Default NONE
+#
+# [*rerun]
+# Boolean, default false. Defines behavior for jobs that are aborted, set to
true
+# to restart automatically
+#
+# [*slots]
+# Integer, default 50. Maximum number of concurrently executing jobs in the
queue
+#
+# [*epilog]
+# Executable path to a shell script that is started after a job's execution,
+# with the same environment settings as the completed job. Default NONE
+#
+# [*terminate_method]
+# Override default method(SIGKILL) used by to terminate a job.
+#
+# [*owner_list]
+# List of users authorized to disable and suspend queue. Default NONE
+#
+# [*user_lists]
+# Comma separated list of user access list names, controls which users have
access
+# to the queue. Default NONE
+
define gridengine::queue(
- $rname = $title,
- $config = undef,
+ $config_path,
+ $hostlist = 'NONE',
+ $seq_no = 0,
+ $np_load_avg_threshold = 1.75,
+ $priority = 0,
+ $qtype = 'BATCH INTERACTIVE',
+ $ckpt_list = 'NONE',
+ $rerun = false,
+ $slots = 50,
+ $epilog = 'NONE',
+ $terminate_method = 'SIGKILL',
+ $owner_list = 'NONE',
+ $user_lists = 'NONE',
) {
- gridengine::resource { $rname:
- dir => 'queues',
- config => $config,
+ file { "${config_path}":
+ ensure => directory,
+ owner => 'sgeadmin',
+ group => 'sgeadmin',
+ mode => '0775',
+ }
+
+ file { "${config_path}/$title":
+ ensure => file,
+ owner => 'sgeadmin',
+ group => 'sgeadmin',
+ mode => '0664',
+ content => template('gridengine/queue-conf.erb'),
+ require => File['$conf_path'],
}
}
-
diff --git a/modules/toollabs/templates/gridengine/queue-webgrid.erb
b/modules/gridengine/templates/queue-conf.erb
similarity index 67%
rename from modules/toollabs/templates/gridengine/queue-webgrid.erb
rename to modules/gridengine/templates/queue-conf.erb
index 4a092bd..5db86f8 100644
--- a/modules/toollabs/templates/gridengine/queue-webgrid.erb
+++ b/modules/gridengine/templates/queue-conf.erb
@@ -1,30 +1,30 @@
qname <%= @title %>
-hostlist #@#
-seq_no 2
-load_thresholds np_load_avg=2.75
-priority 0
-min_cpu_interval 00:05:00
-qtype BATCH
-ckpt_list NONE
-rerun TRUE
-slots 256
-tmpdir /tmp
-shell /bin/bash
-shell_start_mode unix_behavior
-terminate_method SIGTERM
-notify 00:00:60
-prolog NONE
-epilog /usr/local/bin/portreleaser
-processors UNDEFINED
-pe_list NONE
+hostlist <%= @hostlist %>
+seq_no <%= @seq_no %>
+load_thresholds np_load_avg=<%= @np_load_avg_threshold %>
suspend_thresholds NONE
nsuspend 1
suspend_interval 00:05:00
+priority <%= @priority %>
+min_cpu_interval 00:05:00
+processors UNDEFINED
+qtype <%= @qtype %>
+ckpt_list <%= @checkpoints %>
+pe_list NONE
+rerun <% if @rerun -%> TRUE <% else -%> FALSE <%- end %>
+slots <%= @slots %>
+tmpdir /tmp
+shell /bin/bash
+prolog NONE
+epilog <%= @epilog %>
+shell_start_mode unix_behavior
starter_method NONE
suspend_method NONE
resume_method NONE
-owner_list NONE
-user_lists NONE
+terminate_method <%= @terminate_method %>
+notify 00:00:60
+owner_list <%= @owner_list %>
+user_lists <%= @user_lists %>
xuser_lists NONE
subordinate_list NONE
complex_values NONE
@@ -36,8 +36,6 @@
h_rt INFINITY
s_cpu INFINITY
h_cpu INFINITY
-s_vmem INFINITY
-h_vmem INFINITY
s_fsize INFINITY
h_fsize INFINITY
s_data INFINITY
@@ -48,3 +46,5 @@
h_core INFINITY
s_rss INFINITY
h_rss INFINITY
+s_vmem INFINITY
+h_vmem INFINITY
diff --git a/modules/role/manifests/toollabs/node/compute/dedicated.pp
b/modules/role/manifests/toollabs/node/compute/dedicated.pp
index b4450aa..179b2cd 100644
--- a/modules/role/manifests/toollabs/node/compute/dedicated.pp
+++ b/modules/role/manifests/toollabs/node/compute/dedicated.pp
@@ -21,6 +21,20 @@
description => "Computation node dedicated to
${::labsproject}.${dedicated_tool}",
}
+ gridengine::queue {
+ config_path => '/var/lib/gridengine/etc/queues',
+ hostlist => 'NONE', #FIXME: Make hostgroups for these
queues
+ seq_no => 6,
+ np_load_avg_threshold => 2.0,
+ priority => 10,
+ qtype => 'BATCH',
+ rerun => true,
+ slots => 1000,
+ ckpt_list => 'continuous',
+ owner_list => $dedicated_tool,
+ user_lists => $dedicated_tool,
+ }
+
} else {
system::role { 'toollabs::node::compute::dedicated':
diff --git a/modules/toollabs/manifests/master.pp
b/modules/toollabs/manifests/master.pp
index d2ba1c9..885db27 100644
--- a/modules/toollabs/manifests/master.pp
+++ b/modules/toollabs/manifests/master.pp
@@ -4,9 +4,56 @@
include ::gridengine::master
include ::toollabs::infrastructure
- include ::toollabs::queue::continuous
- include ::toollabs::queue::task
+ $etcdir = '/var/lib/gridengine/etc'
+
+ # Set up queues
+ $queue_config_defaults = {
+ 'conf_path' => '/var/lib/gridengine/etc/queues',
+ }
+
+ $queue_config = {
+ 'task' => {
+ hostlist => '@general',
+ seq_no => 0,
+ terminate_method => '/usr/local/bin/jobkill $job_pid',
+ },
+ 'continuous' => {
+ hostlist => '@general',
+ seq_no => 1,
+ priority => 10,
+ qtype => 'BATCH',
+ rerun => true,
+ ckpt_list => 'continuous',
+ terminate_method => '/usr/local/bin/jobkill $job_pid',
+ },
+ 'webgrid-lighttpd' => {
+ hostlist => '@webgrid',
+ seq_no => 2,
+ np_load_avg_threshold => 2.75,
+ qtype => 'BATCH',
+ rerun => true,
+ slots => 256,
+ terminate_method => 'SIGTERM',
+ epilog => '/usr/local/bin/portreleaser',
+ },
+ 'webgrid-generic' => {
+ # FIXME: webgrid-generic is set up with a list of hosts instead of
hostgroups.
+ # It should just be another hostgroup
+ hostlist => 'NONE',
+ seq_no => 3,
+ np_load_avg_threshold => 2.75,
+ qtype => 'BATCH',
+ rerun => true,
+ slots => 256,
+ terminate_method => 'SIGTERM',
+ epilog => '/usr/local/bin/portreleaser',
+ },
+ }
+
+ create_resources(gridengine::queue, $queue_config, $queue_config_defaults)
+
+ # Set up complexes
gridengine_resource { 'h_vmem':
ensure => present,
requestable => 'FORCED',
diff --git a/modules/toollabs/templates/gridengine/queue-continuous.erb
b/modules/toollabs/templates/gridengine/queue-continuous.erb
deleted file mode 100644
index d046164..0000000
--- a/modules/toollabs/templates/gridengine/queue-continuous.erb
+++ /dev/null
@@ -1,50 +0,0 @@
-qname <%= @title %>
-hostlist @general
-seq_no 1
-load_thresholds np_load_avg=1.75
-priority 10
-min_cpu_interval 00:05:00
-qtype BATCH
-ckpt_list continuous
-rerun TRUE
-slots 50
-tmpdir /tmp
-shell /bin/bash
-shell_start_mode unix_behavior
-terminate_method /usr/local/bin/jobkill $job_pid
-notify 00:00:60
-prolog NONE
-epilog NONE
-processors UNDEFINED
-pe_list NONE
-suspend_thresholds NONE
-nsuspend 1
-suspend_interval 00:05:00
-starter_method NONE
-suspend_method NONE
-resume_method NONE
-owner_list NONE
-user_lists NONE
-xuser_lists NONE
-subordinate_list NONE
-complex_values NONE
-projects NONE
-xprojects NONE
-calendar NONE
-initial_state default
-s_rt INFINITY
-h_rt INFINITY
-s_cpu INFINITY
-h_cpu INFINITY
-s_vmem INFINITY
-h_vmem INFINITY
-s_fsize INFINITY
-h_fsize INFINITY
-s_data INFINITY
-h_data INFINITY
-s_stack INFINITY
-h_stack INFINITY
-s_core INFINITY
-h_core INFINITY
-s_rss INFINITY
-h_rss INFINITY
diff --git a/modules/toollabs/templates/gridengine/queue-dedicated.erb
b/modules/toollabs/templates/gridengine/queue-dedicated.erb
deleted file mode 100644
index 56bd397..0000000
--- a/modules/toollabs/templates/gridengine/queue-dedicated.erb
+++ /dev/null
@@ -1,50 +0,0 @@
-qname <%= @title %>
-hostlist #@#
-seq_no 5
-load_thresholds np_load_avg=2.75
-priority 10
-min_cpu_interval 00:05:00
-qtype BATCH INTERACTIVE
-ckpt_list continuous
-rerun TRUE
-slots 50
-tmpdir /tmp
-shell /bin/bash
-shell_start_mode unix_behavior
-terminate_method /usr/local/bin/jobkill $job_pid
-notify 00:00:60
-user_list <%= @dedicated_tool %>
-owner_list <%= @dedicated_tool %>
-prolog NONE
-epilog NONE
-processors UNDEFINED
-pe_list NONE
-suspend_thresholds NONE
-nsuspend 1
-suspend_interval 00:05:00
-starter_method NONE
-suspend_method NONE
-resume_method NONE
-xuser_lists NONE
-subordinate_list NONE
-complex_values NONE
-projects NONE
-xprojects NONE
-calendar NONE
-initial_state default
-s_rt INFINITY
-h_rt INFINITY
-s_cpu INFINITY
-h_cpu INFINITY
-s_vmem INFINITY
-h_vmem INFINITY
-s_fsize INFINITY
-h_fsize INFINITY
-s_data INFINITY
-h_data INFINITY
-s_stack INFINITY
-h_stack INFINITY
-s_core INFINITY
-h_core INFINITY
-s_rss INFINITY
-h_rss INFINITY
diff --git a/modules/toollabs/templates/gridengine/queue-task.erb
b/modules/toollabs/templates/gridengine/queue-task.erb
deleted file mode 100644
index dd683a3..0000000
--- a/modules/toollabs/templates/gridengine/queue-task.erb
+++ /dev/null
@@ -1,50 +0,0 @@
-qname <%= @title %>
-hostlist @general
-seq_no 0
-load_thresholds np_load_avg=1.75
-priority 0
-min_cpu_interval 00:05:00
-qtype BATCH INTERACTIVE
-ckpt_list NONE
-rerun FALSE
-slots 50
-tmpdir /tmp
-shell /bin/bash
-shell_start_mode unix_behavior
-terminate_method /usr/local/bin/jobkill $job_pid
-notify 00:00:60
-prolog NONE
-epilog NONE
-processors UNDEFINED
-pe_list NONE
-suspend_thresholds NONE
-nsuspend 1
-suspend_interval 00:05:00
-starter_method NONE
-suspend_method NONE
-resume_method NONE
-owner_list NONE
-user_lists NONE
-xuser_lists NONE
-subordinate_list NONE
-complex_values NONE
-projects NONE
-xprojects NONE
-calendar NONE
-initial_state default
-s_rt INFINITY
-h_rt INFINITY
-s_cpu INFINITY
-h_cpu INFINITY
-s_vmem INFINITY
-h_vmem INFINITY
-s_fsize INFINITY
-h_fsize INFINITY
-s_data INFINITY
-h_data INFINITY
-s_stack INFINITY
-h_stack INFINITY
-s_core INFINITY
-h_core INFINITY
-s_rss INFINITY
-h_rss INFINITY
--
To view, visit https://gerrit.wikimedia.org/r/352895
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I424f59aab5785991bee6324a46cb7f1872f4baf9
Gerrit-PatchSet: 1
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Madhuvishy <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits