Bug#861283: unblock: slurm-llnl/16.05.9-1

Mehdi Dogguy Wed, 26 Apr 2017 15:45:28 -0700

Package: release.debian.org
Severity: normal
User: release.debian....@packages.debian.org
Usertags: unblock


Slurm 16.05.9-1 has been uploaded to Unstable a while ago and is a bug
fix release. The diff is large but it contains many fixes (See summary
in upstream's NEWS file) and Slurm minor releases have always been
considered safe. Besides, Slurm 16.05.9-1 has stayed in Unstable for a
while now without issues.

Can you please consider unblocking slurm-llnl?

-- System Information:
Debian Release: 9.0
  APT prefers testing
  APT policy: (990, 'testing'), (500, 'unstable'), (1, 'experimental')
Architecture: amd64
 (x86_64)
Foreign Architectures: i386

Kernel: Linux 4.9.0-2-amd64 (SMP w/4 CPU cores)
Locale: LANG=en_US.UTF-8, LC_CTYPE=en_US.UTF-8 (charmap=UTF-8)
Shell: /bin/sh linked to /bin/dash
Init: systemd (via /run/systemd/system)

diff -Nru slurm-llnl-16.05.8/debian/changelog 
slurm-llnl-16.05.9/debian/changelog
--- slurm-llnl-16.05.8/debian/changelog 2017-01-07 02:40:23.000000000 +0100
+++ slurm-llnl-16.05.9/debian/changelog 2017-02-03 09:50:02.000000000 +0100
@@ -1,3 +1,10 @@
+slurm-llnl (16.05.9-1) unstable; urgency=medium
+
+  * New upstream release
+  * Overrides spelling-error-in-binary false positives
+
+ -- Gennaro Oliva <oliv...@na.icar.cnr.it>  Fri, 03 Feb 2017 09:50:02 +0100
+
 slurm-llnl (16.05.8-1) unstable; urgency=medium
 
   * New upstream release 
diff -Nru slurm-llnl-16.05.8/debian/libslurm30.lintian-overrides 
slurm-llnl-16.05.9/debian/libslurm30.lintian-overrides
--- slurm-llnl-16.05.8/debian/libslurm30.lintian-overrides      2017-01-04 
23:42:58.000000000 +0100
+++ slurm-llnl-16.05.9/debian/libslurm30.lintian-overrides      2017-02-02 
09:41:24.000000000 +0100
@@ -12,3 +12,4 @@
 # This happens because because slurm_job_preempt_mode is contained in
 # /usr/sbin/slurmctld and will never be referenced when running sinfo.
 hardening-no-bindnow
+spelling-error-in-binary
diff -Nru slurm-llnl-16.05.8/debian/libslurmdb30.lintian-overrides 
slurm-llnl-16.05.9/debian/libslurmdb30.lintian-overrides
--- slurm-llnl-16.05.8/debian/libslurmdb30.lintian-overrides    2017-01-04 
23:42:58.000000000 +0100
+++ slurm-llnl-16.05.9/debian/libslurmdb30.lintian-overrides    2017-02-02 
09:41:24.000000000 +0100
@@ -12,3 +12,4 @@
 # This happens because because slurm_job_preempt_mode is contained in
 # /usr/sbin/slurmctld and will never be referenced when running sinfo.
 hardening-no-bindnow
+spelling-error-in-binary
diff -Nru slurm-llnl-16.05.8/debian/slurm-client-emulator.lintian-overrides 
slurm-llnl-16.05.9/debian/slurm-client-emulator.lintian-overrides
--- slurm-llnl-16.05.8/debian/slurm-client-emulator.lintian-overrides   
2017-01-04 23:42:58.000000000 +0100
+++ slurm-llnl-16.05.9/debian/slurm-client-emulator.lintian-overrides   
2017-02-02 09:41:24.000000000 +0100
@@ -1 +1,2 @@
 slurm-client-emulator: hardening-no-bindnow
+spelling-error-in-binary
diff -Nru slurm-llnl-16.05.8/debian/slurm-client.lintian-overrides 
slurm-llnl-16.05.9/debian/slurm-client.lintian-overrides
--- slurm-llnl-16.05.8/debian/slurm-client.lintian-overrides    2017-01-04 
23:42:58.000000000 +0100
+++ slurm-llnl-16.05.9/debian/slurm-client.lintian-overrides    2017-02-02 
09:41:24.000000000 +0100
@@ -1,3 +1,4 @@
 slurm-client: manpage-has-errors-from-man
 slurm-client: conflicts-with-version
 slurm-client: hardening-no-bindnow
+spelling-error-in-binary
diff -Nru slurm-llnl-16.05.8/debian/slurmctld.lintian-overrides 
slurm-llnl-16.05.9/debian/slurmctld.lintian-overrides
--- slurm-llnl-16.05.8/debian/slurmctld.lintian-overrides       2017-01-04 
23:42:58.000000000 +0100
+++ slurm-llnl-16.05.9/debian/slurmctld.lintian-overrides       2017-02-02 
09:41:24.000000000 +0100
@@ -1,2 +1,3 @@
 slurmctld: possible-documentation-but-no-doc-base-registration
 slurmctld: hardening-no-bindnow
+spelling-error-in-binary
diff -Nru slurm-llnl-16.05.8/debian/slurmdbd.lintian-overrides 
slurm-llnl-16.05.9/debian/slurmdbd.lintian-overrides
--- slurm-llnl-16.05.8/debian/slurmdbd.lintian-overrides        2017-01-04 
23:42:58.000000000 +0100
+++ slurm-llnl-16.05.9/debian/slurmdbd.lintian-overrides        2017-02-02 
09:41:24.000000000 +0100
@@ -1 +1,2 @@
 slurmdbd: hardening-no-bindnow
+spelling-error-in-binary
diff -Nru slurm-llnl-16.05.8/debian/slurmd.lintian-overrides 
slurm-llnl-16.05.9/debian/slurmd.lintian-overrides
--- slurm-llnl-16.05.8/debian/slurmd.lintian-overrides  2017-01-04 
23:42:58.000000000 +0100
+++ slurm-llnl-16.05.9/debian/slurmd.lintian-overrides  2017-02-02 
09:41:24.000000000 +0100
@@ -1 +1,2 @@
 slurmd: hardening-no-bindnow
+spelling-error-in-binary
diff -Nru slurm-llnl-16.05.8/debian/slurm-wlm-emulator.lintian-overrides 
slurm-llnl-16.05.9/debian/slurm-wlm-emulator.lintian-overrides
--- slurm-llnl-16.05.8/debian/slurm-wlm-emulator.lintian-overrides      
2017-01-04 23:42:58.000000000 +0100
+++ slurm-llnl-16.05.9/debian/slurm-wlm-emulator.lintian-overrides      
2017-02-02 09:41:24.000000000 +0100
@@ -1 +1,2 @@
 slurm-wlm-emulator: hardening-no-bindnow
+spelling-error-in-binary
diff -Nru slurm-llnl-16.05.8/debian/sview.lintian-overrides 
slurm-llnl-16.05.9/debian/sview.lintian-overrides
--- slurm-llnl-16.05.8/debian/sview.lintian-overrides   2017-01-04 
23:42:58.000000000 +0100
+++ slurm-llnl-16.05.9/debian/sview.lintian-overrides   2017-02-02 
09:41:24.000000000 +0100
@@ -1 +1,2 @@
 sview: hardening-no-bindnow
+spelling-error-in-binary
diff -Nru slurm-llnl-16.05.8/doc/html/prolog_epilog.shtml 
slurm-llnl-16.05.9/doc/html/prolog_epilog.shtml
--- slurm-llnl-16.05.8/doc/html/prolog_epilog.shtml     2017-01-04 
22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/doc/html/prolog_epilog.shtml     2017-01-31 
20:56:34.000000000 +0100
@@ -130,7 +130,7 @@
                </tr>
        </tbody></table>
 </center>
-
+<br>
 <p>This second table below identifies what prologs and epilogs are available 
for job
 step allocations, when and where they run.</p>
 
diff -Nru slurm-llnl-16.05.8/doc/html/publications.shtml 
slurm-llnl-16.05.9/doc/html/publications.shtml
--- slurm-llnl-16.05.8/doc/html/publications.shtml      2017-01-04 
22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/doc/html/publications.shtml      2017-01-31 
20:56:34.000000000 +0100
@@ -305,6 +305,8 @@
 Yiannis Georgiou and David Glesser (Bull),
 Krzysztof Rzadca (University of Warsaw),
 Denis Trystram (University Grenoble-Alpes)</li>
+
+<li><a href="SUG14/data_movement.pdf">High Performance Data movement between 
Lustre and Enterprise storage systems</a>
 Aamir Rashid (Terascala)</li>
 
 <li><a href="SUG14/remote_gpu.pdf">Extending Slurm with Support for Remote GPU 
Virtualization</a>
@@ -775,6 +777,6 @@
 Learning Chef: Compute Cluter with Slurm</a>
 A Slurm Cookbook by Adam DeConinck</p>
 
-<p style="text-align:center;">Last modified 29 November 2016</p>
+<p style="text-align:center;">Last modified 12 January 2017</p>
 
 <!--#include virtual="footer.txt"-->
diff -Nru slurm-llnl-16.05.8/doc/html/reset.css 
slurm-llnl-16.05.9/doc/html/reset.css
--- slurm-llnl-16.05.8/doc/html/reset.css       2017-01-04 22:11:51.000000000 
+0100
+++ slurm-llnl-16.05.9/doc/html/reset.css       2017-01-31 20:56:34.000000000 
+0100
@@ -6,7 +6,7 @@
 b, u, i, center,
 ol, ul, li,
 fieldset, form, label, legend,
-table, caption, tbody, tfoot, thead, tr, th, td,
+caption, tbody, tfoot, thead, th,
 article, aside, canvas, details, embed,
 figure, figcaption, footer, header, hgroup,
 menu, nav, output, ruby, section, summary,
@@ -44,6 +44,5 @@
 }
 
 table {
-       border-collapse: collapse;
        border-spacing: 0;
 }
diff -Nru slurm-llnl-16.05.8/doc/html/style.css 
slurm-llnl-16.05.9/doc/html/style.css
--- slurm-llnl-16.05.8/doc/html/style.css       2017-01-04 22:11:51.000000000 
+0100
+++ slurm-llnl-16.05.9/doc/html/style.css       2017-01-31 20:56:34.000000000 
+0100
@@ -23,7 +23,6 @@
 .container {
        margin: 0 auto;
        padding: 0 18px;
-       max-width: 1400px;
 }
 
 .container--main {
@@ -661,6 +660,7 @@
 @media screen and (min-width: 32em) {
        .container {
                padding: 0 36px;
+               max-width: 100%;
        }
 }
 
@@ -673,6 +673,7 @@
 
        .container {
                padding: 0 48px;
+               max-width: 90%;
        }
 
        .container--main {
@@ -732,7 +733,7 @@
        }
 
        .content .container {
-               padding: 0 8% 0 8%;
+               padding: 0 0 0 100px;
                margin: 0;
        }
 
@@ -772,6 +773,9 @@
 
 /* Extra Large Size */
 @media screen and (min-width: 78em) {
-
+       .container {
+               padding: 0 48px;
+               max-width: 90%;
+       }
 }
 
diff -Nru slurm-llnl-16.05.8/META slurm-llnl-16.05.9/META
--- slurm-llnl-16.05.8/META     2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/META     2017-01-31 20:56:34.000000000 +0100
@@ -7,8 +7,8 @@
   Name:                slurm
   Major:       16
   Minor:       05
-  Micro:       8
-  Version:     16.05.8
+  Micro:       9
+  Version:     16.05.9
   Release:     1
 # Include leading zero for all pre-releases
 
diff -Nru slurm-llnl-16.05.8/NEWS slurm-llnl-16.05.9/NEWS
--- slurm-llnl-16.05.8/NEWS     2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/NEWS     2017-01-31 20:56:34.000000000 +0100
@@ -1,6 +1,46 @@
 This file describes changes in recent versions of Slurm. It primarily
 documents those changes that are of interest to users and administrators.
 
+* Changes in Slurm 16.05.9
+==========================
+ -- Fix parsing of SBCAST_COMPRESS environment variable in sbcast.
+ -- Change some debug messages to errors in task/cgroup plugin.
+ -- backfill scheduler: Stop trying to determine expected start time for a job
+    after 2 seconds of wall time. This can happen if there are many running 
jobs
+    and a pending job can not be started soon.
+ -- Improve performance of cr_sort_part_rows() in cons_res plugin.
+ -- CRAY - Fix dealock issue when updating accounting in the slurmctld and
+    scheduling a Datawarp job.
+ -- Correct the job state accounting information for jobs requeued due to burst
+    buffer errors.
+ -- burst_buffer/cray - Avoid "pre_run" operation if not using buffer (i.e.
+    just creating or deleting a persistent burst buffer).
+ -- Fix slurm.spec file support for BlueGene builds.
+ -- Fix missing TRES read lock in acct_policy_job_runnable_pre_select() code.
+ -- Fix debug2 message printing value using wrong array index in
+    _qos_job_runnable_post_select().
+ -- Prevent job timeout on node power up.
+ -- MYSQL - Fix minor memory leak when querying steps and the sql fails.
+ -- Make it so sacctmgr accepts column headers like MaxTRESPU and not MaxTRESP.
+ -- Only look at SLURM_STEP_KILLED_MSG_NODE_ID on startup, to avoid race
+    condition later when looking at a steps env.
+ -- Make backfill scheduler behave like regular scheduler in respect to
+    'assoc_limit_stop'.
+ -- Allow a lower version client command to talk to a higher version contoller
+    using the multi-cluster options (e.g. squeue -M<clsuter>).
+ -- slurmctld/agent race condition fix: Prevent job launch while 
PrologSlurmctld
+    daemon is running or node boot in progress.
+ -- MYSQL - Fix a few other minor memory leaks when uncommon failures occur.
+ -- burst_buffer/cray - Fix race condition that could cause multiple batch job
+    launch requests resulting in drained nodes.
+ -- Correct logic to purge old reservations.
+ -- Fix DBD cache restore from previous versions.
+ -- Fix to logic for getting expected start time of existing job ID with
+    explicit begin time that is in the past.
+ -- Clear job's reason of "BeginTime" in a more timely fashion and/or prevents
+    them from being stuck in a PENDING state.
+ -- Make sure acct policy limits imposed on a job are correct after requeue.
+
 * Changes in Slurm 16.05.8
 ==========================
  -- Remove StoragePass from being printed out in the slurmdbd log at debug2
diff -Nru slurm-llnl-16.05.8/slurm.spec slurm-llnl-16.05.9/slurm.spec
--- slurm-llnl-16.05.8/slurm.spec       2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/slurm.spec       2017-01-31 20:56:34.000000000 +0100
@@ -564,7 +564,6 @@
 rm -f $RPM_BUILD_ROOT/%{_libdir}/slurm/auth_none.so
 %endif
 %if ! %{slurm_with bluegene}
-rm -f $RPM_BUILD_ROOT/%{_libdir}/slurm/job_submit_cnode.so
 rm -f $RPM_BUILD_ROOT/%{_libdir}/slurm/libsched_if.so
 rm -f $RPM_BUILD_ROOT/%{_libdir}/slurm/libsched_if64.so
 rm -f $RPM_BUILD_ROOT/%{_libdir}/slurm/runjob_plugin.so
@@ -877,7 +876,6 @@
 %{_sbindir}/slurm_epilog
 %{_sbindir}/slurm_prolog
 %{_sbindir}/sfree
-%{_libdir}/slurm/job_submit_cnode.so
 %config %{_sysconfdir}/bluegene.conf.example
 %endif
 #############################################################################
diff -Nru slurm-llnl-16.05.8/src/common/slurmdbd_defs.c 
slurm-llnl-16.05.9/src/common/slurmdbd_defs.c
--- slurm-llnl-16.05.8/src/common/slurmdbd_defs.c       2017-01-04 
22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/common/slurmdbd_defs.c       2017-01-31 
20:56:34.000000000 +0100
@@ -2348,21 +2348,16 @@
                   need to set it back to 0 */
                set_buf_offset(buffer, 0);
                safe_unpackstr_xmalloc(&ver_str, &ver_str_len, buffer);
-               if (remaining_buf(buffer))
-                       goto unpack_error;
                debug3("Version string in dbd_state header is %s", ver_str);
+       unpack_error:
                free_buf(buffer);
                buffer = NULL;
-       unpack_error:
                if (ver_str) {
-                       char curr_ver_str[10];
-                       snprintf(curr_ver_str, sizeof(curr_ver_str),
-                                "VER%d", SLURM_PROTOCOL_VERSION);
-                       if (!xstrcmp(ver_str, curr_ver_str))
-                               rpc_version = SLURM_PROTOCOL_VERSION;
+                       /* get the version after VER */
+                       rpc_version = slurm_atoul(ver_str + 3);
+                       xfree(ver_str);
                }
 
-               xfree(ver_str);
                while (1) {
                        /* If the buffer was not the VER%d string it
                           was an actual message so we don't want to
diff -Nru slurm-llnl-16.05.8/src/common/slurmdb_pack.c 
slurm-llnl-16.05.9/src/common/slurmdb_pack.c
--- slurm-llnl-16.05.8/src/common/slurmdb_pack.c        2017-01-04 
22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/common/slurmdb_pack.c        2017-01-31 
20:56:34.000000000 +0100
@@ -809,6 +809,8 @@
                        goto unpack_error;
 
                safe_unpack16(&object_ptr->rpc_version, buffer);
+               object_ptr->rpc_version = MIN(SLURM_PROTOCOL_VERSION,
+                                             object_ptr->rpc_version);
                safe_unpackstr_xmalloc(&object_ptr->tres_str,
                                       &uint32_tmp, buffer);
        } else if (rpc_version >= SLURM_MIN_PROTOCOL_VERSION) {
diff -Nru 
slurm-llnl-16.05.8/src/plugins/accounting_storage/mysql/as_mysql_convert.c 
slurm-llnl-16.05.9/src/plugins/accounting_storage/mysql/as_mysql_convert.c
--- slurm-llnl-16.05.8/src/plugins/accounting_storage/mysql/as_mysql_convert.c  
2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/plugins/accounting_storage/mysql/as_mysql_convert.c  
2017-01-31 20:56:34.000000000 +0100
@@ -804,6 +804,7 @@
                        error("No grp_cpus col name in assoc_table "
                              "for cluster %s, this should never happen",
                              cluster_name);
+                       mysql_free_result(result);
                        continue;
                }
 
@@ -899,6 +900,7 @@
                if (!(row = mysql_fetch_row(result)) || !row[0] || !row[0][0]) {
                        error("No count col name for cluster %s, "
                              "this should never happen", cluster_name);
+                       mysql_free_result(result);
                        continue;
                }
 
diff -Nru 
slurm-llnl-16.05.8/src/plugins/accounting_storage/mysql/as_mysql_jobacct_process.c
 
slurm-llnl-16.05.9/src/plugins/accounting_storage/mysql/as_mysql_jobacct_process.c
--- 
slurm-llnl-16.05.8/src/plugins/accounting_storage/mysql/as_mysql_jobacct_process.c
  2017-01-04 22:11:51.000000000 +0100
+++ 
slurm-llnl-16.05.9/src/plugins/accounting_storage/mysql/as_mysql_jobacct_process.c
  2017-01-31 20:56:34.000000000 +0100
@@ -502,6 +502,7 @@
                local_cluster_list = setup_cluster_list_with_inx(
                        mysql_conn, job_cond, (void **)&curr_cluster);
                if (!local_cluster_list) {
+                       mysql_free_result(result);
                        rc = SLURM_ERROR;
                        goto end_it;
                }
@@ -785,6 +786,7 @@
                              mysql_conn, query, 0))) {
                        xfree(query);
                        rc = SLURM_ERROR;
+                       mysql_free_result(result);
                        goto end_it;
                }
                xfree(query);
diff -Nru 
slurm-llnl-16.05.8/src/plugins/accounting_storage/mysql/as_mysql_resource.c 
slurm-llnl-16.05.9/src/plugins/accounting_storage/mysql/as_mysql_resource.c
--- slurm-llnl-16.05.8/src/plugins/accounting_storage/mysql/as_mysql_resource.c 
2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/plugins/accounting_storage/mysql/as_mysql_resource.c 
2017-01-31 20:56:34.000000000 +0100
@@ -312,6 +312,7 @@
 
        if (!(row = mysql_fetch_row(result))) {
                error("Resource id %u is not known on the system", res_id);
+               mysql_free_result(result);
                return percent_used;
        }
 
@@ -383,6 +384,7 @@
 
        if (!(row = mysql_fetch_row(result))) {
                error("Resource id %u is not known on the system", res->id);
+               mysql_free_result(result);
                return SLURM_ERROR;
        }
 
@@ -1100,6 +1102,8 @@
 
        if (!query_clusters && !vals) {
                xfree(clus_vals);
+               if (result)
+                       mysql_free_result(result);
                errno = SLURM_NO_CHANGE_IN_DATA;
                error("Nothing to change");
                return NULL;
diff -Nru 
slurm-llnl-16.05.8/src/plugins/accounting_storage/mysql/as_mysql_rollup.c 
slurm-llnl-16.05.9/src/plugins/accounting_storage/mysql/as_mysql_rollup.c
--- slurm-llnl-16.05.8/src/plugins/accounting_storage/mysql/as_mysql_rollup.c   
2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/plugins/accounting_storage/mysql/as_mysql_rollup.c   
2017-01-31 20:56:34.000000000 +0100
@@ -1199,6 +1199,7 @@
                                              mysql_conn,
                                              query, 0))) {
                                        rc = SLURM_ERROR;
+                                       mysql_free_result(result);
                                        goto end_it;
                                }
                                xfree(query);
diff -Nru slurm-llnl-16.05.8/src/plugins/burst_buffer/cray/burst_buffer_cray.c 
slurm-llnl-16.05.9/src/plugins/burst_buffer/cray/burst_buffer_cray.c
--- slurm-llnl-16.05.8/src/plugins/burst_buffer/cray/burst_buffer_cray.c        
2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/plugins/burst_buffer/cray/burst_buffer_cray.c        
2017-01-31 20:56:34.000000000 +0100
@@ -1205,8 +1205,8 @@
                num_instances = 0;      /* Redundant, but fixes CLANG bug */
        }
        sessions = _bb_get_sessions(&num_sessions, &bb_state, timeout);
-       slurm_mutex_lock(&bb_state.bb_mutex);
        assoc_mgr_lock(&assoc_locks);
+       slurm_mutex_lock(&bb_state.bb_mutex);
        bb_state.last_load_time = time(NULL);
        for (i = 0; i < num_sessions; i++) {
                if (!init_config) {
@@ -1251,8 +1251,8 @@
                if (bb_alloc->job_id == 0)
                        bb_post_persist_create(NULL, bb_alloc, &bb_state);
        }
-       assoc_mgr_unlock(&assoc_locks);
        slurm_mutex_unlock(&bb_state.bb_mutex);
+       assoc_mgr_unlock(&assoc_locks);
        _bb_free_sessions(sessions, num_sessions);
        _bb_free_instances(instances, num_instances);
 
@@ -3383,6 +3383,7 @@
 
        /* Run "paths" function, get DataWarp environment variables */
        if (_have_dw_cmd_opts(bb_job)) {
+               /* Setup "paths" operation */
                if (bb_state.bb_config.validate_timeout)
                        timeout = bb_state.bb_config.validate_timeout * 1000;
                else
@@ -3422,48 +3423,52 @@
                }
                xfree(resp_msg);
                _free_script_argv(script_argv);
-       }
 
-       pre_run_argv = xmalloc(sizeof(char *) * 10);
-       pre_run_argv[0] = xstrdup("dw_wlm_cli");
-       pre_run_argv[1] = xstrdup("--function");
-       pre_run_argv[2] = xstrdup("pre_run");
-       pre_run_argv[3] = xstrdup("--token");
-       xstrfmtcat(pre_run_argv[4], "%u", job_ptr->job_id);
-       pre_run_argv[5] = xstrdup("--job");
-       xstrfmtcat(pre_run_argv[6], "%s/script", job_dir);
-       if (client_nodes_file_nid) {
+               /* Setup "pre_run" operation */
+               pre_run_argv = xmalloc(sizeof(char *) * 10);
+               pre_run_argv[0] = xstrdup("dw_wlm_cli");
+               pre_run_argv[1] = xstrdup("--function");
+               pre_run_argv[2] = xstrdup("pre_run");
+               pre_run_argv[3] = xstrdup("--token");
+               xstrfmtcat(pre_run_argv[4], "%u", job_ptr->job_id);
+               pre_run_argv[5] = xstrdup("--job");
+               xstrfmtcat(pre_run_argv[6], "%s/script", job_dir);
+               if (client_nodes_file_nid) {
 #if defined(HAVE_NATIVE_CRAY)
-               pre_run_argv[7] = xstrdup("--nidlistfile");
+                       pre_run_argv[7] = xstrdup("--nidlistfile");
 #else
-               pre_run_argv[7] = xstrdup("--nodehostnamefile");
+                       pre_run_argv[7] = xstrdup("--nodehostnamefile");
 #endif
-               pre_run_argv[8] = xstrdup(client_nodes_file_nid);
-       }
-       pre_run_args = xmalloc(sizeof(pre_run_args_t));
-       pre_run_args->args    = pre_run_argv;
-       pre_run_args->job_id  = job_ptr->job_id;
-       pre_run_args->timeout = bb_state.bb_config.other_timeout;
-       pre_run_args->user_id = job_ptr->user_id;
-       if (job_ptr->details)   /* Prevent launch until "pre_run" completes */
-               job_ptr->details->prolog_running++;
-
-       slurm_attr_init(&pre_run_attr);
-       if (pthread_attr_setdetachstate(&pre_run_attr, PTHREAD_CREATE_DETACHED))
-               error("pthread_attr_setdetachstate error %m");
-       while (pthread_create(&pre_run_tid, &pre_run_attr, _start_pre_run,
-                             pre_run_args)) {
-               if (errno != EAGAIN) {
-                       error("%s: pthread_create: %m", __func__);
-                       _start_pre_run(pre_run_argv);   /* Do in-line */
-                       break;
+                       pre_run_argv[8] = xstrdup(client_nodes_file_nid);
                }
-               usleep(100000);
-       }
-       slurm_attr_destroy(&pre_run_attr);
+               pre_run_args = xmalloc(sizeof(pre_run_args_t));
+               pre_run_args->args    = pre_run_argv;
+               pre_run_args->job_id  = job_ptr->job_id;
+               pre_run_args->timeout = bb_state.bb_config.other_timeout;
+               pre_run_args->user_id = job_ptr->user_id;
+               if (job_ptr->details) { /* Defer launch until completion */
+                       job_ptr->details->prolog_running++;
+                       job_ptr->job_state |= JOB_CONFIGURING;
+               }
+
+               slurm_attr_init(&pre_run_attr);
+               if (pthread_attr_setdetachstate(&pre_run_attr,
+                                               PTHREAD_CREATE_DETACHED))
+                       error("pthread_attr_setdetachstate error %m");
+               while (pthread_create(&pre_run_tid, &pre_run_attr,
+                                     _start_pre_run, pre_run_args)) {
+                       if (errno != EAGAIN) {
+                               error("%s: pthread_create: %m", __func__);
+                               _start_pre_run(pre_run_argv);   /* Do in-line */
+                               break;
+                       }
+                       usleep(100000);
+               }
+               slurm_attr_destroy(&pre_run_attr);
+}
 
-       xfree(job_dir);
        xfree(client_nodes_file_nid);
+       xfree(job_dir);
        return rc;
 }
 
@@ -3472,7 +3477,6 @@
 {
        last_job_update = time(NULL);
        job_ptr->end_time = last_job_update;
-       job_ptr->job_state = JOB_PENDING | JOB_COMPLETING;
        if (hold_job)
                job_ptr->priority = 0;
        build_cg_bitmap(job_ptr);
@@ -3480,7 +3484,11 @@
        job_ptr->state_reason = FAIL_BURST_BUFFER_OP;
        xfree(job_ptr->state_desc);
        job_ptr->state_desc = xstrdup("Burst buffer pre_run error");
-       job_completion_logger(job_ptr, false);
+
+       job_ptr->job_state  = JOB_REQUEUE;
+       job_completion_logger(job_ptr, true);
+       job_ptr->job_state = JOB_PENDING | JOB_COMPLETING;
+
        deallocate_nodes(job_ptr, false, false, false);
 }
 
@@ -4066,6 +4074,7 @@
                        error("%s: unable to find job record for job %u",
                              __func__, create_args->job_id);
                }
+               assoc_mgr_lock(&assoc_locks);
                slurm_mutex_lock(&bb_state.bb_mutex);
                _reset_buf_state(create_args->user_id, create_args->job_id,
                                 create_args->name, BB_STATE_ALLOCATED,
@@ -4074,7 +4083,6 @@
                                             create_args->user_id);
                bb_alloc->size = create_args->size;
                bb_alloc->pool = xstrdup(create_args->pool);
-               assoc_mgr_lock(&assoc_locks);
                if (job_ptr) {
                        bb_alloc->account   = xstrdup(job_ptr->account);
                        if (job_ptr->assoc_ptr) {
@@ -4120,8 +4128,8 @@
                }
                (void) bb_post_persist_create(job_ptr, bb_alloc, &bb_state);
                bb_state.last_update_time = time(NULL);
-               assoc_mgr_unlock(&assoc_locks);
                slurm_mutex_unlock(&bb_state.bb_mutex);
+               assoc_mgr_unlock(&assoc_locks);
                unlock_slurmctld(job_write_lock);
        }
        xfree(resp_msg);
@@ -4204,6 +4212,9 @@
                assoc_mgr_lock_t assoc_locks =
                        { READ_LOCK, NO_LOCK, READ_LOCK, NO_LOCK,
                          NO_LOCK, NO_LOCK, NO_LOCK };
+               /* assoc_mgr needs locking to call bb_post_persist_delete */
+               if (bb_alloc)
+                       assoc_mgr_lock(&assoc_locks);
                slurm_mutex_lock(&bb_state.bb_mutex);
                _reset_buf_state(destroy_args->user_id, destroy_args->job_id,
                                 destroy_args->name, BB_STATE_DELETED, 0);
@@ -4216,14 +4227,14 @@
                        bb_limit_rem(bb_alloc->user_id, bb_alloc->size,
                                     bb_alloc->pool, &bb_state);
 
-                       assoc_mgr_lock(&assoc_locks);
                        (void) bb_post_persist_delete(bb_alloc, &bb_state);
-                       assoc_mgr_unlock(&assoc_locks);
 
                        (void) bb_free_alloc_rec(&bb_state, bb_alloc);
                }
                bb_state.last_update_time = time(NULL);
                slurm_mutex_unlock(&bb_state.bb_mutex);
+               if (bb_alloc)
+                       assoc_mgr_unlock(&assoc_locks);
        }
        xfree(resp_msg);
        _free_create_args(destroy_args);
diff -Nru slurm-llnl-16.05.8/src/plugins/checkpoint/blcr/checkpoint_blcr.c 
slurm-llnl-16.05.9/src/plugins/checkpoint/blcr/checkpoint_blcr.c
--- slurm-llnl-16.05.8/src/plugins/checkpoint/blcr/checkpoint_blcr.c    
2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/plugins/checkpoint/blcr/checkpoint_blcr.c    
2017-01-31 20:56:34.000000000 +0100
@@ -422,30 +422,29 @@
         */
 
        /* set LD_PRELOAD for batch script shell */
-       //if (job->batch) {
-               old_env = getenvp(job->env, "LD_PRELOAD");
-               if (old_env) {
-                       /* search and replace all libcr_run and libcr_omit
-                        * the old env value is messed up --
-                        * it will be replaced */
-                       while ((ptr = strtok_r(old_env, " :", &save_ptr))) {
-                               old_env = NULL;
-                               if (!ptr)
-                                       break;
-                               if (!xstrncmp(ptr, "libcr_run.so", 12) ||
-                                   !xstrncmp(ptr, "libcr_omit.so", 13))
-                                       continue;
-                               xstrcat(new_env, ptr);
-                               xstrcat(new_env, ":");
-                       }
+       old_env = getenvp(job->env, "LD_PRELOAD");
+       if (old_env) {
+               /* search and replace all libcr_run and libcr_omit
+                * the old env value is messed up --
+                * it will be replaced */
+               while ((ptr = strtok_r(old_env, " :", &save_ptr))) {
+                       old_env = NULL;
+                       if (!ptr)
+                               break;
+                       if (!xstrncmp(ptr, "libcr_run.so", 12) ||
+                           !xstrncmp(ptr, "libcr_omit.so", 13))
+                               continue;
+                       xstrcat(new_env, ptr);
+                       xstrcat(new_env, ":");
                }
-               ptr = xstrdup("libcr_run.so");
-               if (new_env)
-                       xstrfmtcat(ptr, ":%s", new_env);
-               setenvf(&job->env, "LD_PRELOAD", ptr);
-               xfree(new_env);
-               xfree(ptr);
-               //}
+       }
+       ptr = xstrdup("libcr_run.so");
+       if (new_env)
+               xstrfmtcat(ptr, ":%s", new_env);
+       setenvf(&job->env, "LD_PRELOAD", ptr);
+       xfree(new_env);
+       xfree(ptr);
+
        return SLURM_SUCCESS;
 }
 
diff -Nru slurm-llnl-16.05.8/src/plugins/sched/backfill/backfill.c 
slurm-llnl-16.05.9/src/plugins/sched/backfill/backfill.c
--- slurm-llnl-16.05.8/src/plugins/sched/backfill/backfill.c    2017-01-04 
22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/plugins/sched/backfill/backfill.c    2017-01-31 
20:56:34.000000000 +0100
@@ -135,6 +135,7 @@
 static int max_backfill_job_per_user = 0;
 static int max_backfill_jobs_start = 0;
 static bool backfill_continue = false;
+static bool assoc_limit_stop = false;
 static int defer_rpc_cnt = 0;
 static int sched_timeout = SCHED_TIMEOUT;
 static int yield_sleep   = YIELD_SLEEP;
@@ -639,6 +640,13 @@
                backfill_continue = false;
        }
 
+       if (sched_params && (strstr(sched_params, "assoc_limit_stop"))) {
+               assoc_limit_stop = true;
+       } else {
+               assoc_limit_stop = false;
+       }
+
+
        if (sched_params &&
            (tmp_ptr = strstr(sched_params, "bf_yield_interval="))) {
                sched_timeout = atoi(tmp_ptr + 18);
@@ -1127,7 +1135,8 @@
                }
 
                if (!acct_policy_job_runnable_state(job_ptr) &&
-                   !acct_policy_job_runnable_pre_select(job_ptr))
+                   (!assoc_limit_stop ||
+                    !acct_policy_job_runnable_pre_select(job_ptr)))
                        continue;
 
                job_no_reserve = 0;
@@ -1847,8 +1856,19 @@
                power_g_job_start(job_ptr);
                if (job_ptr->batch_flag == 0)
                        srun_allocate(job_ptr->job_id);
-               else if ((job_ptr->details == NULL) ||
-                        (job_ptr->details->prolog_running == 0))
+               else if (
+#ifdef HAVE_BG
+                               /* On a bluegene system we need to run the
+                                * prolog while the job is CONFIGURING so this
+                                * can't work off the CONFIGURING flag as done
+                                * elsewhere.
+                                */
+                       !job_ptr->details ||
+                       !job_ptr->details->prolog_running
+#else
+                       !IS_JOB_CONFIGURING(job_ptr)
+#endif
+                       )
                        launch_job(job_ptr);
                slurmctld_diag_stats.backfilled_jobs++;
                slurmctld_diag_stats.last_backfilled_jobs++;
diff -Nru slurm-llnl-16.05.8/src/plugins/select/cons_res/select_cons_res.c 
slurm-llnl-16.05.9/src/plugins/select/cons_res/select_cons_res.c
--- slurm-llnl-16.05.8/src/plugins/select/cons_res/select_cons_res.c    
2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/plugins/select/cons_res/select_cons_res.c    
2017-01-31 20:56:34.000000000 +0100
@@ -544,21 +544,24 @@
 /* sort the rows of a partition from "most allocated" to "least allocated" */
 extern void cr_sort_part_rows(struct part_res_record *p_ptr)
 {
-       uint32_t i, j, a, b;
+       uint32_t i, j, b;
+       uint32_t a[p_ptr->num_rows];
 
        if (!p_ptr->row)
                return;
 
        for (i = 0; i < p_ptr->num_rows; i++) {
                if (p_ptr->row[i].row_bitmap)
-                       a = bit_set_count(p_ptr->row[i].row_bitmap);
+                       a[i] = bit_set_count(p_ptr->row[i].row_bitmap);
                else
-                       a = 0;
+                       a[i] = 0;
+       }
+       for (i = 0; i < p_ptr->num_rows; i++) {
                for (j = i+1; j < p_ptr->num_rows; j++) {
-                       if (!p_ptr->row[j].row_bitmap)
-                               continue;
-                       b = bit_set_count(p_ptr->row[j].row_bitmap);
-                       if (b > a) {
+                       if (a[j] > a[i]) {
+                               b = a[j];
+                               a[j] = a[i];
+                               a[i] = b;
                                _swap_rows(&(p_ptr->row[i]), &(p_ptr->row[j]));
                        }
                }
@@ -1878,9 +1881,7 @@
            ((job_ptr->bit_flags & TEST_NOW_ONLY) == 0)) {
                int time_window = 30;
                bool more_jobs = true;
-               bool timed_out = false;
                DEF_TIMERS;
-
                list_sort(cr_job_list, _cr_job_list_sort);
                START_TIMER;
                job_iterator = list_iterator_create(cr_job_list);
@@ -1908,14 +1909,6 @@
                                last_job_ptr = tmp_job_ptr;
                                _rm_job_from_res(future_part, future_usage,
                                                 tmp_job_ptr, 0);
-                               if (timed_out) {
-                                       /* After timeout, remove ALL remaining
-                                        * jobs and test if the pending job can
-                                        * start, rather than executing the slow
-                                        * cr_job_test() operation after
-                                        * removing every 200 jobs */
-                                       continue;
-                               }
                                if (rm_job_cnt++ > 200)
                                        break;
                                next_job_ptr = list_peek_next(job_iterator);
@@ -1949,12 +1942,9 @@
                                }
                                break;
                        }
-                       /* After 1 second of iterating over groups of running
-                        * jobs, simulate the termination of all remaining jobs
-                        * in order to determine if pending job can ever run */
                        END_TIMER;
-                       if (DELTA_TIMER >= 1000000)
-                               timed_out = true;
+                       if (DELTA_TIMER >= 2000000)
+                               break;  /* Quit after 2 seconds wall time */
                }
                list_iterator_destroy(job_iterator);
        }
diff -Nru slurm-llnl-16.05.8/src/plugins/task/cgroup/task_cgroup_memory.c 
slurm-llnl-16.05.9/src/plugins/task/cgroup/task_cgroup_memory.c
--- slurm-llnl-16.05.8/src/plugins/task/cgroup/task_cgroup_memory.c     
2017-01-04 22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/plugins/task/cgroup/task_cgroup_memory.c     
2017-01-31 20:56:34.000000000 +0100
@@ -458,6 +458,7 @@
                goto error;
        }
 
+       fstatus = SLURM_SUCCESS;
 error:
        xcgroup_unlock(&memory_cg);
        xcgroup_destroy(&memory_cg);
diff -Nru slurm-llnl-16.05.8/src/sacctmgr/common.c 
slurm-llnl-16.05.9/src/sacctmgr/common.c
--- slurm-llnl-16.05.8/src/sacctmgr/common.c    2017-01-04 22:11:51.000000000 
+0100
+++ slurm-llnl-16.05.9/src/sacctmgr/common.c    2017-01-31 20:56:34.000000000 
+0100
@@ -405,8 +405,10 @@
                field->name = xstrdup("MaxCPUsPU");
                field->len = 9;
                field->print_routine = print_fields_uint;
-       } else if (!strncasecmp("MaxTRESPerJob",
-                               object, MAX(command_len, 7))) {
+       } else if (!strncasecmp("MaxTRES",
+                               object, MAX(command_len, 7)) ||
+                  !strncasecmp("MaxTRESPerJob",
+                               object, MAX(command_len, 11))) {
                field->type = PRINT_MAXT;
                field->name = xstrdup("MaxTRES");
                field->len = 13;
@@ -452,7 +454,9 @@
                field->len = 13;
                field->print_routine = sacctmgr_print_tres;
        } else if (!strncasecmp("MaxTRESPerUser", object,
-                               MAX(command_len, 11))) {
+                               MAX(command_len, 11)) ||
+                  !strncasecmp("MaxTRESPU", object,
+                               MAX(command_len, 9))) {
                field->type = PRINT_MAXTU;
                field->name = xstrdup("MaxTRESPU");
                field->len = 13;
@@ -473,9 +477,9 @@
                field->len = 9;
                field->print_routine = print_fields_uint;
        } else if (!strncasecmp("MaxJobsPerUser", object,
-                               MAX(command_len, 8)) ||
+                               MAX(command_len, 11)) ||
                   !strncasecmp("MaxJobsPU", object,
-                               MAX(command_len, 8))) {
+                               MAX(command_len, 9))) {
                field->type = PRINT_MAXJ; /* used same as MaxJobs */
                field->name = xstrdup("MaxJobsPU");
                field->len = 9;
diff -Nru slurm-llnl-16.05.8/src/sbcast/opts.c 
slurm-llnl-16.05.9/src/sbcast/opts.c
--- slurm-llnl-16.05.8/src/sbcast/opts.c        2017-01-04 22:11:51.000000000 
+0100
+++ slurm-llnl-16.05.9/src/sbcast/opts.c        2017-01-31 20:56:34.000000000 
+0100
@@ -94,7 +94,7 @@
                {NULL,        0,                 0, 0}
        };
 
-       if (getenv("SBCAST_COMPRESS"))
+       if ((env_val = getenv("SBCAST_COMPRESS")))
                params.compress = parse_compress_type(env_val);
        if ( ( env_val = getenv("SBCAST_FANOUT") ) )
                params.fanout = atoi(env_val);
diff -Nru slurm-llnl-16.05.8/src/slurmctld/acct_policy.c 
slurm-llnl-16.05.9/src/slurmctld/acct_policy.c
--- slurm-llnl-16.05.8/src/slurmctld/acct_policy.c      2017-01-04 
22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/slurmctld/acct_policy.c      2017-01-31 
20:56:34.000000000 +0100
@@ -1765,7 +1765,7 @@
                       qos_ptr->name,
                       assoc_mgr_tres_name_array[tres_pos],
                       qos_ptr->grp_tres_mins_ctld[tres_pos],
-                      tres_usage_mins[i]);
+                      tres_usage_mins[tres_pos]);
                rc = false;
                goto end_it;
                break;
@@ -2741,7 +2741,7 @@
                         * parent or not
                         */
        assoc_mgr_lock_t locks = { READ_LOCK, NO_LOCK, READ_LOCK, NO_LOCK,
-                                  NO_LOCK, NO_LOCK, NO_LOCK };
+                                  READ_LOCK, NO_LOCK, NO_LOCK };
 
        /* check to see if we are enforcing associations */
        if (!accounting_enforce)
diff -Nru slurm-llnl-16.05.8/src/slurmctld/agent.c 
slurm-llnl-16.05.9/src/slurmctld/agent.c
--- slurm-llnl-16.05.8/src/slurmctld/agent.c    2017-01-04 22:11:51.000000000 
+0100
+++ slurm-llnl-16.05.9/src/slurmctld/agent.c    2017-01-31 20:56:34.000000000 
+0100
@@ -174,7 +174,12 @@
        char *message;
 } mail_info_t;
 
-static void _sig_handler(int dummy);
+typedef struct retry_args {
+       bool mail_too;                  /* Time to wait between retries */
+       int min_wait;                   /* Send pending email too */
+} retry_args_t;
+
+static void *_agent_retry(void *arg);
 static int  _batch_launch_defer(queued_request_t *queued_req_ptr);
 static inline int _comm_err(char *node_name, slurm_msg_type_t msg_type);
 static void _list_delete_retry(void *retry_entry);
@@ -185,8 +190,9 @@
                int no_resp_cnt, int retry_cnt);
 static void _purge_agent_args(agent_arg_t *agent_arg_ptr);
 static void _queue_agent_retry(agent_info_t * agent_info_ptr, int count);
-static int _setup_requeue(agent_arg_t *agent_arg_ptr, thd_t *thread_ptr,
-                         int *count, int *spot);
+static int  _setup_requeue(agent_arg_t *agent_arg_ptr, thd_t *thread_ptr,
+                          int *count, int *spot);
+static void _sig_handler(int dummy);
 static void _spawn_retry_agent(agent_arg_t * agent_arg_ptr);
 static void *_thread_per_group_rpc(void *args);
 static int   _valid_agent_arg(agent_arg_t *agent_arg_ptr);
@@ -1261,17 +1267,41 @@
 }
 
 /*
- * agent_retry - Agent for retrying pending RPCs. One pending request is
+ * agent_retry - Spawn agent for retrying pending RPCs. One pending request is
  *     issued if it has been pending for at least min_wait seconds
  * IN min_wait - Minimum wait time between re-issue of a pending RPC
  * IN mail_too - Send pending email too, note this performed using a
  *     fork/waitpid, so it can take longer than just creating a pthread
  *     to send RPCs
- * RET count of queued requests remaining
  */
-extern int agent_retry (int min_wait, bool mail_too)
+extern void agent_retry(int min_wait, bool mail_too)
 {
-       int list_size = 0, rc;
+       pthread_attr_t thread_attr;
+       pthread_t thread_id = (pthread_t) 0;
+       retry_args_t *retry_args_ptr;
+
+       retry_args_ptr = xmalloc(sizeof(struct retry_args));
+       retry_args_ptr->mail_too = mail_too;
+       retry_args_ptr->min_wait = min_wait;
+
+       slurm_attr_init(&thread_attr);
+       if (pthread_attr_setdetachstate(&thread_attr, PTHREAD_CREATE_DETACHED))
+               error("pthread_attr_setdetachstate error %m");
+       if (pthread_create(&thread_id, &thread_attr, _agent_retry,
+                          (void *) retry_args_ptr)) {
+               error("pthread_create error %m");
+               xfree(retry_args_ptr);
+       }
+       slurm_attr_destroy(&thread_attr);
+}
+
+/* Do the work requested by agent_retry (retry pending RPCs).
+ * This is a separate thread so the job records can be locked */
+static void *_agent_retry(void *arg)
+{
+       retry_args_t *retry_args_ptr = (retry_args_t *) arg;
+       bool mail_too;
+       int min_wait, rc;
        time_t now = time(NULL);
        queued_request_t *queued_req_ptr = NULL;
        agent_arg_t *agent_arg_ptr = NULL;
@@ -1279,17 +1309,26 @@
        pthread_t thread_mail = 0;
        pthread_attr_t attr_mail;
        mail_info_t *mi = NULL;
+       /* Write lock on jobs */
+       slurmctld_lock_t job_write_lock =
+               { NO_LOCK, WRITE_LOCK, NO_LOCK, NO_LOCK };
+
+       mail_too = retry_args_ptr->mail_too;
+       min_wait = retry_args_ptr->min_wait;
+       xfree(arg);
 
+       lock_slurmctld(job_write_lock);
        slurm_mutex_lock(&retry_mutex);
        if (retry_list) {
                static time_t last_msg_time = (time_t) 0;
-               uint32_t msg_type[5] = {0, 0, 0, 0, 0}, i = 0;
+               uint32_t msg_type[5] = {0, 0, 0, 0, 0};
+               int i = 0, list_size;
                list_size = list_count(retry_list);
                if ((list_size > 100) &&
                    (difftime(now, last_msg_time) > 300)) {
                        /* Note sizable backlog of work */
                        info("slurmctld: agent retry_list size is %d",
-                               list_size);
+                            list_size);
                        retry_iter = list_iterator_create(retry_list);
                        while ((queued_req_ptr = (queued_request_t *)
                                        list_next(retry_iter))) {
@@ -1311,13 +1350,13 @@
                /* too much work already */
                slurm_mutex_unlock(&agent_cnt_mutex);
                slurm_mutex_unlock(&retry_mutex);
-               return list_size;
+               unlock_slurmctld(job_write_lock);
+               return NULL;
        }
        slurm_mutex_unlock(&agent_cnt_mutex);
 
        if (retry_list) {
                /* first try to find a new (never tried) record */
-
                retry_iter = list_iterator_create(retry_list);
                while ((queued_req_ptr = (queued_request_t *)
                                list_next(retry_iter))) {
@@ -1327,14 +1366,12 @@
                                                  agent_arg_ptr);
                                xfree(queued_req_ptr);
                                list_remove(retry_iter);
-                               list_size--;
                                continue;
                        }
                        if (rc > 0)
                                continue;
                        if (queued_req_ptr->last_attempt == 0) {
                                list_remove(retry_iter);
-                               list_size--;
                                break;
                        }
                }
@@ -1356,7 +1393,6 @@
                                                  agent_arg_ptr);
                                xfree(queued_req_ptr);
                                list_remove(retry_iter);
-                               list_size--;
                                continue;
                        }
                        if (rc > 0)
@@ -1364,13 +1400,13 @@
                        age = difftime(now, queued_req_ptr->last_attempt);
                        if (age > min_wait) {
                                list_remove(retry_iter);
-                               list_size--;
                                break;
                        }
                }
                list_iterator_destroy(retry_iter);
        }
        slurm_mutex_unlock(&retry_mutex);
+       unlock_slurmctld(job_write_lock);
 
        if (queued_req_ptr) {
                agent_arg_ptr = queued_req_ptr->agent_arg_ptr;
@@ -1406,7 +1442,7 @@
                slurm_mutex_unlock(&agent_cnt_mutex);
        }
 
-       return list_size;
+       return NULL;
 }
 
 /*
@@ -1823,7 +1859,7 @@
        agent_arg_t *agent_arg_ptr;
        batch_job_launch_msg_t *launch_msg_ptr;
        time_t now = time(NULL);
-       struct job_record  *job_ptr;
+       struct job_record *job_ptr;
        int nodes_ready = 0, tmp = 0;
 
        agent_arg_ptr = queued_req_ptr->agent_arg_ptr;
@@ -1845,6 +1881,9 @@
                return -1;      /* job cancelled while waiting */
        }
 
+       if (job_ptr->details && job_ptr->details->prolog_running)
+               return 1;
+
        if (job_ptr->wait_all_nodes) {
                (void) job_node_ready(launch_msg_ptr->job_id, &tmp);
                if (tmp == (READY_JOB_STATE | READY_NODE_STATE)) {
@@ -1853,9 +1892,6 @@
                            !xstrcmp(launch_msg_ptr->alias_list, "TBD")) {
                                /* Update launch RPC with correct node
                                 * aliases */
-                               struct job_record *job_ptr;
-                               job_ptr = find_job_record(launch_msg_ptr->
-                                                         job_id);
                                xfree(launch_msg_ptr->alias_list);
                                launch_msg_ptr->alias_list = xstrdup(job_ptr->
                                                                     
alias_list);
@@ -1887,7 +1923,8 @@
        }
 
        if (nodes_ready) {
-               job_config_fini(job_ptr);
+               if (IS_JOB_CONFIGURING(job_ptr))
+                       job_config_fini(job_ptr);
                queued_req_ptr->last_attempt = (time_t) 0;
                return 0;
        }
diff -Nru slurm-llnl-16.05.8/src/slurmctld/agent.h 
slurm-llnl-16.05.9/src/slurmctld/agent.h
--- slurm-llnl-16.05.8/src/slurmctld/agent.h    2017-01-04 22:11:51.000000000 
+0100
+++ slurm-llnl-16.05.9/src/slurmctld/agent.h    2017-01-31 20:56:34.000000000 
+0100
@@ -85,11 +85,10 @@
  *     issued if it has been pending for at least min_wait seconds
  * IN min_wait - Minimum wait time between re-issue of a pending RPC
  * IN mail_too - Send pending email too, note this performed using a
- *             fork/waitpid, so it can take longer than just creating
- *             a pthread to send RPCs
- * RET count of queued requests remaining
+ *     fork/waitpid, so it can take longer than just creating a pthread
+ *     to send RPCs
  */
-extern int agent_retry (int min_wait, bool mail_too);
+extern void agent_retry(int min_wait, bool mail_too);
 
 /* agent_purge - purge all pending RPC requests */
 extern void agent_purge (void);
diff -Nru slurm-llnl-16.05.8/src/slurmctld/job_mgr.c 
slurm-llnl-16.05.9/src/slurmctld/job_mgr.c
--- slurm-llnl-16.05.8/src/slurmctld/job_mgr.c  2017-01-04 22:11:51.000000000 
+0100
+++ slurm-llnl-16.05.9/src/slurmctld/job_mgr.c  2017-01-31 20:56:34.000000000 
+0100
@@ -7470,6 +7470,7 @@
 {
        time_t now = time(NULL);
 
+       last_job_update = now;
        job_ptr->job_state &= (~JOB_CONFIGURING);
        job_ptr->tot_sus_time = difftime(now, job_ptr->start_time);
        if ((job_ptr->time_limit != INFINITE) && (job_ptr->tot_sus_time != 0)) {
@@ -7486,9 +7487,20 @@
        if (bit_overlap(job_ptr->node_bitmap, power_node_bitmap))
                return false;
 
-       if (job_ptr->wait_all_nodes && 
-           ((select_g_job_ready(job_ptr) & READY_NODE_STATE) == 0))
-               return false;
+       if (job_ptr->wait_all_nodes) {
+               /* Make sure all nodes ready to start job */
+               if ((select_g_job_ready(job_ptr) & READY_NODE_STATE) == 0)
+                       return false;
+       } else if (job_ptr->batch_flag) {
+               /* Make first node is ready to start batch job */
+               int i_first = bit_ffs(job_ptr->node_bitmap);
+               struct node_record *node_ptr = node_record_table_ptr + i_first;
+               if ((i_first != -1) &&
+                   (IS_NODE_POWER_SAVE(node_ptr) ||
+                    IS_NODE_POWER_UP(node_ptr))) {
+                       return false;
+               }
+       }
 
        return true;
 }
@@ -12846,6 +12858,7 @@
        if (job_ptr->alias_list && !xstrcmp(job_ptr->alias_list, "TBD") &&
            (prolog == 0) && job_ptr->node_bitmap &&
            (bit_overlap(power_node_bitmap, job_ptr->node_bitmap) == 0)) {
+               last_job_update = time(NULL);
                job_ptr->job_state &= (~JOB_CONFIGURING);
                set_job_alias_list(job_ptr);
        }
@@ -14373,6 +14386,8 @@
         * to add it again. */
        acct_policy_add_job_submit(job_ptr);
 
+       acct_policy_update_pending_job(job_ptr);
+
        if (state & JOB_SPECIAL_EXIT) {
                job_ptr->job_state |= JOB_SPECIAL_EXIT;
                job_ptr->state_reason = WAIT_HELD_USER;
diff -Nru slurm-llnl-16.05.8/src/slurmctld/job_scheduler.c 
slurm-llnl-16.05.9/src/slurmctld/job_scheduler.c
--- slurm-llnl-16.05.8/src/slurmctld/job_scheduler.c    2017-01-04 
22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/slurmctld/job_scheduler.c    2017-01-31 
20:56:34.000000000 +0100
@@ -574,6 +574,7 @@
        ListIterator job_iterator;
        slurmctld_lock_t job_write_lock =
                { READ_LOCK, WRITE_LOCK, WRITE_LOCK, READ_LOCK };
+       time_t now = time(NULL);
 #ifdef HAVE_BG
        static uint16_t cpus_per_node = 0;
        if (!cpus_per_node)
@@ -591,7 +592,8 @@
                        continue;
                if (part_ptr == NULL)
                        continue;
-               if ((job_ptr->details == NULL) || job_ptr->details->begin_time)
+               if ((job_ptr->details == NULL) ||
+                   (job_ptr->details->begin_time > now))
                        continue;
                if ((part_ptr->state_up & PARTITION_SCHED) == 0)
                        continue;
@@ -863,8 +865,20 @@
                        info("sched: Allocate JobId=%u Partition=%s NodeList=%s 
#CPUs=%u",
                             job_ptr->job_id, job_ptr->part_ptr->name,
                             job_ptr->nodes, job_ptr->total_cpus);
-                       if ((job_ptr->details->prolog_running == 0) &&
-                           ((job_ptr->bit_flags & NODE_REBOOT) == 0)) {
+
+                       if (
+#ifdef HAVE_BG
+                               /* On a bluegene system we need to run the
+                                * prolog while the job is CONFIGURING so this
+                                * can't work off the CONFIGURING flag as done
+                                * elsewhere.
+                                */
+                               !job_ptr->details->prolog_running &&
+                               !(job_ptr->bit_flags & NODE_REBOOT)
+#else
+                               !IS_JOB_CONFIGURING(job_ptr)
+#endif
+                               ) {
                                launch_msg = build_launch_job_msg(job_ptr,
                                                        msg->protocol_version);
                        }
@@ -1842,10 +1856,20 @@
 #endif
                        if (job_ptr->batch_flag == 0)
                                srun_allocate(job_ptr->job_id);
-                       else if ((job_ptr->details->prolog_running == 0) &&
-                                ((job_ptr->bit_flags & NODE_REBOOT) == 0)) {
+                       else if (
+#ifdef HAVE_BG
+                               /* On a bluegene system we need to run the
+                                * prolog while the job is CONFIGURING so this
+                                * can't work off the CONFIGURING flag as done
+                                * elsewhere.
+                                */
+                               !job_ptr->details->prolog_running &&
+                               !(job_ptr->bit_flags & NODE_REBOOT)
+#else
+                               !IS_JOB_CONFIGURING(job_ptr)
+#endif
+                               )
                                launch_job(job_ptr);
-                       }
                        rebuild_job_part_list(job_ptr);
                        job_cnt++;
                        if (is_job_array_head &&
@@ -3181,7 +3205,8 @@
        }
 
        /* Enforce reservation: access control, time and nodes */
-       if (job_ptr->details->begin_time)
+       if (job_ptr->details->begin_time &&
+           (job_ptr->details->begin_time > now))
                start_res = job_ptr->details->begin_time;
        else
                start_res = now;
@@ -3753,10 +3778,10 @@
                return errno;
        }
 
-       if (job_ptr->details)
+       if (job_ptr->details) {
                job_ptr->details->prolog_running++;
-
-       job_ptr->job_state |= JOB_CONFIGURING;
+               job_ptr->job_state |= JOB_CONFIGURING;
+       }
 
        slurm_attr_init(&thread_attr_prolog);
        pthread_attr_setdetachstate(&thread_attr_prolog,
diff -Nru slurm-llnl-16.05.8/src/slurmctld/node_mgr.c 
slurm-llnl-16.05.9/src/slurmctld/node_mgr.c
--- slurm-llnl-16.05.8/src/slurmctld/node_mgr.c 2017-01-04 22:11:51.000000000 
+0100
+++ slurm-llnl-16.05.9/src/slurmctld/node_mgr.c 2017-01-31 20:56:34.000000000 
+0100
@@ -3346,9 +3346,11 @@
        char *host_str = NULL;
        hostlist_t no_resp_hostlist = NULL;
 
-       for (i=0; i<node_record_count; i++) {
+       for (i = 0; i < node_record_count; i++) {
                node_ptr = &node_record_table_ptr[i];
-               if (!node_ptr->not_responding)
+               if (!node_ptr->not_responding ||
+                   IS_NODE_POWER_SAVE(node_ptr) ||
+                   IS_NODE_POWER_UP(node_ptr))
                        continue;
                if (no_resp_hostlist) {
                        (void) hostlist_push_host(no_resp_hostlist,
diff -Nru slurm-llnl-16.05.8/src/slurmctld/proc_req.c 
slurm-llnl-16.05.9/src/slurmctld/proc_req.c
--- slurm-llnl-16.05.8/src/slurmctld/proc_req.c 2017-01-04 22:11:51.000000000 
+0100
+++ slurm-llnl-16.05.9/src/slurmctld/proc_req.c 2017-01-31 20:56:34.000000000 
+0100
@@ -3381,8 +3381,20 @@
                                _throttle_fini(&active_rpc_cnt);
                                goto fini;
                        }
-                       if (job_ptr->details &&
-                           job_ptr->details->prolog_running) {
+
+                       if (
+#ifdef HAVE_BG
+                               /* On a bluegene system we need to run the
+                                * prolog while the job is CONFIGURING so this
+                                * can't work off the CONFIGURING flag as done
+                                * elsewhere.
+                                */
+                               job_ptr->details &&
+                               job_ptr->details->prolog_running
+#else
+                               IS_JOB_CONFIGURING(job_ptr)
+#endif
+                               ) {
                                slurm_send_rc_msg(msg, EAGAIN);
                                unlock_slurmctld(job_write_lock);
                                _throttle_fini(&active_rpc_cnt);
diff -Nru slurm-llnl-16.05.8/src/slurmctld/reservation.c 
slurm-llnl-16.05.9/src/slurmctld/reservation.c
--- slurm-llnl-16.05.8/src/slurmctld/reservation.c      2017-01-04 
22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/slurmctld/reservation.c      2017-01-31 
20:56:34.000000000 +0100
@@ -5400,7 +5400,6 @@
                }
                _advance_resv_time(resv_ptr);
                if ((resv_ptr->job_run_cnt    == 0) &&
-                   (resv_ptr->flags_set_node == false) &&
                    ((resv_ptr->flags & RESERVE_FLAG_DAILY ) == 0) &&
                    ((resv_ptr->flags & RESERVE_FLAG_WEEKLY) == 0)) {
                        if (resv_ptr->job_pend_cnt) {
diff -Nru slurm-llnl-16.05.8/src/slurmctld/step_mgr.c 
slurm-llnl-16.05.9/src/slurmctld/step_mgr.c
--- slurm-llnl-16.05.8/src/slurmctld/step_mgr.c 2017-01-04 22:11:51.000000000 
+0100
+++ slurm-llnl-16.05.9/src/slurmctld/step_mgr.c 2017-01-31 20:56:34.000000000 
+0100
@@ -981,11 +981,12 @@
                                return NULL;
                        }
                }
-               if (job_ptr->details
-                   && job_ptr->details->prolog_running == 0) {
+               if (IS_JOB_CONFIGURING(job_ptr)) {
                        info("%s: Configuration for job %u is complete",
                              __func__, job_ptr->job_id);
                        job_config_fini(job_ptr);
+                       if (job_ptr->bit_flags & NODE_REBOOT)
+                               job_validate_mem(job_ptr);
                }
        }
 
diff -Nru slurm-llnl-16.05.8/src/slurmd/common/xcgroup.c 
slurm-llnl-16.05.9/src/slurmd/common/xcgroup.c
--- slurm-llnl-16.05.8/src/slurmd/common/xcgroup.c      2017-01-04 
22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/slurmd/common/xcgroup.c      2017-01-31 
20:56:34.000000000 +0100
@@ -449,7 +449,6 @@
        char* file_path;
        uid_t uid;
        gid_t gid;
-       int create_only;
        uint32_t notify;
 
        /* init variables based on input cgroup */
@@ -457,7 +456,6 @@
        file_path = cg->path;
        uid = cg->uid;
        gid = cg->gid;
-       create_only = 0;
        notify = cg->notify;
 
        /* save current mask and apply working one */
@@ -465,20 +463,23 @@
        omask = umask(cmask);
 
        /* build cgroup */
-       if (mkdir(file_path, 0755)) {
-               if (create_only || errno != EEXIST) {
-                       debug2("%s: unable to create cgroup '%s' : %m",
-                              __func__, file_path);
+       if (mkdir(file_path, 0755)) {
+               if (errno != EEXIST) {
+                       error("%s: unable to create cgroup '%s' : %m",
+                             __func__, file_path);
                        umask(omask);
                        return fstatus;
+               } else {
+                       debug("%s: cgroup '%s' already exists",
+                             __func__, file_path);
                }
        }
        umask(omask);
 
        /* change cgroup ownership as requested */
        if (chown(file_path, uid, gid)) {
-               debug2("%s: unable to chown %d:%d cgroup '%s' : %m",
-                      __func__, uid, gid, file_path);
+               error("%s: unable to chown %d:%d cgroup '%s' : %m",
+                     __func__, uid, gid, file_path);
                return fstatus;
        }
 
diff -Nru slurm-llnl-16.05.8/src/slurmd/slurmstepd/req.c 
slurm-llnl-16.05.9/src/slurmd/slurmstepd/req.c
--- slurm-llnl-16.05.8/src/slurmd/slurmstepd/req.c      2017-01-04 
22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/slurmd/slurmstepd/req.c      2017-01-31 
20:56:34.000000000 +0100
@@ -128,6 +128,7 @@
 static pthread_mutex_t message_lock = PTHREAD_MUTEX_INITIALIZER;
 static pthread_cond_t message_cond = PTHREAD_COND_INITIALIZER;
 static int message_connections;
+static int msg_target_node_id = 0;
 
 /*
  *  Returns true if "uid" is a "slurm authorized user" - i.e. uid == 0
@@ -739,8 +740,6 @@
        int errnum = 0;
        int sig;
        static int msg_sent = 0;
-       char *ptr = NULL;
-       int target_node_id = 0;
        stepd_step_task_info_t *task;
        uint32_t i;
        uint32_t flag;
@@ -792,11 +791,8 @@
                }
        }
 
-       ptr = getenvp(job->env, "SLURM_STEP_KILLED_MSG_NODE_ID");
-       if (ptr)
-               target_node_id = atoi(ptr);
        if ((job->stepid != SLURM_EXTERN_CONT) &&
-           (job->nodeid == target_node_id) && (msg_sent == 0) &&
+           (job->nodeid == msg_target_node_id) && (msg_sent == 0) &&
            (job->state < SLURMSTEPD_STEP_ENDING)) {
                time_t now = time(NULL);
                char entity[24], time_str[24];
@@ -1818,3 +1814,10 @@
                }
        }
 }
+
+extern void set_msg_node_id(stepd_step_rec_t *job)
+{
+       char *ptr = getenvp(job->env, "SLURM_STEP_KILLED_MSG_NODE_ID");
+       if (ptr)
+               msg_target_node_id = atoi(ptr);
+}
diff -Nru slurm-llnl-16.05.8/src/slurmd/slurmstepd/req.h 
slurm-llnl-16.05.9/src/slurmd/slurmstepd/req.h
--- slurm-llnl-16.05.8/src/slurmd/slurmstepd/req.h      2017-01-04 
22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/slurmd/slurmstepd/req.h      2017-01-31 
20:56:34.000000000 +0100
@@ -46,4 +46,6 @@
 /* Delay until a job is resumed */
 extern void wait_for_resumed(uint16_t msg_type);
 
+extern void set_msg_node_id(stepd_step_rec_t *job);
+
 #endif /* _STEP_REQ_H */
diff -Nru slurm-llnl-16.05.8/src/slurmd/slurmstepd/slurmstepd.c 
slurm-llnl-16.05.9/src/slurmd/slurmstepd/slurmstepd.c
--- slurm-llnl-16.05.8/src/slurmd/slurmstepd/slurmstepd.c       2017-01-04 
22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/slurmd/slurmstepd/slurmstepd.c       2017-01-31 
20:56:34.000000000 +0100
@@ -581,6 +581,8 @@
        env_array_overwrite(&job->env,"SLURM_TOPOLOGY_ADDR_PATTERN",
                            conf->node_topo_pattern);
 
+       set_msg_node_id(job);
+
        return job;
 }
 
diff -Nru slurm-llnl-16.05.8/src/srun/libsrun/allocate.c 
slurm-llnl-16.05.9/src/srun/libsrun/allocate.c
--- slurm-llnl-16.05.8/src/srun/libsrun/allocate.c      2017-01-04 
22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/src/srun/libsrun/allocate.c      2017-01-31 
20:56:34.000000000 +0100
@@ -877,6 +877,7 @@
                j->power_flags = opt.power_flags;
        if (opt.mcs_label)
                j->mcs_label = opt.mcs_label;
+       j->wait_all_nodes = 1;
 
        return j;
 }
diff -Nru slurm-llnl-16.05.8/testsuite/expect/test10.13 
slurm-llnl-16.05.9/testsuite/expect/test10.13
--- slurm-llnl-16.05.8/testsuite/expect/test10.13       2017-01-04 
22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/testsuite/expect/test10.13       2017-01-31 
20:56:34.000000000 +0100
@@ -44,6 +44,11 @@
 
 print_header $test_id
 
+if { [test_bluegene] == 0 } {
+       send_user "\nWARNING: This test is only compatible with bluegene 
systems\n"
+       exit 0
+}
+
 if {[file exists $smap] == 0} {
        send_user "\nWARNING: smap not installed\n"
        exit 0
diff -Nru slurm-llnl-16.05.8/testsuite/expect/test10.5 
slurm-llnl-16.05.9/testsuite/expect/test10.5
--- slurm-llnl-16.05.8/testsuite/expect/test10.5        2017-01-04 
22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/testsuite/expect/test10.5        2017-01-31 
20:56:34.000000000 +0100
@@ -42,6 +42,11 @@
 
 print_header $test_id
 
+if { [test_bluegene] == 0 } {
+       send_user "\nWARNING: This test is only compatible with bluegene 
systems\n"
+       exit 0
+}
+
 if {[file exists $smap] == 0} {
        send_user "\nWARNING: smap not installed\n"
        exit 0
diff -Nru slurm-llnl-16.05.8/testsuite/expect/test1.14 
slurm-llnl-16.05.9/testsuite/expect/test1.14
--- slurm-llnl-16.05.8/testsuite/expect/test1.14        2017-01-04 
22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/testsuite/expect/test1.14        2017-01-31 
20:56:34.000000000 +0100
@@ -39,7 +39,6 @@
 set file_out        "test$test_id.output"
 set file_out2       "test$test_id.output2"
 set job_id           0
-set sleep_secs       10
 
 
 print_header $test_id
@@ -64,10 +63,15 @@
 exec $bin_rm -f $file_in $file_in2 $file_out $file_out2
 make_bash_script $file_in "
   echo tasks_per_node=\$SLURM_TASKS_PER_NODE
+  if \[ \$SLURM_TASKS_PER_NODE -gt 32 \]; then
+    sleep_secs=45
+  else
+    sleep_secs=10
+  fi
   inx=0
   while \[ \$inx -lt \$SLURM_TASKS_PER_NODE \]
   do
-    $srun --exclusive -n1 $bin_sleep $sleep_secs &
+    $srun --exclusive -n1 $bin_sleep \$sleep_secs &
     inx=\$((inx+1))
   done
   $bin_sleep 4
@@ -81,7 +85,7 @@
 #
 # Spawn a job via sbatch
 #
-spawn $sbatch -N1 -t1 --gres=craynetwork:0 --output=$file_out $file_in
+spawn $sbatch -N1 -t2 --gres=craynetwork:0 --output=$file_out $file_in
 expect {
        -re "Submitted batch job ($number)" {
                set job_id $expect_out(1,string)
@@ -149,25 +153,30 @@
 # Delete left-over input script
 # Build another input script file
 # Run one more step than allocated CPUs with immediate option and make aborts
-# The "sleep 4" is meant to insure the earlier job steps start first
+# The "sleep" is meant to insure the earlier job steps start first
 #
 exec $bin_rm -f $file_in $file_out
 make_bash_script $file_in "
   inx=0
+  if \[ \$SLURM_TASKS_PER_NODE -gt 32 \]; then
+    sleep_secs=45
+  else
+    sleep_secs=10
+  fi
   while \[ \$inx -lt \$SLURM_TASKS_PER_NODE \]
   do
-    $srun --exclusive -n1 $bin_sleep $sleep_secs &
+    $srun --exclusive -n1 --mem=0 $bin_sleep \$sleep_secs &
     inx=\$((inx+1))
   done
   $bin_sleep 4
-  $srun -v --exclusive -n1 --immediate $file_in2 &
+  $srun -v --exclusive -n1 --mem=0 --immediate $file_in2 &
   wait
 "
 
 #
 # Spawn a job via sbatch
 #
-spawn $sbatch -N1 -t1 --gres=craynetwork:0 --output=$file_out2 $file_in
+spawn $sbatch -N1 -t2 --gres=craynetwork:0 --output=$file_out2 $file_in
 expect {
        -re "Submitted batch job ($number)" {
                set job_id $expect_out(1,string)
diff -Nru slurm-llnl-16.05.8/testsuite/expect/test12.2 
slurm-llnl-16.05.9/testsuite/expect/test12.2
--- slurm-llnl-16.05.8/testsuite/expect/test12.2        2017-01-04 
22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/testsuite/expect/test12.2        2017-01-31 
20:56:34.000000000 +0100
@@ -186,7 +186,7 @@
        # Compute error in MB
        set diff_io [expr $max_disk_write - $max_disk_read]
        set error_io [expr abs($diff_io)]
-       if { $error_io > 0.05 } {
+       if { $error_io > 0.3 } {
                send_user "\nFAILURE: written file size does not match read 
size "
                send_user "file_size:$mb_file_size MB "
                send_user "max_disk_write:$max_disk_write MB "
@@ -196,7 +196,7 @@
 
        set diff_io [expr $ave_disk_write - $ave_disk_read]
        set error_io [expr abs($diff_io)] 
-       if { $error_io > 0.05 } {
+       if { $error_io > 0.3 } {
                send_user "\nFAILURE: average written file size "
                send_user "does not match average read size "
                send_user "file_size:$mb_file_size MB "
diff -Nru slurm-llnl-16.05.8/testsuite/expect/test14.10 
slurm-llnl-16.05.9/testsuite/expect/test14.10
--- slurm-llnl-16.05.8/testsuite/expect/test14.10       2017-01-04 
22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/testsuite/expect/test14.10       2017-01-31 
20:56:34.000000000 +0100
@@ -91,10 +91,10 @@
 $srun -N1 -n1 -w$node2 ./$file_in2
 
 echo -n \"Checking node 1: \"
-$srun -N1 -n1 -w$node2 ls /tmp/$node2/test$test_id\_file
+$srun -Q -N1 -n1 -w$node2 ls /tmp/$node2/test$test_id\_file
 
 echo -n \"Checking node 0: \"
-$srun -N1 -n1 -w$node1 ls /tmp/$node1/test$test_id\_file
+$srun -Q -N1 -n1 -w$node1 ls /tmp/$node1/test$test_id\_file
 
 $srun $bin_rm -f /tmp/$node1/test$test_id\_file
 $srun $bin_rm -fr /tmp/$node1
@@ -107,6 +107,7 @@
 "
 
 # Make allocations
+set timeout $max_job_delay
 set matches 0
 spawn $salloc -N2 -w$hostlist -t1 ./$file_in1
 expect {
diff -Nru slurm-llnl-16.05.8/testsuite/expect/test1.52 
slurm-llnl-16.05.9/testsuite/expect/test1.52
--- slurm-llnl-16.05.8/testsuite/expect/test1.52        2017-01-04 
22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/testsuite/expect/test1.52        2017-01-31 
20:56:34.000000000 +0100
@@ -37,7 +37,7 @@
 set exit_code   0
 set num_nodes   2
 set num_tasks   2
-set node_count  0
+set idle_nodes  0
 set max_nodes   0
 set task_count  0
 set hostfile    "test$test_id.hostfile"
@@ -71,10 +71,6 @@
                set max_nodes 999999
                exp_continue
        }
-       -re "TotalNodes=($number)" {
-               set node_count $expect_out(1,string)
-               exp_continue
-       }
        timeout {
                send_user "\nFAILURE: scontrol not responding\n"
                exit 1
@@ -83,8 +79,14 @@
                wait
        }
 }
-if { ($node_count < 3) || ($max_nodes < 3) } {
-       send_user "WARNING: system must have at least 3 nodes to run this test 
on. $node_count $max_nodes\n"
+
+set idle_nodes [available_nodes $def_part idle]
+if { ($idle_nodes < 3) || ($max_nodes < 3) } {
+       if { $max_nodes == 999999 } {
+               send_user "WARNING: partition $def_part must have at least 3 
idle nodes and MaxNodes >= 3 to run this test on. IDLE:$idle_nodes 
MaxNodes:UNLIMITED\n"
+       } else {
+               send_user "WARNING: partition $def_part must have at least 3 
idle nodes and MaxNodes >= 3 to run this test on. IDLE:$idle_nodes 
MaxNodes:$max_nodes\n"
+       }
        exit $exit_code
 }
 
diff -Nru slurm-llnl-16.05.8/testsuite/expect/test15.22 
slurm-llnl-16.05.9/testsuite/expect/test15.22
--- slurm-llnl-16.05.8/testsuite/expect/test15.22       2017-01-04 
22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/testsuite/expect/test15.22       2017-01-31 
20:56:34.000000000 +0100
@@ -131,9 +131,15 @@
 #
 # Submit job explicitly to a non-default partition
 #
-set job_id           0
+set job_id          0
+set legit_failure   0
 set salloc_pid [spawn $salloc --partition=$other_part_name -t1 $bin_sleep 1]
 expect {
+       -re "Required node not available" {
+               set legit_failure 1
+               exec $bin_kill -INT $salloc_pid
+               exp_continue
+       }
        -re "Granted job allocation ($number)" {
                set job_id $expect_out(1,string)
                exp_continue
@@ -151,7 +157,9 @@
        }
 }
 # Confirm the job's partition
-if {$job_id == 0} {
+if {$legit_failure == 1} {
+       send_user "\nWARNING: partition '$other_part_name' is not usable\n"
+} elseif {$job_id == 0} {
        send_user "\nFAILURE: batch submit failure\n"
        set exit_code 1
 } else {
diff -Nru slurm-llnl-16.05.8/testsuite/expect/test1.63 
slurm-llnl-16.05.9/testsuite/expect/test1.63
--- slurm-llnl-16.05.8/testsuite/expect/test1.63        2017-01-04 
22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/testsuite/expect/test1.63        2017-01-31 
20:56:34.000000000 +0100
@@ -72,6 +72,7 @@
        }
        -re "Hello World!" {
                incr match_run
+               sleep 0.1
                exec $bin_kill -INT $srun_pid
                exp_continue
        }
diff -Nru slurm-llnl-16.05.8/testsuite/expect/test17.34 
slurm-llnl-16.05.9/testsuite/expect/test17.34
--- slurm-llnl-16.05.8/testsuite/expect/test17.34       2017-01-04 
22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/testsuite/expect/test17.34       2017-01-31 
20:56:34.000000000 +0100
@@ -62,8 +62,14 @@
        } else {
                set task_limit 1
        }
+
+       set ntasks [expr abs($task_limit + $task)]
+       if {$ntasks == 0} {
+               set ntasks 1
+       }
+
        set error_chk 0
-       spawn $sbatch -t1 -w$node -S$core_spec -n[expr abs($task_limit + 
$task)] -o$file_out $spec_in
+       spawn $sbatch -t1 -w$node -S$core_spec -n$ntasks -o$file_out $spec_in
        expect {
                -re "Submitted batch job ($number)" {
                        set job_id $expect_out(1,string)
@@ -156,6 +162,19 @@
 
 print_header $test_id
 
+set select_type [test_select_type]
+if {![string compare $select_type "linear"] || ![string compare $select_type 
"serial"]} {
+       send_user "\nWARNING: This test is incompatible with 
select/$select_type\n"
+       exit 0
+} elseif {![string compare $select_type "cray"] && ![test_select_type_params 
"other_cons_res"]} {
+       send_user "\nWARNING: This test is incompatible with select/linear\n"
+       exit 0
+}
+if {[test_select_type_params "CR_SOCKET"]} {
+       send_user "\nWARNING: This test is incompatible with CR_SOCKET 
allocations\n"
+       exit 0
+}
+
 log_user 0
 set allow_spec 0
 spawn $scontrol show config
@@ -178,16 +197,6 @@
        exit $exit_code
 }
 
-set select_type [test_select_type]
-if {![string compare $select_type "linear"]} {
-       send_user "\nWARNING: This test is incompatible with 
select/$select_type\n"
-       exit 0
-}
-if {[test_select_type_params "CR_SOCKET"]} {
-       send_user "\nWARNING: This test is incompatible with CR_SOCKET 
allocations\n"
-       exit 0
-}
-
 # Remove any vestigial files
 exec $bin_rm -f $file_in $file_out $spec_in
 
diff -Nru slurm-llnl-16.05.8/testsuite/expect/test17.39 
slurm-llnl-16.05.9/testsuite/expect/test17.39
--- slurm-llnl-16.05.8/testsuite/expect/test17.39       2017-01-04 
22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/testsuite/expect/test17.39       2017-01-31 
20:56:34.000000000 +0100
@@ -35,8 +35,6 @@
 set slow_id       0
 set fast_id       0
 set dep_id        0
-set slow_job      "test$test_id\_slow_sc"
-set fast_job      "test$test_id\_fast_sc"
 set exit_code     0
 
 print_header $test_id
@@ -56,9 +54,6 @@
        }
 }
 
-make_bash_script $slow_job "sleep 120"
-make_bash_script $fast_job "sleep 30"
-
 proc check_state {id state} {
        global squeue exit_code
 
@@ -85,14 +80,8 @@
        }
 }
 
-if {[test_select_type_params "MEMORY"]} {
-       set job_mem  10
-} else {
-       set job_mem  1
-}
-
 # Submit job 1 of 3
-spawn $sbatch -t3 -o/dev/null --mem=${job_mem} $slow_job
+spawn $sbatch -t3 -o /dev/null --wrap "sleep 120"
 expect {
        -re "Submitted batch job ($number)" {
                set slow_id $expect_out(1,string)
@@ -112,7 +101,7 @@
 }
 
 # Submit job 2 of 3
-spawn $sbatch -t3 -o/dev/null --mem=${job_mem} $fast_job
+spawn $sbatch -t3 -o /dev/null --wrap "sleep 30"
 expect {
        -re "Node count specification invalid" {
                send_user "\nWARNING: can't test with less than two nodes\n"
@@ -136,7 +125,7 @@
 }
 
 # Submit dependency job, 3 of 3
-spawn $sbatch --dependency=afterok:$slow_id?afterok:$fast_id -o/dev/null 
--mem=${job_mem} $slow_job
+spawn $sbatch --dependency=afterok:$slow_id?afterok:$fast_id -o /dev/null 
--wrap "sleep 120"
 expect {
        -re "Submitted batch job ($number)" {
                set dep_id $expect_out(1,string)
@@ -197,7 +186,6 @@
 cancel_job $dep_id
 
 if {$exit_code == 0} {
-       exec $bin_rm -f $slow_job $fast_job
        send_user "\nSUCCESS\n"
 }
 exit $exit_code
diff -Nru slurm-llnl-16.05.8/testsuite/expect/test1.74 
slurm-llnl-16.05.9/testsuite/expect/test1.74
--- slurm-llnl-16.05.8/testsuite/expect/test1.74        2017-01-04 
22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/testsuite/expect/test1.74        2017-01-31 
20:56:34.000000000 +0100
@@ -65,6 +65,10 @@
        send_user "\nWARNING: This test is incompatible with serial systems\n"
        exit $exit_code
 }
+if {[string compare [check_accounting_admin_level] "Administrator"]} {
+       send_user "\nThis test can't be run without being an Accounting 
administrator.\n"
+       exit $exit_code
+}
 
 spawn $bin_id -u -n
 expect {
diff -Nru slurm-llnl-16.05.8/testsuite/expect/test17.40 
slurm-llnl-16.05.9/testsuite/expect/test17.40
--- slurm-llnl-16.05.8/testsuite/expect/test17.40       2017-01-04 
22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/testsuite/expect/test17.40       2017-01-31 
20:56:34.000000000 +0100
@@ -156,6 +156,19 @@
 
 print_header $test_id
 
+set select_type [test_select_type]
+if {![string compare $select_type "linear"] || ![string compare $select_type 
"serial"]} {
+       send_user "\nWARNING: This test is incompatible with 
select/$select_type\n"
+       exit 0
+} elseif {![string compare $select_type "cray"] && ![test_select_type_params 
"other_cons_res"]} {
+       send_user "\nWARNING: This test is incompatible with select/linear\n"
+       exit 0
+}
+if {[test_select_type_params "CR_SOCKET"]} {
+       send_user "\nWARNING: This test is incompatible with CR_SOCKET 
allocations\n"
+       exit 0
+}
+
 log_user 0
 set allow_spec 0
 spawn $scontrol show config
@@ -178,17 +191,6 @@
        exit $exit_code
 }
 
-set select_type [test_select_type]
-if {![string compare $select_type "linear"] || ![string compare $select_type 
"serial"]} {
-       send_user "\nWARNING: This test is incompatible with 
select/$select_type\n"
-       exit 0
-}
-
-if {[test_select_type_params "CR_SOCKET"]} {
-       send_user "\nWARNING: This test is incompatible with CR_SOCKET 
allocations\n"
-       exit 0
-}
-
 # Remove any vestigial files
 exec $bin_rm -f $file_in $file_out $spec_in
 
diff -Nru slurm-llnl-16.05.8/testsuite/expect/test21.36 
slurm-llnl-16.05.9/testsuite/expect/test21.36
--- slurm-llnl-16.05.8/testsuite/expect/test21.36       2017-01-04 
22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/testsuite/expect/test21.36       2017-01-31 
20:56:34.000000000 +0100
@@ -81,6 +81,10 @@
        send_user "\nThis test can't be run without 
AccountStorageType=slurmdbd\n"
        exit 0
 }
+if {[string compare [check_accounting_admin_level] "Administrator"]} {
+       send_user "\nThis test can't be run without being an Accounting 
administrator.\n"
+       exit 0
+}
 
 # Remove pre-existing items
 cleanup
diff -Nru slurm-llnl-16.05.8/testsuite/expect/test2.8 
slurm-llnl-16.05.9/testsuite/expect/test2.8
--- slurm-llnl-16.05.8/testsuite/expect/test2.8 2017-01-04 22:11:51.000000000 
+0100
+++ slurm-llnl-16.05.9/testsuite/expect/test2.8 2017-01-31 20:56:34.000000000 
+0100
@@ -35,7 +35,6 @@
 
 set test_id     "2.8"
 set exit_code   0
-set file_in     "test$test_id.input"
 set is_bluegene 0
 set job_id1     0
 set job_id2     0
@@ -65,26 +64,10 @@
        set step_id 0
 }
 
-if {[test_select_type_params "MEMORY"]} {
-       set job_mem  20
-       set step_mem 10
-} else {
-       set job_mem  1
-       set step_mem 1
-}
-
-#
-# Build input script file
-#
-make_bash_script $file_in "
-  $srun --mem=${step_mem} $bin_sleep 60 &
-  $srun --mem=${step_mem} $bin_sleep 60
-"
-
 #
 # Submit a couple jobs so we have something to work with
 #
-set sbatch_pid [spawn $sbatch --output=/dev/null --error=/dev/null -t5 
--mem=${job_mem} $file_in]
+set sbatch_pid [spawn $sbatch --output=/dev/null --error=/dev/null -t5 --wrap 
"$srun $bin_sleep 60"]
 expect {
        -re "Submitted batch job ($number)" {
                set job_id1 $expect_out(1,string)
@@ -104,7 +87,7 @@
        exit 1
 }
 
-set sbatch_pid [spawn $sbatch --output=/dev/null --error=/dev/null -t5 
--mem=${job_mem} $file_in]
+set sbatch_pid [spawn $sbatch --output=/dev/null --error=/dev/null -t5 --wrap 
"$srun $bin_sleep 60"]
 expect {
        -re "Submitted batch job ($number)" {
                set job_id2 $expect_out(1,string)
@@ -126,8 +109,6 @@
        exit 1
 }
 
-exec $bin_rm -f $file_in
-
 if {[wait_for_job $job_id1 "RUNNING"] != 0} {
         send_user "\nFAILURE: waiting for job $job_id1 to start\n"
         cancel_job $job_id1
@@ -451,4 +432,3 @@
        send_user "\nSUCCESS\n"
 }
 exit $exit_code
-
diff -Nru slurm-llnl-16.05.8/testsuite/expect/test28.7 
slurm-llnl-16.05.9/testsuite/expect/test28.7
--- slurm-llnl-16.05.8/testsuite/expect/test28.7        2017-01-04 
22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/testsuite/expect/test28.7        2017-01-31 
20:56:34.000000000 +0100
@@ -33,7 +33,6 @@
 set test_id       "28.7"
 set exit_code     0
 set array_size    3
-set script        "test$test_id\.bash"
 set top_array_task_id [expr $array_size - 1]
 
 print_header $test_id
@@ -43,20 +42,11 @@
        exit 0
 }
 
-if {[test_select_type_params "MEMORY"]} {
-       set job_mem  10
-} else {
-       set job_mem  1
-}
-
-exec $bin_rm -f $script
-make_bash_script $script "sleep \$(( ( RANDOM % 10 ) + 1 ))"
-
 #
 # Submit a job array for first dependency test
 #
 set job_id 0
-spawn $sbatch -N1 -t1 -a 0-$top_array_task_id -o /dev/null -e /dev/null 
--mem=${job_mem} $script
+spawn $sbatch -N1 -t1 -a 0-$top_array_task_id -o /dev/null -e /dev/null --wrap 
"sleep \$(( ( RANDOM % 10 ) + 1 ))"
 expect {
        -re "Submitted batch job ($number)" {
                set job_id $expect_out(1,string)
@@ -80,7 +70,7 @@
 #
 set timeout $max_job_delay
 set match_job_state 0
-set srun_pid [spawn $srun -t1 
--dependency=afterany:$job_id\_$top_array_task_id --mem=${job_mem} $scontrol 
show job $job_id\_$top_array_task_id]
+set srun_pid [spawn $srun -t1 
--dependency=afterany:$job_id\_$top_array_task_id $scontrol show job 
$job_id\_$top_array_task_id]
 expect {
        -re "JobState=COMPLETED|COMPLETING" {
                set match_job_state 1
@@ -108,7 +98,7 @@
 # Submit a job array for second dependency test
 #
 set job_id 0
-spawn $sbatch -N1 -t1 -a 0-[expr $array_size - 1] -o /dev/null -e /dev/null 
--mem=${job_mem} $script
+spawn $sbatch -N1 -t1 -a 0-[expr $array_size - 1] -o /dev/null -e /dev/null 
--wrap "sleep \$(( ( RANDOM % 10 ) + 1 ))"
 expect {
        -re "Submitted batch job ($number)" {
                set job_id $expect_out(1,string)
@@ -132,7 +122,7 @@
 #
 set timeout $max_job_delay
 set match_job_state 0
-set srun_pid [spawn $srun -t1 --dependency=afterany:$job_id --mem=${job_mem} 
$scontrol show job $job_id]
+set srun_pid [spawn $srun -t1 --dependency=afterany:$job_id $scontrol show job 
$job_id]
 expect {
        -re "JobState=COMPLETED|COMPLETING" {
                incr match_job_state
@@ -154,7 +144,6 @@
 
 cancel_job $job_id
 if {$exit_code == 0} {
-       exec $bin_rm -f $script
        send_user "\nSUCCESS\n"
 }
 exit $exit_code
diff -Nru slurm-llnl-16.05.8/testsuite/expect/test3.15 
slurm-llnl-16.05.9/testsuite/expect/test3.15
--- slurm-llnl-16.05.8/testsuite/expect/test3.15        2017-01-04 
22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/testsuite/expect/test3.15        2017-01-31 
20:56:34.000000000 +0100
@@ -32,7 +32,6 @@
 
 set test_id            "3.15"
 set exit_code          0
-set script_name         "test$test_id.bash"
 set license_name        "test$test_id"
 set resv_name           "resv$test_id"
 set user_name          ""
@@ -57,10 +56,9 @@
 }
 
 proc submit_job { license_count } {
-       global script_name bin_sleep license_name sbatch number exit_code 
job_mem
+       global bin_sleep license_name sbatch number exit_code
        set job_id 0
-       make_bash_script $script_name "$bin_sleep 300"
-       spawn $sbatch -n1 -t1 -o /dev/null -L $license_name:$license_count 
--mem=${job_mem} $script_name
+       spawn $sbatch -n1 -t1 -o /dev/null -L $license_name:$license_count 
--wrap "$bin_sleep 300"
        expect {
                -re "Submitted batch job ($number)" {
                        set job_id $expect_out(1,string)
@@ -187,12 +185,6 @@
        exit $exit_code
 }
 
-if {[test_select_type_params "MEMORY"]} {
-       set job_mem  10
-} else {
-       set job_mem  1
-}
-
 spawn $bin_id -un
 expect {
        -re "($alpha_numeric_under)" {
@@ -378,7 +370,7 @@
 reconfigure
 
 if {$exit_code == 0} {
-       exec $bin_rm -f $cwd/slurm.conf.orig $script_name
+       exec $bin_rm -f $cwd/slurm.conf.orig
        send_user "\nSUCCESS\n"
 } else {
        send_user "\nFAILURE\n"
diff -Nru slurm-llnl-16.05.8/testsuite/expect/test35.2 
slurm-llnl-16.05.9/testsuite/expect/test35.2
--- slurm-llnl-16.05.8/testsuite/expect/test35.2        2017-01-04 
22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/testsuite/expect/test35.2        2017-01-31 
20:56:34.000000000 +0100
@@ -42,6 +42,28 @@
 set script_use  "test$test_id.use.bash"
 set tmp_file    "test$test_id"
 
+#
+# get my uid and clear any vestigial triggers
+#
+set uid -1
+spawn $bin_id -u
+expect {
+       -re "($number)" {
+               set uid $expect_out(1,string)
+               exp_continue
+       }
+       eof {
+               wait
+       }
+}
+if {$uid == -1} {
+       send_user "\nCan't get my uid\n"
+       exit 1
+} elseif {$uid == 0} {
+       send_user "\nWARNING: Can't run this test as user root\n"
+       exit 0
+}
+
 proc find_bb_jobid { fname bb_jobid } {
        global bin_cat
 
@@ -142,6 +164,9 @@
        set exit_code 1
 }
 
+# Wait for purge of buffer to complete
+sleep 10
+
 set found 0
 spawn $scontrol show burst
 expect {
diff -Nru slurm-llnl-16.05.8/testsuite/expect/test5.9 
slurm-llnl-16.05.9/testsuite/expect/test5.9
--- slurm-llnl-16.05.8/testsuite/expect/test5.9 2017-01-04 22:11:51.000000000 
+0100
+++ slurm-llnl-16.05.9/testsuite/expect/test5.9 2017-01-31 20:56:34.000000000 
+0100
@@ -98,6 +98,10 @@
        send_user "\nWARNING: This test is incompatible with serial systems\n"
        exit $exit_code
 }
+if {[string compare [check_accounting_admin_level] "Administrator"]} {
+       send_user "\nThis test can't be run without being an Accounting 
administrator.\n"
+       exit $exit_code
+}
 set available [available_nodes $partition idle]
 if {$available < 2} {
     send_user "\nWARNING: not enough nodes currently available ($available 
avail, 2 needed)\n"
diff -Nru slurm-llnl-16.05.8/testsuite/expect/test7.11 
slurm-llnl-16.05.9/testsuite/expect/test7.11
--- slurm-llnl-16.05.8/testsuite/expect/test7.11        2017-01-04 
22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/testsuite/expect/test7.11        2017-01-31 
20:56:34.000000000 +0100
@@ -69,10 +69,12 @@
 #
 log_user 0
 set config_dir ""
+set ctld_slurm_ver ""
 spawn $scontrol show config
 expect {
-       -re "SLURM_CONF.*= (/.*)/slurm.conf.*SLURM_VERSION" {
+       -re "SLURM_CONF.*= (.*)/slurm.conf.*SLURM_VERSION *= ($float)" {
                set config_dir $expect_out(1,string)
+               set ctld_slurm_ver $expect_out(2,string)
                exp_continue
        }
        eof {
@@ -84,6 +86,27 @@
        send_user "\nFAILURE: Could not locate slurm.conf directory\n"
        exit 1
 }
+
+log_user 0
+set loc_slurm_ver ""
+spawn $scontrol -V
+expect {
+       -re "slurm ($float)" {
+               set loc_slurm_ver $expect_out(1,string)
+               exp_continue
+       }
+       eof {
+               wait
+       }
+}
+log_user 1
+
+if {[string compare $ctld_slurm_ver $loc_slurm_ver]} {
+       send_user "\nWARNING: slurmctld ($ctld_slurm_ver) and local Slurm 
($loc_slurm_ver) versions are not the same, can not continue.\n"
+       exit 0
+}
+
+
 set spank_conf_file ${config_dir}/plugstack.conf
 exec $bin_rm -f $orig_spank_conf $new_spank_conf $file_out $spank_out
 if {[file exists $spank_conf_file]} {
@@ -120,10 +143,6 @@
        }
 }
 
-# Allow enough time for configuration file in NFS to be propagated
-# to all nodes of cluster
-exec sleep 60
-
 #
 # Test of srun help message
 #
diff -Nru slurm-llnl-16.05.8/testsuite/expect/test7.13 
slurm-llnl-16.05.9/testsuite/expect/test7.13
--- slurm-llnl-16.05.8/testsuite/expect/test7.13        2017-01-04 
22:11:51.000000000 +0100
+++ slurm-llnl-16.05.9/testsuite/expect/test7.13        2017-01-31 
20:56:34.000000000 +0100
@@ -166,7 +166,7 @@
                }
        }
        if {$matches != 4} {
-               send_user "\nFAILURE: sacct of $job_id failed ($matches != 5)\n"
+               send_user "\nFAILURE: sacct of $job_id failed ($matches != 4)\n"
                exit 1
        }
 }

Bug#861283: unblock: slurm-llnl/16.05.9-1

Reply via email to