Here is the patch. This patch add a new keyword to maui.cfg:
 - PBSASYNCJOBSTART

Default value is FALSE. This means old behaviour pbs_runjob() is used.

If TRUE then pbs_asyrunjob() is used. This function start jobs faster then
pbs_runjob()

Can this patch be applied to the maui source?


There is also a a new release for maui_2_deb with this patch added:
        ftp://ftp.sara.nl/pub/outgoing/maui_2_deb.tar.gz


Bas van der Vlies wrote:
> Thanks for the info. I have finished the patch and tomorrow i will do  
> some testing before i will post it
> 
> 
> On 26 aug 2009, at 19:35, Tom Rudwick wrote:
> 
>> That sounds correct to me. That is what I have in my patch to switch
>> the behavior at compile time. We've been using it successfully for a
>> long time.
>>
>> Tom
>>
>>
>> Bas van der Vlies wrote:
>>> I have some time to implement the pbs_asynrunjob patch as option for
>>> maui.cfg. If i read this thread correctly i can safely remove both
>>> MPBSJobModify() that use 'neednodes' and change the pbs_runjob to use
>>> HostList as parameter instead of MasterHost.
>>>
>>> I want to make it configurable which function is used:
>>>  - PBSASYNCJOBSTART
>>>
>>> default: FALSE (old behaviour)
>>>
>>> Regards
>>>
>>>
>>>
>>> Garrick Staples wrote:
>>>
>>>> On Thu, Apr 09, 2009 at 11:10:25AM -0600, Josh Butikofer alleged:
>>>>
>>>>> Actually, I just checked out the Maui source code and it looks  
>>>>> like you
>>>>> will need to keep at least one of the neednodes calls (the one  
>>>>> before the
>>>>> call to pbs_runjob()), as Maui is not passing a host list into
>>>>> pbs_runjob(). If Maui does pass in the hostlist to pbs_runjob(),  
>>>>> the
>>>>> neednodes calls are probably not needed.
>>>>>
>>>> That's correct.  If Maui passes in the hostlist, don't modify  
>>>> neednodes.
>>>>
>>>> I gave up trying to fix this behaviour in maui years ago.  Here is  
>>>> the patch
>>>> I've had in my own maui for a very long time.
>>>>
>>>>
>>>> Index: src/moab/MPBSI.c
>>>> ===================================================================
>>>> RCS file: /usr/local/nfs/src/cvs_repository/maui/src/moab/MPBSI.c,v
>>>> retrieving revision 1.14
>>>> diff -u -r1.14 MPBSI.c
>>>> --- src/moab/MPBSI.c    5 Nov 2005 02:42:08 -0000       1.14
>>>> +++ src/moab/MPBSI.c    23 May 2006 01:50:11 -0000
>>>> @@ -1792,6 +1792,7 @@
>>>>        return(FAILURE);
>>>>        }
>>>>
>>>> +/*
>>>>      if (MPBSJobModify(
>>>>            J,
>>>>            R,
>>>> @@ -1826,6 +1827,7 @@
>>>>          J->Name,
>>>>          HostList);
>>>>        }
>>>> +*/
>>>>      }
>>>>    else
>>>>      {
>>>> @@ -1904,7 +1906,7 @@
>>>>
>>>>    MJobGetName(J,NULL,R,tmpJobName,sizeof(tmpJobName),mjnRMName);
>>>>
>>>> -  rc = pbs_runjob(R->U.PBS.ServerSD,tmpJobName,MasterHost,NULL);
>>>> +  rc = pbs_runjob(R->U.PBS.ServerSD,tmpJobName,HostList,NULL);
>>>>
>>>>    if (rc != 0)
>>>>      {
>>>> @@ -1928,6 +1930,7 @@
>>>>      JobStartFailed = TRUE;
>>>>      }
>>>>
>>>> +/*
>>>>    if (J->NeedNodes != NULL)
>>>>      {
>>>>      if (MPBSJobModify(
>>>> @@ -1949,6 +1952,7 @@
>>>>          J->NeedNodes);
>>>>        }
>>>>      }
>>>> +*/
>>>>
>>>>    if (JobStartFailed == TRUE)
>>>>      {
>>>>
>>>>
>>>>
>>>
>>>
>> <ATT00001.txt>
> 
> --
> Bas van der Vlies
> [email protected]
> 
> 
> 
> _______________________________________________
> mauiusers mailing list
> [email protected]
> http://www.supercluster.org/mailman/listinfo/mauiusers


-- 
********************************************************************
*  Bas van der Vlies                    e-mail: [email protected]       *
*  SARA - Academic Computing Services   Amsterdam, The Netherlands *
********************************************************************
Index: include/msched-common.h
===================================================================
--- include/msched-common.h	(revision 1)
+++ include/msched-common.h	(working copy)
@@ -875,7 +875,8 @@
   pOLDRMServer,
   pNAMaxPS,
   pFSSecondaryGroups, /* To enable secondary fairshare group lookups for PBS, HvB */
-  pIgnPbsGroupList    /* ignore the -W group_list parameter for PBS, HvB */
+  pIgnPbsGroupList,   /* ignore the -W group_list parameter for PBS, HvB */
+  pPbsAsyncJobstart   /* Start Torque/Pbs jobs asynchronous, HvB */
   };
 
 #endif /* __M_COMMON_H__ */
Index: include/msched.h
===================================================================
--- include/msched.h	(revision 1)
+++ include/msched.h	(working copy)
@@ -1497,7 +1497,9 @@
 
   int    IgnPbsGroupList;         /* Ignore -W group_list parameter for Torque/PBS HvB */
   int    FSSecondaryGroups;       /* To enable secondary fairshare group lookups for PBS, HvB */
+  int    PbsAsyncJobstart;         /* Start Torque/Pbs jobs asynchronous, HvB */
 
+
   } mpar_t;
 
 /* cred */
Index: src/moab/MPar.c
===================================================================
--- src/moab/MPar.c	(revision 1)
+++ src/moab/MPar.c	(working copy)
@@ -722,6 +722,11 @@
   P->IgnPbsGroupList      = 0;
 
   /*
+   * HvB: Default is to start jobs synchronous
+  */
+  P->PbsAsyncJobstart 	  = 0;
+
+  /*
    * HvB: default is to disable secondary group lookups for fairshare
   */
   P->FSSecondaryGroups   = 0;
@@ -1862,6 +1867,13 @@
       break;
 
     /* HvB */
+    case pPbsAsyncJobstart:
+
+      P->PbsAsyncJobstart =  MUBoolFromString(SVal,FALSE);
+
+      break;
+
+    /* HvB */
     case pIgnPbsGroupList:
 
       P->IgnPbsGroupList =  MUBoolFromString(SVal,FALSE);
Index: src/moab/MConst.c
===================================================================
--- src/moab/MConst.c	(revision 1)
+++ src/moab/MConst.c	(working copy)
@@ -1565,6 +1565,7 @@
   { "NOTIFICATIONINTERVAL",     pAdminEInterval,              mdfString,  mxoSched, NULL },
   { "NOTIFICATIONPROGRAM",      pAdminEAction,                mdfString,  mxoSched, NULL },
   { "PARIGNQUEUELIST",          pParIgnQList,                 mdfStringArray, mxoSched, NULL },
+  { "PBSASYNCJOBSTART",         pPbsAsyncJobstart,            mdfString,  mxoPar,   NULL },
   { "PECAP",                    pRPECap,                      mdfInt,     mxoPar,   NULL },
   { "PERCENTCAP",               pUPerCCap,                    mdfInt,     mxoPar,   NULL },
   { "PERCENTWEIGHT",            pUPerCWeight,                 mdfInt,     mxoPar,   NULL },
Index: src/moab/MPBSI.c
===================================================================
--- src/moab/MPBSI.c	(revision 1)
+++ src/moab/MPBSI.c	(working copy)
@@ -1904,41 +1904,6 @@
 
       return(FAILURE);
       }
-
-    if (MPBSJobModify(
-          J,
-          R,
-          ATTR_l,
-          (char *)(R->Version >= 710 ? "select" : "neednodes"),
-          HostList,
-          NULL,
-          NULL) == FAILURE)
-      {
-      DBG(0,fPBS) DPrint("ERROR:    cannot set hostlist for job '%s'\n",
-        J->Name);
-
-      if (R->FailIteration != MSched.Iteration)
-        {
-        R->FailIteration = MSched.Iteration;
-        R->FailCount     = 0;
-        }
-
-      R->FailCount++;
-
-      if (Msg != NULL)
-        strcpy(Msg,"job cannot be started - cannot set hostlist");
-
-      if (SC != NULL)
-        *SC = mscRemoteFailure;
-
-      return(FAILURE);
-      }
-    else
-      {
-      DBG(7,fPBS) DPrint("INFO:     hostlist for job '%s' set to '%s'\n",
-        J->Name,
-        HostList);
-      }
     }
   else
     {
@@ -2017,7 +1982,17 @@
 
   MJobGetName(J,NULL,R,tmpJobName,sizeof(tmpJobName),mjnRMName);       
 
-  rc = pbs_runjob(R->U.PBS.ServerSD,tmpJobName,MasterHost,NULL);
+  /* HvB */
+  if ( MPar[0].PbsAsyncJobstart == FALSE )
+    {
+    DBG(7,fPBS) DPrint("INFO:     use pbs_runjob\n");
+    rc = pbs_runjob(R->U.PBS.ServerSD,tmpJobName,HostList,NULL);
+    }
+  else
+    {
+    DBG(7,fPBS) DPrint("INFO:     use pbs_asyrun\n");
+    rc = pbs_asyrunjob(R->U.PBS.ServerSD,tmpJobName,HostList,NULL);
+    }
 
   if (rc != 0)
     {
@@ -2041,28 +2016,6 @@
     JobStartFailed = TRUE;
     }
 
-  if (J->NeedNodes != NULL)
-    {
-    if (MPBSJobModify(
-          J,
-          R,
-          ATTR_l,
-          (char *)(R->Version >= 710 ? "select" : "neednodes"),
-          J->NeedNodes,
-          NULL,
-          NULL) == FAILURE)
-      {
-      DBG(7,fPBS) DPrint("WARNING:  cannot reset hostlist for job '%s')\n",
-        J->Name);
-      }
-    else
-      {
-      DBG(7,fPBS) DPrint("INFO:     hostlist for job '%s' set to '%s'\n",
-        J->Name,
-        J->NeedNodes);
-      }
-    }
-
   if (JobStartFailed == TRUE)
     {
     /* job could not be started */
Index: src/moab/MConfig.c
===================================================================
--- src/moab/MConfig.c	(revision 1)
+++ src/moab/MConfig.c	(working copy)
@@ -1824,6 +1824,7 @@
     case pSystemMaxJobPS:
     case pIgnPbsGroupList:
     case pFSSecondaryGroups:
+    case pPbsAsyncJobstart:
 
       MParProcessOConfig(P,PIndex,val,valf,valp,valpa);
  
_______________________________________________
mauiusers mailing list
[email protected]
http://www.supercluster.org/mailman/listinfo/mauiusers

Reply via email to