Here is the patch. This patch add a new keyword to maui.cfg:
- PBSASYNCJOBSTART
Default value is FALSE. This means old behaviour pbs_runjob() is used.
If TRUE then pbs_asyrunjob() is used. This function start jobs faster then
pbs_runjob()
Can this patch be applied to the maui source?
There is also a a new release for maui_2_deb with this patch added:
ftp://ftp.sara.nl/pub/outgoing/maui_2_deb.tar.gz
Bas van der Vlies wrote:
> Thanks for the info. I have finished the patch and tomorrow i will do
> some testing before i will post it
>
>
> On 26 aug 2009, at 19:35, Tom Rudwick wrote:
>
>> That sounds correct to me. That is what I have in my patch to switch
>> the behavior at compile time. We've been using it successfully for a
>> long time.
>>
>> Tom
>>
>>
>> Bas van der Vlies wrote:
>>> I have some time to implement the pbs_asynrunjob patch as option for
>>> maui.cfg. If i read this thread correctly i can safely remove both
>>> MPBSJobModify() that use 'neednodes' and change the pbs_runjob to use
>>> HostList as parameter instead of MasterHost.
>>>
>>> I want to make it configurable which function is used:
>>> - PBSASYNCJOBSTART
>>>
>>> default: FALSE (old behaviour)
>>>
>>> Regards
>>>
>>>
>>>
>>> Garrick Staples wrote:
>>>
>>>> On Thu, Apr 09, 2009 at 11:10:25AM -0600, Josh Butikofer alleged:
>>>>
>>>>> Actually, I just checked out the Maui source code and it looks
>>>>> like you
>>>>> will need to keep at least one of the neednodes calls (the one
>>>>> before the
>>>>> call to pbs_runjob()), as Maui is not passing a host list into
>>>>> pbs_runjob(). If Maui does pass in the hostlist to pbs_runjob(),
>>>>> the
>>>>> neednodes calls are probably not needed.
>>>>>
>>>> That's correct. If Maui passes in the hostlist, don't modify
>>>> neednodes.
>>>>
>>>> I gave up trying to fix this behaviour in maui years ago. Here is
>>>> the patch
>>>> I've had in my own maui for a very long time.
>>>>
>>>>
>>>> Index: src/moab/MPBSI.c
>>>> ===================================================================
>>>> RCS file: /usr/local/nfs/src/cvs_repository/maui/src/moab/MPBSI.c,v
>>>> retrieving revision 1.14
>>>> diff -u -r1.14 MPBSI.c
>>>> --- src/moab/MPBSI.c 5 Nov 2005 02:42:08 -0000 1.14
>>>> +++ src/moab/MPBSI.c 23 May 2006 01:50:11 -0000
>>>> @@ -1792,6 +1792,7 @@
>>>> return(FAILURE);
>>>> }
>>>>
>>>> +/*
>>>> if (MPBSJobModify(
>>>> J,
>>>> R,
>>>> @@ -1826,6 +1827,7 @@
>>>> J->Name,
>>>> HostList);
>>>> }
>>>> +*/
>>>> }
>>>> else
>>>> {
>>>> @@ -1904,7 +1906,7 @@
>>>>
>>>> MJobGetName(J,NULL,R,tmpJobName,sizeof(tmpJobName),mjnRMName);
>>>>
>>>> - rc = pbs_runjob(R->U.PBS.ServerSD,tmpJobName,MasterHost,NULL);
>>>> + rc = pbs_runjob(R->U.PBS.ServerSD,tmpJobName,HostList,NULL);
>>>>
>>>> if (rc != 0)
>>>> {
>>>> @@ -1928,6 +1930,7 @@
>>>> JobStartFailed = TRUE;
>>>> }
>>>>
>>>> +/*
>>>> if (J->NeedNodes != NULL)
>>>> {
>>>> if (MPBSJobModify(
>>>> @@ -1949,6 +1952,7 @@
>>>> J->NeedNodes);
>>>> }
>>>> }
>>>> +*/
>>>>
>>>> if (JobStartFailed == TRUE)
>>>> {
>>>>
>>>>
>>>>
>>>
>>>
>> <ATT00001.txt>
>
> --
> Bas van der Vlies
> [email protected]
>
>
>
> _______________________________________________
> mauiusers mailing list
> [email protected]
> http://www.supercluster.org/mailman/listinfo/mauiusers
--
********************************************************************
* Bas van der Vlies e-mail: [email protected] *
* SARA - Academic Computing Services Amsterdam, The Netherlands *
********************************************************************
Index: include/msched-common.h
===================================================================
--- include/msched-common.h (revision 1)
+++ include/msched-common.h (working copy)
@@ -875,7 +875,8 @@
pOLDRMServer,
pNAMaxPS,
pFSSecondaryGroups, /* To enable secondary fairshare group lookups for PBS, HvB */
- pIgnPbsGroupList /* ignore the -W group_list parameter for PBS, HvB */
+ pIgnPbsGroupList, /* ignore the -W group_list parameter for PBS, HvB */
+ pPbsAsyncJobstart /* Start Torque/Pbs jobs asynchronous, HvB */
};
#endif /* __M_COMMON_H__ */
Index: include/msched.h
===================================================================
--- include/msched.h (revision 1)
+++ include/msched.h (working copy)
@@ -1497,7 +1497,9 @@
int IgnPbsGroupList; /* Ignore -W group_list parameter for Torque/PBS HvB */
int FSSecondaryGroups; /* To enable secondary fairshare group lookups for PBS, HvB */
+ int PbsAsyncJobstart; /* Start Torque/Pbs jobs asynchronous, HvB */
+
} mpar_t;
/* cred */
Index: src/moab/MPar.c
===================================================================
--- src/moab/MPar.c (revision 1)
+++ src/moab/MPar.c (working copy)
@@ -722,6 +722,11 @@
P->IgnPbsGroupList = 0;
/*
+ * HvB: Default is to start jobs synchronous
+ */
+ P->PbsAsyncJobstart = 0;
+
+ /*
* HvB: default is to disable secondary group lookups for fairshare
*/
P->FSSecondaryGroups = 0;
@@ -1862,6 +1867,13 @@
break;
/* HvB */
+ case pPbsAsyncJobstart:
+
+ P->PbsAsyncJobstart = MUBoolFromString(SVal,FALSE);
+
+ break;
+
+ /* HvB */
case pIgnPbsGroupList:
P->IgnPbsGroupList = MUBoolFromString(SVal,FALSE);
Index: src/moab/MConst.c
===================================================================
--- src/moab/MConst.c (revision 1)
+++ src/moab/MConst.c (working copy)
@@ -1565,6 +1565,7 @@
{ "NOTIFICATIONINTERVAL", pAdminEInterval, mdfString, mxoSched, NULL },
{ "NOTIFICATIONPROGRAM", pAdminEAction, mdfString, mxoSched, NULL },
{ "PARIGNQUEUELIST", pParIgnQList, mdfStringArray, mxoSched, NULL },
+ { "PBSASYNCJOBSTART", pPbsAsyncJobstart, mdfString, mxoPar, NULL },
{ "PECAP", pRPECap, mdfInt, mxoPar, NULL },
{ "PERCENTCAP", pUPerCCap, mdfInt, mxoPar, NULL },
{ "PERCENTWEIGHT", pUPerCWeight, mdfInt, mxoPar, NULL },
Index: src/moab/MPBSI.c
===================================================================
--- src/moab/MPBSI.c (revision 1)
+++ src/moab/MPBSI.c (working copy)
@@ -1904,41 +1904,6 @@
return(FAILURE);
}
-
- if (MPBSJobModify(
- J,
- R,
- ATTR_l,
- (char *)(R->Version >= 710 ? "select" : "neednodes"),
- HostList,
- NULL,
- NULL) == FAILURE)
- {
- DBG(0,fPBS) DPrint("ERROR: cannot set hostlist for job '%s'\n",
- J->Name);
-
- if (R->FailIteration != MSched.Iteration)
- {
- R->FailIteration = MSched.Iteration;
- R->FailCount = 0;
- }
-
- R->FailCount++;
-
- if (Msg != NULL)
- strcpy(Msg,"job cannot be started - cannot set hostlist");
-
- if (SC != NULL)
- *SC = mscRemoteFailure;
-
- return(FAILURE);
- }
- else
- {
- DBG(7,fPBS) DPrint("INFO: hostlist for job '%s' set to '%s'\n",
- J->Name,
- HostList);
- }
}
else
{
@@ -2017,7 +1982,17 @@
MJobGetName(J,NULL,R,tmpJobName,sizeof(tmpJobName),mjnRMName);
- rc = pbs_runjob(R->U.PBS.ServerSD,tmpJobName,MasterHost,NULL);
+ /* HvB */
+ if ( MPar[0].PbsAsyncJobstart == FALSE )
+ {
+ DBG(7,fPBS) DPrint("INFO: use pbs_runjob\n");
+ rc = pbs_runjob(R->U.PBS.ServerSD,tmpJobName,HostList,NULL);
+ }
+ else
+ {
+ DBG(7,fPBS) DPrint("INFO: use pbs_asyrun\n");
+ rc = pbs_asyrunjob(R->U.PBS.ServerSD,tmpJobName,HostList,NULL);
+ }
if (rc != 0)
{
@@ -2041,28 +2016,6 @@
JobStartFailed = TRUE;
}
- if (J->NeedNodes != NULL)
- {
- if (MPBSJobModify(
- J,
- R,
- ATTR_l,
- (char *)(R->Version >= 710 ? "select" : "neednodes"),
- J->NeedNodes,
- NULL,
- NULL) == FAILURE)
- {
- DBG(7,fPBS) DPrint("WARNING: cannot reset hostlist for job '%s')\n",
- J->Name);
- }
- else
- {
- DBG(7,fPBS) DPrint("INFO: hostlist for job '%s' set to '%s'\n",
- J->Name,
- J->NeedNodes);
- }
- }
-
if (JobStartFailed == TRUE)
{
/* job could not be started */
Index: src/moab/MConfig.c
===================================================================
--- src/moab/MConfig.c (revision 1)
+++ src/moab/MConfig.c (working copy)
@@ -1824,6 +1824,7 @@
case pSystemMaxJobPS:
case pIgnPbsGroupList:
case pFSSecondaryGroups:
+ case pPbsAsyncJobstart:
MParProcessOConfig(P,PIndex,val,valf,valp,valpa);
_______________________________________________
mauiusers mailing list
[email protected]
http://www.supercluster.org/mailman/listinfo/mauiusers