--- Begin Message ---
Joshua, good day.
Thu, Mar 29, 2007 at 12:19:09PM -0600, Josh Butikofer wrote:
> Yes I will investigate this issue. It must have slipped under my radar.
> If you could provide me with an updated patch, that would be most helpful.
> If everything looks good with your changes I will roll them into the code.
OK, the three attached files are implementing my modification. They
were just ported to the 3.2.6p19 without any changes, but the line
numbers: I see no relevant changes between p16 and p19 in the files
my commits were touching.
The new Maui with my patches is living already for half an hour ;))
on the production cluster, and show no SEGVs or other bad symptoms
I had with p18. I will keep an eye on it and will test my patch
behaviour on the current Maui.
Will keep you informed.
--
Eygene Ryabinkin, RRC KI
>From 7ae8b929614e1f23a24e2309e1eae9a15412fde2 Mon Sep 17 00:00:00 2001
From: Eygene Ryabinkin <[EMAIL PROTECTED]>
Date: Tue, 20 Mar 2007 15:59:25 +0300
Subject: [PATCH] Prepare the MQueueSelectJobs() for the two-pass scheduling.
In order to get the 'PDEF' statement to work correcly we should get
two-pass scheduling for the given partition: first, the jobs for
which the partition is the default should be considered and then
the rest of the jobs should be considered for scheduling. Note that
we should walk over all partitions at the first pass and only then
the second pass over all partitions and the rest of the jobs must
be done: we should select ALL jobs that can fit to their default
partitions.
The current patch is a no-op from the functional point of view.
It just encapsulates the single job checking to the local function
MQueueCheckSingleJob() that was taken from the original body of the
MQueueSelectJobs().
Patch was tested on the RRC-KI Grid cluster and yet showed no
regressions on its daily operations.
Signed-off-by: Eygene Ryabinkin <[EMAIL PROTECTED]>
---
src/moab/MPolicy.c | 578 ++++++++++++++++++++++++++++------------------------
1 files changed, 308 insertions(+), 270 deletions(-)
diff --git a/src/moab/MPolicy.c b/src/moab/MPolicy.c
index 9b1e873..bfca663 100644
--- a/src/moab/MPolicy.c
+++ b/src/moab/MPolicy.c
@@ -147,10 +147,19 @@ extern mres_t *MRes[];
*/
-/* NYI: must handle effqduration */
-
+static int MQueueCheckSingleJob(
+ mjob_t *J,
+ int *Reason,
+ mpar_t *P,
+ mpar_t *GP,
+ int PLevel,
+ int MaxNC,
+ int MaxPC,
+ unsigned long MaxWCLimit,
+ int OrigPIndex,
+ mbool_t UpdateStats);
-
+/* NYI: must handle effqduration */
int MQueueSelectJobs(
@@ -171,27 +180,14 @@ int MQueueSelectJobs(
mjob_t *J;
- char DValue[MAX_MNAME];
- enum MJobDependEnum DType;
-
mpar_t *P;
mpar_t *GP;
- long PS;
-
int LReason[MAX_MREJREASON];
- int PReason;
int *Reason;
int PIndex;
- int PReq;
-
- mreq_t *RQ;
-
- double PE;
-
- char tmpLine[MAX_MLINE];
const char *FName = "MQueueSelectJobs";
@@ -267,368 +263,410 @@ int MQueueSelectJobs(
continue;
}
- RQ = J->Req[0]; /* FIXME */
+ if (MQueueCheckSingleJob(J, Reason, P, GP, PLevel,
+ MaxNC, MaxPC, MaxWCLimit, OrigPIndex, UpdateStats) == FAILURE)
+ continue;
- /* if job removed */
+ /* NOTE: effective queue duration not yet properly supported */
- if (J->Name[0] == '\0')
- {
- Reason[marCorruption]++;
+ J->EffQueueDuration = (MSched.Time > J->SystemQueueTime) ?
+ MSched.Time - J->SystemQueueTime : 0;
+
+ /* add job to destination queue */
- continue;
- }
+ DBG(5,fSCHED) DPrint("INFO: job '%s' added to queue at slot %d\n",
+ J->Name,
+ sindex);
- if (UpdateStats == TRUE)
- {
- J->BlockReason = 0;
+ DstQ[sindex++] = SrcQ[jindex];
+ } /* END for (jindex) */
- if (J->State == mjsIdle)
- MStat.IdleJobs++;
- }
+ /* terminate list */
- PReq = MJobGetProcCount(J);
- MJobGetPE(J,P,&PE);
- PS = (long)PReq * J->SpecWCLimit[0];
+ DstQ[sindex] = -1;
- /* check partition */
+ DBG(1,fSCHED)
+ {
+ DBG(1,fSCHED) DPrint("INFO: total jobs selected in partition %s:
%d/%-d ",
+ MAList[ePartition][PIndex],
+ sindex,
+ jindex);
- if (OrigPIndex != -1)
+ for (index = 0;index < MAX_MREJREASON;index++)
{
- if ((P->Index == 0) && !(J->Flags & (1 << mjfSpan)))
+ if (Reason[index] != 0)
{
- /* why? what does partition '0' mean in partition mode? */
+ fprintf(mlog.logfp,"[%s: %d]",
+ MAllocRejType[index],
+ Reason[index]);
+ }
+ } /* END for (index) */
- DBG(3,fSCHED) DPrint("INFO: job %s not considered for spanning\n",
- J->Name);
+ fprintf(mlog.logfp,"\n");
+ }
- Reason[marPartitionAccess]++;
+ if (sindex == 0)
+ return(FAILURE);
- continue;
- }
- else if ((P->Index != 0) && (J->Flags & (1 << mjfSpan)))
- {
- DBG(3,fSCHED) DPrint("INFO: spanning job %s not considered for
partition scheduling\n",
- J->Name);
+ return(SUCCESS);
+ } /* END MQueueSelectJobs() */
- Reason[marPartitionAccess]++;
+/*
+ * Helper for MQueueSelectJobs: performs the single job evaluation.
+ * Returns SUCCESS if job can be queued and FAILURE otherwise.
+ */
+static int MQueueCheckSingleJob(
+ mjob_t *J,
+ int *Reason,
+ mpar_t *P,
+ mpar_t *GP,
+ int PLevel,
+ int MaxNC,
+ int MaxPC,
+ unsigned long MaxWCLimit,
+ int OrigPIndex,
+ mbool_t UpdateStats)
- continue;
- }
+ {
+ char DValue[MAX_MNAME];
+ enum MJobDependEnum DType;
- if ((P->Index > 0) && (MUBMCheck(P->Index,J->PAL) == FAILURE))
- {
- DBG(7,fSCHED) DPrint("INFO: job %s not considered for partition %s
(allowed %s)\n",
- J->Name,
- P->Name,
- MUListAttrs(ePartition,J->PAL[0]));
+ long PS;
- Reason[marPartitionAccess]++;
+ int PReason;
- continue;
- }
- } /* END if (OrigPIndex != -1) */
+ int PReq;
- /* check job state */
+ mreq_t *RQ;
- if ((J->State != mjsIdle) && (J->State != mjsSuspended))
- {
- DBG(6,fSCHED) DPrint("INFO: job %s rejected (job in non-idle state
'%s')\n",
- J->Name,
- MJobState[J->State]);
+ double PE;
- Reason[marState]++;
+ char tmpLine[MAX_MLINE];
- if ((MaxNC == MAX_MNODE) &&
- (MaxWCLimit == MAX_MTIME) &&
- (J->R != NULL))
- {
- if ((J->State != mjsStarting) && (J->State != mjsRunning))
- MResDestroy(&J->R);
- }
+ const char *FName = "MQueueCheckSingleJob";
- continue;
- }
+ RQ = J->Req[0]; /* FIXME */
- /* check if job has been previously scheduled or deferred */
+ /* if job removed */
- if ((J->EState != mjsIdle) && (J->EState != mjsSuspended))
+ if (J->Name[0] == '\0')
+ {
+ Reason[marCorruption]++;
+
+ return(FAILURE);
+ }
+
+ if (UpdateStats == TRUE)
+ {
+ J->BlockReason = 0;
+
+ if (J->State == mjsIdle)
+ MStat.IdleJobs++;
+ }
+
+ PReq = MJobGetProcCount(J);
+ /* XXX: PE is unused? */
+ MJobGetPE(J,P,&PE);
+ PS = (long)PReq * J->SpecWCLimit[0];
+
+ /* check partition */
+
+ if (OrigPIndex != -1)
+ {
+ if ((P->Index == 0) && !(J->Flags & (1 << mjfSpan)))
{
- DBG(6,fSCHED) DPrint("INFO: job %s rejected (job in non-idle
expected state: '%s')\n",
- J->Name,
- MJobState[J->EState]);
+ /* why? what does partition '0' mean in partition mode? */
- Reason[marEState]++;
+ DBG(3,fSCHED) DPrint("INFO: job %s not considered for spanning\n",
+ J->Name);
- if ((MaxNC == MAX_MNODE) && (MaxWCLimit == MAX_MTIME) && (J->R != NULL))
- {
- if ((J->EState != mjsStarting) && (J->EState != mjsRunning))
- MResDestroy(&J->R);
- }
+ Reason[marPartitionAccess]++;
- continue;
+ return(FAILURE);
}
+ else if ((P->Index != 0) && (J->Flags & (1 << mjfSpan)))
+ {
+ DBG(3,fSCHED) DPrint("INFO: spanning job %s not considered for
partition scheduling\n",
+ J->Name);
- /* check available procs */
+ Reason[marPartitionAccess]++;
+
+ return(FAILURE);
+ }
- if (PReq > P->CRes.Procs)
+ if ((P->Index > 0) && (MUBMCheck(P->Index,J->PAL) == FAILURE))
{
- DBG(6,fSCHED) DPrint("INFO: job %s rejected in partition %s (exceeds
configured procs: %d > %d)\n",
+ DBG(7,fSCHED) DPrint("INFO: job %s not considered for partition %s
(allowed %s)\n",
J->Name,
P->Name,
- PReq,
- P->CRes.Procs);
+ MUListAttrs(ePartition,J->PAL[0]));
- Reason[marNodeCount]++;
+ Reason[marPartitionAccess]++;
- if (P->Index <= 0)
- {
- if (J->R != NULL)
- MResDestroy(&J->R);
+ return(FAILURE);
+ }
+ } /* END if (OrigPIndex != -1) */
- if (J->Hold == 0)
- {
- MJobSetHold(
- J,
- (1 << mhDefer),
- MSched.DeferTime,
- mhrNoResources,
- "exceeds partition configured procs");
- }
- }
+ /* check job state */
- continue;
+ if ((J->State != mjsIdle) && (J->State != mjsSuspended))
+ {
+ DBG(6,fSCHED) DPrint("INFO: job %s rejected (job in non-idle state
'%s')\n",
+ J->Name,
+ MJobState[J->State]);
+
+ Reason[marState]++;
+
+ if ((MaxNC == MAX_MNODE) &&
+ (MaxWCLimit == MAX_MTIME) &&
+ (J->R != NULL))
+ {
+ if ((J->State != mjsStarting) && (J->State != mjsRunning))
+ MResDestroy(&J->R);
}
- /* check partition specific limits */
+ return(FAILURE);
+ }
- if (MJobCheckLimits(
- J,
- PLevel,
- P,
- (1 << mlSystem),
- tmpLine) == FAILURE)
+ /* check if job has been previously scheduled or deferred */
+
+ if ((J->EState != mjsIdle) && (J->EState != mjsSuspended))
+ {
+ DBG(6,fSCHED) DPrint("INFO: job %s rejected (job in non-idle expected
state: '%s')\n",
+ J->Name,
+ MJobState[J->EState]);
+
+ Reason[marEState]++;
+
+ if ((MaxNC == MAX_MNODE) && (MaxWCLimit == MAX_MTIME) && (J->R != NULL))
{
- DBG(6,fSCHED) DPrint("INFO: job %s rejected, partition %s (%s)\n",
- J->Name,
- P->Name,
- tmpLine);
+ if ((J->EState != mjsStarting) && (J->EState != mjsRunning))
+ MResDestroy(&J->R);
+ }
- Reason[marSystemLimits]++;
+ return(FAILURE);
+ }
- if (P->Index <= 0)
- {
- if (J->R != NULL)
- MResDestroy(&J->R);
+ /* check available procs */
+
+ if (PReq > P->CRes.Procs)
+ {
+ DBG(6,fSCHED) DPrint("INFO: job %s rejected in partition %s (exceeds
configured procs: %d > %d)\n",
+ J->Name,
+ P->Name,
+ PReq,
+ P->CRes.Procs);
+ Reason[marNodeCount]++;
+
+ if (P->Index <= 0)
+ {
+ if (J->R != NULL)
+ MResDestroy(&J->R);
+
+ if (J->Hold == 0)
+ {
MJobSetHold(
J,
(1 << mhDefer),
MSched.DeferTime,
- mhrSystemLimits,
- "exceeds system proc/job limit");
+ mhrNoResources,
+ "exceeds partition configured procs");
}
+ }
- continue;
- } /* END if (MJobCheckLimits() == FAILURE) */
-
- /* check job size */
-
- if (PReq > MaxPC)
- {
- DBG(6,fSCHED) DPrint("INFO: job %s rejected in partition %s (exceeds
window size: %d > %d)\n",
- J->Name,
- P->Name,
- PReq,
- MaxPC);
+ return(FAILURE);
+ }
- Reason[marNodeCount]++;
+ /* check partition specific limits */
- continue;
- }
+ if (MJobCheckLimits(
+ J,
+ PLevel,
+ P,
+ (1 << mlSystem),
+ tmpLine) == FAILURE)
+ {
+ DBG(6,fSCHED) DPrint("INFO: job %s rejected, partition %s (%s)\n",
+ J->Name,
+ P->Name,
+ tmpLine);
- /* check job duration */
+ Reason[marSystemLimits]++;
- if (J->SpecWCLimit[0] > MaxWCLimit)
+ if (P->Index <= 0)
{
- DBG(6,fSCHED) DPrint("INFO: job %s rejected in partition %s (exceeds
window time: %ld > %ld)\n",
- J->Name,
- P->Name,
- J->SpecWCLimit[0],
- MaxWCLimit);
-
- Reason[marTime]++;
+ if (J->R != NULL)
+ MResDestroy(&J->R);
- continue;
+ MJobSetHold(
+ J,
+ (1 << mhDefer),
+ MSched.DeferTime,
+ mhrSystemLimits,
+ "exceeds system proc/job limit");
}
- /* check partition class support */
-
- if (P->Index > 0)
- {
- if
(MUNumListGetCount(J->StartPriority,RQ->DRes.PSlot,P->CRes.PSlot,0,NULL) ==
FAILURE)
- {
- DBG(6,fSCHED) DPrint("INFO: job %s rejected, partition %s (classes
not supported '%s')\n",
- J->Name,
- P->Name,
- MUCAListToString(RQ->DRes.PSlot,P->CRes.PSlot,NULL));
+ return(FAILURE);
+ } /* END if (MJobCheckLimits() == FAILURE) */
- Reason[marClass]++;
+ /* check job size */
- if (J->R != NULL)
- MResDestroy(&J->R);
+ if (PReq > MaxPC)
+ {
+ DBG(6,fSCHED) DPrint("INFO: job %s rejected in partition %s (exceeds
window size: %d > %d)\n",
+ J->Name,
+ P->Name,
+ PReq,
+ MaxPC);
- continue;
- }
- } /* END if (PIndex) */
+ Reason[marNodeCount]++;
- if (MJobCheckDependency(J,&DType,DValue) == FAILURE)
- {
- DBG(6,fSCHED) DPrint("INFO: job %s rejected (dependent on job '%s'
%s)\n",
- J->Name,
- DValue,
- MJobDependType[DType]);
+ return(FAILURE);
+ }
- if (GP->JobPrioAccrualPolicy == jpapFullPolicy)
- {
- J->SystemQueueTime = MSched.Time;
- }
+ /* check job duration */
- Reason[marDepend]++;
+ if (J->SpecWCLimit[0] > MaxWCLimit)
+ {
+ DBG(6,fSCHED) DPrint("INFO: job %s rejected in partition %s (exceeds
window time: %ld > %ld)\n",
+ J->Name,
+ P->Name,
+ J->SpecWCLimit[0],
+ MaxWCLimit);
- if ((MaxNC == MAX_MNODE) &&
- (MaxWCLimit == MAX_MTIME) &&
- (J->R != NULL))
- {
- MResDestroy(&J->R);
- }
+ Reason[marTime]++;
- continue;
- } /* END if (MJobCheckDependency(J,&JDepend) == FAILURE) */
+ return(FAILURE);
+ }
- /* check partition active job policies */
+ /* check partition class support */
- if (MJobCheckPolicies(
- J,
- PLevel,
- (1 << mlActive),
- P, /* NOTE: may set to &MPar[0] */
- &PReason,
- NULL,
- MAX_MTIME) == FAILURE)
+ if (P->Index > 0)
+ {
+ if
(MUNumListGetCount(J->StartPriority,RQ->DRes.PSlot,P->CRes.PSlot,0,NULL) ==
FAILURE)
{
- DBG(6,fSCHED) DPrint("INFO: job %s rejected, partition %s (policy
failure: '%s')\n",
+ DBG(6,fSCHED) DPrint("INFO: job %s rejected, partition %s (classes
not supported '%s')\n",
J->Name,
P->Name,
- MPolicyRejection[PReason]);
-
- if (PLevel == ptHARD)
- {
- if (GP->JobPrioAccrualPolicy == jpapFullPolicy)
- {
- J->SystemQueueTime = MSched.Time;
- }
- }
+ MUCAListToString(RQ->DRes.PSlot,P->CRes.PSlot,NULL));
- Reason[marPolicy]++;
+ Reason[marClass]++;
- if ((MaxNC == MAX_MNODE) &&
- (MaxWCLimit == MAX_MTIME) &&
- (J->R != NULL))
- {
+ if (J->R != NULL)
MResDestroy(&J->R);
- }
- continue;
+ return(FAILURE);
}
+ } /* END if (PIndex) */
- J->Cred.U->MTime = MSched.Time;
- J->Cred.G->MTime = MSched.Time;
+ if (MJobCheckDependency(J,&DType,DValue) == FAILURE)
+ {
+ DBG(6,fSCHED) DPrint("INFO: job %s rejected (dependent on job '%s'
%s)\n",
+ J->Name,
+ DValue,
+ MJobDependType[DType]);
+
+ if (GP->JobPrioAccrualPolicy == jpapFullPolicy)
+ {
+ J->SystemQueueTime = MSched.Time;
+ }
- if (J->Cred.A != NULL)
- J->Cred.A->MTime = MSched.Time;
+ Reason[marDepend]++;
- if (MPar[0].FSC.FSPolicy != fspNONE)
+ if ((MaxNC == MAX_MNODE) &&
+ (MaxWCLimit == MAX_MTIME) &&
+ (J->R != NULL))
{
- int OIndex;
+ MResDestroy(&J->R);
+ }
- if (MFSCheckCap(NULL,J,P,&OIndex) == FAILURE)
- {
- DBG(5,fSCHED) DPrint("INFO: job '%s' exceeds %s FS cap\n",
- J->Name,
- (OIndex > 0) ? MXO[OIndex] : "NONE");
-
- if (GP->JobPrioAccrualPolicy == jpapFullPolicy)
- {
- J->SystemQueueTime = MSched.Time;
- }
-
- Reason[marFairShare]++;
+ return(FAILURE);
+ } /* END if (MJobCheckDependency(J,&JDepend) == FAILURE) */
- continue;
- }
- } /* END if (FS[0].FSPolicy != fspNONE) */
+ /* check partition active job policies */
- /* NOTE: idle queue policies handled in MQueueSelectAllJobs() */
+ if (MJobCheckPolicies(
+ J,
+ PLevel,
+ (1 << mlActive),
+ P, /* NOTE: may set to &MPar[0] */
+ &PReason,
+ NULL,
+ MAX_MTIME) == FAILURE)
+ {
+ DBG(6,fSCHED) DPrint("INFO: job %s rejected, partition %s (policy
failure: '%s')\n",
+ J->Name,
+ P->Name,
+ MPolicyRejection[PReason]);
- if (MLocalCheckFairnessPolicy(J,MSched.Time,NULL) == FAILURE)
+ if (PLevel == ptHARD)
{
- DBG(6,fSCHED) DPrint("INFO: job %s rejected, partition %s (violates
local fairness policy)\n",
- J->Name,
- P->Name);
-
- if (GP->JobPrioAccrualPolicy == jpapFullPolicy)
+ if (GP->JobPrioAccrualPolicy == jpapFullPolicy)
{
J->SystemQueueTime = MSched.Time;
}
+ }
- Reason[marPolicy]++;
+ Reason[marPolicy]++;
- continue;
+ if ((MaxNC == MAX_MNODE) &&
+ (MaxWCLimit == MAX_MTIME) &&
+ (J->R != NULL))
+ {
+ MResDestroy(&J->R);
}
- /* NOTE: effective queue duration not yet properly supported */
+ return(FAILURE);
+ }
- J->EffQueueDuration = (MSched.Time > J->SystemQueueTime) ?
- MSched.Time - J->SystemQueueTime : 0;
-
- /* add job to destination queue */
+ J->Cred.U->MTime = MSched.Time;
+ J->Cred.G->MTime = MSched.Time;
- DBG(5,fSCHED) DPrint("INFO: job '%s' added to queue at slot %d\n",
- J->Name,
- sindex);
+ if (J->Cred.A != NULL)
+ J->Cred.A->MTime = MSched.Time;
- DstQ[sindex++] = SrcQ[jindex];
- } /* END for (jindex) */
+ if (MPar[0].FSC.FSPolicy != fspNONE)
+ {
+ int OIndex;
- /* terminate list */
+ if (MFSCheckCap(NULL,J,P,&OIndex) == FAILURE)
+ {
+ DBG(5,fSCHED) DPrint("INFO: job '%s' exceeds %s FS cap\n",
+ J->Name,
+ (OIndex > 0) ? MXO[OIndex] : "NONE");
+
+ if (GP->JobPrioAccrualPolicy == jpapFullPolicy)
+ {
+ J->SystemQueueTime = MSched.Time;
+ }
+
+ Reason[marFairShare]++;
- DstQ[sindex] = -1;
+ return(FAILURE);
+ }
+ } /* END if (FS[0].FSPolicy != fspNONE) */
- DBG(1,fSCHED)
+ /* NOTE: idle queue policies handled in MQueueSelectAllJobs() */
+
+ if (MLocalCheckFairnessPolicy(J,MSched.Time,NULL) == FAILURE)
{
- DBG(1,fSCHED) DPrint("INFO: total jobs selected in partition %s:
%d/%-d ",
- MAList[ePartition][PIndex],
- sindex,
- jindex);
+ DBG(6,fSCHED) DPrint("INFO: job %s rejected, partition %s (violates
local fairness policy)\n",
+ J->Name,
+ P->Name);
- for (index = 0;index < MAX_MREJREASON;index++)
+ if (GP->JobPrioAccrualPolicy == jpapFullPolicy)
{
- if (Reason[index] != 0)
- {
- fprintf(mlog.logfp,"[%s: %d]",
- MAllocRejType[index],
- Reason[index]);
- }
- } /* END for (index) */
+ J->SystemQueueTime = MSched.Time;
+ }
- fprintf(mlog.logfp,"\n");
- }
+ Reason[marPolicy]++;
- if (sindex == 0)
return(FAILURE);
+ }
return(SUCCESS);
- } /* END MQueueSelectJobs() */
+ } /* END MQueueCheckSingleJob() */
--
1.5.0.3-dirty
>From 25910ad71e0e77a419bffd584fba01d8b451aa2a Mon Sep 17 00:00:00 2001
From: Eygene Ryabinkin <[EMAIL PROTECTED]>
Date: Wed, 21 Mar 2007 10:57:59 +0300
Subject: [PATCH] Prepare the MSchedProcessJobs() for the two-pass scheduling.
Transformed the part of the original MJobGetPAL() function to the
new public function MJobFindDefPart() that determines the default
partition for a job.
MQueueSelectJobs() prototype was modified: the OnlyDefPart flag was
added. It enables the examination of jobs that have the passed
partition to be the default one; all other jobs are skipped in the
selection process. When OnlyDefPart is set to FALSE the original
behaviour is restored: all jobs are examined.
The patch is no-op from the functional point of view: the OnlyDefPart
argument to the MQueueSelectJobs() was set to FALSE everywhere.
Patch was tested on the RRC-KI Grid cluster and yet showed no
regressions on its daily operations.
Signed-off-by: Eygene Ryabinkin <[EMAIL PROTECTED]>
---
include/moab-proto.h | 3 +-
src/moab/MPar.c | 107 ++++++++++++++++++++++++++++++--------------------
src/moab/MPolicy.c | 13 ++++++-
src/moab/MQueue.c | 2 +
src/moab/MSched.c | 16 +++++--
src/server/UserI.c | 1 +
6 files changed, 92 insertions(+), 50 deletions(-)
diff --git a/include/moab-proto.h b/include/moab-proto.h
index e92b487..db3ff3a 100644
--- a/include/moab-proto.h
+++ b/include/moab-proto.h
@@ -396,6 +396,7 @@ int MJobSetState(mjob_t *,enum MJobStateEnum);
int MJobPreempt(mjob_t *,mjob_t **,enum MPreemptPolicyEnum,char *,int *);
int MJobResume(mjob_t *,char *,int *);
int MJobGetPAL(mjob_t *,int *,int *,mpar_t **);
+mpar_t *MJobFindDefPart(mjob_t *, mclass_t *, int *);
int MJobRemove(mjob_t *);
int MJobGetAccount(mjob_t *,mgcred_t **);
int MJobSetCreds(mjob_t *,char *,char *,char *);
@@ -491,7 +492,7 @@ int MQueueDiagnose(mjob_t **,int *,int,mpar_t *,char *,int);
int MQueueCheckStatus(void);
int MQueueGetRequeueValue(int *,long,long,double *);
int MQueueSelectAllJobs(mjob_t **,int,mpar_t *,int *,int,int,int,char *);
-int MQueueSelectJobs(int *,int *,int,int,int,unsigned long,int,int *,mbool_t);
+int MQueueSelectJobs(int *,int *,int,int,int,unsigned long,int,int
*,mbool_t,mbool_t);
int MQueueAddAJob(mjob_t *);
int MQueueRemoveAJob(mjob_t *,int);
int MQueueBackFill(int *,int,mpar_t *);
diff --git a/src/moab/MPar.c b/src/moab/MPar.c
index 6ba4e06..0df6f0d 100644
--- a/src/moab/MPar.c
+++ b/src/moab/MPar.c
@@ -347,52 +347,11 @@ int MJobGetPAL(
if (PAL != NULL)
MUBMCopy(PAL,tmpPAL,MAX_MPAR);
- /* determine allowed partition default (precedence: U,G,A,C,S,0) */
+ /* determine allowed partition default */
if (PDef != NULL)
{
- if ((J->Cred.U->F.PDef != NULL) &&
- (J->Cred.U->F.PDef != &MPar[0]) &&
- MUBMCheck(((mpar_t *)J->Cred.U->F.PDef)->Index,tmpPAL))
- {
- *PDef = (mpar_t *)J->Cred.U->F.PDef;
- }
- else if ((J->Cred.G->F.PDef != NULL) &&
- (J->Cred.G->F.PDef != &MPar[0]) &&
- MUBMCheck(((mpar_t *)J->Cred.G->F.PDef)->Index,tmpPAL))
- {
- *PDef = (mpar_t *)J->Cred.G->F.PDef;
- }
- else if ((J->Cred.A != NULL) &&
- (J->Cred.A->F.PDef != NULL) &&
- (J->Cred.A->F.PDef != &MPar[0]) &&
- MUBMCheck(((mpar_t *)J->Cred.A->F.PDef)->Index,tmpPAL))
- {
- *PDef = (mpar_t *)J->Cred.A->F.PDef;
- }
- else if ((C != NULL) &&
- (C->F.PDef != NULL) &&
- (C->F.PDef != &MPar[0]) &&
- MUBMCheck(((mpar_t *)C->F.PDef)->Index,tmpPAL))
- {
- *PDef = (mpar_t *)C->F.PDef;
- }
- else if ((J->Cred.Q != NULL) &&
- (J->Cred.Q->F.PDef != NULL) &&
- (J->Cred.Q->F.PDef != &MPar[0]) &&
- MUBMCheck(((mpar_t *)J->Cred.Q->F.PDef)->Index,tmpPAL))
- {
- *PDef = (mpar_t *)J->Cred.Q->F.PDef;
- }
- else if ((MPar[0].F.PDef != NULL) &&
- (MPar[0].F.PDef != &MPar[0]))
- {
- *PDef = (mpar_t *)MPar[0].F.PDef;
- }
- else
- {
- *PDef = &MPar[MDEF_SYSPDEF];
- }
+ *PDef = MJobFindDefPart(J, C, tmpPAL);
/* verify access to default partition */
@@ -439,7 +398,69 @@ int MJobGetPAL(
return(SUCCESS);
} /* END MJobGetPAL() */
+/*
+ * Determines default partition for a job (precedence: U,G,A,C,S,0)
+ * 'PAL' is consulted to determine partition access if it is not NULL.
+ * 'C' is consulted for the default partition if it is not NULL.
+ */
+mpar_t *MJobFindDefPart(
+ mjob_t *J, /* I: job */
+ mclass_t *C, /* I: job class */
+ int *PAL) /* I: partition access list */
+
+ {
+ mpar_t *PDef;
+
+ if ((J->Cred.U->F.PDef != NULL) &&
+ (J->Cred.U->F.PDef != &MPar[0]) &&
+ (PAL == NULL ||
+ MUBMCheck(((mpar_t *)J->Cred.U->F.PDef)->Index,PAL)))
+ {
+ PDef = (mpar_t *)J->Cred.U->F.PDef;
+ }
+ else if ((J->Cred.G->F.PDef != NULL) &&
+ (J->Cred.G->F.PDef != &MPar[0]) &&
+ (PAL == NULL ||
+ MUBMCheck(((mpar_t *)J->Cred.G->F.PDef)->Index,PAL)))
+ {
+ PDef = (mpar_t *)J->Cred.G->F.PDef;
+ }
+ else if ((J->Cred.A != NULL) &&
+ (J->Cred.A->F.PDef != NULL) &&
+ (J->Cred.A->F.PDef != &MPar[0]) &&
+ (PAL == NULL ||
+ MUBMCheck(((mpar_t *)J->Cred.A->F.PDef)->Index,PAL)))
+ {
+ PDef = (mpar_t *)J->Cred.A->F.PDef;
+ }
+ else if ((C != NULL) &&
+ (C->F.PDef != NULL) &&
+ (C->F.PDef != &MPar[0]) &&
+ (PAL == NULL ||
+ MUBMCheck(((mpar_t *)C->F.PDef)->Index,PAL)))
+ {
+ PDef = (mpar_t *)C->F.PDef;
+ }
+ else if ((J->Cred.Q != NULL) &&
+ (J->Cred.Q->F.PDef != NULL) &&
+ (J->Cred.Q->F.PDef != &MPar[0]) &&
+ (PAL == NULL ||
+ MUBMCheck(((mpar_t *)J->Cred.Q->F.PDef)->Index,PAL)))
+ {
+ PDef = (mpar_t *)J->Cred.Q->F.PDef;
+ }
+ else if ((MPar[0].F.PDef != NULL) &&
+ (MPar[0].F.PDef != &MPar[0]))
+ {
+ PDef = (mpar_t *)MPar[0].F.PDef;
+ }
+ else
+ {
+ PDef = &MPar[MDEF_SYSPDEF];
+ }
+ return PDef;
+ } /* END MJobFindDefPart() */
int MParFind(
diff --git a/src/moab/MPolicy.c b/src/moab/MPolicy.c
index bfca663..c60a435 100644
--- a/src/moab/MPolicy.c
+++ b/src/moab/MPolicy.c
@@ -171,7 +171,8 @@ int MQueueSelectJobs(
unsigned long MaxWCLimit, /* I */
int OrigPIndex, /* I */
int *FReason, /* O */
- mbool_t UpdateStats) /* I: (boolean) */
+ mbool_t UpdateStats, /* I: (boolean) */
+ mbool_t OnlyDefPart) /* I: (boolean) */
{
int index;
@@ -263,6 +264,16 @@ int MQueueSelectJobs(
continue;
}
+ if (OnlyDefPart == TRUE && MJobFindDefPart(J, NULL, NULL) != P)
+ {
+ DBG(7,fSCHED) DPrint("INFO: skipping job[%d] '%s', only default
partition check requested (and current partition is %s)\n",
+ jindex,
+ J->Name,
+ P->Name);
+
+ continue;
+ }
+
if (MQueueCheckSingleJob(J, Reason, P, GP, PLevel,
MaxNC, MaxPC, MaxWCLimit, OrigPIndex, UpdateStats) == FAILURE)
continue;
diff --git a/src/moab/MQueue.c b/src/moab/MQueue.c
index 106a012..aba2bbb 100644
--- a/src/moab/MQueue.c
+++ b/src/moab/MQueue.c
@@ -446,6 +446,7 @@ int MQueueBackFill(
AdjBFTime,
P->Index,
NULL,
+ FALSE,
FALSE) == FAILURE)
{
DBG(5,fSCHED) DPrint("INFO: no jobs meet BF window criteria in
partition %s\n",
@@ -1516,6 +1517,7 @@ int MQueueCheckStatus()
MAX_MTIME,
-1,
ReasonList,
+ FALSE,
FALSE) == FAILURE)
{
strcpy(DeferMessage,"SCHED_INFO: job cannot run. Reason: cannot
select job\n");
diff --git a/src/moab/MSched.c b/src/moab/MSched.c
index 8434272..92fbae0 100644
--- a/src/moab/MSched.c
+++ b/src/moab/MSched.c
@@ -6949,6 +6949,7 @@ int MSchedProcessJobs(
MAX_MTIME,
-1,
NULL,
+ FALSE,
FALSE) == SUCCESS)
{
memcpy(MFQ,tmpQ,sizeof(MFQ));
@@ -6971,7 +6972,8 @@ int MSchedProcessJobs(
MAX_MTIME,
-1,
NULL,
- TRUE);
+ TRUE,
+ FALSE);
/* schedule priority jobs */
@@ -6996,7 +6998,8 @@ int MSchedProcessJobs(
MAX_MTIME,
PIndex,
NULL,
- TRUE) == SUCCESS)
+ TRUE,
+ FALSE) == SUCCESS)
{
MQueueScheduleIJobs(tmpQ,&MPar[PIndex]);
@@ -7023,7 +7026,8 @@ int MSchedProcessJobs(
MAX_MTIME,
-1,
NULL,
- TRUE);
+ TRUE,
+ FALSE);
if (CurrentQ[0] != -1)
{
@@ -7055,7 +7059,8 @@ int MSchedProcessJobs(
MAX_MTIME,
PIndex,
NULL,
- TRUE) == SUCCESS)
+ TRUE,
+ FALSE) == SUCCESS)
{
MQueueBackFill(tmpQ,ptHARD,&MPar[PIndex]);
}
@@ -7097,7 +7102,8 @@ int MSchedProcessJobs(
MAX_MTIME,
-1,
NULL,
- TRUE);
+ TRUE,
+ FALSE);
/* must sort/order MUIQ */
diff --git a/src/server/UserI.c b/src/server/UserI.c
index 9bcd8da..c409c28 100644
--- a/src/server/UserI.c
+++ b/src/server/UserI.c
@@ -1790,6 +1790,7 @@ int UIJobShow(
MAX_MTIME,
P->Index,
Reason,
+ FALSE,
FALSE) == FAILURE) || (DstQ[0] == -1))
{
for (index = 0;index < MAX_MREJREASON;index++)
--
1.5.0.3-dirty
>From 39b7853f12e823389e8b90507cf5fed002b3b5db Mon Sep 17 00:00:00 2001
From: Eygene Ryabinkin <[EMAIL PROTECTED]>
Date: Wed, 21 Mar 2007 14:10:22 +0300
Subject: [PATCH] Fixed default partition handling by the two-pass scheduling.
MSchedProcessJobs() uses two-pass scheduling: first pass over all
partitions schedules jobs that can be put to their default partitions
and the second pass schedules the rest of the jobs. Backfilling is
disabled on the first pass: we should first load the queue with the
eligible jobs and only then do the backfilling.
Patch was tested on the RRC-KI Grid cluster and yet showed no
regressions on its daily operations. The default partition ('PDEF')
statement is working as expected: jobs are first scheduled to the
default partition and only after the default partition nodes are
busy they go to the rest of the partitions.
Signed-off-by: Eygene Ryabinkin <[EMAIL PROTECTED]>
---
src/moab/MSched.c | 81 ++++++++++++++++++++++++++++++----------------------
1 files changed, 47 insertions(+), 34 deletions(-)
diff --git a/src/moab/MSched.c b/src/moab/MSched.c
index 92fbae0..9ef5338 100644
--- a/src/moab/MSched.c
+++ b/src/moab/MSched.c
@@ -6977,44 +6977,57 @@ int MSchedProcessJobs(
/* schedule priority jobs */
+#ifdef M_SCHEDULE_ON_PARTITIONS
+#error Symbol M_SCHEDULE_ON_PARTITIONS is already defined. Fix me, please.
+#endif
+#define M_SCHEDULE_ON_PARTITONS(_OnlyDefPart, _DoBackfill) \
+ do { \
+ for (PIndex = 0;PIndex < MAX_MPAR;PIndex++) \
+ { \
+ if (((PIndex == 0) && (MPar[2].ConfigNodes == 0)) || \
+ (MPar[PIndex].ConfigNodes == 0)) \
+ { \
+ continue; \
+ } \
+ \
+ MOQueueInitialize(tmpQ); \
+ \
+ if (MQueueSelectJobs(
\
+ CurrentQ, \
+ tmpQ, \
+ ptSOFT,
\
+ MAX_MNODE, \
+ MAX_MTASK, \
+ MAX_MTIME, \
+ PIndex,
\
+ NULL, \
+ TRUE, \
+ _OnlyDefPart) == SUCCESS) \
+ { \
+ MQueueScheduleIJobs(tmpQ,&MPar[PIndex]); \
+ \
+ if (_DoBackfill == TRUE && MPar[PIndex].BFPolicy != ptOFF) \
+ {
\
+ /* backfill jobs using 'soft' policy constraints */ \
+ \
+ MQueueBackFill(tmpQ,ptSOFT,&MPar[PIndex]); \
+ }
\
+ } \
+ \
+ MOQueueDestroy(tmpQ,FALSE); \
+ } /* END for (PIndex) */ \
+ } while (0)
+
if (CurrentQ[0] != -1)
{
- for (PIndex = 0;PIndex < MAX_MPAR;PIndex++)
- {
- if (((PIndex == 0) && (MPar[2].ConfigNodes == 0)) ||
- (MPar[PIndex].ConfigNodes == 0))
- {
- continue;
- }
-
- MOQueueInitialize(tmpQ);
-
- if (MQueueSelectJobs(
- CurrentQ,
- tmpQ,
- ptSOFT,
- MAX_MNODE,
- MAX_MTASK,
- MAX_MTIME,
- PIndex,
- NULL,
- TRUE,
- FALSE) == SUCCESS)
- {
- MQueueScheduleIJobs(tmpQ,&MPar[PIndex]);
-
- if (MPar[PIndex].BFPolicy != ptOFF)
- {
- /* backfill jobs using 'soft' policy constraints */
-
- MQueueBackFill(tmpQ,ptSOFT,&MPar[PIndex]);
- }
- }
-
- MOQueueDestroy(tmpQ,FALSE);
- } /* END for (PIndex) */
+ /* schedule jobs on their default partitions; skip backfilling */
+ M_SCHEDULE_ON_PARTITONS(TRUE, FALSE);
+ /* schedule jobs on all partitions; do backfilling */
+ M_SCHEDULE_ON_PARTITONS(FALSE, TRUE);
} /* END if (GlobalSQ[0] != -1) */
+#undef M_SCHEDULE_ON_PARTITONS
+
MOQueueDestroy(CurrentQ,TRUE);
MQueueSelectJobs(
--
1.5.0.3-dirty
--- End Message ---