Hi Maui folks,

GPUs in Maui are a long standing problem. Last year a patch was sent by Mariusz
Mamoński [1], which works based on GRES parameters.
I've just made GPUs kind of working, by enhancing that patch. Please find
attached the resulting patch, which works well for Maui 3.3.1.
It defines a special GRES named "gpu" which works as expected on my test cases.

Note that GRES behaviour seems quite confused as sometimes they are mentioned
as consumable. This patch annihilates this behaviour, for the needs of GPUs.

To use the patch:
get the sources of maui-3.3.1 and patch them:
patch -p1 < ../Patch-for-gpu-GRES.patch
then compile as usual.

You have to configure the GPUs in maui.cfg:
NODECFG[nodename] GRES=gpu:2

Then when queuing jobs you can request GPUs with (Torque syntax):
qsub -W x=GRES:gpu@1

I hope this helps, please test this and enhance to your needs!

[1]
http://www.supercluster.org/pipermail/mauiusers/2011-April/004622.html

Regards,

PS. This is the second attempt to send the mail…

--
Jonathan Michalon
IT student in Strasbourg
>From 5a82ed4512eb26e94288e1a78e0f3f9f7e52b1f2 Mon Sep 17 00:00:00 2001
From: Jonathan Michalon <[email protected]>
Date: Wed, 18 Jan 2012 11:41:55 +0100
Subject: [PATCH 1/2] =?UTF-8?q?Patch=20from=20the=20Internet=20(Mariusz=20Mamo=C5=84ski=20<mamonski=20at=20man.poznan.pl>)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch parses GRES lines, adjusts available ressources on nodes
and puts MAX_MGRES from 4 to 16.

Signed-off-by: Jonathan Michalon <[email protected]>
---
 include/moab.h          |    3 +-
 include/msched-common.h |    2 +-
 src/moab/MConst.c       |    1 +
 src/moab/MJob.c         |   48 +++++++++++++++++++++++++++++++++++++++++++++++
 src/moab/MNode.c        |    6 +++++
 5 files changed, 58 insertions(+), 2 deletions(-)

diff --git a/include/moab.h b/include/moab.h
index dccc349..d68b462 100644
--- a/include/moab.h
+++ b/include/moab.h
@@ -1692,7 +1692,8 @@ enum MXAttrType {
   mxaSID,
   mxaSJID,
   mxaTPN,
-  mxaTRL };
+  mxaTRL,
+  mxaGRes };
 
 enum MauiAppSimCommandEnum {
   mascNONE,
diff --git a/src/moab/MConst.c b/src/moab/MConst.c
index b80776f..c66238f 100644
--- a/src/moab/MConst.c
+++ b/src/moab/MConst.c
@@ -938,6 +938,7 @@ const char *MRMXAttr[] = {
   "SJID",
   "TPN",
   "TRL",
+  "GRES",
   NULL };
 
 const char *MJobFlags[] = {
diff --git a/src/moab/MJob.c b/src/moab/MJob.c
index a98cff9..e2c0956 100644
--- a/src/moab/MJob.c
+++ b/src/moab/MJob.c
@@ -4072,7 +4072,55 @@ int MJobProcessExtensionString(
           RQ->TaskRequestList[2]);
  
         break;
+      case mxaGRes:
 
+        MUStrCpy(tmpLine,Value,sizeof(tmpLine));
+
+        /* FORMAT:  GRES:<RESTYPE>[@<COUNT>][:<RESTYPE>[@<COUNT>]] 
+	   GRES:tape:matlab@2
+	*/
+
+        ptr = MUStrTok(tmpLine,":",&TokPtr2);
+
+        while (ptr != NULL)
+          {
+          char *gresName = NULL;
+          char *p = NULL;
+          int gresCount = 1;
+          int rIndex = 0;
+ 
+          if ((p = strchr(ptr,'@')) != NULL)
+            {
+            *p = '\0';
+            gresCount = strtol(p+1, NULL, 10);
+            }
+          
+          gresName = ptr;
+
+          DBG(3,fCONFIG) DPrint("INFO:     GRES requested = %s@%d\n",
+            gresName,
+            gresCount);
+          
+          if ((rIndex = MUMAGetIndex(eGRes,gresName,mVerify)) == 0)
+            {
+            DBG(1,fPBS) DPrint("ALERT:  Unknown GRES '%s'\n",
+              gresName);
+            }
+          else if (gresCount <= 0)
+            {
+            DBG(1,fPBS) DPrint("ALERT:  Invalid GRES count %d\n",
+              gresCount);
+            }
+          else
+            {
+            RQ->DRes.GRes[rIndex].count = gresCount;
+            RQ->DRes.GRes[0].count +=gresCount; 
+            }
+
+          ptr = MUStrTok(NULL,":",&TokPtr2);
+          }
+
+        break;
       default:
 
         /* not handled */
diff --git a/src/moab/MNode.c b/src/moab/MNode.c
index 557c718..c37ccb4 100644
--- a/src/moab/MNode.c
+++ b/src/moab/MNode.c
@@ -4947,6 +4947,12 @@ int MNodeAdjustAvailResources(
       *ARes[rindex] = CRes[rindex];
       }
     }  /* END for (rindex) */
+  
+  /* Adjust GRES */
+  for (rindex = 0; rindex < MAX_MGRES; rindex++) 
+  {
+    N->ARes.GRes[rindex].count = MAX(0,(N->CRes.GRes[rindex].count - N->DRes.GRes[rindex].count));
+  }
 
   return(SUCCESS);
   }  /* END MNodeAdjustAvailResources() */
-- 
1.7.2.5


>From 63907f27ead94cfe4b229d7ad991cb465a2517e9 Mon Sep 17 00:00:00 2001
From: Jonathan Michalon <[email protected]>
Date: Tue, 24 Jan 2012 19:29:14 +0100
Subject: [PATCH 2/2] Hack to prevent Defer for GRES 'gpu'


Signed-off-by: Jonathan Michalon <[email protected]>
---
 src/moab/MJob.c |   19 +++++++++++++++++++
 1 files changed, 19 insertions(+), 0 deletions(-)

diff --git a/src/moab/MJob.c b/src/moab/MJob.c
index e2c0956..ec4b9a5 100644
--- a/src/moab/MJob.c
+++ b/src/moab/MJob.c
@@ -3179,6 +3179,7 @@ int MJobSetHold(
  
   {
   char     Line[MAX_MLINE];
+  int      rIndex;
 
   const char *FName = "MJobSetHold";
  
@@ -3240,6 +3241,24 @@ int MJobSetHold(
  
     return(SUCCESS);
     }
+
+  /* workaround to avoid defer state for GRES "gpu": when they are all utilized
+   * a noResource is triggered, but we don't want a hold */
+  /* only if NoResources, other reasons may be valid */
+  if (HoldReason == mhrNoResources)
+    {
+    /* check whether GPUs are defined as GRES somewhere */
+    if ((rIndex = MUMAGetIndex(eGRes,"gpu",mVerify)) != 0)
+      {
+      /* check whether our job requires GPUs */
+      if (J->Req[0]->DRes.GRes[rIndex].count > 0)
+        {
+        DBG(2,fSCHED) DPrint("INFO:     defer disabled (hack for GPUs)\n");
+
+        return(SUCCESS);
+        }
+      }
+    }
  
   if (J->Hold & (1 << mhBatch))
     {
-- 
1.7.2.5

_______________________________________________
mauiusers mailing list
[email protected]
http://www.supercluster.org/mailman/listinfo/mauiusers

Reply via email to