The patch below proposes two changes in the code for detecting an 
X server crash.  These changes make my not so reliable 
multi-server-seat setup a bit more workable.  

I posted a summary at 
http://bugs.freedesktop.org/show_bug.cgi?id=20546
but the text is actually longer than the patch itself.  

Since the code is self contained I was wondering if someone here 
would be willing to inspect and then perhaps to support the patch 
for inclusion into the X app/xdm tree.  If there are questions 
just ask and I will describe the underlying logic.  

Changes:
1. RemoveDisplay() is changed into StopDisplay() thus enabling 
   the wait for child exit synchronism.  
2. The resource reservAttempts is added as to allow for a few 
   successive crash-type manager exits.  

diff --git a/dm.c b/dm.c
index 52b49eb..daec863 100644
--- a/dm.c
+++ b/dm.c
@@ -476,6 +476,7 @@ #endif
                break;
            case OBEYSESS_DISPLAY:
                d->startTries = 0;
+               d->reservTries = 0;
                Debug ("Display exited with OBEYSESS_DISPLAY\n");
                if (d->displayType.lifetime != Permanent ||
                    d->status == zombie)
@@ -512,24 +513,42 @@ #endif
                Debug ("Display exited with RESERVER_DISPLAY\n");
                if (d->displayType.origin == FromXDMCP || d->status == zombie)
                    StopDisplay(d);
-               else
-                   RestartDisplay (d, TRUE);
-               {
-                 Time_t Time;
-                 time(&Time);
-                 Debug("time %i %i\n",Time,d->lastCrash);
-                 if (d->lastCrash && 
-                     ((Time - d->lastCrash) < XDM_BROKEN_INTERVAL)) {
-                   Debug("Server crash frequency too high:"
-                         " removing display %s\n",d->name);
-                   LogError("Server crash rate too high:"
-                            " removing display %s\n",d->name);
+               else {
+                 Time_t now;
+                 int crash;
+
+                 time(&now);
+                 Debug("time %i %i try %i of %i\n", now, d->lastReserv,
+                       d->reservTries, d->reservAttempts);
+                 crash = d->lastReserv && 
+                   ((now - d->lastReserv) < XDM_BROKEN_INTERVAL);
+
+                 if (!crash) d->reservTries = 0;
+
+                 if (crash && ++d->reservTries >= d->reservAttempts) {
+                   char msg[]="Server crash frequency too high:"
+                     " stopping display";
+                   Debug("%s %s\n",msg,d->name);
+                   LogError("%s %s\n",msg,d->name);
 #if !defined(ARC4_RANDOM) && !defined(DEV_RANDOM)
                    AddTimerEntropy();
 #endif
-                   RemoveDisplay (d);
+                   /* For a local X server either:
+                    * 1. The server exit was returned by waitpid().  So
+                    *    serverPid==-1 => StopDisplay() calls RemoveDisplay()
+                    *
+                    * 2. The server is a zombie or still running.  So
+                    *    serverPid>1 => StopDisplay() 
+                    *                   - sets status=zombie, 
+                    *                   - kills the server.  
+                    *    The next waitpid() returns this zombie server pid 
+                    *    and the code below then calls RemoveDisplay().
+                    */
+                   StopDisplay(d);
                  } else 
-                   d->lastCrash = Time;
+                   RestartDisplay (d, TRUE);
+
+                 d->lastReserv = now;
                }
                break;
            case waitCompose (SIGTERM,0,0):
diff --git a/dm.h b/dm.h
index 5fa13e4..d472479 100644
--- a/dm.h
+++ b/dm.h
@@ -179,7 +179,8 @@ struct display {
        pid_t           serverPid;      /* process id of server (-1 if none) */
        FileState       state;          /* state during HUP processing */
        int             startTries;     /* current start try */
-        Time_t          lastCrash;      /* time of last crash */
+        Time_t          lastReserv;     /* time of last reserver crash */
+       int             reservTries;    /* current reserver try */
 #ifdef XDMCP
        /* XDMCP state */
        CARD32          sessionID;      /* ID of active session */
@@ -199,6 +200,7 @@ #endif
        int             openRepeat;     /* open attempts to make */
        int             openTimeout;    /* abort open attempt timeout */
        int             startAttempts;  /* number of attempts at starting */
+       int             reservAttempts; /* allowed fatal errors after start */
        int             pingInterval;   /* interval between XSync */
        int             pingTimeout;    /* timeout for XSync */
        int             terminateServer;/* restart for each session */
diff --git a/dpylist.c b/dpylist.c
index 6aca2ee..9c92294 100644
--- a/dpylist.c
+++ b/dpylist.c
@@ -245,7 +245,9 @@ NewDisplay (char *name, char *class)
     d->openTimeout = 0;
     d->startAttempts = 0;
     d->startTries = 0;
-    d->lastCrash = 0;
+    d->lastReserv = 0;
+    d->reservAttempts = 0;
+    d->reservTries = 0;
     d->terminateServer = 0;
     d->grabTimeout = 0;
 #ifdef XDMCP
diff --git a/resource.c b/resource.c
index d38fe07..aef7624 100644
--- a/resource.c
+++ b/resource.c
@@ -284,6 +284,8 @@ struct displayResource serverResources[]
                                "120" },
 { "startAttempts","StartAttempts",DM_INT,      boffset(startAttempts),
                                "4" },
+{ "reservAttempts","ReservAttempts",DM_INT,    boffset(reservAttempts),
+                               "2" },
 { "pingInterval","PingInterval",DM_INT,                boffset(pingInterval),
                                "5" },
 { "pingTimeout","PingTimeout", DM_INT,         boffset(pingTimeout),
diff --git a/xdm.man.cpp b/xdm.man.cpp
index ab511b7..18649b6 100644
--- a/xdm.man.cpp
+++ b/xdm.man.cpp
@@ -448,6 +448,7 @@ See the section
 .IP "\fBDisplayManager.\fP\fIDISPLAY\fP\fB.openRepeat\fP"
 .IP "\fBDisplayManager.\fP\fIDISPLAY\fP\fB.openTimeout\fP"
 .IP "\fBDisplayManager.\fP\fIDISPLAY\fP\fB.startAttempts\fP"
+.IP "\fBDisplayManager.\fP\fIDISPLAY\fP\fB.reservAttempts\fP"
 These numeric resources control the behavior of
 .I xdm
 when attempting to open intransigent servers.  \fBopenDelay\fP is
@@ -466,9 +467,10 @@ This
 process is repeated \fBstartAttempts\fP times, at which point the display is
 declared dead and disabled.  Although
 this behavior may seem arbitrary, it has been empirically developed and
-works quite well on most systems.  The default values are
-5 for \fBopenDelay\fP, 5 for \fBopenRepeat\fP, 30 for \fBopenTimeout\fP and
-4 for \fBstartAttempts\fP.
+works quite well on most systems.  \fBreservAttempts\fP is the number of 
+times a fatal error is allowed after connecting.  The default values are
+\fBopenDelay\fP: 15, \fBopenRepeat\fP: 5, \fBopenTimeout\fP: 120, 
+\fBstartAttempts\fP: 4 and \fBreservAttempts\fP: 2.
 .IP "\fBDisplayManager.\fP\fIDISPLAY\fP\fB.pingInterval\fP"
 .IP "\fBDisplayManager.\fP\fIDISPLAY\fP\fB.pingTimeout\fP"
 To discover when remote displays disappear,
_______________________________________________
xorg-devel mailing list
[email protected]
http://lists.x.org/mailman/listinfo/xorg-devel

Reply via email to