Hi All.   I'm working on a solution to this thread and have got the
following steps working in my code (diff attached):

1) OS sends SIGXFSZ to mythbackend
2) backend captures said signal, squirrels it into a global called
"LastSignal", so anyone who wants to can look for it. (yes, I know globals
are bad, but signal handlers are worse.)
3) ThreadedFileWriter.cpp has an existing function called "safe_write" that
I've modified so that it checks for the signal(in the global) before trying
to write to any file.
4) safe_write: if a SIGXFSZ signal was received it "aborts" the in-progress
write (then-and-there, without flushing memory buffers to disk or anything),
returning an error.
5) safe_write is called from inside ThreadedFileWriter::DiskLoop.  The
return value of safe_write is tested in DiskLoop, and it causes both threads
(write and sync threads of ThreadedFileWriter) to be torn down, and the
ThreadedFileWriter to enter a state of "write error".
6)  next time the caller (RingBuffer.cpp) trys to call tfw->Write(...) it
fails, returning -1 up to the calling function (which is in RingBuffer.cpp -
Write), and the tfw is torn down, closing the open file handle, and cleaning
up.
7) RingBuffer.cpp already had the capability to return -1 or other errors,
so it's been tweaked to look at the return status of the tfw->Write call
too, and pass the error up if it occurs.

...now, I'm not sure where to take it from here.  

The signal is definitely being captured, and it's being passed all the way
back up to the RingBuffer, so I know that's working but....  Nothing else
(backend and/or frontend) seems to  recognise that the recording failed.
Should I go that far, or just barf the error message to the log, and leave
it at that?

IE: How do I make everything else recognise that the recording of this file
has aborted/failed?

Am I doing the right thing here... Or is there an easier way?


Buzz.


> -----Original Message-----
> From: [EMAIL PROTECTED] 
> [mailto:[EMAIL PROTECTED] On Behalf Of Buzz
> Sent: Thursday, 19 January 2006 5:12 PM
> To: 'Development of mythtv'
> Subject: Re: [mythtv] Backend process dies at 4GB file limit? 
> -with backtrace as requested.
> 
> Mike,
> Thanks for the URL tip re gdb.   
> 
> In this case however, the BT I've given is enough to find the
> problem/solution:
> 
> *) OS is sending a SIGXFSZ to backend, backend is taking 
> default action which "coredump and exit".
> 
> Solution:
> 
> * capture SIGXFSZ, handle it gracefully.
> 
> Buzz
> 
> 
> > -----Original Message-----
> > From: [EMAIL PROTECTED]
> > [mailto:[EMAIL PROTECTED] On Behalf Of Michael T. Dean
> > Sent: Thursday, 19 January 2006 4:57 PM
> > To: Development of mythtv
> > Subject: Re: [mythtv] Backend process dies at 4GB file limit? 
> > - with backtrace as requested.
> > 
> > Buzz wrote:
> > > All of backend log is available on request, and gdb is
> > still sitting
> > > there (for a few hours anyway)
> > good
> > > , incase you want me to do anything else with it.
> > > :-)   I'm a novice with GDB, so be gentle with me. (I don't 
> > know anything
> > > more than how to do 'run','c' and 'bt'.)
> > >   
> > Please http://www.mythtv.org/docs/mythtv-HOWTO-22.html#ss22.2
> > 
> > Mike
> > 
> > _______________________________________________
> > mythtv-dev mailing list
> > [email protected]
> > http://mythtv.org/cgi-bin/mailman/listinfo/mythtv-dev
> > 
> 
> 
> _______________________________________________
> mythtv-dev mailing list
> [email protected]
> http://mythtv.org/cgi-bin/mailman/listinfo/mythtv-dev
> 
Index: libs/libmythtv/RingBuffer.cpp
===================================================================
--- libs/libmythtv/RingBuffer.cpp       (revision 8639)
+++ libs/libmythtv/RingBuffer.cpp       (working copy)
@@ -964,9 +964,16 @@
     pthread_rwlock_rdlock(&rwlock);
 
     ret = tfw->Write(buf, count);
-    writepos += ret;
+    if ( ret != -1 ) writepos += ret;
 
     pthread_rwlock_unlock(&rwlock);
+
+    if (ret == -1 ) { 
+       delete tfw ; 
+       tfw = NULL;
+        VERBOSE(VB_IMPORTANT, LOC_ERR + "Write Failed abnormally!");
+    }
+
     return ret;
 }
 
Index: libs/libmythtv/ThreadedFileWriter.cpp
===================================================================
--- libs/libmythtv/ThreadedFileWriter.cpp       (revision 8639)
+++ libs/libmythtv/ThreadedFileWriter.cpp       (working copy)
@@ -8,6 +8,7 @@
 #include <sys/stat.h>
 #include <unistd.h>
 #include <fcntl.h>
+#include <signal.h>
 
 // MythTV headers
 #include "ThreadedFileWriter.h"
@@ -35,23 +36,32 @@
  *   to the stream.
  */
 
+int LastSignal = 0;
+
 /** \fn safe_write(int, const void*, uint)
  *  \brief Writes data to disk
  *
  *   This just uses the Standard C write() to write to disk.
  *   We retry forever on EAGAIN errors, and three times on
  *   any other error.
+ *  
+ *   If the OS signaled us of a "bigger" underlying problem,
+ *   then we give up immediately, returning -1.
  *
  *  \param fd   File descriptor
  *  \param data Pointer to data to write
  *  \param sz   Size of data to write in bytes
  */
-static uint safe_write(int fd, const void *data, uint sz)
+static int safe_write(int fd, const void *data, uint sz)
 {
     int ret;
     uint tot = 0;
     uint errcnt = 0;
+    extern int LastSignal;
 
+    // write nothing if system limit reached
+    if ( LastSignal == SIGXFSZ ) { LastSignal = 0; return -1; } 
+
     while (tot < sz)
     {
         ret = write(fd, (char *)data + tot, sz - tot);
@@ -90,8 +100,9 @@
 void *ThreadedFileWriter::boot_writer(void *wotsit)
 {
     ThreadedFileWriter *fw = (ThreadedFileWriter *)wotsit;
-    fw->DiskLoop();
-    return NULL;
+    void *err = (void *)fw->DiskLoop();
+    //at this point fw is either destructed, or about to be....
+    return err; // so the writer thread can be implicitly pthread_exit'd
 }
 
 /** \fn ThreadedFileWriter::boot_syncer(void*)
@@ -159,14 +170,18 @@
 
     if (fd >= 0)
     {
-        Flush();
-        in_dtor = true; /* tells child thread to exit */
+       /* flush, but only if other threads are sure to be still ok....*/
+        if (in_dtor != true ) { 
+           Flush(); 
+       } 
+        in_dtor = true; /* tells child threads to exit, if not already */
 
-        bufferSyncWait.wakeAll();
-        pthread_join(syncer, NULL);
+        /* wait till the child threads have died*/
+        bufferSyncWait.wakeAll(); //wake sync thread
+        pthread_join(syncer, NULL); //wait for it to die
+        bufferHasData.wakeAll();  //wake data thread
+        pthread_join(writer, NULL); //wait for it to die
 
-        bufferHasData.wakeAll();
-        pthread_join(writer, NULL);
         close(fd);
         fd = -1;
     }
@@ -182,11 +197,12 @@
  *  \brief Writes data to the end of the write buffer
  *
  *   NOTE: This blocks while buffer is in use by the write to disk thread.
+ *   NOTE2: returns -1 on write error.
  *
  *  \param data  pointer to data to write to disk
  *  \param count size of data in bytes
  */
-uint ThreadedFileWriter::Write(const void *data, uint count)
+int ThreadedFileWriter::Write(const void *data, uint count)
 {
     if (count == 0)
         return 0;
@@ -207,6 +223,10 @@
     if (!first)
         VERBOSE(VB_IMPORTANT, LOC_ERR + "Write() -- IOBOUND end");
 
+    // catastrophic write failure:
+    if (no_writes && in_dtor) 
+       return -1;
+
     if (no_writes)
         return 0;
 
@@ -266,7 +286,7 @@
 /** \fn ThreadedFileWriter::Sync(void)
  *  \brief flush data written to the file descriptor to disk.
  *
- *   NOTE: This doesn't even try flush our queue of data.
+ *   NOTE: This doesn't even try to flush our queue of data.
  *   This only ensures that data which has already been sent
  *   to the kernel for this file is written to disk. This 
  *   means that if this backend is writing the data over a 
@@ -335,11 +355,12 @@
 /** \fn ThreadedFileWriter::DiskLoop(void)
  *  \brief The thread run method that actually calls safe_write().
  */
-void ThreadedFileWriter::DiskLoop(void)
+int ThreadedFileWriter::DiskLoop(void)
 {
-    uint size = 0, written = 0;
+    int size = 0;
+    uint written = 0;
 
-    while (!in_dtor || BufUsed() > 0)
+    while (!no_writes && (!in_dtor || BufUsed() > 0 ))
     {
         size = BufUsed();
 
@@ -347,7 +368,7 @@
             bufferEmpty.wakeAll();
 
         if (!size || (!in_dtor && !flush &&
-            ((size < tfw_min_write_size) &&
+            (((uint)size < tfw_min_write_size) &&
              (written >= tfw_min_write_size))))
         {
             bufferHasData.wait(100);
@@ -358,19 +379,22 @@
            buffer is valid, and we try to write all of it at once which
            takes a long time. During this time, the other thread fills up
            the 10% that was free... */
-        size = (size > TFW_MAX_WRITE_SIZE) ? TFW_MAX_WRITE_SIZE : size;
+        size = ((uint)size > TFW_MAX_WRITE_SIZE) ? TFW_MAX_WRITE_SIZE : size;
 
         if ((rpos + size) > tfw_buf_size)
         {
             int first_chunk_size  = tfw_buf_size - rpos;
             int second_chunk_size = size - first_chunk_size;
             size = safe_write(fd, buf+rpos, first_chunk_size);
+           if ( size == -1 ) { no_writes = true; in_dtor = true; }//serious 
write failure abort now!
             if ((int)size == first_chunk_size)
                 size += safe_write(fd, buf, second_chunk_size);
+           if ( size == -1 ) { no_writes = true; in_dtor = true; }//serious 
write failure abort now
         }
         else
         {
             size = safe_write(fd, buf+rpos, size);
+           if ( size == -1 ) { no_writes = true; in_dtor = true; }//serious 
write failure abort now
         }
 
         if (written < tfw_min_write_size)
@@ -384,6 +408,9 @@
 
         bufferWroteData.wakeAll();
     }
+    // pass serious write error upstream, if it occurs....
+    if ( no_writes == true && in_dtor == true ) return -1;
+   return 0;
 }
 
 /** \fn ThreadedFileWriter::BufUsed(void)
Index: libs/libmythtv/ThreadedFileWriter.h
===================================================================
--- libs/libmythtv/ThreadedFileWriter.h (revision 8639)
+++ libs/libmythtv/ThreadedFileWriter.h (working copy)
@@ -7,6 +7,8 @@
 #include <qwaitcondition.h>
 #include <qstring.h>
 
+extern int LastSignal;
+
 class ThreadedFileWriter
 {
   public:
@@ -16,7 +18,7 @@
     bool Open(void);
 
     long long Seek(long long pos, int whence);
-    uint Write(const void *data, uint count);
+    int Write(const void *data, uint count);
 
     void SetWriteBufferSize(uint newSize = TFW_DEF_BUF_SIZE);
     void SetWriteBufferMinWriteSize(uint newMinSize = TFW_MIN_WRITE_SIZE);
@@ -29,7 +31,7 @@
 
   protected:
     static void *boot_writer(void *);
-    void DiskLoop(void);
+    int DiskLoop(void);
 
     static void *boot_syncer(void *);
     void SyncLoop(void);
Index: programs/mythbackend/mainserver.h
===================================================================
--- programs/mythbackend/mainserver.h   (revision 8639)
+++ programs/mythbackend/mainserver.h   (working copy)
@@ -22,6 +22,8 @@
 class HttpStatus;
 class ProcessRequestThread;
 
+extern int LastSignal;
+
 class MainServer : public QObject
 {
     Q_OBJECT
Index: programs/mythbackend/main.cpp
===================================================================
--- programs/mythbackend/main.cpp       (revision 8639)
+++ programs/mythbackend/main.cpp       (working copy)
@@ -42,6 +42,7 @@
 QString lockfile_location;
 HouseKeeper *housekeeping = NULL;
 QString logfile = "";
+//extern int LastSignal;
 
 bool setupTVs(bool ismaster, bool &error)
 {
@@ -182,6 +183,8 @@
     unlink(lockfile_location.ascii());
 
     signal(SIGHUP, SIG_DFL);
+
+    signal(SIGXFSZ,SIG_DFL);
 }
 
 int log_rotate(int report_error)
@@ -215,7 +218,17 @@
     log_rotate(0);
 }
 
+void file_size_limit_handler(int s)
+{
+       extern int LastSignal;
 
+       // some implementations reset signal handler to default after capture:
+       signal(SIGXFSZ,file_size_limit_handler);
+       // remember it, so others can handle it now!
+       LastSignal = s; 
+}
+
+
 int main(int argc, char **argv)
 {
     for(int i = 3; i < sysconf(_SC_OPEN_MAX) - 1; ++i)
@@ -376,6 +389,9 @@
 
     if (signal(SIGPIPE, SIG_IGN) == SIG_ERR)
         cerr << "Unable to ignore SIGPIPE\n";
+    
+    if (signal(SIGXFSZ, &file_size_limit_handler) == SIG_ERR) 
+        cerr << "Unable to set SIGXFSZ handler. Reaching ulimit/filesize limit 
will kill backend ungracefully.\n";
 
     if (daemonize)
         if (daemon(0, 1) < 0)
_______________________________________________
mythtv-dev mailing list
[email protected]
http://mythtv.org/cgi-bin/mailman/listinfo/mythtv-dev

Reply via email to