Andrew Dunstan <[EMAIL PROTECTED]> writes:
>> A small problem with it was reported to me a couple of days ago - 
>> user had firewalled off all IP6 traffic. The stats collector happily 
>> bound and connected to the socket, but all the packets fell in the 
>> bit bucket. They found it quite hard to diagnose the problem.

> Revised patch attached. I think this is about as much trouble as this 
> problem is worth ;-)

I thought the messages were a bit sloppy, which made the patch much less
useful than it should be: we are testing for a very specific failure
mode and we can give a very specific message.  Patch as-applied is
attached.

I don't have any real convenient way to set up a situation where this
failure can actually occur.  Anyone want to verify that the patch
acts as intended?

                        regards, tom lane

*** src/backend/postmaster/pgstat.c.orig        Mon Mar 15 15:01:57 2004
--- src/backend/postmaster/pgstat.c     Mon Mar 22 18:55:29 2004
***************
*** 191,196 ****
--- 191,202 ----
                           *addr,
                                hints;
        int                     ret;
+       fd_set      rset;
+       struct timeval tv;
+       char        test_byte;
+       int         sel_res;
+ 
+ #define TESTBYTEVAL ((char) 199)
  
        /*
         * Force start of collector daemon if something to collect
***************
*** 303,308 ****
--- 309,393 ----
                        ereport(LOG,
                                        (errcode_for_socket_access(),
                                         errmsg("could not connect socket for 
statistics collector: %m")));
+                       closesocket(pgStatSock);
+                       pgStatSock = -1;
+                       continue;
+               }
+ 
+               /*
+                * Try to send and receive a one-byte test message on the socket.
+                * This is to catch situations where the socket can be created but
+                * will not actually pass data (for instance, because kernel packet
+                * filtering rules prevent it).
+                */
+               test_byte = TESTBYTEVAL;
+               if (send(pgStatSock, &test_byte, 1, 0) != 1)
+               {
+                       ereport(LOG,
+                                       (errcode_for_socket_access(),
+                                        errmsg("could not send test message on socket 
for statistics collector: %m")));
+                       closesocket(pgStatSock);
+                       pgStatSock = -1;
+                       continue;
+               }
+ 
+               /*
+                * There could possibly be a little delay before the message can be
+                * received.  We arbitrarily allow up to half a second before deciding
+                * it's broken.
+                */
+               for (;;)                                /* need a loop to handle EINTR 
*/
+               {
+                       FD_ZERO(&rset);
+                       FD_SET(pgStatSock, &rset);
+                       tv.tv_sec = 0;
+                       tv.tv_usec = 500000;
+                       sel_res = select(pgStatSock+1, &rset, NULL, NULL, &tv);
+                       if (sel_res >= 0 || errno != EINTR)
+                               break;
+               }
+               if (sel_res < 0)
+               {
+                       ereport(LOG,
+                                       (errcode_for_socket_access(),
+                                        errmsg("select() failed in statistics 
collector: %m")));
+                       closesocket(pgStatSock);
+                       pgStatSock = -1;
+                       continue;
+               }
+               if (sel_res == 0 || !FD_ISSET(pgStatSock, &rset))
+               {
+                       /*
+                        * This is the case we actually think is likely, so take pains 
to
+                        * give a specific message for it.
+                        *
+                        * errno will not be set meaningfully here, so don't use it.
+                        */
+                       ereport(LOG,
+                                       (ERRCODE_CONNECTION_FAILURE,
+                                        errmsg("test message did not get through on 
socket for statistics collector")));
+                       closesocket(pgStatSock);
+                       pgStatSock = -1;
+                       continue;
+               }
+ 
+               test_byte++;                    /* just make sure variable is changed 
*/
+ 
+               if (recv(pgStatSock, &test_byte, 1, 0) != 1)
+               {
+                       ereport(LOG,
+                                       (errcode_for_socket_access(),
+                                        errmsg("could not receive test message on 
socket for statistics collector: %m")));
+                       closesocket(pgStatSock);
+                       pgStatSock = -1;
+                       continue;
+               }
+ 
+               if (test_byte != TESTBYTEVAL) /* strictly paranoia ... */
+               {
+                       ereport(LOG,
+                                       (ERRCODE_INTERNAL_ERROR,
+                                        errmsg("incorrect test message transmission 
on socket for statistics collector")));
                        closesocket(pgStatSock);
                        pgStatSock = -1;
                        continue;

---------------------------(end of broadcast)---------------------------
TIP 4: Don't 'kill -9' the postmaster

Reply via email to