Author: atm Date: Fri May 17 00:06:56 2013 New Revision: 1483612 URL: http://svn.apache.org/r1483612 Log: HADOOP-9566. Performing direct read using libhdfs sometimes raises SIGPIPE (which in turn throws SIGABRT) causing client crashes. Contributed by Colin Patrick McCabe.
Modified: hadoop/common/trunk/hadoop-common-project/hadoop-common/CHANGES.txt hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/net/unix/DomainSocket.c Modified: hadoop/common/trunk/hadoop-common-project/hadoop-common/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-common-project/hadoop-common/CHANGES.txt?rev=1483612&r1=1483611&r2=1483612&view=diff ============================================================================== --- hadoop/common/trunk/hadoop-common-project/hadoop-common/CHANGES.txt (original) +++ hadoop/common/trunk/hadoop-common-project/hadoop-common/CHANGES.txt Fri May 17 00:06:56 2013 @@ -726,6 +726,10 @@ Release 2.0.5-beta - UNRELEASED HADOOP-9563. Fix incompatibility introduced by HADOOP-9523. (Tian Hong Wang via suresh) + HADOOP-9566. Performing direct read using libhdfs sometimes raises SIGPIPE + (which in turn throws SIGABRT) causing client crashes. (Colin Patrick + McCabe via atm) + Release 2.0.4-alpha - 2013-04-25 INCOMPATIBLE CHANGES Modified: hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/net/unix/DomainSocket.c URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/net/unix/DomainSocket.c?rev=1483612&r1=1483611&r2=1483612&view=diff ============================================================================== --- hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/net/unix/DomainSocket.c (original) +++ hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/net/unix/DomainSocket.c Fri May 17 00:06:56 2013 @@ -16,8 +16,7 @@ * limitations under the License. */ -#define _GNU_SOURCE - +#include "config.h" #include "exception.h" #include "org/apache/hadoop/io/nativeio/file_descriptor.h" #include "org_apache_hadoop.h" @@ -31,6 +30,7 @@ #include <stdint.h> #include <stdio.h> #include <stdlib.h> +#include <string.h> #include <sys/ioctl.h> /* for FIONREAD */ #include <sys/socket.h> #include <sys/stat.h> @@ -47,6 +47,15 @@ #define DEFAULT_SEND_TIMEOUT 120000 #define LISTEN_BACKLOG 128 +/* In Linux, you can pass the MSG_NOSIGNAL flag to send, sendto, etc. to prevent + * those functions from generating SIGPIPE. HDFS-4831 for details. + */ +#ifdef MSG_NOSIGNAL +#define PLATFORM_SEND_FLAGS MSG_NOSIGNAL +#else +#define PLATFORM_SEND_FLAGS 0 +#endif + /** * Can't pass more than this number of file descriptors in a single message. */ @@ -176,6 +185,19 @@ static jthrowable setup(JNIEnv *env, int "is %zd bytes.", sizeof(addr.sun_path) - 1); goto done; } +#ifdef SO_NOSIGPIPE + /* On MacOS and some BSDs, SO_NOSIGPIPE will keep send and sendto from causing + * EPIPE. Note: this will NOT help when using write or writev, only with + * send, sendto, sendmsg, etc. See HDFS-4831. + */ + ret = 1; + if (setsockopt(fd, SOL_SOCKET, SO_NOSIGPIPE, (void *)&ret, sizeof(ret))) { + ret = errno; + jthr = newSocketException(env, ret, + "error setting SO_NOSIGPIPE on socket: error %s", terror(ret)); + goto done; + } +#endif if (doConnect) { RETRY_ON_EINTR(ret, connect(fd, (struct sockaddr*)&addr, sizeof(addr))); @@ -583,7 +605,7 @@ static jthrowable write_fully(JNIEnv *en int err, res; while (amt > 0) { - res = write(fd, buf, amt); + res = send(fd, buf, amt, PLATFORM_SEND_FLAGS); if (res < 0) { err = errno; if (err == EINTR) { @@ -685,7 +707,7 @@ jint offset, jint length) goto done; } } - RETRY_ON_EINTR(ret, sendmsg(fd, &socketMsg, 0)); + RETRY_ON_EINTR(ret, sendmsg(fd, &socketMsg, PLATFORM_SEND_FLAGS)); if (ret < 0) { ret = errno; jthr = newSocketException(env, ret, "sendmsg(2) error: %s", terror(ret));