When launching tasks on about 1000 nodes, I get the following error
message sometimes:
srun: error: io_init_msg_read timed out
srun: error: failed reading io init message
I find the problem in src/common/fd.c, where "select()" is used to check
whether a file descriptor is readable. Running the attached program
tsel.c shows that in RHEL 6.2 file descriptor passed to "select()" can
not exceed 1023, or "FD_ISSET()" will not function correctly:
[root@ln0 select]# cat /etc/issue
Red Hat Enterprise Linux Server release 6.2 (Santiago)
Kernel \r on an \m
[root@ln0 select]# uname -a
Linux ln0 2.6.32-220.el6.x86_64 #1 SMP Wed Nov 9 08:03:13 EST 2011
x86_64 x86_64 x86_64 GNU/Linux
[root@ln0 select]# ./tsel 1023
dup2 returned 1023
file descriptor 1023 in readable set
file descriptor 1023 in exception set
select returned 1
file descriptor 1023 readable
[root@ln0 select]# ./tsel 1024
dup2 returned 1024
file descriptor 1024 in readable set
file descriptor 1024 in exception set
select returned 1
[root@ln0 select]# ./tsel 1027
dup2 returned 1027
file descriptor 1027 in readable set
file descriptor 1027 in exception set
select returned -1
failed to select:: Bad file descriptor
I changed "select()" to "poll()" to fix this problem.
>From ef51b8e20b9dd7215c7c2e5a3eaf69e2046a8fc9 Mon Sep 17 00:00:00 2001
From: Hongjia Cao <[email protected]>
Date: Tue, 12 Mar 2013 16:17:15 +0800
Subject: [PATCH] change select() to poll() in waiting for a socket to be
readable
select()/FD_ISSET() does not work for file descriptor larger than 1023.
---
src/common/fd.c | 31 ++++++++++++++++---------------
1 个文件被修改,插入 16 行(+),删除 15 行(-)
diff --git a/src/common/fd.c b/src/common/fd.c
index 04e1583..c43072d 100644
--- a/src/common/fd.c
+++ b/src/common/fd.c
@@ -44,6 +44,7 @@
#include <errno.h>
#include <fcntl.h>
#include <unistd.h>
+#include <poll.h>
#include "src/common/macros.h"
#include "src/common/fd.h"
@@ -280,30 +281,30 @@ ssize_t fd_read_line(int fd, void *buf, size_t maxlen)
* Return 0 when readable or -1 on error */
extern int wait_fd_readable(int fd, int time_limit)
{
- fd_set except_fds, read_fds;
- struct timeval timeout;
- int rc;
-
- FD_ZERO(&except_fds);
- FD_SET(fd, &except_fds);
- FD_ZERO(&read_fds);
- FD_SET(fd, &read_fds);
- timeout.tv_sec = time_limit;
- timeout.tv_usec = 0;
+ struct pollfd ufd;
+ time_t start;
+ int rc, time_left;
+
+ start = time(NULL);
+ time_left = time_limit;
+ ufd.fd = fd;
+ ufd.events = POLLIN;
+ ufd.revents = 0;
while (1) {
- rc = select(fd+1, &read_fds, NULL, &except_fds, &timeout);
-
+ rc = poll(&ufd, 1, time_left * 1000);
if (rc > 0) { /* activity on this fd */
- if (FD_ISSET(fd, &read_fds))
+ if (ufd.revents & POLLIN)
return 0;
else /* Exception */
return -1;
} else if (rc == 0) {
error("Timeout waiting for slurmstepd");
return -1;
- } else if (errno == EINTR) {
- error("select(): %m");
+ } else if (errno != EINTR) {
+ error("poll(): %m");
return -1;
+ } else {
+ time_left = time_limit - (time(NULL) - start);
}
}
}
--
1.7.10.4
#include <sys/select.h>
#include <stdio.h>
#include <stdlib.h>
int
main(int argc, char **argv)
{
fd_set except_fds, read_fds;
struct timeval timeout;
int rc, fd;
if (argc < 2) {
fprintf(stderr, "please give me the fileno\n");
exit(1);
}
fd = atoi(argv[1]);
rc = dup2(fileno(stdin), fd);
printf("dup2 returned %d\n");
if (rc < 0)
perror("failed to dup2:");
FD_ZERO(&except_fds);
FD_SET(fd, &except_fds);
FD_ZERO(&read_fds);
FD_SET(fd, &read_fds);
timeout.tv_sec = 10;
timeout.tv_usec = 0;
if (FD_ISSET(fd, &read_fds)) {
printf("file descriptor %d in readable set\n", fd);
}
if(FD_ISSET(fd, &except_fds)) {
printf("file descriptor %d in exception set\n", fd);
}
rc = select(fd+1, &read_fds, NULL, &except_fds, &timeout);
printf("select returned %d\n", rc);
if (rc > 0) {
if (FD_ISSET(fd, &read_fds)) {
printf("file descriptor %d readable\n", fd);
} else if(FD_ISSET(fd, &except_fds)) {
printf("file descriptor %d exception\n", fd);
}
} else if (rc < 0) {
perror("failed to select:");
} else {
fprintf(stderr, "timed out\n");
}
return 0;
}