Hi List,

i was debugging a problem in my own program which sometimes received
'Address already in use' during creation of the netlink socket. It turned out
that udevd has the same bug. What actually happens is that udevd opens
the netlink socket, and forks afterwards. that doesn't sound bad at all, but
the pid of udevd is stored inside the nl_sockaddr structure. So after udevd
has forked, the PID stored in the kernel is no longer existent. If another process
is now started that wants to do netlink communication with the kernel and
has (by coincidence) the same PID, it will fail.

Example from my system running udev:

pid of udevd is 18921:

# pidof udevd
18921

get the netlink socket for pid 18921:

$ lsof -np 18921
COMMAND   PID USER   FD      TYPE     DEVICE SIZE/OFF    NODE NAME
udevd   18921 root  cwd       DIR      253,1     4096       2 /
udevd   18921 root  rtd       DIR      253,1     4096       2 /
udevd   18921 root  txt       REG      253,1   161776 7823451 /sbin/udevd
udevd 18921 root mem REG 253,1 47080 2122850 /lib/i386-linux-gnu/i686/cmov/libnss_files-2.17.so udevd 18921 root mem REG 253,1 42668 2122852 /lib/i386-linux-gnu/i686/cmov/libnss_nis-2.17.so udevd 18921 root mem REG 253,1 13856 2122844 /lib/i386-linux-gnu/i686/cmov/libdl-2.17.so udevd 18921 root mem REG 253,1 125258 2122837 /lib/i386-linux-gnu/i686/cmov/libpthread-2.17.so udevd 18921 root mem REG 253,1 255908 688195 /lib/i386-linux-gnu/libpcre.so.3.13.1 udevd 18921 root mem REG 253,1 1759012 2122841 /lib/i386-linux-gnu/i686/cmov/libc-2.17.so udevd 18921 root mem REG 253,1 30696 2122856 /lib/i386-linux-gnu/i686/cmov/librt-2.17.so udevd 18921 root mem REG 253,1 133088 658519 /lib/i386-linux-gnu/libselinux.so.1 udevd 18921 root mem REG 253,1 87940 2122847 /lib/i386-linux-gnu/i686/cmov/libnsl-2.17.so udevd 18921 root mem REG 253,1 30560 2122848 /lib/i386-linux-gnu/i686/cmov/libnss_compat-2.17.so udevd 18921 root mem REG 253,1 134376 8478759 /lib/i386-linux-gnu/ld-2.17.so
udevd   18921 root    0u      CHR        1,3      0t0    1029 /dev/null
udevd   18921 root    1u      CHR        1,3      0t0    1029 /dev/null
udevd   18921 root    2u      CHR        1,3      0t0    1029 /dev/null
udevd 18921 root 3u unix 0xc019b940 0t0 784351 /run/udev/control
udevd   18921 root    4u  netlink                 0t0  784352 KOBJECT_UEVENT
udevd 18921 root 5u REG 0,13 8 784354 /run/udev/queue.bin
udevd   18921 root    6r     0000        0,9        0    4048 anon_inode
udevd   18921 root    7u     0000        0,9        0    4048 anon_inode
udevd   18921 root    8u     unix 0xdf574040      0t0  788161 socket
udevd   18921 root    9u     unix 0xe3ddb4c0      0t0  788162 socket
udevd   18921 root   10u     0000        0,9        0    4048 anon_inode
udevd   18921 root   11u     unix 0xe3ddb940      0t0  788165 socket

-> 784352

check PID with /proc/net/netlink:

$ grep 784352 /proc/net/netlink
e70ad800 15  18920  00000001 0        0        0 2        0 784352

tells 18920, which is the pid before the demonize fork.

I'm using the following diff (fork before opening the netlink socket):

$ git diff
diff --git a/src/udev/udevd.c b/src/udev/udevd.c
index 7c6c5d6..4e0a789 100644
--- a/src/udev/udevd.c
+++ b/src/udev/udevd.c
@@ -1003,6 +1003,7 @@ int main(int argc, char *argv[])
/* before opening new files, make sure std{in,out,err} fds are in a sane state */
         if (daemonize) {
                 int fd;
+                pid_t pid;

                 fd = open("/dev/null", O_RDWR);
                 if (fd >= 0) {
@@ -1016,6 +1017,23 @@ int main(int argc, char *argv[])
                         fprintf(stderr, "cannot open /dev/null\n");
                         log_error("cannot open /dev/null\n");
                 }
+
+                pid = fork();
+                switch (pid) {
+                case 0:
+                        break;
+                case -1:
+                        log_error("fork of daemon failed: %m\n");
+                        rc = 4;
+                        goto exit;
+                default:
+                        rc = EXIT_SUCCESS;
+                        goto exit_daemonize;
+                }
+
+                setsid();
+
+                write_string_file("/proc/self/oom_score_adj", "-1000");
         }

         if (systemd_fds(udev, &fd_ctrl, &fd_netlink) >= 0) {
@@ -1081,28 +1099,8 @@ int main(int argc, char *argv[])
                 goto exit;
         }

-        if (daemonize) {
-                pid_t pid;
-
-                pid = fork();
-                switch (pid) {
-                case 0:
-                        break;
-                case -1:
-                        log_error("fork of daemon failed: %m\n");
-                        rc = 4;
-                        goto exit;
-                default:
-                        rc = EXIT_SUCCESS;
-                        goto exit_daemonize;
-                }
-
-                setsid();
-
-                write_string_file("/proc/self/oom_score_adj", "-1000");
-        } else {
+        if (!daemonize)
                 sd_notify(1, "READY=1");
-        }

         print_kmsg("starting version " VERSION "\n");

Regards,
Sven
_______________________________________________
systemd-devel mailing list
systemd-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/systemd-devel

Reply via email to