(following up to my message from last month)

Background: OpenAFS is vulnerable to crashing in the linux kernel symlink code when running on kernel versions between 2.6.10 to 2.6.12. This also includes all RHEL4 kernels, because RHEL4 includes the code from 2.6.10. The problem is that the symlink text caching API, page_follow_link() et al, is unsuitable for network filesystems where the page cache may be invalidated in parallel with a path lookup.

This crash can be triggered easily by doing a bunch of path lookups involving symlinks (e.g., stat() on various files pointed to through links), while simultaneously running 'fs flushvol' on the volume containing the symlinks.

The simplest way to fix this problem is to disable the use of symlink text caching when the kernel does not provide a usable symlink API.

----


My patch last month was buggy, it did not work under load because I copied and pasted the following code from
src/afs/LINUX/osi_vnodeops.c::afs_linux_follow_link:

        AFS_GLOCK();
        name = osi_Alloc(PATH_MAX + 1);
        ...
        ...


This code (from the former afs_linux_follow_link()) appears to only be used on linux 2.2 and older kernels. There are two problems here; first, PATH_MAX is defined to be 4096 on Linux. Therefore on most machines (e.g. i386 and amd64), osi_Alloc(PATH_MAX+1) wants to allocate more than a single page of memory. This forces the use of vmalloc() which is something that should be avoided.

Secondly, the allocation was wrapped within AFS_GLOCK()/AFS_GUNLOCK(). The OpenAFS implementation of osi_Alloc() for the Linux kernel will do one of two things depending upon the requested allocation size:

        if (size <= PAGE_SIZE) {
                kmalloc(size, GFP_NOFS);
        } else {
                vmalloc(size);
        }

GFP_NOFS tells the allocator not to recurse back into the filesystem if it's necessary to free up memory. However, vmalloc() does not have such an option. Therefore, calling osi_Alloc() to request more than a page of memory may end up recursing back into AFS to try to free unused inodes or dentries.

In this case, what happened was that osi_Alloc() is called within an AFS_GLOCK(); osi_Alloc() calls vmalloc() which tries to free dentry objects, which then calls back into the AFS module. Unfortunately, AFS_GLOCK() is already held and we deadlock.



I solved this by:

-       limiting symlink text to PATH_MAX instead of PATH_MAX+1 (this is
        what OpenAFS already did for 2.4+ kernels anyway via
        afs_symlink_filler()).  This avoids calling vmalloc().

-       removing the AFS_GLOCK()/AFS_GUNLOCK() wrappers around
        osi_Alloc().  I don't see any reason for this.



An updated version of the patch (against openafs-stable-1_4_x) is attached. It should fix the symlink crashes on 2.6.10-2.6.12, as well as all RHEL4 kernels. No code changes are made unless compiled on one of these buggy kernels.

You can also download this patch from:

        
http://www-personal.engin.umich.edu/~wingc/openafs/patches/openafs-stable-1_4_x-20070418-symlink26.patch


-Chris Wing
[EMAIL PROTECTED]



diff -uNr openafs.orig/acinclude.m4 openafs/acinclude.m4
--- openafs.orig/acinclude.m4   2007-02-22 16:48:58.000000000 -0500
+++ openafs/acinclude.m4        2007-04-18 10:46:23.000000000 -0400
@@ -606,6 +606,7 @@
                 LINUX_IOP_I_CREATE_TAKES_NAMEIDATA
                 LINUX_IOP_I_LOOKUP_TAKES_NAMEIDATA
                 LINUX_IOP_I_PERMISSION_TAKES_NAMEIDATA
+                LINUX_IOP_I_PUT_LINK_TAKES_COOKIE
                 LINUX_DOP_D_REVALIDATE_TAKES_NAMEIDATA
                 LINUX_AOP_WRITEBACK_CONTROL
                 LINUX_FS_STRUCT_FOP_HAS_FLOCK
@@ -848,6 +849,11 @@
                 if test "x$ac_cv_linux_exports_tasklist_lock" = "xyes" ; then
                  AC_DEFINE(EXPORTED_TASKLIST_LOCK, 1, [define if tasklist_lock 
exported])
                 fi
+                if test "x$ac_cv_linux_kernel_page_follow_link" = "xyes" -o 
"x$ac_cv_linux_func_i_put_link_takes_cookie" = "xyes"; then
+                 AC_DEFINE(USABLE_KERNEL_PAGE_SYMLINK_CACHE, 1, [define if 
your kernel has a usable symlink cache API])
+                else
+                 AC_MSG_WARN([your kernel does not have a usable symlink cache 
API])
+                fi
                 :
                fi
 esac
diff -uNr openafs.orig/src/afs/LINUX/osi_vnodeops.c 
openafs/src/afs/LINUX/osi_vnodeops.c
--- openafs.orig/src/afs/LINUX/osi_vnodeops.c   2007-02-20 13:06:24.000000000 
-0500
+++ openafs/src/afs/LINUX/osi_vnodeops.c        2007-04-18 10:46:23.000000000 
-0400
@@ -1279,7 +1279,7 @@
        return -code;
 }

-#if !defined(AFS_LINUX24_ENV)
+#if !defined(USABLE_KERNEL_PAGE_SYMLINK_CACHE)
 /* afs_linux_readlink
  * Fill target (which is in user space) with contents of symlink.
  */
@@ -1299,6 +1299,36 @@
 /* afs_linux_follow_link
  * a file system dependent link following routine.
  */
+#if defined(AFS_LINUX24_ENV)
+static int afs_linux_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+    int code;
+    char *name;
+
+    name = osi_Alloc(PATH_MAX);
+    if (!name) {
+       return -EIO;
+    }
+
+    AFS_GLOCK();
+    code = afs_linux_ireadlink(dentry->d_inode, name, PATH_MAX - 1, 
AFS_UIOSYS);
+    AFS_GUNLOCK();
+
+    if (code < 0) {
+       goto out;
+    }
+
+    name[code] = '\0';
+    code = vfs_follow_link(nd, name);
+
+out:
+    osi_Free(name, PATH_MAX);
+
+    return code;
+}
+
+#else /* !defined(AFS_LINUX24_ENV) */
+
 static struct dentry *
 afs_linux_follow_link(struct dentry *dp, struct dentry *basep,
                      unsigned int follow)
@@ -1332,7 +1362,8 @@
     AFS_GUNLOCK();
     return res;
 }
-#endif
+#endif /* AFS_LINUX24_ENV */
+#endif /* USABLE_KERNEL_PAGE_SYMLINK_CACHE */

 /* afs_linux_readpage
  * all reads come through here. A strategy-like read call.
@@ -1697,7 +1728,7 @@
 /* We really need a separate symlink set of ops, since do_follow_link()
  * determines if it _is_ a link by checking if the follow_link op is set.
  */
-#if defined(AFS_LINUX24_ENV)
+#if defined(USABLE_KERNEL_PAGE_SYMLINK_CACHE)
 static int
 afs_symlink_filler(struct file *file, struct page *page)
 {
@@ -1732,10 +1763,10 @@
 static struct address_space_operations afs_symlink_aops = {
   .readpage =  afs_symlink_filler
 };
-#endif
+#endif /* USABLE_KERNEL_PAGE_SYMLINK_CACHE */

 static struct inode_operations afs_symlink_iops = {
-#if defined(AFS_LINUX24_ENV)
+#if defined(USABLE_KERNEL_PAGE_SYMLINK_CACHE)
   .readlink =          page_readlink,
 #if defined(HAVE_KERNEL_PAGE_FOLLOW_LINK)
   .follow_link =       page_follow_link,
@@ -1743,13 +1774,17 @@
   .follow_link =       page_follow_link_light,
   .put_link =           page_put_link,
 #endif
-  .setattr =           afs_notify_change,
-#else
+#else /* !defined(USABLE_KERNEL_PAGE_SYMLINK_CACHE) */
   .readlink =          afs_linux_readlink,
   .follow_link =       afs_linux_follow_link,
+#if !defined(AFS_LINUX24_ENV)
   .permission =                afs_linux_permission,
   .revalidate =                afs_linux_revalidate,
 #endif
+#endif /* USABLE_KERNEL_PAGE_SYMLINK_CACHE */
+#if defined(AFS_LINUX24_ENV)
+  .setattr =           afs_notify_change,
+#endif
 };

 void
@@ -1775,7 +1810,7 @@

     } else if (S_ISLNK(ip->i_mode)) {
        ip->i_op = &afs_symlink_iops;
-#if defined(AFS_LINUX24_ENV)
+#if defined(USABLE_KERNEL_PAGE_SYMLINK_CACHE)
        ip->i_data.a_ops = &afs_symlink_aops;
        ip->i_mapping = &ip->i_data;
 #endif
diff -uNr openafs.orig/src/cf/linux-test4.m4 openafs/src/cf/linux-test4.m4
--- openafs.orig/src/cf/linux-test4.m4  2007-02-26 12:53:33.000000000 -0500
+++ openafs/src/cf/linux-test4.m4       2007-04-18 10:46:23.000000000 -0400
@@ -644,6 +644,22 @@
   AC_MSG_RESULT($ac_cv_linux_func_i_permission_takes_nameidata)])


+AC_DEFUN([LINUX_IOP_I_PUT_LINK_TAKES_COOKIE], [
+  AC_MSG_CHECKING([whether inode_operations.put_link takes an opaque cookie])
+  AC_CACHE_VAL([ac_cv_linux_func_i_put_link_takes_cookie], [
+    AC_TRY_KBUILD(
+[#include <linux/fs.h>
+#include <linux/namei.h>],
+[struct inode _inode;
+struct dentry _dentry;
+struct nameidata _nameidata;
+void *cookie;
+(void)_inode.i_op->put_link(&_dentry, &_nameidata, cookie);],
+      ac_cv_linux_func_i_put_link_takes_cookie=yes,
+      ac_cv_linux_func_i_put_link_takes_cookie=no)])
+  AC_MSG_RESULT($ac_cv_linux_func_i_put_link_takes_cookie)])
+
+
 AC_DEFUN([LINUX_DOP_D_REVALIDATE_TAKES_NAMEIDATA], [
   AC_MSG_CHECKING([whether dentry_operations.d_revalidate takes a nameidata])
   AC_CACHE_VAL([ac_cv_linux_func_d_revalidate_takes_nameidata], [
_______________________________________________
OpenAFS-devel mailing list
[email protected]
https://lists.openafs.org/mailman/listinfo/openafs-devel

Reply via email to