[PATCH 1/5] fs: Introduce vfs_path_lookup

2007-05-23 Thread Josef 'Jeff' Sipek
Signed-off-by: Josef 'Jeff' Sipek <[EMAIL PROTECTED]>
---
 fs/namei.c|   32 
 include/linux/namei.h |2 ++
 2 files changed, 34 insertions(+), 0 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index 580162b..a30efbc 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1172,6 +1172,37 @@ int fastcall path_lookup(const char *name, unsigned int 
flags,
return do_path_lookup(AT_FDCWD, name, flags, nd);
 }
 
+/**
+ * vfs_path_lookup - lookup a file path relative to a dentry-vfsmount pair
+ * @dentry:  pointer to dentry of the base directory
+ * @mnt: pointer to vfs mount of the base directory
+ * @name: pointer to file name
+ * @flags: lookup flags
+ * @nd: pointer to nameidata
+ */
+int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
+   const char *name, unsigned int flags, 
+   struct nameidata *nd)
+{
+   int retval;
+
+   /* same as do_path_lookup */
+   nd->last_type = LAST_ROOT;
+   nd->flags = flags;
+   nd->depth = 0;
+
+   nd->mnt = mntget(mnt);
+   nd->dentry = dget(dentry);
+
+   retval = path_walk(name, nd);
+   if (unlikely(!retval && !audit_dummy_context() && nd->dentry &&
+   nd->dentry->d_inode))
+   audit_inode(name, nd->dentry->d_inode);
+
+   return retval;
+
+}
+
 static int __path_lookup_intent_open(int dfd, const char *name,
unsigned int lookup_flags, struct nameidata *nd,
int open_flags, int create_mode)
@@ -2775,6 +2806,7 @@ EXPORT_SYMBOL(__page_symlink);
 EXPORT_SYMBOL(page_symlink);
 EXPORT_SYMBOL(page_symlink_inode_operations);
 EXPORT_SYMBOL(path_lookup);
+EXPORT_SYMBOL(vfs_path_lookup);
 EXPORT_SYMBOL(path_release);
 EXPORT_SYMBOL(path_walk);
 EXPORT_SYMBOL(permission);
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 0ab27ba..4718788 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -70,6 +70,8 @@ extern int FASTCALL(__user_walk_fd(int dfd, const char __user 
*, unsigned, struc
 #define user_path_walk_link(name,nd) \
__user_walk_fd(AT_FDCWD, name, 0, nd)
 extern int FASTCALL(path_lookup(const char *, unsigned, struct nameidata *));
+extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
+  const char *, unsigned int, struct nameidata *);
 extern int FASTCALL(path_walk(const char *, struct nameidata *));
 extern int FASTCALL(link_path_walk(const char *, struct nameidata *));
 extern void path_release(struct nameidata *);
-- 
1.5.2.rc1.165.gaf9b

-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 5/5] fs: Remove path_walk export

2007-05-23 Thread Josef 'Jeff' Sipek
Signed-off-by: Josef 'Jeff' Sipek <[EMAIL PROTECTED]>
---
 fs/namei.c|3 +--
 include/linux/namei.h |1 -
 2 files changed, 1 insertions(+), 3 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index 50285a1..15f45ac 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1024,7 +1024,7 @@ static int fastcall link_path_walk(const char *name, 
struct nameidata *nd)
return result;
 }
 
-int fastcall path_walk(const char * name, struct nameidata *nd)
+static int fastcall path_walk(const char * name, struct nameidata *nd)
 {
current->total_link_count = 0;
return link_path_walk(name, nd);
@@ -2810,7 +2810,6 @@ EXPORT_SYMBOL(page_symlink_inode_operations);
 EXPORT_SYMBOL(path_lookup);
 EXPORT_SYMBOL(vfs_path_lookup);
 EXPORT_SYMBOL(path_release);
-EXPORT_SYMBOL(path_walk);
 EXPORT_SYMBOL(permission);
 EXPORT_SYMBOL(vfs_permission);
 EXPORT_SYMBOL(file_permission);
diff --git a/include/linux/namei.h b/include/linux/namei.h
index d60a5eb..e378e1f 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -72,7 +72,6 @@ extern int FASTCALL(__user_walk_fd(int dfd, const char __user 
*, unsigned, struc
 extern int FASTCALL(path_lookup(const char *, unsigned, struct nameidata *));
 extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
   const char *, unsigned int, struct nameidata *);
-extern int FASTCALL(path_walk(const char *, struct nameidata *));
 extern void path_release(struct nameidata *);
 extern void path_release_on_umount(struct nameidata *);
 
-- 
1.5.2.rc1.165.gaf9b

-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3/5] nfsctl: Use vfs_path_lookup

2007-05-23 Thread Josef 'Jeff' Sipek
use vfs_path_lookup instead of open-coding the necessary functionality.

Signed-off-by: Josef 'Jeff' Sipek <[EMAIL PROTECTED]>
Acked-by: NeilBrown <[EMAIL PROTECTED]>
---
 fs/nfsctl.c |   16 ++--
 1 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/fs/nfsctl.c b/fs/nfsctl.c
index c043136..51f1b31 100644
--- a/fs/nfsctl.c
+++ b/fs/nfsctl.c
@@ -23,19 +23,15 @@
 static struct file *do_open(char *name, int flags)
 {
struct nameidata nd;
+   struct vfsmount *mnt;
int error;
 
-   nd.mnt = do_kern_mount("nfsd", 0, "nfsd", NULL);
+   mnt = do_kern_mount("nfsd", 0, "nfsd", NULL);
+   if (IS_ERR(mnt))
+   return (struct file *)mnt;
 
-   if (IS_ERR(nd.mnt))
-   return (struct file *)nd.mnt;
-
-   nd.dentry = dget(nd.mnt->mnt_root);
-   nd.last_type = LAST_ROOT;
-   nd.flags = 0;
-   nd.depth = 0;
-
-   error = path_walk(name, &nd);
+   error = vfs_path_lookup(mnt->mnt_root, mnt, name, 0, &nd);
+   mntput(mnt);/* drop do_kern_mount reference */
if (error)
return ERR_PTR(error);
 
-- 
1.5.2.rc1.165.gaf9b

-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/5] sunrpc: Use vfs_path_lookup

2007-05-23 Thread Josef 'Jeff' Sipek
use vfs_path_lookup instead of open-coding the necessary functionality.

Signed-off-by: Josef 'Jeff' Sipek <[EMAIL PROTECTED]>
Acked-by: Trond Myklebust <[EMAIL PROTECTED]>
---
 net/sunrpc/rpc_pipe.c |   16 +++-
 1 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 5887457..db6f231 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -451,21 +451,19 @@ void rpc_put_mount(void)
 static int
 rpc_lookup_parent(char *path, struct nameidata *nd)
 {
+   struct vfsmount *mnt;
+
if (path[0] == '\0')
return -ENOENT;
-   nd->mnt = rpc_get_mount();
-   if (IS_ERR(nd->mnt)) {
+
+   mnt = rpc_get_mount();
+   if (IS_ERR(mnt)) {
printk(KERN_WARNING "%s: %s failed to mount "
   "pseudofilesystem \n", __FILE__, __FUNCTION__);
-   return PTR_ERR(nd->mnt);
+   return PTR_ERR(mnt);
}
-   mntget(nd->mnt);
-   nd->dentry = dget(rpc_mount->mnt_root);
-   nd->last_type = LAST_ROOT;
-   nd->flags = LOOKUP_PARENT;
-   nd->depth = 0;
 
-   if (path_walk(path, nd)) {
+   if (vfs_path_lookup(mnt->mnt_root, mnt, path, LOOKUP_PARENT, nd)) {
printk(KERN_WARNING "%s: %s failed to find path %s\n",
__FILE__, __FUNCTION__, path);
rpc_put_mount();
-- 
1.5.2.rc1.165.gaf9b

-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 4/5] fs: Mark link_path_walk static

2007-05-23 Thread Josef 'Jeff' Sipek
Signed-off-by: Josef 'Jeff' Sipek <[EMAIL PROTECTED]>
---
 fs/namei.c|4 +++-
 include/linux/namei.h |1 -
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index a30efbc..50285a1 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -107,6 +107,8 @@
  * any extra contention...
  */
 
+static int fastcall link_path_walk(const char *name, struct nameidata *nd);
+
 /* In order to reduce some races, while at the same time doing additional
  * checking and hopefully speeding things up, we copy filenames to the
  * kernel data space before using them..
@@ -998,7 +1000,7 @@ return_err:
  * Retry the whole path once, forcing real lookup requests
  * instead of relying on the dcache.
  */
-int fastcall link_path_walk(const char *name, struct nameidata *nd)
+static int fastcall link_path_walk(const char *name, struct nameidata *nd)
 {
struct nameidata save = *nd;
int result;
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 4718788..d60a5eb 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -73,7 +73,6 @@ extern int FASTCALL(path_lookup(const char *, unsigned, 
struct nameidata *));
 extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
   const char *, unsigned int, struct nameidata *);
 extern int FASTCALL(path_walk(const char *, struct nameidata *));
-extern int FASTCALL(link_path_walk(const char *, struct nameidata *));
 extern void path_release(struct nameidata *);
 extern void path_release_on_umount(struct nameidata *);
 
-- 
1.5.2.rc1.165.gaf9b

-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 0/5] New path lookup function (V4)

2007-05-23 Thread Josef 'Jeff' Sipek
The only change since V3, is the fix for a vfsmount reference leak in the
nfsctl patch (pointed out by hch).

Stackable file systems, among others, frequently need to lookup paths or
path components starting from an arbitrary point in the namespace
(identified by a dentry and a vfsmount).  Currently, such file systems use
lookup_one_len, which is frowned upon [1] as it does not pass the lookup
intent along; not passing a lookup intent, for example, can trigger BUG_ON's
when stacking on top of NFSv4.

The first patch introduces a new lookup function to allow lookup starting
from an arbitrary point in the namespace.  This approach has been suggested
by Christoph Hellwig [2].

The second patch changes sunrpc to use vfs_path_lookup.

The third patch changes nfsctl.c to use vfs_path_lookup.

The fourth patch marks link_path_walk static.

The fifth, and last patch, unexports path_walk because it is no longer
unnecessary to call it directly, and using the new vfs_path_lookup is
cleaner.

For example, the following snippet of code, looks up "some/path/component"
in a directory pointed to by parent_{dentry,vfsmnt}:

err = vfs_path_lookup(parent_dentry, parent_vfsmnt,
  "some/path/component", 0, &nd);
if (!err) {
/* exits */

...

/* once done, release the references */
path_release(&nd);
} else if (err == -ENOENT) {
/* doesn't exist */
} else {
/* other error */
}

VFS functions such as lookup_create can be used on the nameidata structure
to pass the create intent to the file system.

Josef 'Jeff' Sipek.

[1] http://lkml.org/lkml/2007/3/9/95
[2] http://lkml.org/lkml/2007/5/4/51

-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [AppArmor 01/41] Pass struct vfsmount to the inode_create LSM hook

2007-05-23 Thread James Morris
On Wed, 23 May 2007, Andreas Gruenbacher wrote:

> This is backwards from what AppArmor does. The policy defines which paths may 
> be accessed; all paths not explicitly listed are denied. If files are mounted 
> at multiple locations, then the policy may allow access to some locations but 
> not to others. That's not a hole.

I don't know what else you'd call it.

Would you mind providing some concrete examples of how such a model would 
be useful?


- James
-- 
James Morris
<[EMAIL PROTECTED]>
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


possible bug/oops in nfs_pageio_add_request (2.6.22-rc2)?

2007-05-23 Thread Erez Zadok
I've hit a NULL ptr deref on desc->pg_error below, triggered when mounting a
stackable file system on top of nfsv3:

// from file: nfs/pagelist.c
int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
   struct nfs_page *req)
{
while (!nfs_pageio_do_add_request(desc, req)) {
nfs_pageio_doio(desc);
if (desc->pg_error < 0)

Scenario:

2.6.22-rc2 with Unionfs 2.0 (release u2 for 2.6.22-rc2, which includes mmap
support).

I mount unionfs on top of nfs (v3).  I have one file in the nfs branch.  I
run a simple program through the union which mmap's the file, changes the
first byte of the file, calls msync(), and then closes.  This causes
unionfs_writepage to be invoked, which in turn calls the lower file system's
->writepage, here nfs_writepage.

The 'wbc' that's passed to unionfs_writepage from the VFS has this:

wbc->for_writepages = 1
wbc->fs_private = NULL

If you follow the logic, then nfs_writepage calls nfs_writepage_locked,
passing the same wbc.  nfs_writepage_locked does this:

if (wbc->for_writepages)
pgio = wbc->fs_private;
else {
nfs_pageio_init_write(&mypgio, inode, wb_priority(wbc));
pgio = &mypgio;
}

which means that pgio is set to NULL from the caller's wbc.  Then
nfs_writepage_locked calls nfs_page_async_flush, passing it this pgio
(NULL).  nfs_page_async_flush invokes nfs_pageio_add_request, passing it
this NULL pgio.  Inside nfs_pageio_add_request the NULL is being
dereferenced as desc->pg_error and we get an oops.

As a workaround, in unionfs_writepage I tried this before calling the lower
file system's ->writepage (which was nfs_writepage):

struct writeback_control lower_wbc;
memcpy(&lower_wbc, wbc, sizeof(struct writeback_control));
if (lower_wbc.for_writepages && !lower_wbc.fs_private) {
printk("unionfs: setting wbc.for_writepages to 0\n");
lower_wbc.for_writepages = 0;
}

Then I passed &lower_wbc to the lower file system's writepage method
(nfs_writepage).  It works; no oops, and the file in question was sync'ed to
the backing f/s too.  But I'm not sure if it's the correct workaround and
whether it'd break things for other non-NFS file systems.

It's possible that I'm doing something wrong in unionfs's mmap code, which
indirectly results in a malformed wbc structure being passed to unionfs (by
malformed I mean that wbc->fs_private is NULL and wbc->for_writepages is set
to 1).  If such a wbc can be created by any other means and passed to NFS,
then nfs probably will continue to oops even w/o unionfs.

FWIW, I tried a similar scenario with eCryptfs (another stackable f/s in
2.6.22-rc2) on top of NFSv3, and got the same oops (sorry, Mike :-)

Any pointers would be appreciated.

Thanks,
Erez.
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 13/21] Unionfs: Don't leak resources when copyup fails partially

2007-05-23 Thread Josef 'Jeff' Sipek
Original-patch-by: Erez Zadok <[EMAIL PROTECTED]>
Signed-off-by: Josef 'Jeff' Sipek <[EMAIL PROTECTED]>
---
 fs/unionfs/copyup.c |   24 ++--
 1 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/fs/unionfs/copyup.c b/fs/unionfs/copyup.c
index 0975b6e..a80ece6 100644
--- a/fs/unionfs/copyup.c
+++ b/fs/unionfs/copyup.c
@@ -632,7 +632,7 @@ static struct dentry *create_parents_named(struct inode 
*dir,
unsigned int childnamelen;
 
int nr_dentry;
-   int count;
+   int count = 0;
 
int old_bstart;
int old_bend;
@@ -660,7 +660,6 @@ static struct dentry *create_parents_named(struct inode 
*dir,
/* assume the negative dentry of unionfs as the parent dentry */
parent_dentry = dentry;
 
-   count = 0;
/*
 * This loop finds the first parent that exists in the given branch.
 * We start building the directory structure from there.  At the end
@@ -772,6 +771,23 @@ static struct dentry *create_parents_named(struct inode 
*dir,
 hidden_dentry);
unlock_dir(hidden_parent_dentry);
if (err) {
+   struct inode *inode = hidden_dentry->d_inode;
+   /*
+* If we get here, it means that we created a 
new
+* dentry+inode, but copying permissions failed.
+* Therefore, we should delete this inode and 
dput
+* the dentry so as not to leave cruft behind.
+*
+* XXX: call dentry_iput() instead, but then we 
have
+* to export that symbol.
+*/
+   if (hidden_dentry->d_op && 
hidden_dentry->d_op->d_iput)
+   
hidden_dentry->d_op->d_iput(hidden_dentry,
+   inode);
+   else
+   iput(inode);
+   hidden_dentry->d_inode = NULL;
+
dput(hidden_dentry);
hidden_dentry = ERR_PTR(err);
goto out;
@@ -786,6 +802,10 @@ static struct dentry *create_parents_named(struct inode 
*dir,
child_dentry = path[--count];
}
 out:
+   /* cleanup any leftover locks from the do/while loop above */
+   if (IS_ERR(hidden_dentry))
+   while (count)
+   unionfs_unlock_dentry(path[count--]);
kfree(path);
return hidden_dentry;
 }
-- 
1.5.2.rc1.165.gaf9b

-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 18/21] Unionfs: Remove defunct unionfs_put_inode super op

2007-05-23 Thread Josef 'Jeff' Sipek
From: Erez Zadok <[EMAIL PROTECTED]>

Removed old workaround code that was needed to get mmap working, is no
longer needed with recent kernels.

Signed-off-by: Erez Zadok <[EMAIL PROTECTED]>
Signed-off-by: Josef 'Jeff' Sipek <[EMAIL PROTECTED]>
---
 fs/unionfs/super.c |   20 
 1 files changed, 0 insertions(+), 20 deletions(-)

diff --git a/fs/unionfs/super.c b/fs/unionfs/super.c
index 446faf8..bfd6c24 100644
--- a/fs/unionfs/super.c
+++ b/fs/unionfs/super.c
@@ -62,25 +62,6 @@ static void unionfs_read_inode(struct inode *inode)
inode->i_mapping->a_ops = &unionfs_empty_aops;
 }
 
-static void unionfs_put_inode(struct inode *inode)
-{
-   /*
-* This is really funky stuff:
-*
-* Basically, if i_count == 1, iput will then decrement it and this
-* inode will be destroyed.  It is currently holding a reference to
-* the hidden inode.  Therefore, it needs to release that reference
-* by calling iput on the hidden inode.  iput() _will_ do it for us
-* (by calling our clear_inode), but _only_ if i_nlink == 0.  The
-* problem is, NFS keeps i_nlink == 1 for silly_rename'd files.  So
-* we must force our i_nlink to 0 here to trick iput() into calling
-* our clear_inode.
-*/
-
-   if (atomic_read(&inode->i_count) == 1)
-   inode->i_nlink = 0;
-}
-
 /*
  * we now define delete_inode, because there are two VFS paths that may
  * destroy an inode: one of them calls clear inode before doing everything
@@ -938,7 +919,6 @@ out:
 
 struct super_operations unionfs_sops = {
.read_inode = unionfs_read_inode,
-   .put_inode  = unionfs_put_inode,
.delete_inode   = unionfs_delete_inode,
.put_super  = unionfs_put_super,
.statfs = unionfs_statfs,
-- 
1.5.2.rc1.165.gaf9b

-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 06/21] Unionfs: Added numerous comments

2007-05-23 Thread Josef 'Jeff' Sipek
From: Erez Zadok <[EMAIL PROTECTED]>

Audited entire code for documentation.  Added comments at top of functions
where it felt necessary (i.e., function's name and size don't make it clear
what it may be doing precisely).  Reformatted some long comments.  Fixed a
few comment typos and spelling errors.

Signed-off-by: Erez Zadok <[EMAIL PROTECTED]>
Signed-off-by: Josef 'Jeff' Sipek <[EMAIL PROTECTED]>
---
 fs/unionfs/commonfops.c |7 ---
 fs/unionfs/copyup.c |   22 --
 fs/unionfs/dentry.c |5 ++---
 fs/unionfs/file.c   |1 +
 fs/unionfs/lookup.c |3 ++-
 fs/unionfs/main.c   |8 +++-
 fs/unionfs/rename.c |   13 +
 fs/unionfs/subr.c   |   19 ++-
 fs/unionfs/super.c  |   15 +--
 9 files changed, 56 insertions(+), 37 deletions(-)

diff --git a/fs/unionfs/commonfops.c b/fs/unionfs/commonfops.c
index 666b3c7..a57471e 100644
--- a/fs/unionfs/commonfops.c
+++ b/fs/unionfs/commonfops.c
@@ -240,6 +240,7 @@ out:
return err;
 }
 
+/* perform a delayed copyup of a read-write file on a read-only branch */
 static int do_delayed_copyup(struct file *file, struct dentry *dentry)
 {
int bindex, bstart, bend, err = 0;
@@ -312,9 +313,9 @@ int unionfs_file_revalidate(struct file *file, int 
willwrite)
 
/*
 * There are two cases we are interested in.  The first is if the
-* generation is lower than the super-block.  The second is if someone
-* has copied up this file from underneath us, we also need to refresh
-* things.
+* generation is lower than the super-block.  The second is if
+* someone has copied up this file from underneath us, we also need
+* to refresh things.
 */
if (!d_deleted(dentry) &&
(sbgen > fgen || dbstart(dentry) != fbstart(file))) {
diff --git a/fs/unionfs/copyup.c b/fs/unionfs/copyup.c
index 1495778..4924685 100644
--- a/fs/unionfs/copyup.c
+++ b/fs/unionfs/copyup.c
@@ -18,6 +18,12 @@
 
 #include "union.h"
 
+/*
+ * For detailed explanation of copyup see:
+ * Documentation/filesystems/unionfs/concepts.txt
+ */
+
+/* forward definitions */
 static int copyup_named_dentry(struct inode *dir, struct dentry *dentry,
   int bstart, int new_bindex, const char *name,
   int namelen, struct file **copyup_file,
@@ -26,11 +32,6 @@ static struct dentry *create_parents_named(struct inode *dir,
   struct dentry *dentry,
   const char *name, int bindex);
 
-/*
- * For detailed explanation of copyup see:
- * Documentation/filesystems/unionfs/concepts.txt
- */
-
 #ifdef CONFIG_UNION_FS_XATTR
 /* copyup all extended attrs for a given dentry */
 static int copyup_xattrs(struct dentry *old_hidden_dentry,
@@ -487,9 +488,9 @@ out:
 }
 
 /*
- * This function creates a copy of a file represented by 'file' which currently
- * resides in branch 'bstart' to branch 'new_bindex.'  The copy will be named
- * "name".
+ * This function creates a copy of a file represented by 'file' which
+ * currently resides in branch 'bstart' to branch 'new_bindex.'  The copy
+ * will be named "name".
  */
 int copyup_named_file(struct inode *dir, struct file *file, char *name,
  int bstart, int new_bindex, loff_t len)
@@ -509,8 +510,8 @@ int copyup_named_file(struct inode *dir, struct file *file, 
char *name,
 }
 
 /*
- * This function creates a copy of a file represented by 'file' which currently
- * resides in branch 'bstart' to branch 'new_bindex'.
+ * This function creates a copy of a file represented by 'file' which
+ * currently resides in branch 'bstart' to branch 'new_bindex'.
  */
 int copyup_file(struct inode *dir, struct file *file, int bstart,
int new_bindex, loff_t len)
@@ -539,6 +540,7 @@ struct dentry *create_parents(struct inode *dir, struct 
dentry *dentry,
return create_parents_named(dir, dentry, dentry->d_name.name, bindex);
 }
 
+/* purge a dentry's lower-branch states (dput/mntput, etc.) */
 static void __cleanup_dentry(struct dentry * dentry, int bindex,
 int old_bstart, int old_bend)
 {
diff --git a/fs/unionfs/dentry.c b/fs/unionfs/dentry.c
index 46a52f7..95cea3b 100644
--- a/fs/unionfs/dentry.c
+++ b/fs/unionfs/dentry.c
@@ -18,7 +18,6 @@
 
 #include "union.h"
 
-
 /*
  * Revalidate a single dentry.
  * Assume that dentry's info node is locked.
@@ -61,8 +60,8 @@ static int __unionfs_d_revalidate_one(struct dentry *dentry,
sbgen = atomic_read(&UNIONFS_SB(dentry->d_sb)->generation);
/*
 * If we are working on an unconnected dentry, then there is no
-* revalidation to be done, because this file does not exist within the
-* namespace, and Unionfs operates on the namespace, not data.
+* revalidation to be done, because this file does not exist within
+* th

[PATCH 14/21] Unionfs: Call realloc unconditionally

2007-05-23 Thread Josef 'Jeff' Sipek
krealloc already checks if the new size is greater than the old size.
Therefore, we can call realloc unconditionally - making the code simpler and
cleaner.

Signed-off-by: Josef 'Jeff' Sipek <[EMAIL PROTECTED]>
---
 fs/unionfs/lookup.c |   26 --
 1 files changed, 8 insertions(+), 18 deletions(-)

diff --git a/fs/unionfs/lookup.c b/fs/unionfs/lookup.c
index cf78c46..758c813 100644
--- a/fs/unionfs/lookup.c
+++ b/fs/unionfs/lookup.c
@@ -451,17 +451,17 @@ void free_dentry_private_data(struct unionfs_dentry_info 
*udi)
 /* allocate new dentry private data, free old one if necessary */
 int new_dentry_private_data(struct dentry *dentry)
 {
-   int new_size;
int size;
struct unionfs_dentry_info *info = UNIONFS_D(dentry);
+   void *p;
int unlock_on_err = 0;
 
spin_lock(&dentry->d_lock);
if (!info) {
dentry->d_fsdata = kmem_cache_alloc(unionfs_dentry_cachep,
GFP_ATOMIC);
+   
info = UNIONFS_D(dentry);
-
if (!info)
goto out;
 
@@ -470,9 +470,7 @@ int new_dentry_private_data(struct dentry *dentry)
unlock_on_err = 1;
 
info->lower_paths = NULL;
-   size = 0;
-   } else
-   size = sizeof(struct path) * info->bcount;
+   }
 
info->bstart = -1;
info->bend = -1;
@@ -481,21 +479,13 @@ int new_dentry_private_data(struct dentry *dentry)
atomic_set(&info->generation,
   atomic_read(&UNIONFS_SB(dentry->d_sb)->generation));
 
-   new_size = sizeof(struct path) * sbmax(dentry->d_sb);
-
-   /*
-* Don't reallocate when we already have enough space.
-*/
-   if (new_size > size) {
-   void *p;
+   size = sizeof(struct path) * sbmax(dentry->d_sb);
 
-   p = krealloc(info->lower_paths, new_size, GFP_ATOMIC);
-   if (!p)
-   goto out_free;
+   p = krealloc(info->lower_paths, size, GFP_ATOMIC);
+   if (!p)
+   goto out_free;
 
-   info->lower_paths = p;
-   size = new_size;
-   }
+   info->lower_paths = p;
memset(info->lower_paths, 0, size);
 
spin_unlock(&dentry->d_lock);
-- 
1.5.2.rc1.165.gaf9b

-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 05/21] Unionfs: Cleanup of strings and comments

2007-05-23 Thread Josef 'Jeff' Sipek
From: Erez Zadok <[EMAIL PROTECTED]>

Includes:
- consistent style for multi-line comments
- spell-check of all strings and comments

Signed-off-by: Erez Zadok <[EMAIL PROTECTED]>
Signed-off-by: Josef 'Jeff' Sipek <[EMAIL PROTECTED]>
---
 fs/stack.c   |6 --
 fs/unionfs/branchman.c   |3 ++-
 fs/unionfs/commonfops.c  |   26 +-
 fs/unionfs/copyup.c  |   46 +-
 fs/unionfs/dentry.c  |6 --
 fs/unionfs/dirfops.c |6 --
 fs/unionfs/dirhelper.c   |3 ++-
 fs/unionfs/file.c|2 +-
 fs/unionfs/inode.c   |   45 +
 fs/unionfs/lookup.c  |   39 +--
 fs/unionfs/main.c|   24 
 fs/unionfs/rdstate.c |   15 ++-
 fs/unionfs/rename.c  |   27 +--
 fs/unionfs/sioq.c|3 ++-
 fs/unionfs/sioq.h|2 +-
 fs/unionfs/subr.c|   15 ++-
 fs/unionfs/super.c   |   21 +
 fs/unionfs/union.h   |   17 +++--
 fs/unionfs/xattr.c   |   12 
 include/linux/fs_stack.h |3 ++-
 20 files changed, 207 insertions(+), 114 deletions(-)

diff --git a/fs/stack.c b/fs/stack.c
index 1f5b161..4368d4b 100644
--- a/fs/stack.c
+++ b/fs/stack.c
@@ -13,7 +13,8 @@
 #include 
 #include 
 
-/* does _NOT_ require i_mutex to be held.
+/*
+ * does _NOT_ require i_mutex to be held.
  *
  * This function cannot be inlined since i_size_{read,write} is rather
  * heavy-weight on 32-bit systems
@@ -25,7 +26,8 @@ void fsstack_copy_inode_size(struct inode *dst, const struct 
inode *src)
 }
 EXPORT_SYMBOL_GPL(fsstack_copy_inode_size);
 
-/* copy all attributes; get_nlinks is optional way to override the i_nlink
+/*
+ * copy all attributes; get_nlinks is optional way to override the i_nlink
  * copying
  */
 void fsstack_copy_attr_all(struct inode *dest, const struct inode *src,
diff --git a/fs/unionfs/branchman.c b/fs/unionfs/branchman.c
index eba2221..4545e18 100644
--- a/fs/unionfs/branchman.c
+++ b/fs/unionfs/branchman.c
@@ -18,7 +18,8 @@
 
 #include "union.h"
 
-/* return to userspace the branch indices containing the file in question
+/*
+ * return to user-space the branch indices containing the file in question
  *
  * We use fd_set and therefore we are limited to the number of the branches
  * to FD_SETSIZE, which is currently 1024 - plenty for most people
diff --git a/fs/unionfs/commonfops.c b/fs/unionfs/commonfops.c
index 778901f..666b3c7 100644
--- a/fs/unionfs/commonfops.c
+++ b/fs/unionfs/commonfops.c
@@ -18,7 +18,8 @@
 
 #include "union.h"
 
-/* 1) Copyup the file
+/*
+ * 1) Copyup the file
  * 2) Rename the file to '.unionfs' - obviously
  * stolen from NFS's silly rename
  */
@@ -111,7 +112,8 @@ static int find_new_branch_index(struct file *file, int 
bindex,
return -1;
 }
 
-/* put all references held by upper struct file and free lower file pointer
+/*
+ * put all references held by upper struct file and free lower file pointer
  * array
  */
 static void cleanup_file(struct file *file)
@@ -129,7 +131,7 @@ static void cleanup_file(struct file *file)
int i;  /* holds (possibly) updated branch index */
i = find_new_branch_index(file, bindex, sb);
if (i < 0)
-   printk(KERN_ERR "unionfs: no supberlock for "
+   printk(KERN_ERR "unionfs: no superblock for "
   "file %p\n", file);
else {
unionfs_read_lock(sb);
@@ -308,7 +310,8 @@ int unionfs_file_revalidate(struct file *file, int 
willwrite)
 
BUG_ON(sbgen > dgen);
 
-   /* There are two cases we are interested in.  The first is if the
+   /*
+* There are two cases we are interested in.  The first is if the
 * generation is lower than the super-block.  The second is if someone
 * has copied up this file from underneath us, we also need to refresh
 * things.
@@ -397,7 +400,8 @@ static int __open_dir(struct inode *inode, struct file 
*file)
 
unionfs_set_lower_file_idx(file, bindex, hidden_file);
 
-   /* The branchget goes after the open, because otherwise
+   /*
+* The branchget goes after the open, because otherwise
 * we would miss the reference on release.
 */
unionfs_read_lock(inode->i_sb);
@@ -422,11 +426,13 @@ static int __open_file(struct inode *inode, struct file 
*file)
bstart = fbstart(file) = dbstart(file->f_dentry);
bend = fbend(file) = dbend(file->f_dentry);
 
-   /* check for the permission for hidden file.  If the error is
+   /*
+* check for the permission for hidden file.  If the error is
 

[PATCH 08/21] Unionfs: Rename Unionfs's double_lock_dentry to avoid confusion

2007-05-23 Thread Josef 'Jeff' Sipek
From: Erez Zadok <[EMAIL PROTECTED]>

To avoid potential confusion with a VFS function, rename our version of
double_lock_dentry to unionfs_double_lock_dentry.

Signed-off-by: Erez Zadok <[EMAIL PROTECTED]>
Signed-off-by: Josef 'Jeff' Sipek <[EMAIL PROTECTED]>
---
 fs/unionfs/inode.c  |2 +-
 fs/unionfs/rename.c |2 +-
 fs/unionfs/union.h  |3 ++-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/fs/unionfs/inode.c b/fs/unionfs/inode.c
index c54b290..627c2a7 100644
--- a/fs/unionfs/inode.c
+++ b/fs/unionfs/inode.c
@@ -265,7 +265,7 @@ static int unionfs_link(struct dentry *old_dentry, struct 
inode *dir,
BUG_ON(!is_valid_dentry(new_dentry));
BUG_ON(!is_valid_dentry(old_dentry));
 
-   double_lock_dentry(new_dentry, old_dentry);
+   unionfs_double_lock_dentry(new_dentry, old_dentry);
 
hidden_new_dentry = unionfs_lower_dentry(new_dentry);
 
diff --git a/fs/unionfs/rename.c b/fs/unionfs/rename.c
index 231866e..0e1e71a 100644
--- a/fs/unionfs/rename.c
+++ b/fs/unionfs/rename.c
@@ -400,7 +400,7 @@ int unionfs_rename(struct inode *old_dir, struct dentry 
*old_dentry,
BUG_ON(!is_valid_dentry(old_dentry));
BUG_ON(!is_valid_dentry(new_dentry));
 
-   double_lock_dentry(old_dentry, new_dentry);
+   unionfs_double_lock_dentry(old_dentry, new_dentry);
 
if (!S_ISDIR(old_dentry->d_inode->i_mode))
err = unionfs_partial_lookup(old_dentry);
diff --git a/fs/unionfs/union.h b/fs/unionfs/union.h
index 0ce3a27..fee4f88 100644
--- a/fs/unionfs/union.h
+++ b/fs/unionfs/union.h
@@ -223,7 +223,8 @@ static inline off_t rdstate2offset(struct unionfs_dir_state 
*buf)
 #define unionfs_write_lock(sb)  down_write(&UNIONFS_SB(sb)->rwsem)
 #define unionfs_write_unlock(sb) up_write(&UNIONFS_SB(sb)->rwsem)
 
-static inline void double_lock_dentry(struct dentry *d1, struct dentry *d2)
+static inline void unionfs_double_lock_dentry(struct dentry *d1,
+ struct dentry *d2)
 {
if (d2 < d1) {
struct dentry *tmp = d1;
-- 
1.5.2.rc1.165.gaf9b

-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 12/21] Unionfs: Prefix external functions with 'extern' properly

2007-05-23 Thread Josef 'Jeff' Sipek
From: Erez Zadok <[EMAIL PROTECTED]>

Signed-off-by: Erez Zadok <[EMAIL PROTECTED]>
Signed-off-by: Josef 'Jeff' Sipek <[EMAIL PROTECTED]>
---
 fs/unionfs/sioq.h  |1 +
 fs/unionfs/union.h |   50 +-
 2 files changed, 26 insertions(+), 25 deletions(-)

diff --git a/fs/unionfs/sioq.h b/fs/unionfs/sioq.h
index 5a96f66..55b781e 100644
--- a/fs/unionfs/sioq.h
+++ b/fs/unionfs/sioq.h
@@ -74,6 +74,7 @@ struct sioq_args {
};
 };
 
+/* Extern definitions for SIOQ functions */
 extern int __init init_sioq(void);
 extern __exit void stop_sioq(void);
 extern void run_sioq(work_func_t func, struct sioq_args *args);
diff --git a/fs/unionfs/union.h b/fs/unionfs/union.h
index dab408f..5376b76 100644
--- a/fs/unionfs/union.h
+++ b/fs/unionfs/union.h
@@ -183,26 +183,26 @@ struct unionfs_dir_state {
 #include "fanout.h"
 #include "sioq.h"
 
-/* Cache creation/deletion routines. */
-void unionfs_destroy_filldir_cache(void);
-int unionfs_init_filldir_cache(void);
-int unionfs_init_inode_cache(void);
-void unionfs_destroy_inode_cache(void);
-int unionfs_init_dentry_cache(void);
-void unionfs_destroy_dentry_cache(void);
+/* externs for cache creation/deletion routines */
+extern void unionfs_destroy_filldir_cache(void);
+extern int unionfs_init_filldir_cache(void);
+extern int unionfs_init_inode_cache(void);
+extern void unionfs_destroy_inode_cache(void);
+extern int unionfs_init_dentry_cache(void);
+extern void unionfs_destroy_dentry_cache(void);
 
 /* Initialize and free readdir-specific  state. */
-int init_rdstate(struct file *file);
-struct unionfs_dir_state *alloc_rdstate(struct inode *inode, int bindex);
-struct unionfs_dir_state *find_rdstate(struct inode *inode, loff_t fpos);
-void free_rdstate(struct unionfs_dir_state *state);
-int add_filldir_node(struct unionfs_dir_state *rdstate, const char *name,
-int namelen, int bindex, int whiteout);
-struct filldir_node *find_filldir_node(struct unionfs_dir_state *rdstate,
-  const char *name, int namelen);
-
-struct dentry **alloc_new_dentries(int objs);
-struct unionfs_data *alloc_new_data(int objs);
+extern int init_rdstate(struct file *file);
+extern struct unionfs_dir_state *alloc_rdstate(struct inode *inode, int 
bindex);
+extern struct unionfs_dir_state *find_rdstate(struct inode *inode, loff_t 
fpos);
+extern void free_rdstate(struct unionfs_dir_state *state);
+extern int add_filldir_node(struct unionfs_dir_state *rdstate, const char 
*name,
+   int namelen, int bindex, int whiteout);
+extern struct filldir_node *find_filldir_node(struct unionfs_dir_state 
*rdstate,
+ const char *name, int namelen);
+
+extern struct dentry **alloc_new_dentries(int objs);
+extern struct unionfs_data *alloc_new_data(int objs);
 
 /* We can only use 32-bits of offset for rdstate --- blech! */
 #define DIREOF (0xf)
@@ -236,8 +236,8 @@ static inline void unionfs_double_lock_dentry(struct dentry 
*d1,
 }
 
 extern int new_dentry_private_data(struct dentry *dentry);
-void free_dentry_private_data(struct unionfs_dentry_info *udi);
-void update_bstart(struct dentry *dentry);
+extern void free_dentry_private_data(struct unionfs_dentry_info *udi);
+extern void update_bstart(struct dentry *dentry);
 
 /*
  * EXTERNALS:
@@ -246,6 +246,7 @@ void update_bstart(struct dentry *dentry);
 /* replicates the directory structure up to given dentry in given branch */
 extern struct dentry *create_parents(struct inode *dir, struct dentry *dentry,
 int bindex);
+extern int make_dir_opaque(struct dentry *dir, int bindex);
 
 /* partial lookup */
 extern int unionfs_partial_lookup(struct dentry *dentry);
@@ -304,10 +305,11 @@ extern long unionfs_ioctl(struct file *file, unsigned int 
cmd,
 /* Inode operations */
 extern int unionfs_rename(struct inode *old_dir, struct dentry *old_dentry,
  struct inode *new_dir, struct dentry *new_dentry);
-int unionfs_unlink(struct inode *dir, struct dentry *dentry);
-int unionfs_rmdir(struct inode *dir, struct dentry *dentry);
+extern int unionfs_unlink(struct inode *dir, struct dentry *dentry);
+extern int unionfs_rmdir(struct inode *dir, struct dentry *dentry);
 
-int __unionfs_d_revalidate_chain(struct dentry *dentry, struct nameidata *nd);
+extern int __unionfs_d_revalidate_chain(struct dentry *dentry,
+   struct nameidata *nd);
 
 /* The values for unionfs_interpose's flag. */
 #define INTERPOSE_DEFAULT  0
@@ -454,8 +456,6 @@ static inline void unlock_dir(struct dentry *dir)
dput(dir);
 }
 
-extern int make_dir_opaque(struct dentry *dir, int bindex);
-
 static inline struct vfsmount *unionfs_mntget(struct dentry *dentry,
  int bindex)
 {
-- 
1.5.2.rc1.165.gaf9b

-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
th

[PATCH 10/21] Unionfs: Move unionfs_query_file to commonfops.c

2007-05-23 Thread Josef 'Jeff' Sipek
Moved unionfs_query_file closer to its one user in commonfops.c.
Additionally, it can now become static, and branchman.c can be removed as it
is empty.

Signed-off-by: Josef 'Jeff' Sipek <[EMAIL PROTECTED]>
---
 fs/unionfs/Makefile |4 +-
 fs/unionfs/branchman.c  |   60 ---
 fs/unionfs/commonfops.c |   41 
 fs/unionfs/union.h  |6 
 4 files changed, 43 insertions(+), 68 deletions(-)
 delete mode 100644 fs/unionfs/branchman.c

diff --git a/fs/unionfs/Makefile b/fs/unionfs/Makefile
index e6b2e0c..6986d79 100644
--- a/fs/unionfs/Makefile
+++ b/fs/unionfs/Makefile
@@ -1,7 +1,7 @@
 obj-$(CONFIG_UNION_FS) += unionfs.o
 
 unionfs-y := subr.o dentry.o file.o inode.o main.o super.o \
-   branchman.o rdstate.o copyup.o dirhelper.o rename.o \
-   unlink.o lookup.o commonfops.o dirfops.o sioq.o
+   rdstate.o copyup.o dirhelper.o rename.o unlink.o \
+   lookup.o commonfops.o dirfops.o sioq.o
 
 unionfs-$(CONFIG_UNION_FS_XATTR) += xattr.o
diff --git a/fs/unionfs/branchman.c b/fs/unionfs/branchman.c
deleted file mode 100644
index 4545e18..000
--- a/fs/unionfs/branchman.c
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) 2003-2007 Erez Zadok
- * Copyright (c) 2003-2006 Charles P. Wright
- * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
- * Copyright (c) 2005-2006 Junjiro Okajima
- * Copyright (c) 2005  Arun M. Krishnakumar
- * Copyright (c) 2004-2006 David P. Quigley
- * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
- * Copyright (c) 2003  Puja Gupta
- * Copyright (c) 2003  Harikesavan Krishnan
- * Copyright (c) 2003-2007 Stony Brook University
- * Copyright (c) 2003-2007 The Research Foundation of SUNY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include "union.h"
-
-/*
- * return to user-space the branch indices containing the file in question
- *
- * We use fd_set and therefore we are limited to the number of the branches
- * to FD_SETSIZE, which is currently 1024 - plenty for most people
- */
-int unionfs_ioctl_queryfile(struct file *file, unsigned int cmd,
-   unsigned long arg)
-{
-   int err = 0;
-   fd_set branchlist;
-
-   int bstart = 0, bend = 0, bindex = 0;
-   struct dentry *dentry, *hidden_dentry;
-
-   dentry = file->f_dentry;
-   unionfs_lock_dentry(dentry);
-   if ((err = unionfs_partial_lookup(dentry)))
-   goto out;
-   bstart = dbstart(dentry);
-   bend = dbend(dentry);
-
-   FD_ZERO(&branchlist);
-
-   for (bindex = bstart; bindex <= bend; bindex++) {
-   hidden_dentry = unionfs_lower_dentry_idx(dentry, bindex);
-   if (!hidden_dentry)
-   continue;
-   if (hidden_dentry->d_inode)
-   FD_SET(bindex, &branchlist);
-   }
-
-   err = copy_to_user((void __user *)arg, &branchlist, sizeof(fd_set));
-   if (err)
-   err = -EFAULT;
-
-out:
-   unionfs_unlock_dentry(dentry);
-   return err < 0 ? err : bend;
-}
diff --git a/fs/unionfs/commonfops.c b/fs/unionfs/commonfops.c
index a57471e..69d9c87 100644
--- a/fs/unionfs/commonfops.c
+++ b/fs/unionfs/commonfops.c
@@ -632,6 +632,47 @@ out:
return err;
 }
 
+/*
+ * return to user-space the branch indices containing the file in question
+ *
+ * We use fd_set and therefore we are limited to the number of the branches
+ * to FD_SETSIZE, which is currently 1024 - plenty for most people
+ */
+static int unionfs_ioctl_queryfile(struct file *file, unsigned int cmd,
+  unsigned long arg)
+{
+   int err = 0;
+   fd_set branchlist;
+
+   int bstart = 0, bend = 0, bindex = 0;
+   struct dentry *dentry, *hidden_dentry;
+
+   dentry = file->f_dentry;
+   unionfs_lock_dentry(dentry);
+   if ((err = unionfs_partial_lookup(dentry)))
+   goto out;
+   bstart = dbstart(dentry);
+   bend = dbend(dentry);
+
+   FD_ZERO(&branchlist);
+
+   for (bindex = bstart; bindex <= bend; bindex++) {
+   hidden_dentry = unionfs_lower_dentry_idx(dentry, bindex);
+   if (!hidden_dentry)
+   continue;
+   if (hidden_dentry->d_inode)
+   FD_SET(bindex, &branchlist);
+   }
+
+   err = copy_to_user((void __user *)arg, &branchlist, sizeof(fd_set));
+   if (err)
+   err = -EFAULT;
+
+out:
+   unionfs_unlock_dentry(dentry);
+   return err < 0 ? err : bend;
+}
+
 long unionfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
long err;
diff --git a/fs/unionfs/union.h b/fs/unionfs/union.h
index fee4f88..dab408f 100644
--- a/fs/unionfs/union.h
+++ b/fs/unionfs/union.h
@@ -319,12 +319,6 @@ int __unionfs_d_revalidate_chain(st

[PATCH 11/21] Unionfs: Combine unionfs_write with __unionfs_write.

2007-05-23 Thread Josef 'Jeff' Sipek
The __unionfs_write helper function was used only by unionfs_write, and
there is really no reason why they should not be combined.

Signed-off-by: Josef 'Jeff' Sipek <[EMAIL PROTECTED]>
---
 fs/unionfs/file.c |   30 ++
 1 files changed, 10 insertions(+), 20 deletions(-)

diff --git a/fs/unionfs/file.c b/fs/unionfs/file.c
index 1dfcfcb..2e5ec42 100644
--- a/fs/unionfs/file.c
+++ b/fs/unionfs/file.c
@@ -77,17 +77,20 @@ out:
return err;
 }
 
-/* helper function to unionfs_write */
-static ssize_t __unionfs_write(struct file *file, const char __user *buf,
-  size_t count, loff_t *ppos)
+static ssize_t unionfs_write(struct file *file, const char __user *buf,
+size_t count, loff_t *ppos)
 {
-   int err = -EINVAL;
+   int err;
struct file *hidden_file = NULL;
struct inode *inode;
struct inode *hidden_inode;
loff_t pos = *ppos;
int bstart, bend;
 
+   unionfs_read_lock(file->f_dentry->d_sb);
+   if ((err = unionfs_file_revalidate(file, 1)))
+   goto out;
+
inode = file->f_dentry->d_inode;
 
bstart = fbstart(file);
@@ -98,8 +101,10 @@ static ssize_t __unionfs_write(struct file *file, const 
char __user *buf,
hidden_file = unionfs_lower_file(file);
hidden_inode = hidden_file->f_dentry->d_inode;
 
-   if (!hidden_file->f_op || !hidden_file->f_op->write)
+   if (!hidden_file->f_op || !hidden_file->f_op->write) {
+   err = -EINVAL;
goto out;
+   }
 
/* adjust for append -- seek to the end of the file */
if (file->f_flags & O_APPEND)
@@ -120,21 +125,6 @@ static ssize_t __unionfs_write(struct file *file, const 
char __user *buf,
if (pos > inode->i_size)
inode->i_size = pos;
 out:
-   return err;
-}
-
-static ssize_t unionfs_write(struct file *file, const char __user *buf,
-size_t count, loff_t *ppos)
-{
-   int err = 0;
-
-   unionfs_read_lock(file->f_dentry->d_sb);
-   if ((err = unionfs_file_revalidate(file, 1)))
-   goto out;
-
-   err = __unionfs_write(file, buf, count, ppos);
-
-out:
unionfs_read_unlock(file->f_dentry->d_sb);
return err;
 }
-- 
1.5.2.rc1.165.gaf9b

-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 07/21] Unionfs: Consistent pointer declaration spacing

2007-05-23 Thread Josef 'Jeff' Sipek
From: Erez Zadok <[EMAIL PROTECTED]>

Change instances of "foo * var" to "foo *var" for consistency.

Signed-off-by: Erez Zadok <[EMAIL PROTECTED]>
Signed-off-by: Josef 'Jeff' Sipek <[EMAIL PROTECTED]>
---
 fs/unionfs/copyup.c |6 +++---
 fs/unionfs/file.c   |   14 +++---
 fs/unionfs/inode.c  |2 +-
 fs/unionfs/super.c  |2 +-
 fs/unionfs/xattr.c  |4 ++--
 5 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/fs/unionfs/copyup.c b/fs/unionfs/copyup.c
index 4924685..0975b6e 100644
--- a/fs/unionfs/copyup.c
+++ b/fs/unionfs/copyup.c
@@ -541,7 +541,7 @@ struct dentry *create_parents(struct inode *dir, struct 
dentry *dentry,
 }
 
 /* purge a dentry's lower-branch states (dput/mntput, etc.) */
-static void __cleanup_dentry(struct dentry * dentry, int bindex,
+static void __cleanup_dentry(struct dentry *dentry, int bindex,
 int old_bstart, int old_bend)
 {
int loop_start;
@@ -592,7 +592,7 @@ static void __cleanup_dentry(struct dentry * dentry, int 
bindex,
 }
 
 /* set lower inode ptr and update bstart & bend if necessary */
-static void __set_inode(struct dentry * upper, struct dentry * lower,
+static void __set_inode(struct dentry *upper, struct dentry *lower,
int bindex)
 {
unionfs_set_lower_inode_idx(upper->d_inode, bindex,
@@ -605,7 +605,7 @@ static void __set_inode(struct dentry * upper, struct 
dentry * lower,
 }
 
 /* set lower dentry ptr and update bstart & bend if necessary */
-static void __set_dentry(struct dentry * upper, struct dentry * lower,
+static void __set_dentry(struct dentry *upper, struct dentry *lower,
 int bindex)
 {
unionfs_set_lower_dentry_idx(upper, bindex, lower);
diff --git a/fs/unionfs/file.c b/fs/unionfs/file.c
index 57f2bdd..1dfcfcb 100644
--- a/fs/unionfs/file.c
+++ b/fs/unionfs/file.c
@@ -53,8 +53,8 @@ out:
return err;
 }
 
-static ssize_t unionfs_read(struct file * file, char __user * buf,
-   size_t count, loff_t * ppos)
+static ssize_t unionfs_read(struct file *file, char __user *buf,
+   size_t count, loff_t *ppos)
 {
struct file *hidden_file;
loff_t pos = *ppos;
@@ -78,8 +78,8 @@ out:
 }
 
 /* helper function to unionfs_write */
-static ssize_t __unionfs_write(struct file * file, const char __user * buf,
-  size_t count, loff_t * ppos)
+static ssize_t __unionfs_write(struct file *file, const char __user *buf,
+  size_t count, loff_t *ppos)
 {
int err = -EINVAL;
struct file *hidden_file = NULL;
@@ -123,8 +123,8 @@ out:
return err;
 }
 
-static ssize_t unionfs_write(struct file * file, const char __user * buf,
-size_t count, loff_t * ppos)
+static ssize_t unionfs_write(struct file *file, const char __user *buf,
+size_t count, loff_t *ppos)
 {
int err = 0;
 
@@ -145,7 +145,7 @@ static int unionfs_file_readdir(struct file *file, void 
*dirent,
return -ENOTDIR;
 }
 
-static unsigned int unionfs_poll(struct file *file, poll_table * wait)
+static unsigned int unionfs_poll(struct file *file, poll_table *wait)
 {
unsigned int mask = DEFAULT_POLLMASK;
struct file *hidden_file = NULL;
diff --git a/fs/unionfs/inode.c b/fs/unionfs/inode.c
index 2d0822a..c54b290 100644
--- a/fs/unionfs/inode.c
+++ b/fs/unionfs/inode.c
@@ -768,7 +768,7 @@ out:
return err;
 }
 
-static int unionfs_readlink(struct dentry *dentry, char __user * buf,
+static int unionfs_readlink(struct dentry *dentry, char __user *buf,
int bufsiz)
 {
int err;
diff --git a/fs/unionfs/super.c b/fs/unionfs/super.c
index 39939ba..b7f0b45 100644
--- a/fs/unionfs/super.c
+++ b/fs/unionfs/super.c
@@ -824,7 +824,7 @@ static void unionfs_destroy_inode(struct inode *inode)
 }
 
 /* unionfs inode cache constructor */
-static void init_once(void *v, struct kmem_cache * cachep, unsigned long flags)
+static void init_once(void *v, struct kmem_cache *cachep, unsigned long flags)
 {
struct unionfs_inode_info *i = v;
 
diff --git a/fs/unionfs/xattr.c b/fs/unionfs/xattr.c
index 4dc8ada..12d618b 100644
--- a/fs/unionfs/xattr.c
+++ b/fs/unionfs/xattr.c
@@ -51,7 +51,7 @@ void unionfs_xattr_free(void *ptr, size_t size)
  * BKL held by caller.
  * dentry->d_inode->i_mutex locked
  */
-ssize_t unionfs_getxattr(struct dentry * dentry, const char *name, void *value,
+ssize_t unionfs_getxattr(struct dentry *dentry, const char *name, void *value,
 size_t size)
 {
struct dentry *hidden_dentry = NULL;
@@ -115,7 +115,7 @@ int unionfs_removexattr(struct dentry *dentry, const char 
*name)
  * BKL held by caller.
  * dentry->d_inode->i_mutex locked
  */
-ssize_t unionfs_listxattr(struct dentry * dentry, char *list, size_t size)
+ssize_t unionfs_listxattr(struct dentry *dentry, char *list, size_t size)
 {
 

[PATCH 16/21] Unionfs: Disallow setting leftmost branch to readonly

2007-05-23 Thread Josef 'Jeff' Sipek
From: Erez Zadok <[EMAIL PROTECTED]>

Unionfs requires the leftmost branch to be writeable for copyup to work
properly and simply.  If, through branch-management commands (add, delete,
or mode change), the leftmost branch will becomes readonly, then return an
error (and tell the user to use "remount,ro" if they want a readonly union).

[jsipek: fixed up to apply cleanly]
Signed-off-by: Erez Zadok <[EMAIL PROTECTED]>
Signed-off-by: Josef 'Jeff' Sipek <[EMAIL PROTECTED]>
---
 fs/unionfs/super.c |   16 
 1 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/fs/unionfs/super.c b/fs/unionfs/super.c
index 3dee863..446faf8 100644
--- a/fs/unionfs/super.c
+++ b/fs/unionfs/super.c
@@ -629,12 +629,20 @@ static int unionfs_remount_fs(struct super_block *sb, int 
*flags,
 out_no_change:
 
/**
-* WE'RE ALMOST DONE: see if we need to allocate a small-sized new
-* vector, copy the vectors to their correct place, release the
-* refcnt of the older ones, and return.
-* Also handle invalidating any pages that will have to be re-read.
+* WE'RE ALMOST DONE: check if leftmost branch might be read-only,
+* see if we need to allocate a small-sized new vector, copy the
+* vectors to their correct place, release the refcnt of the older
+* ones, and return.  Also handle invalidating any pages that will
+* have to be re-read.
 ***/
 
+   if (!(tmp_data[0].branchperms & MAY_WRITE)) {
+   printk("unionfs: leftmost branch cannot be read-only "
+  "(use \"remount,ro\" to create a read-only union)\n");
+   err = -EINVAL;
+   goto out_release;
+   }
+
/* (re)allocate space for new pointers to hidden dentry */
size = new_branches * sizeof(struct unionfs_data);
new_data = krealloc(tmp_data, size, GFP_KERNEL);
-- 
1.5.2.rc1.165.gaf9b

-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 09/21] Unionfs: Rename our "do_rename" to __unionfs_rename

2007-05-23 Thread Josef 'Jeff' Sipek
From: Erez Zadok <[EMAIL PROTECTED]>

To avoid confusion with the VFS function do_rename, and to help ctags,
rename our utility (static) function do_rename to __unionfs_rename.

Signed-off-by: Erez Zadok <[EMAIL PROTECTED]>
Signed-off-by: Josef 'Jeff' Sipek <[EMAIL PROTECTED]>
---
 fs/unionfs/rename.c |   21 +++--
 1 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/fs/unionfs/rename.c b/fs/unionfs/rename.c
index 0e1e71a..edc5a5c 100644
--- a/fs/unionfs/rename.c
+++ b/fs/unionfs/rename.c
@@ -18,9 +18,9 @@
 
 #include "union.h"
 
-static int do_rename(struct inode *old_dir, struct dentry *old_dentry,
-struct inode *new_dir, struct dentry *new_dentry,
-int bindex, struct dentry **wh_old)
+static int __unionfs_rename(struct inode *old_dir, struct dentry *old_dentry,
+   struct inode *new_dir, struct dentry *new_dentry,
+   int bindex, struct dentry **wh_old)
 {
int err = 0;
struct dentry *hidden_old_dentry;
@@ -144,7 +144,7 @@ out:
 /*
  * Main rename code.  This is sufficienly complex, that it's documented in
  * Docmentation/filesystems/unionfs/rename.txt.  This routine calls
- * do_rename() above to perform some of the work.
+ * __unionfs_rename() above to perform some of the work.
  */
 static int do_unionfs_rename(struct inode *old_dir,
 struct dentry *old_dentry,
@@ -171,8 +171,8 @@ static int do_unionfs_rename(struct inode *old_dir,
new_bend = dbend(new_dentry);
 
/* Rename source to destination. */
-   err = do_rename(old_dir, old_dentry, new_dir, new_dentry, old_bstart,
-   &wh_old);
+   err = __unionfs_rename(old_dir, old_dentry, new_dir, new_dentry,
+  old_bstart, &wh_old);
if (err) {
if (!IS_COPYUP_ERR(err))
goto out;
@@ -230,8 +230,9 @@ static int do_unionfs_rename(struct inode *old_dir,
if (!err) {
dput(wh_old);
bwh_old = bindex;
-   err = do_rename(old_dir, old_dentry, new_dir,
-   new_dentry, bindex, &wh_old);
+   err = __unionfs_rename(old_dir, old_dentry,
+  new_dir, new_dentry,
+  bindex, &wh_old);
break;
}
}
@@ -306,8 +307,8 @@ revert:
goto revert_out;
}
 
-   local_err = do_rename(new_dir, new_dentry,
- old_dir, old_dentry, old_bstart, NULL);
+   local_err = __unionfs_rename(new_dir, new_dentry,
+old_dir, old_dentry, old_bstart, NULL);
 
/* If we can't fix it, then we cop-out with -EIO. */
if (local_err) {
-- 
1.5.2.rc1.165.gaf9b

-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 20/21] Unionfs: Removed a trailing whitespace

2007-05-23 Thread Josef 'Jeff' Sipek
From: Yiannis Pericleous <[EMAIL PROTECTED]>

Signed-off-by: Yiannis Pericleous <[EMAIL PROTECTED]>
Signed-off-by: Josef 'Jeff' Sipek <[EMAIL PROTECTED]>
---
 fs/unionfs/super.c |6 --
 1 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/fs/unionfs/super.c b/fs/unionfs/super.c
index bfd6c24..fe02941 100644
--- a/fs/unionfs/super.c
+++ b/fs/unionfs/super.c
@@ -699,10 +699,12 @@ out_no_change:
i = atomic_inc_return(&UNIONFS_SB(sb)->generation);
atomic_set(&UNIONFS_D(sb->s_root)->generation, i);
atomic_set(&UNIONFS_I(sb->s_root->d_inode)->generation, i);
-   if (!(*flags & MS_SILENT)) 
-   printk("unionfs: new generation number %d\n", i);
+
err = 0;/* reset to success */
 
+   if (!(*flags & MS_SILENT))
+   printk("unionfs: new generation number %d\n", i);
+
/*
 * The code above falls through to the next label, and releases the
 * refcnts of the older ones (stored in tmp_*): if we fell through
-- 
1.5.2.rc1.165.gaf9b

-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 19/21] Unionfs: Actually catch bad use of unionfs_mnt{get,put}

2007-05-23 Thread Josef 'Jeff' Sipek
Signed-off-by: Josef 'Jeff' Sipek <[EMAIL PROTECTED]>
---
 fs/unionfs/union.h |   34 +++---
 1 files changed, 7 insertions(+), 27 deletions(-)

diff --git a/fs/unionfs/union.h b/fs/unionfs/union.h
index 5376b76..335d579 100644
--- a/fs/unionfs/union.h
+++ b/fs/unionfs/union.h
@@ -459,38 +459,18 @@ static inline void unlock_dir(struct dentry *dir)
 static inline struct vfsmount *unionfs_mntget(struct dentry *dentry,
  int bindex)
 {
-   struct vfsmount *mnt;
+   BUG_ON(!dentry || bindex < 0);
 
-   if (!dentry) {
-   if (bindex < 0)
-   return NULL;
-   BUG_ON(bindex < 0);
-   }
-   mnt = unionfs_lower_mnt_idx(dentry, bindex);
-   if (!mnt) {
-   if (bindex < 0)
-   return NULL;
-   BUG_ON(mnt && bindex < 0);
-   }
-   mnt = mntget(mnt);
-   return mnt;
+   return mntget(unionfs_lower_mnt_idx(dentry, bindex));
 }
 
 static inline void unionfs_mntput(struct dentry *dentry, int bindex)
 {
-   struct vfsmount *mnt;
+   if (!dentry)
+   return;
 
-   if (!dentry) {
-   if (bindex < 0)
-   return;
-   BUG_ON(dentry && bindex < 0);
-   }
-   mnt = unionfs_lower_mnt_idx(dentry, bindex);
-   if (!mnt) {
-   if (bindex < 0)
-   return;
-   BUG_ON(mnt && bindex < 0);
-   }
-   mntput(mnt);
+   BUG_ON(bindex < 0);
+
+   mntput(unionfs_lower_mnt_idx(dentry, bindex));
 }
 #endif /* not _UNION_H_ */
-- 
1.5.2.rc1.165.gaf9b

-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 04/21] Unionfs: Add missing copyright notices

2007-05-23 Thread Josef 'Jeff' Sipek
From: Erez Zadok <[EMAIL PROTECTED]>

Signed-off-by: Erez Zadok <[EMAIL PROTECTED]>
Signed-off-by: Josef 'Jeff' Sipek <[EMAIL PROTECTED]>
---
 fs/stack.c   |   11 +++
 fs/unionfs/sioq.c|5 -
 fs/unionfs/sioq.h|   13 +
 include/linux/fs_stack.h |   11 +++
 4 files changed, 35 insertions(+), 5 deletions(-)

diff --git a/fs/stack.c b/fs/stack.c
index 9aee8fc..1f5b161 100644
--- a/fs/stack.c
+++ b/fs/stack.c
@@ -1,3 +1,14 @@
+/*
+ * Copyright (c) 2006-2007 Erez Zadok
+ * Copyright (c) 2006-2007 Josef 'Jeff' Sipek
+ * Copyright (c) 2006-2007 Stony Brook University
+ * Copyright (c) 2006-2007 The Research Foundation of SUNY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
 #include 
 #include 
 #include 
diff --git a/fs/unionfs/sioq.c b/fs/unionfs/sioq.c
index a1e8ffa..34e25b0 100644
--- a/fs/unionfs/sioq.c
+++ b/fs/unionfs/sioq.c
@@ -1,13 +1,8 @@
 /*
  * Copyright (c) 2003-2007 Erez Zadok
- * Copyright (c) 2003-2006 Charles P. Wright
  * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
  * Copyright (c) 2005-2006 Junjiro Okajima
- * Copyright (c) 2005  Arun M. Krishnakumar
  * Copyright (c) 2004-2006 David P. Quigley
- * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
- * Copyright (c) 2003  Puja Gupta
- * Copyright (c) 2003  Harikesavan Krishnan
  * Copyright (c) 2003-2007 Stony Brook University
  * Copyright (c) 2003-2007 The Research Foundation of SUNY
  *
diff --git a/fs/unionfs/sioq.h b/fs/unionfs/sioq.h
index 4827514..dd6c44b 100644
--- a/fs/unionfs/sioq.h
+++ b/fs/unionfs/sioq.h
@@ -1,3 +1,16 @@
+/*
+ * Copyright (c) 2003-2007 Erez Zadok
+ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
+ * Copyright (c) 2005-2006 Junjiro Okajima
+ * Copyright (c) 2004-2006 David P. Quigley
+ * Copyright (c) 2003-2007 Stony Brook University
+ * Copyright (c) 2003-2007 The Research Foundation of SUNY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
 #ifndef _SIOQ_H
 #define _SIOQ_H
 
diff --git a/include/linux/fs_stack.h b/include/linux/fs_stack.h
index 2fe2387..9a896d6 100644
--- a/include/linux/fs_stack.h
+++ b/include/linux/fs_stack.h
@@ -1,3 +1,14 @@
+/*
+ * Copyright (c) 2006-2007 Erez Zadok
+ * Copyright (c) 2006-2007 Josef 'Jeff' Sipek
+ * Copyright (c) 2006-2007 Stony Brook University
+ * Copyright (c) 2006-2007 The Research Foundation of SUNY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
 #ifndef _LINUX_FS_STACK_H
 #define _LINUX_FS_STACK_H
 
-- 
1.5.2.rc1.165.gaf9b

-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 15/21] Unionfs: Use krealloc instead of open-coding the functionality

2007-05-23 Thread Josef 'Jeff' Sipek
Change the branch management code to use krealloc instead of playing tricks
with kmalloc/memcpy/kfree.

Signed-off-by: Josef 'Jeff' Sipek <[EMAIL PROTECTED]>
---
 fs/unionfs/super.c |   56 +---
 1 files changed, 14 insertions(+), 42 deletions(-)

diff --git a/fs/unionfs/super.c b/fs/unionfs/super.c
index b7f0b45..3dee863 100644
--- a/fs/unionfs/super.c
+++ b/fs/unionfs/super.c
@@ -427,6 +427,7 @@ static int unionfs_remount_fs(struct super_block *sb, int 
*flags,
struct unionfs_data *new_data = NULL, *tmp_data = NULL;
struct path *new_lower_paths = NULL, *tmp_lower_paths = NULL;
int new_high_branch_id; /* new high branch ID */
+   int size;   /* memory allocation size, temp var */
 
unionfs_write_lock(sb);
 
@@ -634,49 +635,20 @@ out_no_change:
 * Also handle invalidating any pages that will have to be re-read.
 ***/
 
-   /*
-* Allocate space for actual pointers, if needed.  By the time we
-* finish this block of code, new_branches and new_lower_paths will
-* have the correct size.  None of this code below would be needed
-* if the kernel had a realloc() function, at least one capable of
-* shrinking/truncating an allocation's size (hint, hint).
-*/
-   if (new_branches < max_branches) {
+   /* (re)allocate space for new pointers to hidden dentry */
+   size = new_branches * sizeof(struct unionfs_data);
+   new_data = krealloc(tmp_data, size, GFP_KERNEL);
+   if (!new_data) {
+   err = -ENOMEM;
+   goto out_release;
+   }
 
-   /* allocate space for new pointers to hidden dentry */
-   new_data = kcalloc(new_branches,
-  sizeof(struct unionfs_data), GFP_KERNEL);
-   if (!new_data) {
-   err = -ENOMEM;
-   goto out_release;
-   }
-   /* allocate space for new pointers to lower paths */
-   new_lower_paths = kcalloc(new_branches,
- sizeof(struct path), GFP_KERNEL);
-   if (!new_lower_paths) {
-   err = -ENOMEM;
-   goto out_release;
-   }
-   /*
-* copy current info into new placeholders, incrementing
-* refcounts.
-*/
-   memcpy(new_data, tmp_data,
-  new_branches * sizeof(struct unionfs_data));
-   memcpy(new_lower_paths, tmp_lower_paths,
-  new_branches * sizeof(struct path));
-   /*
-* Since we already hold various refcnts on the objects, we
-* don't need to redo it here.  Just free the older memory
-* and re-point the pointers.
-*/
-   kfree(tmp_data);
-   kfree(tmp_lower_paths);
-   /* no need to nullify pointers here */
-   } else {
-   /* number of branches didn't change, no need to re-alloc */
-   new_data = tmp_data;
-   new_lower_paths = tmp_lower_paths;
+   /* allocate space for new pointers to lower paths */
+   size = new_branches * sizeof(struct path);
+   new_lower_paths = krealloc(tmp_lower_paths, size, GFP_KERNEL);
+   if (!new_lower_paths) {
+   err = -ENOMEM;
+   goto out_release;
}
 
/*
-- 
1.5.2.rc1.165.gaf9b

-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 21/21] Unionfs: Correctly decrement refcounts of mnt's upon branch management

2007-05-23 Thread Josef 'Jeff' Sipek
From: Erez Zadok <[EMAIL PROTECTED]>

The old logic was broken in one place, which another place tried to "fix"
incorrectly.  Also added detailed comments to explain the new/correct logic.

Signed-off-by: Erez Zadok <[EMAIL PROTECTED]>
Signed-off-by: Josef 'Jeff' Sipek <[EMAIL PROTECTED]>
---
 fs/unionfs/commonfops.c |   17 -
 fs/unionfs/dentry.c |   18 +-
 2 files changed, 25 insertions(+), 10 deletions(-)

diff --git a/fs/unionfs/commonfops.c b/fs/unionfs/commonfops.c
index 69d9c87..83001aa 100644
--- a/fs/unionfs/commonfops.c
+++ b/fs/unionfs/commonfops.c
@@ -135,14 +135,21 @@ static void cleanup_file(struct file *file)
   "file %p\n", file);
else {
unionfs_read_lock(sb);
+   /* decrement count of open files */
branchput(sb, i);
unionfs_read_unlock(sb);
-   /* XXX: is it OK to use sb->s_root here? */
-   unionfs_mntput(sb->s_root, i);
-   /* mntget b/c fput below will call mntput */
-   unionfs_mntget(sb->s_root, bindex);
+   /*
+* fput will perform an mntput for us on the
+* correct branch.  Although we're using the
+* file's old branch configuration, bindex,
+* which is the old index, correctly points
+* to the right branch in the file's branch
+* list.  In other words, we're going to
+* mntput the correct branch even if
+* branches have been added/removed.
+*/
+   fput(unionfs_lower_file_idx(file, bindex));
}
-   fput(unionfs_lower_file_idx(file, bindex));
}
}
 
diff --git a/fs/unionfs/dentry.c b/fs/unionfs/dentry.c
index 95cea3b..db0ef43 100644
--- a/fs/unionfs/dentry.c
+++ b/fs/unionfs/dentry.c
@@ -258,14 +258,22 @@ out_this:
/* finally, lock this dentry and revalidate it */
verify_locked(dentry);
dgen = atomic_read(&UNIONFS_D(dentry)->generation);
-   saved_bstart = dbstart(dentry);
-   saved_bend = dbend(dentry);
valid = __unionfs_d_revalidate_one(dentry, nd);
 
-   if (valid && chain_len > 0 && sbgen != dgen) {
-   for (bindex = saved_bstart; bindex <= saved_bend; bindex++)
+   /*
+* If __unionfs_d_revalidate_one() succeeded above, then it will
+* have incremented the refcnt of the mnt's, but also the branch
+* indices of the dentry will have been updated (to take into
+* account any branch insertions/deletion.  So the current
+* dbstart/dbend match the current, and new, indices of the mnts
+* which __unionfs_d_revalidate_one has incremented.  Note: the "if"
+* test below does not depend on whether chain_len was 0 or greater.
+*/
+   if (valid && sbgen != dgen)
+   for (bindex = dbstart(dentry);
+bindex <= dbend(dentry);
+bindex++)
unionfs_mntput(dentry, bindex);
-   }
 
 out_free:
/* unlock/dput all dentries in chain and return status */
-- 
1.5.2.rc1.165.gaf9b

-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 17/21] Unionfs: Documentation update regarding overlapping branches and new lookup code

2007-05-23 Thread Josef 'Jeff' Sipek
From: Erez Zadok <[EMAIL PROTECTED]>

Added detailed comment and updated documentation to explain why overlapping
branches are disallowed, and better explain the cache coherency issues.

Signed-off-by: Erez Zadok <[EMAIL PROTECTED]>
Signed-off-by: Josef 'Jeff' Sipek <[EMAIL PROTECTED]>
---
 Documentation/filesystems/unionfs/issues.txt |   16 
 fs/unionfs/main.c|   16 +++-
 2 files changed, 23 insertions(+), 9 deletions(-)

diff --git a/Documentation/filesystems/unionfs/issues.txt 
b/Documentation/filesystems/unionfs/issues.txt
index a434fee..c634604 100644
--- a/Documentation/filesystems/unionfs/issues.txt
+++ b/Documentation/filesystems/unionfs/issues.txt
@@ -5,14 +5,14 @@ KNOWN Unionfs 2.0 ISSUES:
This means we can't reliably detect a read-only NFS export.
 
 2. Modifying a Unionfs branch directly, while the union is mounted, is
-   currently unsupported.  We have tested Unionfs under such conditions, and
-   fixed any bugs we found (Unionfs comes with an extensive regression test
-   suite).  However, it may still be possible that changes made to lower
-   branches directly could cause cache incoherency which, in the worst case,
-   may case an oops.  We are currently addressing this problem for Unionfs
-   and also generically for all stackable file systems, by handing mmap and
-   introducing small VFS/MM changes that would allow a file system to handle
-   cache coherency correctly.
+   currently unsupported, because it could cause a cache incoherency between
+   the union layer and the lower file systems (for that reason, Unionfs
+   currently prohibits using branches which overlap with each other, even
+   partially).  We have tested Unionfs under such conditions, and fixed any
+   bugs we found (Unionfs comes with an extensive regression test suite).
+   However, it may still be possible that changes made to lower branches
+   directly could cause cache incoherency which, in the worst case, may case
+   an oops.
 
Unionfs 2.0 has a temporary workaround for this.  You can force Unionfs
to increase the superblock generation number, and hence purge all cached
diff --git a/fs/unionfs/main.c b/fs/unionfs/main.c
index 84d3bf5..a9ad445 100644
--- a/fs/unionfs/main.c
+++ b/fs/unionfs/main.c
@@ -351,7 +351,21 @@ static int parse_dirs_option(struct super_block *sb, 
struct unionfs_dentry_info
 
BUG_ON(branches != (hidden_root_info->bend + 1));
 
-   /* ensure that no overlaps exist in the branches */
+   /*
+* Ensure that no overlaps exist in the branches.
+*
+* This test is required because the Linux kernel has no support
+* currently for ensuring coherency between stackable layers and
+* branches.  If we were to allow overlapping branches, it would be
+* possible, for example, to delete a file via one branch, which
+* would not be reflected in another branch.  Such incoherency could
+* lead to inconsistencies and even kernel oopses.  Rather than
+* implement hacks to work around some of these cache-coherency
+* problems, we prevent branch overlapping, for now.  A complete
+* solution will involve proper kernel/VFS support for cache
+* coherency, at which time we could safely remove this
+* branch-overlapping test.
+*/
for (i = 0; i < branches; i++) {
for (j = i + 1; j < branches; j++) {
dent1 = hidden_root_info->lower_paths[i].dentry;
-- 
1.5.2.rc1.165.gaf9b

-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 03/21] Unionfs: Every printk should prefix with "unionfs: " consistently

2007-05-23 Thread Josef 'Jeff' Sipek
From: Erez Zadok <[EMAIL PROTECTED]>

Signed-off-by: Erez Zadok <[EMAIL PROTECTED]>
Signed-off-by: Josef 'Jeff' Sipek <[EMAIL PROTECTED]>
---
 fs/unionfs/commonfops.c |7 ---
 fs/unionfs/copyup.c |2 +-
 fs/unionfs/dentry.c |8 +---
 fs/unionfs/inode.c  |   15 ---
 fs/unionfs/lookup.c |5 +++--
 fs/unionfs/main.c   |8 
 fs/unionfs/rdstate.c|4 ++--
 fs/unionfs/rename.c |   30 +++---
 fs/unionfs/sioq.c   |2 +-
 fs/unionfs/subr.c   |4 ++--
 fs/unionfs/super.c  |8 
 11 files changed, 49 insertions(+), 44 deletions(-)

diff --git a/fs/unionfs/commonfops.c b/fs/unionfs/commonfops.c
index 9cf6b81..778901f 100644
--- a/fs/unionfs/commonfops.c
+++ b/fs/unionfs/commonfops.c
@@ -355,8 +355,8 @@ int unionfs_file_revalidate(struct file *file, int 
willwrite)
if (willwrite && IS_WRITE_FLAG(file->f_flags) &&
!IS_WRITE_FLAG(unionfs_lower_file(file)->f_flags) &&
is_robranch(dentry)) {
-   printk(KERN_DEBUG "Doing delayed copyup of a read-write "
-  "file on a read-only branch.\n");
+   printk(KERN_DEBUG "unionfs: Doing delayed copyup of a "
+  "read-write file on a read-only branch.\n");
err = do_delayed_copyup(file, dentry);
}
 
@@ -576,7 +576,8 @@ int unionfs_file_release(struct inode *inode, struct file 
*file)
 
if (fileinfo->rdstate) {
fileinfo->rdstate->access = jiffies;
-   printk(KERN_DEBUG "Saving rdstate with cookie %u [%d.%lld]\n",
+   printk(KERN_DEBUG "unionfs: saving rdstate with cookie "
+  "%u [%d.%lld]\n",
   fileinfo->rdstate->cookie,
   fileinfo->rdstate->bindex,
   (long long)fileinfo->rdstate->dirpos);
diff --git a/fs/unionfs/copyup.c b/fs/unionfs/copyup.c
index 502a40f..8fae308 100644
--- a/fs/unionfs/copyup.c
+++ b/fs/unionfs/copyup.c
@@ -183,7 +183,7 @@ static int __copyup_ndentry(struct dentry 
*old_hidden_dentry,
run_sioq(__unionfs_create, &args);
err = args.err;
} else {
-   printk(KERN_ERR "Unknown inode type %d\n",
+   printk(KERN_ERR "unionfs: unknown inode type %d\n",
   old_mode);
BUG();
}
diff --git a/fs/unionfs/dentry.c b/fs/unionfs/dentry.c
index 067732c..463cf4c 100644
--- a/fs/unionfs/dentry.c
+++ b/fs/unionfs/dentry.c
@@ -48,7 +48,8 @@ static int __unionfs_d_revalidate_one(struct dentry *dentry,
 
/* if the dentry is unhashed, do NOT revalidate */
if (d_deleted(dentry)) {
-   printk(KERN_DEBUG "unhashed dentry being revalidated: %*s\n",
+   printk(KERN_DEBUG "unionfs: unhashed dentry being "
+  "revalidated: %*s\n",
   dentry->d_name.len, dentry->d_name.name);
goto out;
}
@@ -297,12 +298,13 @@ static void unionfs_d_release(struct dentry *dentry)
 
/* this could be a negative dentry, so check first */
if (!UNIONFS_D(dentry)) {
-   printk(KERN_DEBUG "dentry without private data: %.*s",
+   printk(KERN_DEBUG "unionfs: dentry without private data: %.*s",
   dentry->d_name.len, dentry->d_name.name);
goto out;
} else if (dbstart(dentry) < 0) {
/* this is due to a failed lookup */
-   printk(KERN_DEBUG "dentry without hidden dentries : %.*s",
+   printk(KERN_DEBUG "unionfs: dentry without hidden "
+  "dentries: %.*s",
   dentry->d_name.len, dentry->d_name.name);
goto out_free;
}
diff --git a/fs/unionfs/inode.c b/fs/unionfs/inode.c
index 0b9cb29..f0616ed 100644
--- a/fs/unionfs/inode.c
+++ b/fs/unionfs/inode.c
@@ -463,8 +463,9 @@ static int unionfs_symlink(struct inode *dir, struct dentry 
*dentry,
if (IS_ERR(hidden_dentry))
err = PTR_ERR(hidden_dentry);
 
-   printk(KERN_DEBUG "hidden dentry NULL (or 
error)"
-  "for bindex = %d\n", bindex);
+   printk(KERN_DEBUG "unionfs: hidden dentry "
+  "NULL (or error) for bindex = %d\n",
+  bindex);
continue;
}
}
@@ -585,8 +586,8 @@ static int unionfs_mkdir(struct inode *parent, struct 
dentry *dentry, int mode)
if (!hidden_dentry) {
hidden_dentry = create_parents(parent, dentry, bindex);
if (!hidden_dentry || IS_ERR(hidden_dentry)) {
-   printk(KERN_DEBUG "hidden dentry NULL for "
-

[GIT PULL -mm] Unionfs cleanups and fixes

2007-05-23 Thread Josef 'Jeff' Sipek
The following patches consist of mostly cleanups of the Unionfs code, as
well as fixes for several harder to hit bugs (resource/memory leaks).

As before, there is a git repo at:

git://git.kernel.org/pub/scm/linux/kernel/git/jsipek/unionfs.git

(master.kernel.org:/pub/scm/linux/kernel/git/jsipek/unionfs.git)

There are 21 new commits:

Unionfs: Correctly decrement refcounts of mnt's upon branch management
Unionfs: Removed a trailing whitespace
Unionfs: Actually catch bad use of unionfs_mnt{get,put}
Unionfs: Remove defunct unionfs_put_inode super op
Unionfs: Documentation update regarding overlapping branches and new 
lookup code
Unionfs: Disallow setting leftmost branch to readonly
Unionfs: Use krealloc instead of open-coding the functionality
Unionfs: Call realloc unconditionally
Unionfs: Don't leak resources when copyup fails partially
Unionfs: Prefix external functions with 'extern' properly
Unionfs: Combine unionfs_write with __unionfs_write.
Unionfs: Move unionfs_query_file to commonfops.c
Unionfs: Rename our "do_rename" to __unionfs_rename
Unionfs: Rename Unionfs's double_lock_dentry to avoid confusion
Unionfs: Consistent pointer declaration spacing
Unionfs: Added numerous comments
Unionfs: Cleanup of strings and comments
Unionfs: Add missing copyright notices
Unionfs: Every printk should prefix with "unionfs: " consistently
Unionfs: Coding style fixes
Unionfs: Tiny documentation fixups

Thanks,

Josef 'Jeff' Sipek <[EMAIL PROTECTED]>

-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 01/21] Unionfs: Tiny documentation fixups

2007-05-23 Thread Josef 'Jeff' Sipek
From: Erez Zadok <[EMAIL PROTECTED]>

Signed-off-by: Erez Zadok <[EMAIL PROTECTED]>
Signed-off-by: Josef 'Jeff' Sipek <[EMAIL PROTECTED]>
---
 Documentation/filesystems/unionfs/usage.txt |4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/filesystems/unionfs/usage.txt 
b/Documentation/filesystems/unionfs/usage.txt
index 13fbcea..1c7554b 100644
--- a/Documentation/filesystems/unionfs/usage.txt
+++ b/Documentation/filesystems/unionfs/usage.txt
@@ -81,10 +81,10 @@ CACHE CONSISTENCY
 If you modify any file on any of the lower branches directly, while there is
 a Unionfs 2.0 mounted above any of those branches, you should tell Unionfs
 to purge its caches and re-get the objects.  To do that, you have to
-incremenet the generation number of the superblock using the following
+increment the generation number of the superblock using the following
 command:
 
-# mount -t unionfs -o remount,remount,incgen none MOUNTPOINT
+# mount -t unionfs -o remount,incgen none MOUNTPOINT
 
 
 For more information, see .
-- 
1.5.2.rc1.165.gaf9b

-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [AppArmor 01/41] Pass struct vfsmount to the inode_create LSM hook

2007-05-23 Thread Andreas Gruenbacher
On Thursday 12 April 2007 12:12, Al Viro wrote:
> On Thu, Apr 12, 2007 at 02:08:10AM -0700, [EMAIL PROTECTED] wrote:
> > This is needed for computing pathnames in the AppArmor LSM.
>
> Which is an argument against said LSM in current form.

The fundamental model of AppArmor is to perform access checks based on the 
location of a file in the filesystem namespace, i.e., the pathname, and this 
can only be done with both the dentry and vfsmount. My understanding was that 
there was agreement at the last kernel summit that pathname based approaches 
should be allowed.

> > -   error = security_inode_create(dir, dentry, mode);
> > +   error = security_inode_create(dir, dentry, nd ? nd->mnt : NULL, mode);
>
> is a clear sign that interface is wrong.

No. There are callers of vfs_create() that use a NULL nameidata, and that's 
what causes the problem here. Struct nameidata is pretty big, and so we don't 
want to allocate temporary nameidata objects all over the place. So to me the 
above NULL check seems the lesser evil.

One way to deal with the nameidata size problem is to split it up into one 
part for the real path lookups, and a much smaller part that only contains 
the dentry, vfsmount, and intent flags. This would allow to pass around the 
smaller nameidata much more consistently.

John has posted patches for that on May 14; subject [RFD Patch 0/4]. Feedback 
appreciated.

In several places, the NULL nameidata is wrong because we can't check the 
vfsmount flags or intent. Not having this information is causing problems for 
nfs too for example -- it's not an AppArmor specific problem.

> Leaving aside the general idiocy of "we prohibit to do something with file
> if mounted here, but if there's another mountpoint, well, we just miss", an
> API of that kind is just plain wrong.  Either you can live without seeing
> vfsmount in that method (in which case you shouldn't pass it at all), or you
> have a hole.

This is backwards from what AppArmor does. The policy defines which paths may 
be accessed; all paths not explicitly listed are denied. If files are mounted 
at multiple locations, then the policy may allow access to some locations but 
not to others. That's not a hole.

In fact this is not much different from traditional permissions on parent 
directories: even if the same files are mounted at several locations, parent 
directory permissions may allow accessing only some of those locations.

Thanks,
Andreas
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC 5/5] inode reservation v0.1 (next step working)

2007-05-23 Thread coly
>From the benchmark and the experimental inores patch on ext4, it can be
found that inode reservation on ext4 is a good idea to be tried. 

One of the original idea of inode reservation is NOT modifying on-disk
format. Current magic inode can make it, but use a magic inodes list to
link each reserved inode block together is not a good idea. Indeed, this
is a performance killer.

1, Therefor, group descriptor has to be modified to add a new member ---
this new member records the lastest reserved inode in inode table of
each block group.

2, Use rest room in magic inode (sizeof(ext4_inode) -
sizeof(ext4_magic_inode)) to record other reserved inode blocks. This
method can reduce number of magic inodes, which can minimize I/O for
magic inodes.

3, Use magic inode cache. Most of the magic inode accessings are reading
(not writing), therefore, caching can help to reduce most of the read
I/O for magic inode.

4, Modify mke2fs to support new on-disk layout for inode reservation.


Coly

-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC 4/5] inode reservation v0.1 (benchmark result)

2007-05-23 Thread coly
Current patch avoids inodes from different directories mixed together in
the inode table. Therefore the benchmakr emulate a situation that mixes
inodes of different sub-directories together. and record the time on
removing them all.

In the first part, reserving 16 inodes for each new created directory.
Therefore 14 files can only use 1 reserved block for each directory in
inode table, obviously, the result of benchmark is the best case :-)

Enviornment:
1) create 9890 directory, create files in each directory alternatively
2) kernel version 2.6.20-mm, the ext4 subdir-inode-reservation is
patched based on 2.6.20-mm
3) 14 files in each subdirectory. 9890 sub directories in
mount_point/mailbox/
4) mount with option data=writeback
5) each operation followed by a reboot
6) EXT4_INIT_RESERVE_INODES = 16

= data=writeback 
remove directories and files by rm -rf:
* ext3
read 16m56.979s
user 0m0.156s
sys 0m21.449s

* ext4org
real 18m38.809s
user 0m0.636s
sys 0m37.422s

* ext4inores
real 7m57.437s
user 0m0.452s
sys 0m34.698s


= data=ordered 
remove directories and files by rm -rf:
* ext3
real 17m23.435s
user 0m0.140s
sys 0m21.709s

* ext4org
real 17m39.515s
user 0m0.120s
sys 0.22.097s

* ext4inores
real 7m41.365s
user 0m0.196s
sys 0m24.210s

= data=journal 
remove directories and files by rm -rf:
* ext3
real 12m50.545s
user 0m0.152s
sys 0m22.725s

* ext4org
real 13m43.910s
user 0m0.196s
sys 0m23.161s

* ext4inores
real 7m49.915s
user 0m0.168s
sys 0m23.633s


Due to the bad design of magic inode and the on-disk layout of magic
inode. When 30 files created alternatively in each directory, no
performance advantage exists. When 50 files created alternatively in
each directory, the patched ext4 will use double time on removing all
the files and directories.

Therefore, in the next version a new on-disk layout will be used.

-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC 3/5] inode reservation v0.1 (e2fsprogs patch)

2007-05-23 Thread coly
This patch only makes mke2fs support on-disk layout for inode
reservation. Just for experiment. e2fsck and other utils can not work
with magic inode yet.



diff -u -r
e2fsprogs-1.39-tyt1/debugfs/debugfs.c ../e2fsprogs/debugfs/debugfs.c
--- e2fsprogs-1.39-tyt1/debugfs/debugfs.c 2006-10-07 11:42:54.0
+0800
+++ ../e2fsprogs/debugfs/debugfs.c 2007-03-29 23:05:05.0 +0800
@@ -1215,7 +1215,8 @@
} else
mode = 010755;

- retval = ext2fs_new_inode(current_fs, dir, mode, 0, &free_inode);
+ retval = ext2fs_new_inode(current_fs, dir, mode, 0, &free_inode,
+ LINUX_S_ISDIR(mode) ? 1 : 0);
if (retval)
com_err("ext2fs_new_inode", retval, 0);
else
@@ -1294,7 +1295,7 @@
return;
}

- retval = ext2fs_new_inode(current_fs, cwd, 010755, 0, &newfile);
+ retval = ext2fs_new_inode(current_fs, cwd, 010755, 0, &newfile, 0);
if (retval) {
com_err(argv[0], retval, 0);
close(fd);
@@ -1384,7 +1385,7 @@
goto usage;
if (check_fs_read_write(argv[0]))
return;
- retval = ext2fs_new_inode(current_fs, cwd, 010755, 0, &newfile);
+ retval = ext2fs_new_inode(current_fs, cwd, 010755, 0, &newfile, 0);
if (retval) {
com_err(argv[0], retval, 0);
return;
diff -u -r
e2fsprogs-1.39-tyt1/debugfs/util.c ../e2fsprogs/debugfs/util.c
--- e2fsprogs-1.39-tyt1/debugfs/util.c 2006-10-07 11:40:28.0
+0800
+++ ../e2fsprogs/debugfs/util.c 2007-03-31 02:07:10.0 +0800
@@ -119,7 +119,7 @@

retval = ext2fs_namei(current_fs, root, cwd, str, &ino);
if (retval) {
- com_err(str, retval, "");
+ com_err(str, retval, " ");
return 0;
}
return ino;
diff -u -r
e2fsprogs-1.39-tyt1/e2fsck/pass1.c ../e2fsprogs/e2fsck/pass1.c
--- e2fsprogs-1.39-tyt1/e2fsck/pass1.c 2006-10-07 11:42:54.0
+0800
+++ ../e2fsprogs/e2fsck/pass1.c 2007-03-31 02:05:25.0 +0800
@@ -248,7 +248,7 @@
struct ext2_super_block *sb = ctx->fs->super;
struct ext2_inode_large *inode;
struct ext2_ext_attr_entry *entry;
- char *start, *end, *name;
+ char *start, *end;
int storage_size, remain, offs;
int problem = 0;

@@ -329,7 +329,7 @@

/* simple remove all possible EA(s) */
*((__u32 *)start) = 0UL;
- e2fsck_write_inode_full(ctx, pctx->ino, inode,
+ e2fsck_write_inode_full(ctx, pctx->ino, (struct ext2_inode *)inode,
EXT2_INODE_SIZE(sb), "pass1");
}

@@ -919,7 +919,6 @@

if (ctx->flags & E2F_FLAG_RESIZE_INODE) {
ext2fs_block_bitmap save_bmap;
- errcode_t retval;

save_bmap = fs->block_map;
fs->block_map = ctx->block_found_map;
diff -u -r
e2fsprogs-1.39-tyt1/e2fsck/pass3.c ../e2fsprogs/e2fsck/pass3.c
--- e2fsprogs-1.39-tyt1/e2fsck/pass3.c 2006-10-07 11:40:28.0
+0800
+++ ../e2fsprogs/e2fsck/pass3.c 2007-03-29 23:05:05.0 +0800
@@ -436,7 +436,7 @@
* Next find a free inode.
*/
retval = ext2fs_new_inode(fs, EXT2_ROOT_INO, 040700,
-   ctx->inode_used_map, &ino);
+   ctx->inode_used_map, &ino, 1);
if (retval) {
pctx.errcode = retval;
fix_problem(ctx, PR_3_ERR_LPF_NEW_INODE, &pctx);
diff -u -r
e2fsprogs-1.39-tyt1/lib/ext2fs/alloc.c ../e2fsprogs/lib/ext2fs/alloc.c
--- e2fsprogs-1.39-tyt1/lib/ext2fs/alloc.c 2006-10-07 11:40:28.0
+0800
+++ ../e2fsprogs/lib/ext2fs/alloc.c 2007-04-30 15:21:01.0 +0800
@@ -26,6 +26,192 @@
#include "ext2_fs.h"
#include "ext2fs.h"

+
+static errcode_t ext2fs_reserve_inodes_area(ext2_filsys fs, 
+ struct ext2_magic_inode * prev_link_minode)
+{
+ struct ext2_magic_inode lastres_minode;
+ struct ext2_magic_inode new_link_minode;
+ ext2_ino_t reserv_size;
+ ext2_ino_t new_link_ino;
+ ext2_ino_t lastres_ino;
+ ext2_ino_t dir_group;
+ ext2_ino_t itable_idx;
+ int find = 0;
+ errcode_t retval; 
+ 
+ reserv_size = prev_link_minode->mi_next_ressize;
+ if(reserv_size > EXT2_INODES_PER_GROUP(fs->super))
+ return EXT2_ET_MAGIC_INODE_CORRUPT;
+
+ for(dir_group = 0; dir_group < fs->group_desc_count; dir_group ++)
+ {
+ lastres_ino = ext2fs_get_group_lastres_ino(fs, dir_group);
+ retval = ext2fs_read_magic_inode(fs, lastres_ino,
+ &lastres_minode);
+ if(retval)
+ return retval;
+ retval = ext2fs_check_magic_inode(&lastres_minode,
+ EXT2_MINODE_TYPE_LASTRES);
+ if(retval)
+ return retval;
+ itable_idx = lastres_minode.mi_lastres_ino %
+ EXT2_INODES_PER_GROUP(fs->super);
+ if((EXT2_INODES_PER_GROUP(fs->super) - itable_idx) >=
+ reserv_size) {
+ find = 1;
+ break;
+ }
+ }
+ if (!find)
+ return EXT2_ET_DIR_NO_SPACE;
+ new_link_ino = lastres_minode.mi_lastres_ino + reserv_size;
+ if(ext2fs_test_inode_bitmap(fs->inode_map, new_link_ino))
+ return EXT2_ET_MAGIC_INODE_CORRUPT;
+ ext2fs_inode_alloc_stats2(fs, new_link_ino, +1, 0);
+ ext2fs_setup_magic_inode(&new_link_minode, 
+ EXT2_MINODE_TYPE_LINK);
+ new_link_minode.mi_next_ino = 0;
+ new_link_minode.mi_parent_ino = prev_link_minode->mi_parent_ino;
+ new_link_minode.mi_current_ressize = reserv_size;
+ reserv_size *= 2;
+ if(reserv_size > EXT2_INODES_PER_GROUP(fs->super))
+ reserv_size = EXT2_INODES_PER_GROUP(fs->super);
+ new_link_minode.mi_next_ressize = reserv_size;
+ retval = ext2fs_write_magic_inode(fs, new_link_ino, &new_link_minode);
+ if(retval)
+ return retval;
+ lastres_minode.mi_lastre

[RFC 2/5] inode reservation v0.1 (ext4 kernel patch)

2007-05-23 Thread coly
The patch is generated based on 2.6.20-ext4-2 branch. you can find the
benchmark from other email.

DO NOT waste time on reading the patch :-) I post this patch here is to
show that I really spent time on it and the patch can work (even not
well).


diff --git a/Makefile b/Makefile
index 7e2750f..21d21e4 100644
--- a/Makefile
+++ b/Makefile
@@ -1,8 +1,7 @@
VERSION = 2
PATCHLEVEL = 6
SUBLEVEL = 20
-EXTRAVERSION =
-NAME = Homicidal Dwarf Hamster
+EXTRAVERSION = inores

# *DOCUMENTATION*
# To see a list of typical targets execute "make help"
diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c
index 11e93c1..daf88b4 100644
--- a/fs/ext4/bitmap.c
+++ b/fs/ext4/bitmap.c
@@ -30,3 +30,29 @@ unsigned long ext4_count_free (struct buffer_head *
map, unsigned int numchars)

#endif  /*  EXT4FS_DEBUG  */

+/*
+ * Read the inode allocation bitmap for a given block_group, reading
+ * into the specified slot in the superblock's bitmap cache.
+ *
+ * Return buffer_head of bitmap on success or NULL.
+ */
+struct buffer_head *
+read_inode_bitmap(struct super_block * sb, unsigned long block_group)
+{
+ struct ext4_group_desc *desc;
+ struct buffer_head *bh = NULL;
+
+ desc = ext4_get_group_desc(sb, block_group, NULL);
+ if (!desc)
+ goto error_out;
+
+ bh = sb_bread(sb, ext4_inode_bitmap(sb, desc));
+ if (!bh)
+ ext4_error(sb, "read_inode_bitmap",
+ "Cannot read inode bitmap - "
+ "block_group = %lu, inode_bitmap = %llu",
+ block_group, ext4_inode_bitmap(sb, desc));
+error_out:
+ return bh;
+}
+
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 427f830..bb83112 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -45,32 +45,6 @@


/*
- * Read the inode allocation bitmap for a given block_group, reading
- * into the specified slot in the superblock's bitmap cache.
- *
- * Return buffer_head of bitmap on success or NULL.
- */
-static struct buffer_head *
-read_inode_bitmap(struct super_block * sb, unsigned long block_group)
-{
- struct ext4_group_desc *desc;
- struct buffer_head *bh = NULL;
-
- desc = ext4_get_group_desc(sb, block_group, NULL);
- if (!desc)
- goto error_out;
-
- bh = sb_bread(sb, ext4_inode_bitmap(sb, desc));
- if (!bh)
- ext4_error(sb, "read_inode_bitmap",
- "Cannot read inode bitmap - "
- "block_group = %lu, inode_bitmap = %llu",
- block_group, ext4_inode_bitmap(sb, desc));
-error_out:
- return bh;
-}
-
-/*
  * NOTE! When we get the inode, we're the only people
  * that have access to it, and as such there are no
  * race conditions we have to worry about. The inode
@@ -288,6 +262,12 @@ static int find_group_orlov(struct super_block *sb,
struct inode *parent)
for (i = 0; i < ngroups; i++) {
group = (parent_group + i) % ngroups;
desc = ext4_get_group_desc (sb, group, &bh);
+ if (test_opt(sb, INORES) &&
+ (ext4_unreserved_inodes(sb, group) < 
+ EXT4_INIT_RESERVE_INODES)) {
+ printk(KERN_DEBUG "no enough reserved inodes in group %d\n", group);
+ continue;
+ }
if (!desc || !desc->bg_free_inodes_count)
continue;
if (le16_to_cpu(desc->bg_used_dirs_count) >= best_ndir)
@@ -323,6 +303,12 @@ static int find_group_orlov(struct super_block *sb,
struct inode *parent)
for (i = 0; i < ngroups; i++) {
group = (parent_group + i) % ngroups;
desc = ext4_get_group_desc (sb, group, &bh);
+ if (test_opt(sb, INORES) &&
+ (ext4_unreserved_inodes(sb, group) < 
+ EXT4_INIT_RESERVE_INODES)) {
+ printk(KERN_DEBUG "no enough reserved inodes in group %d\n", group);
+ continue;
+ }
if (!desc || !desc->bg_free_inodes_count)
continue;
if (le16_to_cpu(desc->bg_used_dirs_count) >= max_dirs)
@@ -335,6 +321,9 @@ static int find_group_orlov(struct super_block *sb,
struct inode *parent)
}

fallback:
+ printk(KERN_DEBUG "reach fallback, disable INORES\n");
+ return -1; /* for test */
+ clear_opt(sbi->s_mount_opt, INORES);
for (i = 0; i < ngroups; i++) {
group = (parent_group + i) % ngroups;
desc = ext4_get_group_desc (sb, group, &bh);
@@ -414,6 +403,598 @@ static int find_group_other(struct super_block
*sb, struct inode *parent)
return -1;
}

+
+static int ext4_inores_newdir_ino(handle_t * handle,
+   struct inode * dir, 
+   time_t ctime,
+   unsigned long * ino)
+{
+ struct super_block * sb;
+ struct ext4_sb_info * sbi;
+ int group;
+ struct buffer_head * bitmap_bh = NULL, * bh2;
+ unsigned long lastres_ino, start_ino, end_ino;
+ struct ext4_magic_inode * link_minode, * lastres_minode;
+ struct ext4_iloc link_iloc, lastres_iloc;
+ struct ext4_group_desc * gdp = NULL;
+ int itable_offset;
+ int ret = 0;
+
+ sb = dir->i_sb;
+ sbi = EXT4_SB(sb);
+
+find_group_again:
+ group = find_group_orlov(sb, dir);
+ 
+ if (group == -1) {
+ printk("no space in find_group_orlove.\n");
+ return -ENOSPC;
+ }
+ if (!test_opt (sb, INORES)) {
+ printk(KERN_DEBUG "INORES is not set, return 0.\n");
+ * ino = 0;
+ return 0;
+ }
+ 
+ /* 
+ * the corresponded block is already loaded into memory in 
+ * find_group_orlov(), this lock will not hurt performance 
+ * in common case.
+ */
+ spin_lock(sb_bgl_lock(sbi, group));
+ if (ext4_unreserved_inodes

[RFC 1/5] inode reservation v0.1

2007-05-23 Thread coly
This patch is only experimental, just have a try whether the
subdirectory inode reservation idea works. Now the answer is it works,
and I am working on an improved version for this patch.

The basic idea of subdirecctory inode reservation is to avoid
unnecessary redundant meta data writing and hard disk seeking. Detailed
idea can be found here:http://lkml.org/lkml/2007/3/26/180

This email has another 4 extra parts:
1) ext4 kernel patch
2) e2fsprogs patch
3) current benchmark
4) next step working

The reason to release this unstable patch is:
* I want others to know I am working on it.
* The result of benchmark indecates that this work maybe is valuable.
* Hope others can give me feedback at very basic phase of this patch.

I should thank Adreas Dilger, who firstly introduces this idea to me.
Also I should thank other friends in #linuxfs at irc.oftc.org, great
helps to me.

In the next several months, I will try best to write a usable patch :-)


Coly

-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


NCPFS and extended characters (Charcode >=128)

2007-05-23 Thread Chris Malton

Hello,
	NCPFS has an undesirable bug (IMHO) where any character above charcode 
127 will prevent the directory listing from showing.


Could somebody either work out how to fix it, or better still, fix it 
and commit?


Thank you in advance,

Chris


smime.p7s
Description: S/MIME Cryptographic Signature


Re: [AppArmor 38/45] AppArmor: Module and LSM hooks

2007-05-23 Thread Andreas Gruenbacher
On Tuesday 15 May 2007 11:14, Pavel Machek wrote:
> Why is this configurable? 

The maximum length of a pathname is an arbitrary limit: we don't want to 
allocate arbitrary amounts of of kernel memory for pathnames so we introduce 
this limit and set it to a reasonable value. In the unlikely case that 
someone uses insanely long pathnames, this limit can be increased.

Andreas
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH] file as directory

2007-05-23 Thread Miklos Szeredi
> I hope to get some breathing space next week, then I'll get back to
> VFS work.

Great.

> I'd rather do that one myself,

Sure, don't want to rob you of any fun stuff ;)

Miklos
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Review status (Re: [PATCH] LogFS take three)

2007-05-23 Thread Evgeniy Polyakov
On Wed, May 23, 2007 at 05:14:04PM +0200, Jörn Engel ([EMAIL PROTECTED]) wrote:
> > > I'm just a German.  Forgive me if I drink lesser beverages.
> > 
> > You should definitely change that.
> 
> Change being German?  Not a bad idea, actually.

You cook up really tasty shnaps, in small quantities it is good for
health in infinite volumes.

> > Btw, what about this piece:
> > 
> > int logfs_erase_segment(struct super_block *sb, u32 index)
> > {
> > struct logfs_super *super = LOGFS_SUPER(sb);
> > 
> > super->s_gec++;
> > 
> > return mtderase(sb, index << super->s_segshift, super->s_segsize);
> > }
> > 
> > index << super->s_segshift might overflow, mtderase expects loff_t
> > there, since index can be arbitrary segment number, is it possible, that
> > overflow really occurs?
> 
> Indeed it is.  You just earned your second beer^Wvodka.

Actually this code looks less encrypted than ext2 for, which definitely
a good sign from reviewer's point of view.

> Jörn

-- 
Evgeniy Polyakov
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH] file as directory

2007-05-23 Thread Al Viro
On Wed, May 23, 2007 at 05:25:49PM +0200, Miklos Szeredi wrote:
> > > How will this work with copy_tree() and namespace duplication, which
> > > currently walk the tree with only namespace_sem held?
> > 
> > Easy - grab namespace_sem, grab vfsmount_lock, walk the subtree and bump
> > mnt_busy on everything (by 1 + number of non-busy children).  Then drop
> > vfsmount_lock and do as usual, dropping references in tree being copied
> > as you go.  Nothing will get attached or detached due to namespace_sem,
> > nothing will get evicted by anybody other than you since you've got all
> > that stuff pinned down.  End of story...
> 
> Right.
> 
> Do you have some code?
> 
> Should I try to code something up?

I hope to get some breathing space next week, then I'll get back to
VFS work.  I'd rather do that one myself, since it'll be a long series
of equivalent transformations - debugging such rewrite of refcounting
done as a single patch is going to be hell.  And yes, refcounting rewrite
is near the top of the list (another thing is wading through several
threads from hell and reviewing unionfs ;-/)
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH] file as directory

2007-05-23 Thread Miklos Szeredi
> > How will this work with copy_tree() and namespace duplication, which
> > currently walk the tree with only namespace_sem held?
> 
> Easy - grab namespace_sem, grab vfsmount_lock, walk the subtree and bump
> mnt_busy on everything (by 1 + number of non-busy children).  Then drop
> vfsmount_lock and do as usual, dropping references in tree being copied
> as you go.  Nothing will get attached or detached due to namespace_sem,
> nothing will get evicted by anybody other than you since you've got all
> that stuff pinned down.  End of story...

Right.

Do you have some code?

Should I try to code something up?

Miklos
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Review status (Re: [PATCH] LogFS take three)

2007-05-23 Thread Jörn Engel
On Wed, 23 May 2007 19:07:32 +0400, Evgeniy Polyakov wrote:
> On Wed, May 23, 2007 at 02:58:41PM +0200, Jörn Engel ([EMAIL PROTECTED]) 
> wrote:
> > On Sun, 20 May 2007 21:30:52 +0400, Evgeniy Polyakov wrote:
> 
> And what if it is 33 bits? Or it is not allowed?

Not allowed.  Both number and size of segments may never exceed 32bit.

> > > segsize is long, but should be u64 I think.
> > 
> > It could be s32 as well.
> 
> It is a matter of definition - if segment size is allowed to be more
> than 32 bits, then below transformation is not correct, otherwise
> segment size should not use additional 32bits on 64bit platform, since
> it is long.

I guess I could save 4 Bytes there.

> > I'm just a German.  Forgive me if I drink lesser beverages.
> 
> You should definitely change that.

Change being German?  Not a bad idea, actually.

> Btw, what about this piece:
> 
> int logfs_erase_segment(struct super_block *sb, u32 index)
> {
>   struct logfs_super *super = LOGFS_SUPER(sb);
> 
>   super->s_gec++;
> 
>   return mtderase(sb, index << super->s_segshift, super->s_segsize);
> }
> 
> index << super->s_segshift might overflow, mtderase expects loff_t
> there, since index can be arbitrary segment number, is it possible, that
> overflow really occurs?

Indeed it is.  You just earned your second beer^Wvodka.

Jörn

-- 
The wise man seeks everything in himself; the ignorant man tries to get
everything from somebody else.
-- unknown
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Review status (Re: [PATCH] LogFS take three)

2007-05-23 Thread Evgeniy Polyakov
On Wed, May 23, 2007 at 02:58:41PM +0200, Jörn Engel ([EMAIL PROTECTED]) wrote:
> On Sun, 20 May 2007 21:30:52 +0400, Evgeniy Polyakov wrote:
> > 
> > In that case segment size must be more than 32 bits, or below
> > transformation will not be correct?
> 
> Must it?  If segment size is just 20bit then the filesystem may only be
> 52bit.  Or 51bit when using signed values.

And what if it is 33 bits? Or it is not allowed?

> > segsize is long, but should be u64 I think.
> 
> It could be s32 as well.

It is a matter of definition - if segment size is allowed to be more
than 32 bits, then below transformation is not correct, otherwise
segment size should not use additional 32bits on 64bit platform, since
it is long.

> > static void fixup_from_wbuf(struct super_block *sb, struct logfs_area
> > *area, void *read, u64 ofs, size_t readlen)
> > 
> > u32 read_start = ofs & (super->s_segsize - 1);
> > u32 read_end = read_start + readlen;
> > 
> > And this can overflow, since readlen is size_t.
> 
> Theoretically yes.  Practically readlen is bounded to sb->blocksize plus
> one header.  I'll start worrying about that when blocksize approaches
> 32bit limit.
> 
> > > If anyone can find similar bugs, the bounty is a beer or non-alcoholic
> > > beverage of choice. :)
> > 
> > Stop kiling your kidneys, your health and promote such antisocial style
> > of life, start drinking vodka instead.
> 
> I'm just a German.  Forgive me if I drink lesser beverages.

You should definitely change that.


Btw, what about this piece:

int logfs_erase_segment(struct super_block *sb, u32 index)
{
struct logfs_super *super = LOGFS_SUPER(sb);

super->s_gec++;

return mtderase(sb, index << super->s_segshift, super->s_segsize);
}

index << super->s_segshift might overflow, mtderase expects loff_t
there, since index can be arbitrary segment number, is it possible, that
overflow really occurs?

> Jörn
> 
> -- 
> Eighty percent of success is showing up.
> -- Woody Allen

-- 
Evgeniy Polyakov
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH] file as directory

2007-05-23 Thread Al Viro
On Wed, May 23, 2007 at 04:32:37PM +0200, Miklos Szeredi wrote:
> > Umm...  It is related to detached subtrees, but I'm not sure if it is what
> > you are thinking about.
> 
> I was thinking of a similar one by Mike Waychison.  It had the problem
> of requiring a spinlock for mntget/mntput.  It was also different in
> that it did not gradually dissolve detached trees, but kept them as
> whole blobs until the last ref went away.
 
Here the spinlock is needed only when mnt_busy goes to 0, so presumably
it won't be a serious problem on more or less common setups; however,
it certainly would need serious profiling.

> How will this work with copy_tree() and namespace duplication, which
> currently walk the tree with only namespace_sem held?

Easy - grab namespace_sem, grab vfsmount_lock, walk the subtree and bump
mnt_busy on everything (by 1 + number of non-busy children).  Then drop
vfsmount_lock and do as usual, dropping references in tree being copied
as you go.  Nothing will get attached or detached due to namespace_sem,
nothing will get evicted by anybody other than you since you've got all
that stuff pinned down.  End of story...
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH] file as directory

2007-05-23 Thread Miklos Szeredi
> > >   * invalidation on unlink is still an open problem.
> > >   * locking in final mntput() doesn't look nice; we probably need
> > > a new refcounting scheme for vfsmounts to make that work.  I have a 
> > > variant
> > > that might work here (and make life much easier for expiry logics in
> > > automount/shared trees, which is what it had been initially proposed for),
> > 
> > Which variant?  We had that "detached subtrees" thing, is that it?
> 
> Umm...  It is related to detached subtrees, but I'm not sure if it is what
> you are thinking about.

I was thinking of a similar one by Mike Waychison.  It had the problem
of requiring a spinlock for mntget/mntput.  It was also different in
that it did not gradually dissolve detached trees, but kept them as
whole blobs until the last ref went away.

> Short version of the story: new counter (mnt_busy) that would be defined
> in the following way: the number of external references (not due to the
> vfsmount tree structure or from namespace to root) + the number of
> children that have non-zero ->mnt_busy.  And a per-vfsmount flag ("goner").
> 
> The rules for handling ->mnt_busy:
>   * duplicating external reference: increment m->mnt_busy
>   * getting from m to child: increment child->mnt_busy, if it went
> from 0 to non-zero - increment m->mnt_busy as well (that's done under
> vfsmount_lock, so we can safely check for zero here).
>   * getting from m to parent: increment parent->mnt_busy.
>   * dropping external reference: decrement m->mnt_busy; if it's still
> non-zero, we are done.  If it's zero, we are in for some work (and had
> acquired vfsmount_lock by atomic_dec_and_lock()).  Here's what we do:
>   * go through ancestors, decrementing ->mnt_busy, until we
> hit the root or get to one with ->mnt_busy staying
> non-zero.
>   * find the most remote ancestor that has zero ->mnt_busy
> and is marked as goner (might be m itself).
>   * if no such beast exists, we are done.
>   * otherwise, detach the subtree rooted in that ancestor
> from its parent (if any) and unhash its root (if hashed).

How will this work with copy_tree() and namespace duplication, which
currently walk the tree with only namespace_sem held?

> Now there is no external references to any vfsmount in that
> subtree.
>   * now we can kill all vfsmounts in that subtree.
>   * detaching m from parent: nothing; we trade a busy child of parent
> for new external reference to parent.
>   * lazy umount: in addition to detaching everything from parents
> and dropping resulting external references to parents, mark everything
> in the subtree as goners.
>   * normal umount: check ->mnt_busy *and* lack of children, detach,
> mark as goner, drop resulting external reference to parent.
>   * fun new stuff - umount of intact subtree: detach the subtree from
> parent, do *not* dissolve it, mark everything in subtree as goners.  If
> something we mark as goner is not busy, we can kill it and all its 
> descendents.
> The subtree will be shrinking as its pieces lose external references.
>   * check for expirability: "we hold an external reference to m and
> m->mnt_busy is 1".  No need to look into children, etc.
>   * your vfsmounts: simply mark them goners from the very beginning.
>  
> > > but it still doesn't kill the need to deal with invalidation.  And
> > > yes, NFS still needs it (and so do all network filesystems, really).
> > > The question of caching is related to that.
> > 
> > So what's so special about invalidation?  Why not just treat
> > dir-on-file mounts the same as any other ref on the dentry?
> 
> Because of the case of having something mounted in that subtree.  The
> current code doesn't even try to evict such stuff.  NFS *does*, but
> it's not in position to do that decently (not NFS fault, it's just that
> we don't have the data needed for it).
> 
> Note that one problem we used to have back then is gone - namely, 
> per-namespace
> semaphores.  It's a global semaphore now, so we *can* do cross-namespace
> rogering of mount trees without that kind of locking horrors.
> 
> What we really need is "go through dentry subtree, try to evict everything
> we can, for anything that has stuff mounted on it go through all such
> vfsmounts and kick them and all their descendents out".  That's what should
> happen on invalidation.  From generic code, so that NFS wouldn't have to
> bother.
> 
> And _that_ is what we could call from ->unlink() on your inode - would take
> care of submounts.

OK, I'll digest this info.

Miklos
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH] file as directory

2007-05-23 Thread Al Viro
On Wed, May 23, 2007 at 03:23:54PM +0200, Ph. Marek wrote:
> How about some special node in eg. /proc (or a new filesystem)?
> Eg.
>/fileAsDir/etc/passwd/owner ...
> would work for all *files*. For directories we do not know whether we're 
> still 
> climbing the hierarchy or would like to see meta-data.

So we need to make *anything* done anywhere in the namespace to modify
the dentry tree on that fs.  Could you spell "fuck, NO"?
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH] file as directory

2007-05-23 Thread Ph. Marek
On Mittwoch, 23. Mai 2007, Al Viro wrote:
> Then I do not understand what this mechanism could be used for, other
> than an odd way to twist POSIX behaviour and see how much of the userland
> would survive that.
I have some similar considerations about how userspace should deal with that.

The behaviour of simply "cd file/" are not that robust, I fear ...


> Certainly not useful for your "look into tarball 
> as a tree", unless you seriously want to scan the entire damn fs for
> tarballs at mount time and set up a superblock for each.  And for per-file
> extended attributes/forks/whatever-you-call-that-abomination it also
> obviously doesn't help, since you lose them for directories.
Well, *use cases* I can see. I'd like to use that - for loop mounting, 
archives, possibly using symlinks to remote filesystems "symlink1 => 
ssh:[EMAIL PROTECTED]" (although that's possible with FUSE anyway - but would 
be 
possibly within a .zip, too), ...


But I'm not sure how to do the presentation to userspace *right*.


How about some special node in eg. /proc (or a new filesystem)?
Eg.
   /fileAsDir/etc/passwd/owner ...
would work for all *files*. For directories we do not know whether we're still 
climbing the hierarchy or would like to see meta-data.

Some way like a ".this" entry is not the Right Way IMO ...
Well, I cannot imagine a real good way to tell where I'd like to stop 
following the "normal" filesystem and go into the "generated" hierarchy ...

   /fileAsDir/level-3/usr/local/bin/owner
is not nice.


Regards,

Phil
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH] file as directory

2007-05-23 Thread Al Viro
On Wed, May 23, 2007 at 03:01:38PM +0200, Miklos Szeredi wrote:
> Someone might think of a way to make those work with directories.
> Invisible directory entries, anyone?  

Not unless you manage to get working union-mount [*NOT* unionfs]
 
> > * invalidation on unlink is still an open problem.
> > * locking in final mntput() doesn't look nice; we probably need
> > a new refcounting scheme for vfsmounts to make that work.  I have a variant
> > that might work here (and make life much easier for expiry logics in
> > automount/shared trees, which is what it had been initially proposed for),
> 
> Which variant?  We had that "detached subtrees" thing, is that it?

Umm...  It is related to detached subtrees, but I'm not sure if it is what
you are thinking about.

Short version of the story: new counter (mnt_busy) that would be defined
in the following way: the number of external references (not due to the
vfsmount tree structure or from namespace to root) + the number of
children that have non-zero ->mnt_busy.  And a per-vfsmount flag ("goner").

The rules for handling ->mnt_busy:
* duplicating external reference: increment m->mnt_busy
* getting from m to child: increment child->mnt_busy, if it went
from 0 to non-zero - increment m->mnt_busy as well (that's done under
vfsmount_lock, so we can safely check for zero here).
* getting from m to parent: increment parent->mnt_busy.
* dropping external reference: decrement m->mnt_busy; if it's still
non-zero, we are done.  If it's zero, we are in for some work (and had
acquired vfsmount_lock by atomic_dec_and_lock()).  Here's what we do:
* go through ancestors, decrementing ->mnt_busy, until we
  hit the root or get to one with ->mnt_busy staying
  non-zero.
* find the most remote ancestor that has zero ->mnt_busy
  and is marked as goner (might be m itself).
* if no such beast exists, we are done.
* otherwise, detach the subtree rooted in that ancestor
  from its parent (if any) and unhash its root (if hashed).
  Now there is no external references to any vfsmount in that
  subtree.
* now we can kill all vfsmounts in that subtree.
* detaching m from parent: nothing; we trade a busy child of parent
for new external reference to parent.
* lazy umount: in addition to detaching everything from parents
and dropping resulting external references to parents, mark everything
in the subtree as goners.
* normal umount: check ->mnt_busy *and* lack of children, detach,
mark as goner, drop resulting external reference to parent.
* fun new stuff - umount of intact subtree: detach the subtree from
parent, do *not* dissolve it, mark everything in subtree as goners.  If
something we mark as goner is not busy, we can kill it and all its descendents.
The subtree will be shrinking as its pieces lose external references.
* check for expirability: "we hold an external reference to m and
m->mnt_busy is 1".  No need to look into children, etc.
* your vfsmounts: simply mark them goners from the very beginning.
 
> > but it still doesn't kill the need to deal with invalidation.  And
> > yes, NFS still needs it (and so do all network filesystems, really).
> > The question of caching is related to that.
> 
> So what's so special about invalidation?  Why not just treat
> dir-on-file mounts the same as any other ref on the dentry?

Because of the case of having something mounted in that subtree.  The
current code doesn't even try to evict such stuff.  NFS *does*, but
it's not in position to do that decently (not NFS fault, it's just that
we don't have the data needed for it).

Note that one problem we used to have back then is gone - namely, per-namespace
semaphores.  It's a global semaphore now, so we *can* do cross-namespace
rogering of mount trees without that kind of locking horrors.

What we really need is "go through dentry subtree, try to evict everything
we can, for anything that has stuff mounted on it go through all such
vfsmounts and kick them and all their descendents out".  That's what should
happen on invalidation.  From generic code, so that NFS wouldn't have to
bother.

And _that_ is what we could call from ->unlink() on your inode - would take
care of submounts.

Note that I'm not all that happy about this scheme; we might make it work,
but I still want to see a good use scenario for that kind of stuff.
Invalidation logics is a separate story - it's simply needed for existing
stuff; that area sucks *badly*, regardless of adding these hybrid objects.
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH] file as directory

2007-05-23 Thread Jaroslav Sykora
Hello,

On Tuesday May 22, 2007, Miklos Szeredi wrote:
> Why do we want this?
> 
> 
> That depends on who you ask.  My answer is this:
> 
>   'foo.tar.gz/foo/bar' or
>   'foo.tar.gz/contents/foo/bar'
> 
> or something similar.
> 

I work for a similir goal in my bachelor's theses. But my approach is 
a little bit different. Instead of:

>   'foo.tar.gz/foo/bar' or
>   'foo.tar.gz/contents/foo/bar'

I do:
   'foo.zip^/foo/bar' or
   'foo.zip^/contents/foo/bar'

where foo.zip is a ZIP file. See the little '^' in the pathname: it's an
escape character. I have a kernel patch which modifies a lookup 
resolution function and when a normal lookup fails ('foo.zip^/foo/bar'
dosn't exist) and the pathname contains '^' it *redirects* the lookup to 
a FUSE mount.

So say we have a FUSE vfs server (called 'RheaVFS') on '/tmp/shadow'. 
When a process tries to access '/home/xx/foo.zip^/foo/bar' 
it is in-kernel transparently redirected to 
'/tmp/shadow/home/xx/foo.zip^/foo/bar' and the vfs server handles all the
extraction/compresion/semi-mounting/semi-umounting/whatsoever...

Advantages:
* 99.9% imho backward compatible. No problems with clever programs 
  doing stat() before open()/opendir().
* you can easily and transparently stack filesystems one on top of another
  with a clear semantic. Say we have 'foo.tar.gz'; then:
'foo.tar.gz^' is a decompressed TAR *file*;
'foo.tar.gz^^' is a directory
* you can pass additional parameters to the vfs server after the '^', 
  eg. 'foo.zip^compresslevel=1/foo/bar'
* works with symlinks too

Drawbacks:
* users must/should be aware of the special escape char '^'
* usually only single vfs server per user handles all "virtual"
  directories --> single point of failure. (But I implemented a quirk
  which allows restarting the FUSE vfs server with only minor
  problems)
* probably tons of others I don't know

The project tarball is at:

http://veverka.sh.cvut.cz/~sykora/prj/rheavfs-20070523-1239.tar.gz

The kernel patch is in the tarball and for your viewing pleasure 
I've attached it to this email.
The patch is againts 2.6.20.1 and works with 2.6.21.1 too.
There are two minor failed hunks for 2.6.22-rc2 which I hadn't time to correct.

My project is not completed, there's almost no documentation etc.
Maybe I will put together some simple README/HOWTO in a few days.
I wouldn't present the project at this time, but seeing your post
I've thought my approach might be interesting for the discussion.


Jara

-- 
I find television very educating. Every time somebody turns on the set, 
I go into the other room and read a book.
--- orig/fs/file_table.c	2007-02-20 07:34:32.0 +0100
+++ new/fs/file_table.c	2007-05-09 20:49:52.0 +0200
@@ -152,8 +152,8 @@ EXPORT_SYMBOL(fput);
  */
 void fastcall __fput(struct file *file)
 {
-	struct dentry *dentry = file->f_path.dentry;
-	struct vfsmount *mnt = file->f_path.mnt;
+	struct dentry *dentry = file->f_path.dentry, *s_dentry = file->f_shdw;
+	struct vfsmount *mnt = file->f_path.mnt, *s_mnt = file->f_shdwmnt;
 	struct inode *inode = dentry->d_inode;
 
 	might_sleep();
@@ -178,15 +178,21 @@ void fastcall __fput(struct file *file)
 	file_kill(file);
 	file->f_path.dentry = NULL;
 	file->f_path.mnt = NULL;
+	file->f_shdw = NULL;
+	file->f_shdwmnt = NULL;
 	file_free(file);
 	dput(dentry);
 	mntput(mnt);
+	if (s_dentry) {
+		/* NOTE: if s_dentry == NULL then s_mnt may be ERR_PTR */
+		dput(s_dentry);
+		mntput(s_mnt);
+	}
 }
 
-struct file fastcall *fget(unsigned int fd)
+struct file fastcall *__fget(struct files_struct *files, unsigned int fd)
 {
 	struct file *file;
-	struct files_struct *files = current->files;
 
 	rcu_read_lock();
 	file = fcheck_files(files, fd);
@@ -202,6 +208,11 @@ struct file fastcall *fget(unsigned int 
 	return file;
 }
 
+struct file fastcall *fget(unsigned int fd)
+{
+	return __fget(current->files, fd);
+}
+
 EXPORT_SYMBOL(fget);
 
 /*
--- orig/fs/open.c	2007-02-20 07:34:32.0 +0100
+++ new/fs/open.c	2007-05-09 21:01:24.0 +0200
@@ -413,13 +413,51 @@ asmlinkage long sys_access(const char __
 	return sys_faccessat(AT_FDCWD, filename, mode);
 }
 
+static inline int read_fs_flags(void)
+{
+	int res;
+	read_lock(¤t->fs->lock);
+	res = current->fs->flags;
+	read_unlock(¤t->fs->lock);
+	return res;
+}
+
+void set_fs_shdwpwd(struct fs_struct *fs,
+			struct vfsmount *mnt, struct dentry *dentry)
+{
+	struct dentry *old_dentry;
+	struct vfsmount *old_mnt;
+
+	BUG_ON(dentry != NULL && mnt == NULL);
+	write_lock(&fs->lock);
+	/* set shadow pwd */
+	old_dentry = fs->shdwpwd;
+	old_mnt = fs->shdwpwdmnt;
+	fs->

Re: [RFC][PATCH 5/14] Introduce union stack

2007-05-23 Thread Serge E. Hallyn
Quoting Paul Dickson ([EMAIL PROTECTED]):
> On Mon, 14 May 2007 13:23:06 -0700, Badari Pulavarty wrote:
> 
> > > + while (fs) {
> > > + locked = union_trylock(fs->root);
> > > + if (!locked)
> > > + goto loop1;
> > > + locked = union_trylock(fs->altroot);
> > > + if (!locked)
> > > + goto loop2;
> > > + locked = union_trylock(fs->pwd);
> > > + if (!locked)
> > > + goto loop3;
> > > + break;
> > > + loop3:
> > > + union_unlock(fs->altroot);
> > > + loop2:
> > > + union_unlock(fs->root);
> > > + loop1:
> > > + read_unlock(&fs->lock);
> > > + UM_DEBUG_LOCK("Failed to get all semaphores in fs_struct!\n");
> > > + cpu_relax();
> > > + read_lock(&fs->lock);
> > > + continue;
> > 
> > Nit.. why "continue" ?
> > 
> > > + }
> > > + BUG_ON(!fs);
> 
> How about getting rid of the gotos:
> 
>   while (fs) {
>   locked = union_trylock(fs->root);
>   if (locked) {
>   locked = union_trylock(fs->altroot);
>   if (locked) {
>   locked = union_trylock(fs->pwd);
>   if (locked)
>   break;
>   else {
>   union_unlock(fs->altroot);
>   union_unlock(fs->root);
>   }
>   else
>   union_unlock(fs->root);
>   }
>   }
>   read_unlock(&fs->lock);
>   UM_DEBUG_LOCK("Failed to get all semaphores in fs_struct!\n");
>   cpu_relax();
>   read_lock(&fs->lock);
>   }
>   BUG_ON(!fs);
> 
> It's the same number of lines.  Shorter if you get rid of the "locked"
> variable.

I dunno, I thought the goto versoin was cleaner and easier to tell that
the right locks are getting unlocked.  The worst part in the second
version is the break in the middle!

-serge
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Review status (Re: [PATCH] LogFS take three)

2007-05-23 Thread Jörn Engel
On Sun, 20 May 2007 21:30:52 +0400, Evgeniy Polyakov wrote:
> 
> In that case segment size must be more than 32 bits, or below
> transformation will not be correct?

Must it?  If segment size is just 20bit then the filesystem may only be
52bit.  Or 51bit when using signed values.

> segsize is long, but should be u64 I think.

It could be s32 as well.

> static void fixup_from_wbuf(struct super_block *sb, struct logfs_area
>   *area, void *read, u64 ofs, size_t readlen)
> 
> u32 read_start = ofs & (super->s_segsize - 1);
> u32 read_end = read_start + readlen;
> 
> And this can overflow, since readlen is size_t.

Theoretically yes.  Practically readlen is bounded to sb->blocksize plus
one header.  I'll start worrying about that when blocksize approaches
32bit limit.

> > If anyone can find similar bugs, the bounty is a beer or non-alcoholic
> > beverage of choice. :)
> 
> Stop kiling your kidneys, your health and promote such antisocial style
> of life, start drinking vodka instead.

I'm just a German.  Forgive me if I drink lesser beverages.

Jörn

-- 
Eighty percent of success is showing up.
-- Woody Allen
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH] file as directory

2007-05-23 Thread Miklos Szeredi
> On Wed, May 23, 2007 at 12:39:25PM +0100, Al Viro wrote:
> > Then I do not understand what this mechanism could be used for, other
> > than an odd way to twist POSIX behaviour and see how much of the userland
> > would survive that.  Certainly not useful for your "look into tarball
> > as a tree", unless you seriously want to scan the entire damn fs for
> > tarballs at mount time and set up a superblock for each.  And for per-file
> > extended attributes/forks/whatever-you-call-that-abomination it also
> > obviously doesn't help, since you lose them for directories.

Someone might think of a way to make those work with directories.
Invisible directory entries, anyone?  

> > IOW, what uses do you have in mind?  Complete scenario, please...
> 
> Ah... After rereading the thread you've mentioned in the very beginning,
> I think I understand what you are driving at.  However, in that case
>   * I really don't see why bother with returning vfsmount at all.
> dentry alone is enough to create a new vfsmount, all in fs/namei.c.
>   * the lifetime rules look fscking scary.  You call that ->enter()
> on nearly every damn lookup.  OK, so you'll recreate equivalent vfsmount,
> but...  That's a lot of allocations/freeing.  Can we do some caching and
> deal with it on memory pressure?

Hmm.

>   * invalidation on unlink is still an open problem.
>   * locking in final mntput() doesn't look nice; we probably need
> a new refcounting scheme for vfsmounts to make that work.  I have a variant
> that might work here (and make life much easier for expiry logics in
> automount/shared trees, which is what it had been initially proposed for),

Which variant?  We had that "detached subtrees" thing, is that it?

> but it still doesn't kill the need to deal with invalidation.  And
> yes, NFS still needs it (and so do all network filesystems, really).
> The question of caching is related to that.

So what's so special about invalidation?  Why not just treat
dir-on-file mounts the same as any other ref on the dentry?

Miklos
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH] file as directory

2007-05-23 Thread Al Viro
On Wed, May 23, 2007 at 08:34:42AM -0400, Trond Myklebust wrote:
> On Wed, 2007-05-23 at 08:36 +0100, Al Viro wrote:
> > On Wed, May 23, 2007 at 09:19:17AM +0200, Miklos Szeredi wrote:
> > > > Eh...  Arbitrary limitations are fun, aren't they?
> > > 
> > > But these mounts _are_ special.  There is really no point in moving or
> > > pivoting them.
> > 
> > pivoting - probably true, moving... why not?
> 
> Moving would be an implementation artefact that doesn't really
> correspond to any useful operation on the filesyst
> 
> AFAIK, most filesystems that have implemented subfiles (excepting
> Reiser4 of course) do not allow you to rename or move the subfile
> directory or its contents from one parent file to another.

If that's about xattr and nothing else, colour me thoroughly uninterested.
If it might have other interesting uses, OTOH...
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH] file as directory

2007-05-23 Thread Trond Myklebust
On Wed, 2007-05-23 at 08:36 +0100, Al Viro wrote:
> On Wed, May 23, 2007 at 09:19:17AM +0200, Miklos Szeredi wrote:
> > > Eh...  Arbitrary limitations are fun, aren't they?
> > 
> > But these mounts _are_ special.  There is really no point in moving or
> > pivoting them.
> 
> pivoting - probably true, moving... why not?

Moving would be an implementation artefact that doesn't really
correspond to any useful operation on the filesyst

AFAIK, most filesystems that have implemented subfiles (excepting
Reiser4 of course) do not allow you to rename or move the subfile
directory or its contents from one parent file to another.

Trond

-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH] file as directory

2007-05-23 Thread Al Viro
On Wed, May 23, 2007 at 12:39:25PM +0100, Al Viro wrote:
> Then I do not understand what this mechanism could be used for, other
> than an odd way to twist POSIX behaviour and see how much of the userland
> would survive that.  Certainly not useful for your "look into tarball
> as a tree", unless you seriously want to scan the entire damn fs for
> tarballs at mount time and set up a superblock for each.  And for per-file
> extended attributes/forks/whatever-you-call-that-abomination it also
> obviously doesn't help, since you lose them for directories.
> 
> IOW, what uses do you have in mind?  Complete scenario, please...

Ah... After rereading the thread you've mentioned in the very beginning,
I think I understand what you are driving at.  However, in that case
* I really don't see why bother with returning vfsmount at all.
dentry alone is enough to create a new vfsmount, all in fs/namei.c.
* the lifetime rules look fscking scary.  You call that ->enter()
on nearly every damn lookup.  OK, so you'll recreate equivalent vfsmount,
but...  That's a lot of allocations/freeing.  Can we do some caching and
deal with it on memory pressure?
* invalidation on unlink is still an open problem.
* locking in final mntput() doesn't look nice; we probably need
a new refcounting scheme for vfsmounts to make that work.  I have a variant
that might work here (and make life much easier for expiry logics in
automount/shared trees, which is what it had been initially proposed for),
but it still doesn't kill the need to deal with invalidation.  And yes,
NFS still needs it (and so do all network filesystems, really).  The question
of caching is related to that.
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH] file as directory

2007-05-23 Thread Jan Engelhardt

On May 22 2007 20:48, Miklos Szeredi wrote:
>Why do we want this?
>
>
>That depends on who you ask.  My answer is this:
>
>  'foo.tar.gz/foo/bar' or
>  'foo.tar.gz/contents/foo/bar'
>
>or something similar.

Stole reiser4 an idea.
These semantics are quite fragile. Until now, chdir is only possible
for directories (otherwise, -ENOTDIR), and opening a directory without
O_DIRECTORY gives -EISDIR. You can't just change semantics.

That said, with FUSE, something like this should already be possible,
should not it?

And looking at your example of foo.tar.gz/foo/bar,the tar.gz needs to
be read at least once to get at foo/bar.


Jan
-- 
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH] file as directory

2007-05-23 Thread Al Viro
> When the real superblock is created.  It could even be the _same_
> super block as the real one.  There'd be just the problem of anchoring
> the dir-on-file dentries somewhere...
> 
> Or with fuse the dir-on-file mount can just come from any mounted
> filesystem, again possibly the same one as the parent.  I do actually
> test with this.  The userspace filesystem supplies a file descriptor,
> from which the struct path is extracted and returned from ->enter().

Then I do not understand what this mechanism could be used for, other
than an odd way to twist POSIX behaviour and see how much of the userland
would survive that.  Certainly not useful for your "look into tarball
as a tree", unless you seriously want to scan the entire damn fs for
tarballs at mount time and set up a superblock for each.  And for per-file
extended attributes/forks/whatever-you-call-that-abomination it also
obviously doesn't help, since you lose them for directories.

IOW, what uses do you have in mind?  Complete scenario, please...
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH] file as directory

2007-05-23 Thread Miklos Szeredi
> > Right.  After locking vfsmount_lock, mount_dironfile() should recheck
> > if there was a race and bail out.
> 
> Owww...  Not pretty, that...

If the cost of ->enter() is low, then it shouln't really be a problem.
We can't use ->i_mutex for locking, and introducing a new lock for
this doesn't sound right either.

> > I don't think the filesystem ought to try _creating_ a vfsmount.  I
> > imagine, that the fs has already a kernel-internal mounted for this
> > kind of stuff, and it just supplies a dentry from that.  The vfsmount
> > isn't actually important, but it should be readily available, and it's
> > easier to clone from a vfsmount/dentry pair.
> 
> I don't get it.  What's the point of that exercise, then?  When do you
> create that kernel-internal mount?

When the real superblock is created.  It could even be the _same_
super block as the real one.  There'd be just the problem of anchoring
the dir-on-file dentries somewhere...

Or with fuse the dir-on-file mount can just come from any mounted
filesystem, again possibly the same one as the parent.  I do actually
test with this.  The userspace filesystem supplies a file descriptor,
from which the struct path is extracted and returned from ->enter().

Miklos
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC][PATCH 10/14] In-kernel file copy between union mounted filesystems

2007-05-23 Thread Bharata B Rao
On Tue, May 22, 2007 at 08:35:17AM -0400, Shaya Potter wrote:
> Bharata B Rao wrote:
> >
> >In case of regular files, when we copyup a file, we are actually preventing
> >any writes to the lower layers (which we have designated as read only).
> >
> >Applying the same logic to devices, what do we achieve by copying them up ?
> >How does it matter if we write to the device through a node in the upper
> >layer or in the lower layer. Both the writes eventually do the same thing.
> 
> What happens if the lower layer is on a read only medium.  But the top 
> layer is RW.  Why can't one change permissions?  In your model, one can't.
> 
> What happens if one wants to share a lower layer read-only (I'm doing 
> this with my research into uses of union file systems), one doesn't want 
> permission change in one use of the lower layer to affect any of the 
> other uses.

Ok, makes sense. Thanks for the clarification.

So looks like in addition to copyup on open (which is what we do currently)
there is a case for doing copyups for other situations also.

Regards,
Bharata.
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH] file as directory

2007-05-23 Thread Miklos Szeredi
> > > + * This is tricky, because for namespace modification we must take the
> > > + * namespace semaphore.  But mntput() is called from various places,
> > > + * sometimes with namespace_sem held.  Fortunately in those places the
> > > + * mount cannot yet have MNT_DIRONFILE, or at least that's what I
> > > + * hope...
> > > + *
> > > + * The umounting is done in two stages, first the mount is removed
> > > + * from the hashes.  This is done atomically wrt other mount lookups,
> > > + * so it's not possible to acquire a new ref to this dead mount that
> > > + * way.
> > > + *
> > > + * Then after having locked namespace_sem and relocked vfsmount_lock,
> > > + * the mount is properly detached.
> > > + */
> > > +static void umount_dironfile(struct vfsmount *mnt)
> > > + __releases(vfsmount_lock)
> > > +{
> > > + struct nameidata nd;
> > 
> > You've got to be kidding.  nameidata is *big*.  If anything, we want
> > to make detach_mnt() take struct path * instead, but even that is
> > lousy due to recursion.
> > 
> > I really don't like what's going on here.  The thing is, current code
> > is based on assumption that presence in the mount tree => holding a
> > reference.  We _might_ deal with that (there was an old plan to change
> > refcounting logics for vfsmounts), but that sort of games with locks
> > spells trouble.  What happens, for example, if namespace gets cloned
> > before you grab namespace_sem?
> 
> It _should_ work.  The mount in the new namespace will be created
> (with namespace_sem held, so we can't yet free this mount), and then
> dropped, because there are no refs to it.

BTW, I'm not saying I like this.  It's pretty ugly and fragile.  But
it's damn convenient to get rid of these mounts from mntput().

Is there a better alternative?

Miklos
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH] file as directory

2007-05-23 Thread Al Viro
On Wed, May 23, 2007 at 12:09:19PM +0200, Miklos Szeredi wrote:
> Right.  After locking vfsmount_lock, mount_dironfile() should recheck
> if there was a race and bail out.

Owww...  Not pretty, that...
 
> I don't think the filesystem ought to try _creating_ a vfsmount.  I
> imagine, that the fs has already a kernel-internal mounted for this
> kind of stuff, and it just supplies a dentry from that.  The vfsmount
> isn't actually important, but it should be readily available, and it's
> easier to clone from a vfsmount/dentry pair.

I don't get it.  What's the point of that exercise, then?  When do you
create that kernel-internal mount?

> As I said, the superblock should be persistent, so we'll get a stable
> st_dev for multiple mounts.

OK, but then I guess I don't understand the intended use.
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH] file as directory

2007-05-23 Thread Miklos Szeredi
> On Wed, May 23, 2007 at 11:03:08AM +0200, Miklos Szeredi wrote:
> > I still don't get it where the superblock comes in.  The locking is
> > "interesting" in there, yes.  And I haven't completely convinced
> > myself it's right, let alone something that won't easily be screwed up
> > in the future.  So there's definitely room for thought there.
> > 
> > But how does it matter if two different paths have the same sb or a
> > different sb mounted over them?
>  
> Because then you get a slew of fun issues with dropping the final reference
> to vfsmount vs. lookup on another place.  What hold do you have on that
> superblock and when do you switch from "oh, called ->enter() on the same
> inode again, return vfsmount over the same superblock" to "need to
> initialize that damn superblock, all mounts are gone"?
> 
> > The same dentry is mounted over each one.  The contents of the
> > directory should only depend on the contents of the underlying inode.
> > The path leading up to it is completely irrelevant.
> 
> So what kind of exclusion do you have for ->enter()?  None?
> 

So really these issues, are about how do we get hold of the superblock
to mount.

I think that should be a filesystem internal problem, and I suspect
the easiest solution is to just have a permanent meta superblock for
these dir-on-file mounts.

Miklos
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH] file as directory

2007-05-23 Thread Miklos Szeredi
> > + * This is called if the object has no ->lookup() defined, yet the
> > + * path contains a slash after the object name.
> > + *
> > + * If the filesystem defines an ->enter() method, this will be called,
> > + * and the filesystem shall fill the supplied struct path or return an
> > + * error.
> > + *
> > + * The returned path will be bind mounted on top of the object with
> > + * the MNT_DIRONFILE flag, and the nameidata will descend into the
> > + * mount.
> > + */
> > +static int enter_file(struct inode *inode, struct nameidata *nd)
> > +{
> > +   int err;
> > +   struct path newpath;
> > +
> > +   printk(KERN_DEBUG "%s/%d enter %s/\n", current->comm, current->pid,
> > +  nd->dentry->d_name.name);
> > +   if (!inode->i_op->enter)
> > +   return -ENOTDIR;
> > +
> > +   newpath.mnt = NULL;
> > +   newpath.dentry = NULL;
> > +   err = inode->i_op->enter(nd, &newpath);
> > +   if (!err) {
> > +   err = mount_dironfile(nd, &newpath);
> > +   pathput(&newpath);
> > +   }
> > +   return err;
> 
> Ouch.  What guarantees that two lookups won't race right here?  You are
> not holding any locks at that point, AFAICS...

Right.  After locking vfsmount_lock, mount_dironfile() should recheck
if there was a race and bail out.

> BTW, why newpath?  What's wrong with simply returning a new vfsmount
> with right ->mnt_root/->mnt_sb (instead of creating it inside
> mount_dironfile())?  ERR_PTR() for error, struct vfsmount * for success...

I don't think the filesystem ought to try _creating_ a vfsmount.  I
imagine, that the fs has already a kernel-internal mounted for this
kind of stuff, and it just supplies a dentry from that.  The vfsmount
isn't actually important, but it should be readily available, and it's
easier to clone from a vfsmount/dentry pair.

> > @@ -301,8 +310,8 @@ static struct vfsmount *clone_mnt(struct
> > mnt->mnt_mountpoint = mnt->mnt_root;
> > mnt->mnt_parent = mnt;
> >  
> > -   /* don't copy the MNT_USER flag */
> > -   mnt->mnt_flags &= ~MNT_USER;
> > +   /* don't copy some flags */
> > +   mnt->mnt_flags &= ~(MNT_USER | MNT_DIRONFILE);
> > if (flag & CL_SETUSER)
> > __set_mnt_user(mnt, owner);
> 
> Hmm?  So you do copy them and strip your MNT_DIRONFILE from copies?

Yes.  On namespace cloning the MNT_DIRONFILE will be re-added later.
Otherwise we shouln't even get here with MNT_DIRONFILE.

> > + * This is tricky, because for namespace modification we must take the
> > + * namespace semaphore.  But mntput() is called from various places,
> > + * sometimes with namespace_sem held.  Fortunately in those places the
> > + * mount cannot yet have MNT_DIRONFILE, or at least that's what I
> > + * hope...
> > + *
> > + * The umounting is done in two stages, first the mount is removed
> > + * from the hashes.  This is done atomically wrt other mount lookups,
> > + * so it's not possible to acquire a new ref to this dead mount that
> > + * way.
> > + *
> > + * Then after having locked namespace_sem and relocked vfsmount_lock,
> > + * the mount is properly detached.
> > + */
> > +static void umount_dironfile(struct vfsmount *mnt)
> > +   __releases(vfsmount_lock)
> > +{
> > +   struct nameidata nd;
> 
> You've got to be kidding.  nameidata is *big*.  If anything, we want
> to make detach_mnt() take struct path * instead, but even that is
> lousy due to recursion.
> 
> I really don't like what's going on here.  The thing is, current code
> is based on assumption that presence in the mount tree => holding a
> reference.  We _might_ deal with that (there was an old plan to change
> refcounting logics for vfsmounts), but that sort of games with locks
> spells trouble.  What happens, for example, if namespace gets cloned
> before you grab namespace_sem?

It _should_ work.  The mount in the new namespace will be created
(with namespace_sem held, so we can't yet free this mount), and then
dropped, because there are no refs to it.

> There's another problem, BTW - a lot of stuff does stat + open + fstat +
> compare kind of sequence.  You'll end up mounting/umounting between stat
> and open, which opens you to race with somebody else.  Get a different
> st_dev, eat a nice unreproducible error from application...

As I said, the superblock should be persistent, so we'll get a stable
st_dev for multiple mounts.

Miklos
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH] file as directory

2007-05-23 Thread Al Viro
On Wed, May 23, 2007 at 11:03:08AM +0200, Miklos Szeredi wrote:
> I still don't get it where the superblock comes in.  The locking is
> "interesting" in there, yes.  And I haven't completely convinced
> myself it's right, let alone something that won't easily be screwed up
> in the future.  So there's definitely room for thought there.
> 
> But how does it matter if two different paths have the same sb or a
> different sb mounted over them?
 
Because then you get a slew of fun issues with dropping the final reference
to vfsmount vs. lookup on another place.  What hold do you have on that
superblock and when do you switch from "oh, called ->enter() on the same
inode again, return vfsmount over the same superblock" to "need to
initialize that damn superblock, all mounts are gone"?

> The same dentry is mounted over each one.  The contents of the
> directory should only depend on the contents of the underlying inode.
> The path leading up to it is completely irrelevant.

So what kind of exclusion do you have for ->enter()?  None?
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH] file as directory

2007-05-23 Thread Al Viro
On Tue, May 22, 2007 at 08:48:49PM +0200, Miklos Szeredi wrote:
>   */
> -static int __follow_mount(struct path *path)
> +static int __follow_mount(struct path *path, bool enter)
>  {
>   int res = 0;
>   while (d_mountpoint(path->dentry)) {
> - struct vfsmount *mounted = lookup_mnt(path->mnt, path->dentry);
> + struct vfsmount *mounted =
> + lookup_mnt(path->mnt, path->dentry, enter);
> +
>   if (!mounted)
>   break;
>   dput(path->dentry);
> @@ -689,27 +697,37 @@ static int __follow_mount(struct path *p
>   return res;
>  }
>  
> -static void follow_mount(struct vfsmount **mnt, struct dentry **dentry)
> +/*
> + * Follows mounts on the given nameidata.
> + *
> + * Only follows "directory on file" mounts if LOOKUP_ENTER is set.
> + */
> +void follow_mount(struct nameidata *nd)

BTW, I'd split that (and matching updates in callers) into separate
patch.

>  {
> - while (d_mountpoint(*dentry)) {
> - struct vfsmount *mounted = lookup_mnt(*mnt, *dentry);
> + while (d_mountpoint(nd->dentry)) {
> + bool enter = nd->flags & LOOKUP_ENTER;

int, surely?

> + * This is called if the object has no ->lookup() defined, yet the
> + * path contains a slash after the object name.
> + *
> + * If the filesystem defines an ->enter() method, this will be called,
> + * and the filesystem shall fill the supplied struct path or return an
> + * error.
> + *
> + * The returned path will be bind mounted on top of the object with
> + * the MNT_DIRONFILE flag, and the nameidata will descend into the
> + * mount.
> + */
> +static int enter_file(struct inode *inode, struct nameidata *nd)
> +{
> + int err;
> + struct path newpath;
> +
> + printk(KERN_DEBUG "%s/%d enter %s/\n", current->comm, current->pid,
> +nd->dentry->d_name.name);
> + if (!inode->i_op->enter)
> + return -ENOTDIR;
> +
> + newpath.mnt = NULL;
> + newpath.dentry = NULL;
> + err = inode->i_op->enter(nd, &newpath);
> + if (!err) {
> + err = mount_dironfile(nd, &newpath);
> + pathput(&newpath);
> + }
> + return err;

Ouch.  What guarantees that two lookups won't race right here?  You are
not holding any locks at that point, AFAICS...

BTW, why newpath?  What's wrong with simply returning a new vfsmount
with right ->mnt_root/->mnt_sb (instead of creating it inside
mount_dironfile())?  ERR_PTR() for error, struct vfsmount * for success...

> @@ -301,8 +310,8 @@ static struct vfsmount *clone_mnt(struct
>   mnt->mnt_mountpoint = mnt->mnt_root;
>   mnt->mnt_parent = mnt;
>  
> - /* don't copy the MNT_USER flag */
> - mnt->mnt_flags &= ~MNT_USER;
> + /* don't copy some flags */
> + mnt->mnt_flags &= ~(MNT_USER | MNT_DIRONFILE);
>   if (flag & CL_SETUSER)
>   __set_mnt_user(mnt, owner);

Hmm?  So you do copy them and strip your MNT_DIRONFILE from copies?

> + * This is tricky, because for namespace modification we must take the
> + * namespace semaphore.  But mntput() is called from various places,
> + * sometimes with namespace_sem held.  Fortunately in those places the
> + * mount cannot yet have MNT_DIRONFILE, or at least that's what I
> + * hope...
> + *
> + * The umounting is done in two stages, first the mount is removed
> + * from the hashes.  This is done atomically wrt other mount lookups,
> + * so it's not possible to acquire a new ref to this dead mount that
> + * way.
> + *
> + * Then after having locked namespace_sem and relocked vfsmount_lock,
> + * the mount is properly detached.
> + */
> +static void umount_dironfile(struct vfsmount *mnt)
> + __releases(vfsmount_lock)
> +{
> + struct nameidata nd;

You've got to be kidding.  nameidata is *big*.  If anything, we want
to make detach_mnt() take struct path * instead, but even that is
lousy due to recursion.

I really don't like what's going on here.  The thing is, current code
is based on assumption that presence in the mount tree => holding a
reference.  We _might_ deal with that (there was an old plan to change
refcounting logics for vfsmounts), but that sort of games with locks
spells trouble.  What happens, for example, if namespace gets cloned
before you grab namespace_sem?

There's another problem, BTW - a lot of stuff does stat + open + fstat +
compare kind of sequence.  You'll end up mounting/umounting between stat
and open, which opens you to race with somebody else.  Get a different
st_dev, eat a nice unreproducible error from application...
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH] file as directory

2007-05-23 Thread Miklos Szeredi
> > > As for unlink...  How do you deal with having that thing
> > > mounted, mounting something _under_ it (so that vfsmount would be kept
> > > busy) and then unlinking that sucker?
> >
> > Yeah, that's a good point.  Current patch doesn't deal with that.
> > Simplest solution could be to disallow submounting these.  Don't think
> > it makes much sense anyway.
> >
> 
> Hmm, think about /your/path/qemu-disk1.img/boot ,
> /your/path/qemu-disk2.img/usr , ...

I get it.

It could probably be done with a little added complexity.  For example
when a real mount is attached onto a dir-on-file mount, the
"mountedness" is propagated up to the dentry on the next real mount.

So in that case unlink won't be allowed, even if the immediate
attachment is a dir-on-file mount.

This is tricky to do right though.

Other possibility is to detach all mount trees attached to dentry on
unlink.

Miklos
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH] file as directory

2007-05-23 Thread Miklos Szeredi
> > So your question is, which mount takes priority on the lookup?  It
> > probably should be the propagated real mount, rather than the
> > dir-on-file one, shouldn't it?
> >
> 
> Maybe this might belong into __link_path_walk() similar to the
> handling of symbolic links. If the real mount has always higher
> priority why do we bother in follow_mount() about it.

Do you mean, that follow_mount() should never descend into the
dir-on-file mount but that should always be done by
__link_path_walk()?

This could make sense.

__lookup_mnt() currently returns the first matching mount in the hash
list.  With your suggestion, we'd need two __lookup_mnt() variants (or
a parameter).  One, that only matches normal mounts, and one that only
matches dir-on-file mounts.  Is that it?

Miklos
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH] file as directory

2007-05-23 Thread Jan Blunck

On 5/23/07, Miklos Szeredi <[EMAIL PROTECTED]> wrote:


> As for unlink...  How do you deal with having that thing
> mounted, mounting something _under_ it (so that vfsmount would be kept
> busy) and then unlinking that sucker?

Yeah, that's a good point.  Current patch doesn't deal with that.
Simplest solution could be to disallow submounting these.  Don't think
it makes much sense anyway.



Hmm, think about /your/path/qemu-disk1.img/boot ,
/your/path/qemu-disk2.img/usr , ...

Jan
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH] file as directory

2007-05-23 Thread Jan Blunck

On 5/23/07, Miklos Szeredi <[EMAIL PROTECTED]> wrote:


So your question is, which mount takes priority on the lookup?  It
probably should be the propagated real mount, rather than the
dir-on-file one, shouldn't it?



Maybe this might belong into __link_path_walk() similar to the
handling of symbolic links. If the real mount has always higher
priority why do we bother in follow_mount() about it.

Jan
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH] file as directory

2007-05-23 Thread Miklos Szeredi
> On Wed, May 23, 2007 at 10:05:21AM +0200, Miklos Szeredi wrote:
> > > Er...  These mounts might not be propagated, but what about a bind
> > > over another instance of such file in master tree?
> > 
> > So your question is, which mount takes priority on the lookup?  It
> > probably should be the propagated real mount, rather than the
> > dir-on-file one, shouldn't it?
> 
> There might be dragons in that area...
>  
> > > > I think they should be the same superblock, same dentry.  What would
> > > > be the advantage of doing otherwise?
> > > 
> > > Then you are going to have interesting time with locking in final 
> > > mntput().
> > 
> > Final mntput of what?
> 
> When the last reference to your mount goes away.

I still don't get it where the superblock comes in.  The locking is
"interesting" in there, yes.  And I haven't completely convinced
myself it's right, let alone something that won't easily be screwed up
in the future.  So there's definitely room for thought there.

But how does it matter if two different paths have the same sb or a
different sb mounted over them?

> > > BTW, what about having several links to the same file?  You have i_mutex
> > > on the inode, so serialization of those is not a problem, but...
> > 
> > Sorry, I lost it...
> 
> Say /foo/bar/a is such a file.
> 
> cd /foo/bar
> ln a b
> 
> now do lookups on a/ and b/
> 
> What happens?

The same dentry is mounted over each one.  The contents of the
directory should only depend on the contents of the underlying inode.
The path leading up to it is completely irrelevant.

Miklos
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH] file as directory

2007-05-23 Thread Al Viro
On Wed, May 23, 2007 at 10:05:21AM +0200, Miklos Szeredi wrote:
> > Er...  These mounts might not be propagated, but what about a bind
> > over another instance of such file in master tree?
> 
> So your question is, which mount takes priority on the lookup?  It
> probably should be the propagated real mount, rather than the
> dir-on-file one, shouldn't it?

There might be dragons in that area...
 
> > > I think they should be the same superblock, same dentry.  What would
> > > be the advantage of doing otherwise?
> > 
> > Then you are going to have interesting time with locking in final mntput().
> 
> Final mntput of what?

When the last reference to your mount goes away.
 
> > BTW, what about having several links to the same file?  You have i_mutex
> > on the inode, so serialization of those is not a problem, but...
> 
> Sorry, I lost it...

Say /foo/bar/a is such a file.

cd /foo/bar
ln a b

now do lookups on a/ and b/

What happens?
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH] file as directory

2007-05-23 Thread Miklos Szeredi
> > > Eh...  Arbitrary limitations are fun, aren't they?
> > 
> > But these mounts _are_ special.  There is really no point in moving or
> > pivoting them.
> 
> pivoting - probably true, moving... why not?

I don't see any use for that.  But indeed, it should not be too hard
to do.

> > > What about MNT_SLAVE stuff being set up prior to that lookup?
> > 
> > These mounts are not propagated.  Or at least I hope so.  Propagation
> > stuff is a bit too complicated for my poor little brain.
> 
> Er...  These mounts might not be propagated, but what about a bind
> over another instance of such file in master tree?

So your question is, which mount takes priority on the lookup?  It
probably should be the propagated real mount, rather than the
dir-on-file one, shouldn't it?

> > I think they should be the same superblock, same dentry.  What would
> > be the advantage of doing otherwise?
> 
> Then you are going to have interesting time with locking in final mntput().

Final mntput of what?

> BTW, what about having several links to the same file?  You have i_mutex
> on the inode, so serialization of those is not a problem, but...

Sorry, I lost it...

> > I think doing this recursively should be allowed.  "Releasing last ref
> > cleans up the mess" should work in that case.
> 
> Releasing the last reference will lead to cascade of umounts in that
> case...  IOW, need to be careful with locking.

I think it's done right: detach_mnt() with namespace_sem and
vfsmount_lock, then release locks, and path_release(&old_nd).

If the recursion is extremely deep we could have stack overflow
problems though, aargh...

Miklos
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH] file as directory

2007-05-23 Thread Al Viro
On Wed, May 23, 2007 at 09:19:17AM +0200, Miklos Szeredi wrote:
> > Eh...  Arbitrary limitations are fun, aren't they?
> 
> But these mounts _are_ special.  There is really no point in moving or
> pivoting them.

pivoting - probably true, moving... why not?
 
> > What about MNT_SLAVE stuff being set up prior to that lookup?
> 
> These mounts are not propagated.  Or at least I hope so.  Propagation
> stuff is a bit too complicated for my poor little brain.

Er...  These mounts might not be propagated, but what about a bind
over another instance of such file in master tree?
 
> I think they should be the same superblock, same dentry.  What would
> be the advantage of doing otherwise?

Then you are going to have interesting time with locking in final mntput().
BTW, what about having several links to the same file?  You have i_mutex
on the inode, so serialization of those is not a problem, but...
 
> I think doing this recursively should be allowed.  "Releasing last ref
> cleans up the mess" should work in that case.

Releasing the last reference will lead to cascade of umounts in that
case...  IOW, need to be careful with locking.
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH] file as directory

2007-05-23 Thread Miklos Szeredi
> > > Interesting...  How do you deal with mount propagation and things like
> > > mount --move?
> > 
> > Moving (or doing other mount operations on) an ancestor shouldn't be a
> > problem.  Moving this mount itself is not allowed, and neither is
> > doing bind or pivot_root.  Maybe bind could be allowed...
> 
> Eh...  Arbitrary limitations are fun, aren't they?

But these mounts _are_ special.  There is really no point in moving or
pivoting them.

> > When doing recursive bind on ancestor, these mounts are skipped.
> 
> What about clone copying your namespace?

In that case they are cloned, but only those survive which have refs
in the new namespace.

> What about MNT_SLAVE stuff being set up prior to that lookup?

These mounts are not propagated.  Or at least I hope so.  Propagation
stuff is a bit too complicated for my poor little brain.

> More interesting question: should independent lookups of that sucker
> on different paths end up with the same superblock (and vfsmount for
> each) or should we get fully independent mount on each?  The latter
> would be interesting wrt cache coherency...

I think they should be the same superblock, same dentry.  What would
be the advantage of doing otherwise?

> > > As for unlink...  How do you deal with having that thing
> > > mounted, mounting something _under_ it (so that vfsmount would be kept
> > > busy) and then unlinking that sucker?
> > 
> > Yeah, that's a good point.  Current patch doesn't deal with that.
> > Simplest solution could be to disallow submounting these.  Don't think
> > it makes much sense anyway.
> 
> Arbitrary limitations... (and that's where revalidate horrors come in, BTW).
> BTW^2: what if fs mounted that way will happen to have such node itself?

I think doing this recursively should be allowed.  "Releasing last ref
cleans up the mess" should work in that case.

> I'm not saying that it's unfeasible or won't lead to interesting things,
> but it really needs semantics done right...

Agreed :)

Miklos
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH] file as directory

2007-05-23 Thread Al Viro
On Wed, May 23, 2007 at 08:36:04AM +0200, Miklos Szeredi wrote:
> > Interesting...  How do you deal with mount propagation and things like
> > mount --move?
> 
> Moving (or doing other mount operations on) an ancestor shouldn't be a
> problem.  Moving this mount itself is not allowed, and neither is
> doing bind or pivot_root.  Maybe bind could be allowed...

Eh...  Arbitrary limitations are fun, aren't they?
 
> When doing recursive bind on ancestor, these mounts are skipped.

What about clone copying your namespace?  What about MNT_SLAVE stuff being
set up prior to that lookup?  More interesting question: should independent
lookups of that sucker on different paths end up with the same superblock
(and vfsmount for each) or should we get fully independent mount on each?
The latter would be interesting wrt cache coherency...

> > As for unlink...  How do you deal with having that thing
> > mounted, mounting something _under_ it (so that vfsmount would be kept
> > busy) and then unlinking that sucker?
> 
> Yeah, that's a good point.  Current patch doesn't deal with that.
> Simplest solution could be to disallow submounting these.  Don't think
> it makes much sense anyway.

Arbitrary limitations... (and that's where revalidate horrors come in, BTW).
BTW^2: what if fs mounted that way will happen to have such node itself?

I'm not saying that it's unfeasible or won't lead to interesting things,
but it really needs semantics done right...
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html