Re: [RFC 1/2] Case-insensitive XFS - kernel patch

2007-10-23 Thread Barry Naujok
On Wed, 24 Oct 2007 06:19:12 +1000, Christoph Hellwig <[EMAIL PROTECTED]> wrote:

> This patch is quite badly mangled by your mailer.  Could you just
> attach it?  (Or even better use a mailer that handles inlined text
> without mangling it..)

Found the setting at last. Here it is again...  


===
fs/xfs/Makefile
===

--- a/fs/xfs/Makefile   2007-10-23 17:19:40.0 +1000
+++ b/fs/xfs/Makefile   2007-10-23 16:17:22.173903950 +1000
@@ -74,6 +74,7 @@ xfs-y += xfs_alloc.o \
   xfs_trans_extfree.o \
   xfs_trans_inode.o \
   xfs_trans_item.o \
+  xfs_unicode.o \
   xfs_utils.o \
   xfs_vfsops.o \
   xfs_vnodeops.o \

===
fs/xfs/linux-2.6/xfs_iops.c
===

--- a/fs/xfs/linux-2.6/xfs_iops.c   2007-10-23 17:19:41.0 +1000
+++ b/fs/xfs/linux-2.6/xfs_iops.c   2007-10-23 16:43:19.828562924 +1000
@@ -47,12 +47,17 @@
  #include "xfs_buf_item.h"
  #include "xfs_utils.h"
  #include "xfs_vnodeops.h"
+#include "xfs_da_btree.h"
+#include "xfs_unicode.h"

  #include 
  #include 
  #include 
  #include 

+struct dentry_operations xfs_dentry_operations;
+struct dentry_operations xfs_nls_dentry_operations;
+
  /*
   * Bring the atime in the XFS inode uptodate.
   * Used before logging the inode to disk or when the Linux inode goes away.
@@ -369,10 +374,17 @@ xfs_vn_lookup(
  {
bhv_vnode_t *cvp;
int error;
+   struct xfs_mount *mp = XFS_I(dir)->i_mount;
+   struct dentry   *result;

if (dentry->d_name.len >= MAXNAMELEN)
return ERR_PTR(-ENAMETOOLONG);

+   if (xfs_sb_version_hasunicode(&mp->m_sb) ||
+   xfs_sb_version_hasoldci(&mp->m_sb))
+   dentry->d_op = mp->m_nls ? &xfs_nls_dentry_operations :
+   &xfs_dentry_operations;
+
error = xfs_lookup(XFS_I(dir), dentry, &cvp);
if (unlikely(error)) {
if (unlikely(error != ENOENT))
@@ -381,7 +393,11 @@ xfs_vn_lookup(
return NULL;
}

-   return d_splice_alias(vn_to_inode(cvp), dentry);
+   result = d_splice_alias(vn_to_inode(cvp), dentry);
+   if (result)
+   result->d_op = dentry->d_op;
+
+   return result;
  }

  STATIC int
@@ -823,3 +839,74 @@ const struct inode_operations xfs_symlin
.listxattr  = xfs_vn_listxattr,
.removexattr= xfs_vn_removexattr,
  };
+
+
+STATIC int
+xfs_dentry_hash(
+   struct dentry   *dir,
+   struct qstr *this)
+{
+   this->hash = xfs_dir_hashname(XFS_I(dir->d_inode),
+   this->name, this->len);
+   return 0;
+}
+
+STATIC int
+xfs_dentry_compare(
+   struct dentry   *dir,
+   struct qstr *a,
+   struct qstr *b)
+{
+   int result = xfs_dir_compname(XFS_I(dir->d_inode), a->name, a->len,
+   b->name, b->len);
+   if (result == 0) {
+   if (a->len == b->len)
+   memcpy((unsigned char *)a->name, b->name, a->len);
+   else {
+   /* TODO: more complicated when name lengths differ */
+   }
+   }
+   return result;
+}
+
+STATIC int
+xfs_nls_dentry_hash(
+   struct dentry   *dir,
+   struct qstr *this)
+{
+   xfs_mount_t *mp = XFS_I(dir->d_inode)->i_mount;
+
+   this->hash = xfs_nls_hash(mp->m_nls, mp->m_cft, this->name, this->len);
+   return 0;
+}
+
+STATIC int
+xfs_nls_dentry_compare(
+   struct dentry   *dir,
+   struct qstr *a,
+   struct qstr *b)
+{
+   xfs_mount_t *mp = XFS_I(dir->d_inode)->i_mount;
+   int result = xfs_nls_casecmp(mp->m_nls, mp->m_cft,
+   a->name, a->len, b->name, b->len);
+   if (result == 0) {
+   if (a->len == b->len)
+   memcpy((unsigned char *)a->name, b->name, a->len);
+   else {
+   /* TODO: more complicated when name lengths differ */
+   }
+   }
+   return result;
+}
+
+struct dentry_operations xfs_dentry_operations =
+{
+   .d_hash = xfs_dentry_hash,
+   .d_compare = xfs_dentry_compare,
+};
+
+struct dentry_operations xfs_nls_dentry_operations =
+{
+   .d_hash = xfs_nls_dentry_hash,
+   .d_compare = xfs_nls_dentry_compare,
+};

===
fs/xfs/linux-2.6/xfs_linux.h

[PATCH] 9p: fix memory leak in v9fs_get_sb

2007-10-23 Thread Latchesar Ionkov
This patch fixes a memory leak in v9fs_get_sb.

Signed-off-by: Latchesar Ionkov <[EMAIL PROTECTED]>

---
commit 77250c234636881976ebd567f9edc7c36711bd4a
tree 35d683472542706a3b78eb51bc29b92f690c314a
parent 01e7ae8c13bb06a2ce622ebace33bb7e28ef596c
author Latchesar Ionkov <[EMAIL PROTECTED]> 1193169149 -0600
committer Latchesar Ionkov <[EMAIL PROTECTED]> 1193169149 -0600

 fs/9p/vfs_super.c |3 +++
 1 files changed, 3 insertions(+), 0 deletions(-)

diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index bb0cef9..678c02f 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -119,6 +119,7 @@ static int v9fs_get_sb(struct file_system_type *fs_type, 
int flags,
 
P9_DPRINTK(P9_DEBUG_VFS, " \n");
 
+   st = NULL;
v9ses = kzalloc(sizeof(struct v9fs_session_info), GFP_KERNEL);
if (!v9ses)
return -ENOMEM;
@@ -164,10 +165,12 @@ static int v9fs_get_sb(struct file_system_type *fs_type, 
int flags,
root->d_inode->i_ino = v9fs_qid2ino(&st->qid);
v9fs_stat2inode(st, root->d_inode, sb);
v9fs_fid_add(root, fid);
+   kfree(st);
 
return simple_set_mnt(mnt, sb);
 
 error:
+   kfree(st);
if (fid)
p9_client_clunk(fid);
 
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/2] VFS: Reorder vfs_getxattr to avoid unnecessary calls to the LSM

2007-10-23 Thread James Morris
On Mon, 22 Oct 2007, David P. Quigley wrote:

> Originally vfs_getxattr would pull the security xattr variable using
> the inode getxattr handle and then proceed to clobber it with a subsequent 
> call
> to the LSM. This patch reorders the two operations such that when the xattr
> requested is in the security namespace it first attempts to grab the value 
> from
> the LSM directly. If it fails to obtain the value because there is no module
> present or the module does not support the operation it will fall back to 
> using
> the inode getxattr operation. In the event that both are inaccessible it
> returns EOPNOTSUPP.
> 
> Signed-off-by: David P. Quigley <[EMAIL PROTECTED]>

Acked-by: James Morris <[EMAIL PROTECTED]>


-- 
James Morris
<[EMAIL PROTECTED]>
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/2] VFS/Security: Rework inode_getsecurity and callers to return resulting buffer

2007-10-23 Thread James Morris
On Mon, 22 Oct 2007, David P. Quigley wrote:

> +static inline int security_inode_getsecurity(const struct inode *inode,
> + const char *name,
> + void **buffer)

It's better to keep function declarations on one line if possible (the 
80-col rule can be broken for this).

But in any case, it looks ok to me.


Acked-by: James Morris <[EMAIL PROTECTED]>


-- 
James Morris
<[EMAIL PROTECTED]>
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [V9fs-developer] [PATCH] 9p: v9fs_vfs_rename incorrect clunk order

2007-10-23 Thread Eric Van Hensbergen
On 10/22/07, Latchesar Ionkov <[EMAIL PROTECTED]> wrote:
> In v9fs_vfs_rename function labels don't match the fids that are clunked.
> The correct clunk order is clunking newdirfid first and then olddirfid next.
>
> Signed-off-by: Latchesar Ionkov <[EMAIL PROTECTED]>
Acked-by: Eric Van Hensbergen <[EMAIL PROTECTED]>
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC 1/2] Case-insensitive XFS - kernel patch

2007-10-23 Thread Christoph Hellwig
This patch is quite badly mangled by your mailer.  Could you just
attach it?  (Or even better use a mailer that handles inlined text
without mangling it..)

-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC 0/2] getsecurity/vfs_getxattr cleanup

2007-10-23 Thread David P. Quigley
Any comments on these patches? I know Casey voiced some concerns about
them the first time I posted them but I believe I have adequately
addressed them.

Dave

On Mon, 2007-10-22 at 15:06 -0400, David P. Quigley wrote:
> This patch series addresses two concerns. Currently when a developer
> wishes to obtain a security blob from the LSM he/she has to guess at the
> length of the blob being returned. We modify security_inode_getsecurity
> to return an appropriately sized buffer populated with the security
> information and the length of that buffer. This is similar to the
> approach taken by Al Viro for the security_getprocattr hook. 
> 
> The second concern that this patch set addresses is that vfs_getxattr
> reads the security xattr using inode_getxattr and then proceeds to
> clobber it with a subsequent call to the LSM. This is fixed by
> reordering vfs_getxattr.
> 
> The series applies on top of 2.6.23 aka git hash
> bbf25010f1a6b761914430f5fca081ec8c7accd1
> 
> -
> To unsubscribe from this list: send the line "unsubscribe 
> linux-security-module" in
> the body of a message to [EMAIL PROTECTED]
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] reiserfs: don't drop PG_dirty when releasing sub-page-sized dirty file

2007-10-23 Thread Fengguang Wu
On Tue, Oct 23, 2007 at 10:10:53AM -0400, Chris Mason wrote:
> On Tue, 23 Oct 2007 19:56:20 +0800
> Fengguang Wu <[EMAIL PROTECTED]> wrote:
> 
> > On Tue, Oct 23, 2007 at 12:07:07PM +0200, Peter Zijlstra wrote:
> > > [ adding reiserfs devs to the CC ]
> > 
> > Thank you.
> > 
> > This fix is kind of crude - even when it fixed Maxim's problem, and
> > survived my stress testing of a lot of patching and kernel compiling.
> > I'd be glad to see better solutions.
> 
> This should be safe, reiserfs has the buffer heads themselves clean and
> the page should get cleaned eventually.  The cancel_dirty_page call was
> just an optimization to be VM friendly.
 
> -chris

'chris' as in fs/reiserfs/{inode.c,namei.c}, and now in btrfs/*?

Nice to meet you ;-)

Fengguang

-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] reiserfs: don't drop PG_dirty when releasing sub-page-sized dirty file

2007-10-23 Thread Chris Mason
On Tue, 23 Oct 2007 19:56:20 +0800
Fengguang Wu <[EMAIL PROTECTED]> wrote:

> On Tue, Oct 23, 2007 at 12:07:07PM +0200, Peter Zijlstra wrote:
> > [ adding reiserfs devs to the CC ]
> 
> Thank you.
> 
> This fix is kind of crude - even when it fixed Maxim's problem, and
> survived my stress testing of a lot of patching and kernel compiling.
> I'd be glad to see better solutions.

This should be safe, reiserfs has the buffer heads themselves clean and
the page should get cleaned eventually.  The cancel_dirty_page call was
just an optimization to be VM friendly.

-chris
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Does "32.1% non-contiguous" mean severely fragmented?

2007-10-23 Thread Theodore Tso
On Tue, Oct 23, 2007 at 07:38:20PM +0900, Tetsuo Handa wrote:
> > Are you sure the file isn't getting written by some background tasks
> > that you weren't aware of?  This seems very strange; what
> > virtualization software are you using?  VMware, Xen, KVM?
> I'm using VMware Workstation 6.0.0 build 45731 for x86_64.
> It seems that there were some background tasks that delays writing.
> I tried the following sequence, "sync" didn't affect.

Or it may be that it takes a while to do a controlled shutdown.

One potential reason for the vmem file being very badly fragmented is
that it might not be getting written in sequential order.  If the
writer is writing the file in random order, then unless you have a
filesystem which can do delayed allocations, the blocks will get
allocated in the other that they are first written, and if the writer
is seeking to random locations to do the write, that's one way that
you can end up with a very badly fragmented file.

Regards,

- Ted
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] reiserfs: don't drop PG_dirty when releasing sub-page-sized dirty file

2007-10-23 Thread Fengguang Wu
On Tue, Oct 23, 2007 at 12:07:07PM +0200, Peter Zijlstra wrote:
> [ adding reiserfs devs to the CC ]

Thank you.

This fix is kind of crude - even when it fixed Maxim's problem, and
survived my stress testing of a lot of patching and kernel compiling.
I'd be glad to see better solutions.

Fengguang
---

reiserfs: don't drop PG_dirty when releasing sub-page-sized dirty file

This is not a new problem in 2.6.23-git17.
2.6.22/2.6.23 is buggy in the same way.

Reiserfs could accumulate dirty sub-page-size files until umount time.
They cannot be synced to disk by pdflush routines or explicit `sync'
commands.  Only `umount' can do the trick.

The direct cause is: the dirty page's PG_dirty is wrongly _cleared_.
Call trace:
 [] cancel_dirty_page+0xd0/0xf0
 [] :reiserfs:reiserfs_cut_from_item+0x660/0x710
 [] :reiserfs:reiserfs_do_truncate+0x271/0x530
 [] :reiserfs:reiserfs_truncate_file+0xfd/0x3b0
 [] :reiserfs:reiserfs_file_release+0x1e0/0x340
 [] __fput+0xcc/0x1b0
 [] fput+0x16/0x20
 [] filp_close+0x56/0x90
 [] sys_close+0xad/0x110
 [] system_call+0x7e/0x83

Fix the bug by removing the cancel_dirty_page() call. Tests show that
it causes no bad behaviors on various write sizes.


=== for the patient ===
Here are more detailed demonstrations of the problem.

1) the page has both PG_dirty(D)/PAGECACHE_TAG_DIRTY(d) after being written to;
   and then only PAGECACHE_TAG_DIRTY(d) remains after the file is closed.

-- screen 0 --
[T0] root /home/wfg# cat > /test/tiny
[T1] hi
[T2] root /home/wfg#

-- screen 1 --
[T1] root /home/wfg# echo /test/tiny > /proc/filecache
[T1] root /home/wfg# cat /proc/filecache
 # file /test/tiny
 # flags R:referenced A:active M:mmap U:uptodate D:dirty W:writeback 
O:owner B:buffer d:dirty w:writeback
 # idx   len state   refcnt
 0   1   ___UD__Bd_  2
[T2] root /home/wfg# cat /proc/filecache
 # file /test/tiny
 # flags R:referenced A:active M:mmap U:uptodate D:dirty W:writeback 
O:owner B:buffer d:dirty w:writeback
 # idx   len state   refcnt
 0   1   ___U___Bd_  2

2) note the non-zero 'cancelled_write_bytes' after /tmp/hi is copied.

-- screen 0 --
[T0] root /home/wfg# echo hi > /tmp/hi
[T1] root /home/wfg# cp /tmp/hi /dev/stdin /test
[T2] hi
[T3] root /home/wfg#

-- screen 1 --
[T1] root /proc/4397# cd /proc/`pidof cp`
[T1] root /proc/4713# cat io
 rchar: 8396
 wchar: 3
 syscr: 20
 syscw: 1
 read_bytes: 0
 write_bytes: 20480
 cancelled_write_bytes: 4096
[T2] root /proc/4713# cat io
 rchar: 8399
 wchar: 6
 syscr: 21
 syscw: 2
 read_bytes: 0
 write_bytes: 24576
 cancelled_write_bytes: 4096

//Question: the 'write_bytes' is a bit more than expected ;-)

Cc: Maxim Levitsky <[EMAIL PROTECTED]>  
 
Cc: Peter Zijlstra <[EMAIL PROTECTED]>
Cc: Jeff Mahoney <[EMAIL PROTECTED]>
Signed-off-by: Fengguang Wu <[EMAIL PROTECTED]>
---
 fs/reiserfs/stree.c |3 ---
 1 file changed, 3 deletions(-)

--- linux-2.6.24-git17.orig/fs/reiserfs/stree.c
+++ linux-2.6.24-git17/fs/reiserfs/stree.c
@@ -1458,9 +1458,6 @@ static void unmap_buffers(struct page *p
}
bh = next;
} while (bh != head);
-   if (PAGE_SIZE == bh->b_size) {
-   cancel_dirty_page(page, PAGE_CACHE_SIZE);
-   }
}
}
 }

-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Does "32.1% non-contiguous" mean severely fragmented?

2007-10-23 Thread Tetsuo Handa
Hello.

> What filesystem are you using?  ext3?  ext4?  xfs?  And are you using
> any non-standard patches, such as some of the delayed allocation
> patches that have been floating around?  If you're using ext3, that
> shouldn't be happening.
I'm using ext3.
I'm running it on kernel 2.6.18-8.1.14.el5 (CentOS 5) for x86_64.
I don't know whether some of the delayed allocation patches are used
for 2.6.18-8.1.14.el5 kernel.

> Are you sure the file isn't getting written by some background tasks
> that you weren't aware of?  This seems very strange; what
> virtualization software are you using?  VMware, Xen, KVM?
I'm using VMware Workstation 6.0.0 build 45731 for x86_64.
It seems that there were some background tasks that delays writing.
I tried the following sequence, "sync" didn't affect.

[EMAIL PROTECTED] Ubuntu7.10]# service vmware stop
[EMAIL PROTECTED] Ubuntu7.10]# sleep 30
[EMAIL PROTECTED] Ubuntu7.10]# filefrag Ubuntu7.10.vmem
Ubuntu7.10.vmem: 9280 extents found, perfection would be 5 extents
[EMAIL PROTECTED] Ubuntu7.10]# sync
[EMAIL PROTECTED] Ubuntu7.10]# filefrag Ubuntu7.10.vmem
Ubuntu7.10.vmem: 9280 extents found, perfection would be 5 extents
[EMAIL PROTECTED] Ubuntu7.10]# service vmware start
[EMAIL PROTECTED] Ubuntu7.10]# vmware
[EMAIL PROTECTED] Ubuntu7.10]# service vmware stop
[EMAIL PROTECTED] Ubuntu7.10]# sleep 30
[EMAIL PROTECTED] Ubuntu7.10]# filefrag Ubuntu7.10.vmem
Ubuntu7.10.vmem: 9748 extents found, perfection would be 5 extents
[EMAIL PROTECTED] Ubuntu7.10]# sync
[EMAIL PROTECTED] Ubuntu7.10]# filefrag Ubuntu7.10.vmem
Ubuntu7.10.vmem: 9748 extents found, perfection would be 5 extents
[EMAIL PROTECTED] Ubuntu7.10]# service vmware start
[EMAIL PROTECTED] Ubuntu7.10]# vmware
[EMAIL PROTECTED] Ubuntu7.10]# service vmware stop
[EMAIL PROTECTED] Ubuntu7.10]# filefrag Ubuntu7.10.vmem
Ubuntu7.10.vmem: 9749 extents found, perfection would be 5 extents
[EMAIL PROTECTED] Ubuntu7.10]# sync
[EMAIL PROTECTED] Ubuntu7.10]# filefrag Ubuntu7.10.vmem
Ubuntu7.10.vmem: 9755 extents found, perfection would be 5 extents

Thank you.

-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC 0/2] Case-insensitive filename lookup for XFS

2007-10-23 Thread Anton Altaparmakov
I forgot to say:  If you do what I did for NTFS you can also throw  
away your custom dentry operations that your patch adds as the dcache  
then only holds correctly cased names so you are fine to do case  
sensitive dcache lookups at all times.  Access via wrongly cased name  
will always go to ->lookup inode operation and that is fine because  
such lookups almost never happen because majority of users will either  
use a GUI in which case all names are always correctly cased as the  
names displayed in the GUI are obtained from a ->readdir and thus show  
the correct case or they will use the command line in which case they  
will be savvy enough to use tab-completion in which case the names are  
correct case, too.  Tab-completion does not work on wrongly cased  
names so you are very unlikely to ever get a wrongly cased name at all.


And yes of course you can on purpose construct a test / benchmark  
where having to do the ->lookup each time will be really slow because  
you keep creating files and then accessing them by wrongly cased name  
on purpose (or whatever) but I would hope that you do not care about  
such artificial benchmarks that do not reflect any real-world loads...


Best regards,

Anton

On 23 Oct 2007, at 11:01, Anton Altaparmakov wrote:


Hi,

On 23 Oct 2007, at 08:53, Barry Naujok wrote:

Following is the initial test version of case-insensitive support
for XFS in Linux. It implements case-insensitivity utilising a
Unicode case folding table stored on disk generated from
http://www.unicode.org/Public/UNIDATA/CaseFolding.txt

As the filesystem stores names as Unicode (UTF-8), the "nls"
mount option has been added to support systems not utilising
UTF-8 natively. If the nls mount option is not used, it will
use the default NLS defined in the kernel's config.

To allow case-insensitivity to be a mount option rather than
a mkfs option, the hashes stored on disk are always case-folded.
This is indicated by the new "unicode" bit in the superblock.
This bit also associated with the presence of the case-folding
table on disk.

With the case-folding table on disk, it allows us to upgrade
the table in the future while retaining backwards and forwards
compatibility. It also allows special case tables such as
Turkic case which is supported in this patch set.

The case-insensitive support also installs a couple of
dentry_operations for the XFS inodes: hash and compare.

Currently, there is a couple of outstanding issues with the
dentry cache interaction:

- The first lookup if case-mismatched will continue to
  have the mismatched case in the cache. Not really sure
  if this is an issue or not. If it is an issue, how
  should I resolve it?

- As above, but with a non-existing lookup, then creating
  the file with a different case, the first failed lookup
  will define the case used. I have partially resolved
  this with a memcpy if the two lengths are the same.
  How do I fix this if the lengths are different?
  (TODO's show the location of this problem.)


Both of the above can be fairly easily fixed if you want.  NTFS does  
it in the stock kernel.


You would need to change the XFS ->lookup inode operation so that  
when it reads the directory to check whether a name exists, if it is  
found but the case is not matched, you need to make a copy of the  
correctly cased name (if NTFS this is done in fs/ntfs/ 
dir.c::ntfs_lookup_inode_by_name() if you want to take a look, the  
name is stored in the "ntfs_name" structure that is allocated during  
the lookup if a case mismatched match is found and this is returned  
to the caller).


Then in ->lookup() if you got a correctly cased name structure (if  
the name was cased correctly the correctly cased named structure  
pointer would be NULL) then you need to replace the dentry passed  
into ->lookup with a new one with the correct case.  This is a  
little complicated because such a dentry may already exist in which  
case you have to use the existing one (instantiating it if it was  
negative) and if it does not already exist you need to allocate a  
new one, instantiate it and then move it over the old one.  Again a  
little complicated because of disconnected dentries for NFS.  But it  
is not too bad and it works well in NTFS (see fs/ntfs/ 
namei.c::ntfs_lookup() the code that does all of this starts at the  
"handle_name" goto label).


Doing things this way means that you never have wrong case dentries  
in dcache.  And this in turn means that things like handling - 
>unlink and ->rename inode operations is much easier as the dentry  
you receive there is returned from a ->lookup() call thus you know  
it is correctly cased already so you can do a case-sensitive match  
when looking up the directory entry to remove/rename!  (I am afraid  
you cannot look at the NTFS code for that as that is not publicly  
available yet. )-:)


Best regards,

Anton


Other TODOs:

- support for case-insensitve extended attributes
  as a sepa

Re: [RFC 0/2] Case-insensitive filename lookup for XFS

2007-10-23 Thread Anton Altaparmakov

Hi,

On 23 Oct 2007, at 08:53, Barry Naujok wrote:

Following is the initial test version of case-insensitive support
for XFS in Linux. It implements case-insensitivity utilising a
Unicode case folding table stored on disk generated from
http://www.unicode.org/Public/UNIDATA/CaseFolding.txt

As the filesystem stores names as Unicode (UTF-8), the "nls"
mount option has been added to support systems not utilising
UTF-8 natively. If the nls mount option is not used, it will
use the default NLS defined in the kernel's config.

To allow case-insensitivity to be a mount option rather than
a mkfs option, the hashes stored on disk are always case-folded.
This is indicated by the new "unicode" bit in the superblock.
This bit also associated with the presence of the case-folding
table on disk.

With the case-folding table on disk, it allows us to upgrade
the table in the future while retaining backwards and forwards
compatibility. It also allows special case tables such as
Turkic case which is supported in this patch set.

The case-insensitive support also installs a couple of
dentry_operations for the XFS inodes: hash and compare.

Currently, there is a couple of outstanding issues with the
dentry cache interaction:

 - The first lookup if case-mismatched will continue to
   have the mismatched case in the cache. Not really sure
   if this is an issue or not. If it is an issue, how
   should I resolve it?

 - As above, but with a non-existing lookup, then creating
   the file with a different case, the first failed lookup
   will define the case used. I have partially resolved
   this with a memcpy if the two lengths are the same.
   How do I fix this if the lengths are different?
   (TODO's show the location of this problem.)


Both of the above can be fairly easily fixed if you want.  NTFS does  
it in the stock kernel.


You would need to change the XFS ->lookup inode operation so that when  
it reads the directory to check whether a name exists, if it is found  
but the case is not matched, you need to make a copy of the correctly  
cased name (if NTFS this is done in fs/ntfs/ 
dir.c::ntfs_lookup_inode_by_name() if you want to take a look, the  
name is stored in the "ntfs_name" structure that is allocated during  
the lookup if a case mismatched match is found and this is returned to  
the caller).


Then in ->lookup() if you got a correctly cased name structure (if the  
name was cased correctly the correctly cased named structure pointer  
would be NULL) then you need to replace the dentry passed into - 
>lookup with a new one with the correct case.  This is a little  
complicated because such a dentry may already exist in which case you  
have to use the existing one (instantiating it if it was negative) and  
if it does not already exist you need to allocate a new one,  
instantiate it and then move it over the old one.  Again a little  
complicated because of disconnected dentries for NFS.  But it is not  
too bad and it works well in NTFS (see fs/ntfs/namei.c::ntfs_lookup()  
the code that does all of this starts at the "handle_name" goto label).


Doing things this way means that you never have wrong case dentries in  
dcache.  And this in turn means that things like handling ->unlink and  
->rename inode operations is much easier as the dentry you receive  
there is returned from a ->lookup() call thus you know it is correctly  
cased already so you can do a case-sensitive match when looking up the  
directory entry to remove/rename!  (I am afraid you cannot look at the  
NTFS code for that as that is not publicly available yet. )-:)


Best regards,

Anton


Other TODOs:

 - support for case-insensitve extended attributes
   as a separate mount option.

 - Other xfsprogs updates: xfs_repair, xfs_db


--
Anton Altaparmakov  (replace at with @)
Unix Support, Computing Service, University of Cambridge, CB2 3QH, UK
Linux NTFS maintainer, http://www.linux-ntfs.org/

-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/9] Unionfs: security convert lsm into a static interface fix

2007-10-23 Thread Christoph Hellwig
On Mon, Oct 22, 2007 at 08:48:04PM -0400, Erez Zadok wrote:
> Why?  Are you concerned that the security policy may change after a module
> is loaded?

No, it's a matter of proper layering.  We generally don't want modules
like stackabke filesystems to call directly into methods but rather use
proper highlevel VFS helpers to isolate them from details and possible
changes.  The move to out of line security_ helpers just put this on the
radard.

> I can probably get rid of having unionfs call security_inode_permission, by
> calling permission() myself and carefully post-process its return code
> (unionfs needs to "ignore" EROFS initially, to allow copyup to take place).

Sounds fine.

> But security_file_ioctl doesn't have any existing helper I can call.  I can
> introduce a trivial vfs_security_file_ioctl wrapper to security_file_ioctl,
> but what about the already existing *19* exported security_* functions in
> security/security.c?  Do you want to see simple wrappers for all of them?
> It seems redundant to add a one-line wrapper around an already one-line
> function around security_ops->XXX.  Plus, some of the existing exported
> security_* functions are file-system related, others are networking, etc.  So
> we'll need wrappers whose names are prefixed appropriately: vfs_*, net_*,
> etc.

The fix for security_file_ioctl is probably to either not do it at all
or move it the call to security_file_ioctl into vfs_ioctl and get it by
using that helper.  I suspect most other security_ exports should be
avoided similarly.

I also suspect the whole issue of where and how-many times to call LSM
methods for stackable filesystems is a huge can of worms and it might make
sense to talk to the LSM folks about it.
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC 1/2] Case-insensitive XFS - kernel patch

2007-10-23 Thread Barry Naujok


 Makefile  |1
 linux-2.6/xfs_iops.c  |   89 
 linux-2.6/xfs_linux.h |2
 linux-2.6/xfs_super.c |8
 xfs_clnt.h|5
 xfs_da_btree.c|   20 +
 xfs_da_btree.h|   21 +
 xfs_dir2.c|  177 +---
 xfs_dir2.h|9
 xfs_dir2_block.c  |   30 +-
 xfs_dir2_data.c   |3
 xfs_dir2_leaf.c   |   19 +
 xfs_dir2_node.c   |5
 xfs_dir2_sf.c |   35 ++-
 xfs_mount.c   |   25 ++
 xfs_mount.h   |8
 xfs_sb.h  |   33 ++-
 xfs_unicode.c |  547  
++

 xfs_unicode.h |   75 ++
 xfs_vfsops.c  |   53 
 20 files changed, 1100 insertions(+), 65 deletions(-)

===
fs/xfs/Makefile
===

--- a/fs/xfs/Makefile   2007-10-23 17:19:40.0 +1000
+++ b/fs/xfs/Makefile   2007-10-23 16:17:22.173903950 +1000
@@ -74,6 +74,7 @@ xfs-y += xfs_alloc.o \
   xfs_trans_extfree.o \
   xfs_trans_inode.o \
   xfs_trans_item.o \
+  xfs_unicode.o \
   xfs_utils.o \
   xfs_vfsops.o \
   xfs_vnodeops.o \

===
fs/xfs/linux-2.6/xfs_iops.c
===

--- a/fs/xfs/linux-2.6/xfs_iops.c   2007-10-23 17:19:41.0 +1000
+++ b/fs/xfs/linux-2.6/xfs_iops.c   2007-10-23 16:43:19.828562924 +1000
@@ -47,12 +47,17 @@
 #include "xfs_buf_item.h"
 #include "xfs_utils.h"
 #include "xfs_vnodeops.h"
+#include "xfs_da_btree.h"
+#include "xfs_unicode.h"

 #include 
 #include 
 #include 
 #include 

+struct dentry_operations xfs_dentry_operations;
+struct dentry_operations xfs_nls_dentry_operations;
+
 /*
  * Bring the atime in the XFS inode uptodate.
  * Used before logging the inode to disk or when the Linux inode goes  
away.

@@ -369,10 +374,17 @@ xfs_vn_lookup(
 {
bhv_vnode_t *cvp;
int error;
+   struct xfs_mount *mp = XFS_I(dir)->i_mount;
+   struct dentry   *result;

if (dentry->d_name.len >= MAXNAMELEN)
return ERR_PTR(-ENAMETOOLONG);

+   if (xfs_sb_version_hasunicode(&mp->m_sb) ||
+   xfs_sb_version_hasoldci(&mp->m_sb))
+   dentry->d_op = mp->m_nls ? &xfs_nls_dentry_operations :
+   &xfs_dentry_operations;
+
error = xfs_lookup(XFS_I(dir), dentry, &cvp);
if (unlikely(error)) {
if (unlikely(error != ENOENT))
@@ -381,7 +393,11 @@ xfs_vn_lookup(
return NULL;
}

-   return d_splice_alias(vn_to_inode(cvp), dentry);
+   result = d_splice_alias(vn_to_inode(cvp), dentry);
+   if (result)
+   result->d_op = dentry->d_op;
+
+   return result;
 }

 STATIC int
@@ -823,3 +839,74 @@ const struct inode_operations xfs_symlin
.listxattr  = xfs_vn_listxattr,
.removexattr= xfs_vn_removexattr,
 };
+
+
+STATIC int
+xfs_dentry_hash(
+   struct dentry   *dir,
+   struct qstr *this)
+{
+   this->hash = xfs_dir_hashname(XFS_I(dir->d_inode),
+   this->name, this->len);
+   return 0;
+}
+
+STATIC int
+xfs_dentry_compare(
+   struct dentry   *dir,
+   struct qstr *a,
+   struct qstr *b)
+{
+   int result = xfs_dir_compname(XFS_I(dir->d_inode), a->name, a->len,
+   b->name, b->len);
+   if (result == 0) {
+   if (a->len == b->len)
+   memcpy((unsigned char *)a->name, b->name, a->len);
+   else {
+   /* TODO: more complicated when name lengths differ */
+   }
+   }
+   return result;
+}
+
+STATIC int
+xfs_nls_dentry_hash(
+   struct dentry   *dir,
+   struct qstr *this)
+{
+   xfs_mount_t *mp = XFS_I(dir->d_inode)->i_mount;
+
+   this->hash = xfs_nls_hash(mp->m_nls, mp->m_cft, this->name, this->len);
+   return 0;
+}
+
+STATIC int
+xfs_nls_dentry_compare(
+   struct dentry   *dir,
+   struct qstr *a,
+   struct qstr *b)
+{
+   xfs_mount_t *mp = XFS_I(dir->d_inode)->i_mount;
+   int result = xfs_nls_casecmp(mp->m_nls, mp->m_cft,
+   a->name, a->len, b->name, b->len);
+   if (result == 0) {
+   if (a->len == b->len)
+   memcpy((unsigned char *)a->name, b->name, a->len);
+   else {
+   /* TODO: more complicated when name lengths d

[RFC 2/2] Case-insensitive XFS - mkfs.xfs

2007-10-23 Thread Barry Naujok


 include/xfs_sb.h |   27 +-
 libxfs/xfs_mount.c   |2
 mkfs/Makefile|2
 mkfs/casefoldtable.c |  608  
+++

 mkfs/proto.c |  158 +
 mkfs/xfs_mkfs.c  |   93 ---
 mkfs/xfs_mkfs.h  |   24 +-
 7 files changed, 866 insertions(+), 48 deletions(-)

===
xfsprogs/include/xfs_sb.h
===

--- a/xfsprogs/include/xfs_sb.h 2007-10-23 17:14:16.0 +1000
+++ b/xfsprogs/include/xfs_sb.h 2007-10-23 16:56:07.765557256 +1000
@@ -46,10 +46,12 @@ struct xfs_mount;
 #define XFS_SB_VERSION_SECTORBIT   0x0800
 #defineXFS_SB_VERSION_EXTFLGBIT0x1000
 #defineXFS_SB_VERSION_DIRV2BIT 0x2000
+#define XFS_SB_VERSION_OLDCIBIT0x4000
 #defineXFS_SB_VERSION_MOREBITSBIT  0x8000
 #defineXFS_SB_VERSION_OKSASHFBITS  \
(XFS_SB_VERSION_EXTFLGBIT | \
-XFS_SB_VERSION_DIRV2BIT)
+XFS_SB_VERSION_DIRV2BIT | \
+XFS_SB_VERSION_OLDCIBIT)
 #defineXFS_SB_VERSION_OKREALFBITS  \
(XFS_SB_VERSION_ATTRBIT | \
 XFS_SB_VERSION_NLINKBIT | \
@@ -82,13 +84,12 @@ struct xfs_mount;
 #define XFS_SB_VERSION2_DONOTUSEBIT2   0x0004
 #define XFS_SB_VERSION2_ATTR2BIT   0x0008  /* Inline attr rework */
 #define XFS_SB_VERSION2_PARENTBIT  0x0010  /* Parent pointers */
-#define XFS_SB_VERSION2_SASHFBITS  0xff00  /* Mask: features that
-  require changing
-  PROM and SASH */
+#define XFS_SB_VERSION2_UNICODEBIT 0x0020  /* Unicode names */

 #defineXFS_SB_VERSION2_OKREALFBITS \
-   (XFS_SB_VERSION2_ATTR2BIT | \
-XFS_SB_VERSION2_LAZYSBCOUNTBIT)
+   (XFS_SB_VERSION2_LAZYSBCOUNTBIT | \
+XFS_SB_VERSION2_ATTR2BIT | \
+XFS_SB_VERSION2_UNICODEBIT)
 #defineXFS_SB_VERSION2_OKSASHFBITS \
(0)
 #define XFS_SB_VERSION2_OKREALBITS \
@@ -151,6 +152,8 @@ typedef struct xfs_sb
__uint16_t  sb_logsectsize; /* sector size for the log, bytes */
__uint32_t  sb_logsunit;/* stripe unit size for the log */
__uint32_t  sb_features2;   /* additional feature bits */
+   __uint32_t  sb_bad_features2; /* unusable space */
+   xfs_ino_t   sb_cftino;  /* unicode case folding table inode */
 } xfs_sb_t;

 /*
@@ -169,7 +172,7 @@ typedef enum {
XFS_SBS_GQUOTINO, XFS_SBS_QFLAGS, XFS_SBS_FLAGS, XFS_SBS_SHARED_VN,
XFS_SBS_INOALIGNMT, XFS_SBS_UNIT, XFS_SBS_WIDTH, XFS_SBS_DIRBLKLOG,
XFS_SBS_LOGSECTLOG, XFS_SBS_LOGSECTSIZE, XFS_SBS_LOGSUNIT,
-   XFS_SBS_FEATURES2,
+   XFS_SBS_FEATURES2, XFS_SBS_BAD_FEATURES2, XFS_SBS_CFTINO,
XFS_SBS_FIELDCOUNT
 } xfs_sb_field_t;

@@ -194,13 +197,15 @@ typedef enum {
 #define XFS_SB_IFREE   XFS_SB_MVAL(IFREE)
 #define XFS_SB_FDBLOCKSXFS_SB_MVAL(FDBLOCKS)
 #define XFS_SB_FEATURES2   XFS_SB_MVAL(FEATURES2)
+#define XFS_SB_CFTINO  XFS_SB_MVAL(CFTINO)
 #defineXFS_SB_NUM_BITS ((int)XFS_SBS_FIELDCOUNT)
 #defineXFS_SB_ALL_BITS ((1LL << XFS_SB_NUM_BITS) - 1)
 #defineXFS_SB_MOD_BITS \
(XFS_SB_UUID | XFS_SB_ROOTINO | XFS_SB_RBMINO | XFS_SB_RSUMINO | \
 XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | XFS_SB_GQUOTINO | \
 XFS_SB_QFLAGS | XFS_SB_SHARED_VN | XFS_SB_UNIT | XFS_SB_WIDTH | \
-XFS_SB_ICOUNT | XFS_SB_IFREE | XFS_SB_FDBLOCKS | XFS_SB_FEATURES2)
+XFS_SB_ICOUNT | XFS_SB_IFREE | XFS_SB_FDBLOCKS | XFS_SB_FEATURES2 | \
+XFS_SB_CFTINO)


 /*
@@ -455,6 +460,12 @@ static inline void xfs_sb_version_addatt
((sbp)->sb_features2 | XFS_SB_VERSION2_ATTR2BIT)));
 }

+static inline int xfs_sb_version_hasunicode(xfs_sb_t *sbp)
+{
+   return (xfs_sb_version_hasmorebits(sbp) &&  \
+   ((sbp)->sb_features2 & XFS_SB_VERSION2_UNICODEBIT));
+}
+
 /*
  * end of superblock version macros
  */

===
xfsprogs/libxfs/xfs_mount.c
===

--- a/xfsprogs/libxfs/xfs_mount.c   2007-10-23 17:14:16.0 +1000
+++ b/xfsprogs/libxfs/xfs_mount.c   2007-10-23 16:52:26.438099100 +1000
@@ -140,6 +140,8 @@ static struct {
 { offsetof(xfs_sb_t, sb_logsectsize),0 },
 { offsetof(xfs_sb_t, sb_logsunit),  0 },
 { offsetof(xfs_sb_t, sb_features2), 0 },
+{ offsetof(xfs_sb_t, sb_bad_features2), 0 },
+{ offsetof(xfs_sb_t, sb_cftino),0 },
 { sizeof(xfs_sb_t), 0 }
 };


===
xfsprogs/mkfs/Makefile
===

[RFC 0/2] Case-insensitive filename lookup for XFS

2007-10-23 Thread Barry Naujok


Following is the initial test version of case-insensitive support
for XFS in Linux. It implements case-insensitivity utilising a
Unicode case folding table stored on disk generated from
http://www.unicode.org/Public/UNIDATA/CaseFolding.txt

As the filesystem stores names as Unicode (UTF-8), the "nls"
mount option has been added to support systems not utilising
UTF-8 natively. If the nls mount option is not used, it will
use the default NLS defined in the kernel's config.

To allow case-insensitivity to be a mount option rather than
a mkfs option, the hashes stored on disk are always case-folded.
This is indicated by the new "unicode" bit in the superblock.
This bit also associated with the presence of the case-folding
table on disk.

With the case-folding table on disk, it allows us to upgrade
the table in the future while retaining backwards and forwards
compatibility. It also allows special case tables such as
Turkic case which is supported in this patch set.

The case-insensitive support also installs a couple of
dentry_operations for the XFS inodes: hash and compare.

Currently, there is a couple of outstanding issues with the
dentry cache interaction:

  - The first lookup if case-mismatched will continue to
have the mismatched case in the cache. Not really sure
if this is an issue or not. If it is an issue, how
should I resolve it?

  - As above, but with a non-existing lookup, then creating
the file with a different case, the first failed lookup
will define the case used. I have partially resolved
this with a memcpy if the two lengths are the same.
How do I fix this if the lengths are different?
(TODO's show the location of this problem.)

Other TODOs:

  - support for case-insensitve extended attributes
as a separate mount option.

  - Other xfsprogs updates: xfs_repair, xfs_db

-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html