[PATCH] [7/7] Remove incorrect BKL comments in ext4
Signed-off-by: Andi Kleen [EMAIL PROTECTED] --- fs/ext4/dir.c |2 +- fs/ext4/inode.c |1 - 2 files changed, 1 insertion(+), 2 deletions(-) Index: linux/fs/ext4/dir.c === --- linux.orig/fs/ext4/dir.c +++ linux/fs/ext4/dir.c @@ -46,7 +46,7 @@ const struct file_operations ext4_dir_op #ifdef CONFIG_COMPAT .compat_ioctl = ext4_compat_ioctl, #endif - .fsync = ext4_sync_file, /* BKL held */ + .fsync = ext4_sync_file, .release= ext4_release_dir, }; Index: linux/fs/ext4/inode.c === --- linux.orig/fs/ext4/inode.c +++ linux/fs/ext4/inode.c @@ -778,7 +778,6 @@ err_out: * * `handle' can be NULL if create == 0. * - * The BKL may not be held on entry here. Be sure to take it early. * return 0, # of blocks mapped or allocated. * return = 0, if plain lookup failed. * return 0, error case. - To unsubscribe from this list: send the line unsubscribe linux-ext4 in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] [5/7] Remove incorrect comment refering to lock_kernel() from jbd/jbd2
None of the callers of this function does actually take the BKL as far as I can see. So remove the comment refering to the BKL. Signed-off-by: Andi Kleen [EMAIL PROTECTED] --- fs/jbd/recovery.c |2 +- fs/jbd2/recovery.c |2 +- 2 files changed, 2 insertions(+), 2 deletions(-) Index: linux/fs/jbd/recovery.c === --- linux.orig/fs/jbd/recovery.c +++ linux/fs/jbd/recovery.c @@ -354,7 +354,7 @@ static int do_one_pass(journal_t *journa struct buffer_head *obh; struct buffer_head *nbh; - cond_resched(); /* We're under lock_kernel() */ + cond_resched(); /* If we already know where to stop the log traversal, * check right now that we haven't gone past the end of Index: linux/fs/jbd2/recovery.c === --- linux.orig/fs/jbd2/recovery.c +++ linux/fs/jbd2/recovery.c @@ -364,7 +364,7 @@ static int do_one_pass(journal_t *journa struct buffer_head *obh; struct buffer_head *nbh; - cond_resched(); /* We're under lock_kernel() */ + cond_resched(); /* If we already know where to stop the log traversal, * check right now that we haven't gone past the end of - To unsubscribe from this list: send the line unsubscribe linux-ext4 in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] [4/7] ext3: Remove incorrect BKL comment
There is no BKL held on entry in -fsync nor in the low level ext3_sync_file. Signed-off-by: Andi Kleen [EMAIL PROTECTED] --- fs/ext3/dir.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) Index: linux/fs/ext3/dir.c === --- linux.orig/fs/ext3/dir.c +++ linux/fs/ext3/dir.c @@ -46,7 +46,7 @@ const struct file_operations ext3_dir_op #ifdef CONFIG_COMPAT .compat_ioctl = ext3_compat_ioctl, #endif - .fsync = ext3_sync_file, /* BKL held */ + .fsync = ext3_sync_file, .release= ext3_release_dir, }; - To unsubscribe from this list: send the line unsubscribe linux-ext4 in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] [2/7] Remove incorrect BKL comment in ext2
No BKL used anywhere, so don't mention it. Signed-off-by: Andi Kleen [EMAIL PROTECTED] --- fs/ext2/inode.c |1 - 1 file changed, 1 deletion(-) Index: linux/fs/ext2/inode.c === --- linux.orig/fs/ext2/inode.c +++ linux/fs/ext2/inode.c @@ -569,7 +569,6 @@ static void ext2_splice_branch(struct in * * `handle' can be NULL if create == 0. * - * The BKL may not be held on entry here. Be sure to take it early. * return 0, # of blocks mapped or allocated. * return = 0, if plain lookup failed. * return 0, error case. - To unsubscribe from this list: send the line unsubscribe linux-ext4 in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] [1/7] Convert ext2 over to use unlocked_ioctl
I checked ext2_ioctl and could not find anything in there that would need the BKL. So convert it over to use unlocked_ioctl Signed-off-by: Andi Kleen [EMAIL PROTECTED] --- fs/ext2/dir.c |2 +- fs/ext2/ext2.h |3 +-- fs/ext2/file.c |4 ++-- fs/ext2/ioctl.c | 12 +++- 4 files changed, 7 insertions(+), 14 deletions(-) Index: linux/fs/ext2/dir.c === --- linux.orig/fs/ext2/dir.c +++ linux/fs/ext2/dir.c @@ -703,7 +703,7 @@ const struct file_operations ext2_dir_op .llseek = generic_file_llseek, .read = generic_read_dir, .readdir= ext2_readdir, - .ioctl = ext2_ioctl, + .unlocked_ioctl = ext2_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = ext2_compat_ioctl, #endif Index: linux/fs/ext2/ext2.h === --- linux.orig/fs/ext2/ext2.h +++ linux/fs/ext2/ext2.h @@ -139,8 +139,7 @@ int __ext2_write_begin(struct file *file struct page **pagep, void **fsdata); /* ioctl.c */ -extern int ext2_ioctl (struct inode *, struct file *, unsigned int, - unsigned long); +extern long ext2_ioctl(struct file *, unsigned int, unsigned long); extern long ext2_compat_ioctl(struct file *, unsigned int, unsigned long); /* namei.c */ Index: linux/fs/ext2/file.c === --- linux.orig/fs/ext2/file.c +++ linux/fs/ext2/file.c @@ -48,7 +48,7 @@ const struct file_operations ext2_file_o .write = do_sync_write, .aio_read = generic_file_aio_read, .aio_write = generic_file_aio_write, - .ioctl = ext2_ioctl, + .unlocked_ioctl = ext2_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = ext2_compat_ioctl, #endif @@ -65,7 +65,7 @@ const struct file_operations ext2_xip_fi .llseek = generic_file_llseek, .read = xip_file_read, .write = xip_file_write, - .ioctl = ext2_ioctl, + .unlocked_ioctl = ext2_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = ext2_compat_ioctl, #endif Index: linux/fs/ext2/ioctl.c === --- linux.orig/fs/ext2/ioctl.c +++ linux/fs/ext2/ioctl.c @@ -17,9 +17,9 @@ #include asm/uaccess.h -int ext2_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, - unsigned long arg) +long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { + struct inode *inode = filp-f_dentry-d_inode; struct ext2_inode_info *ei = EXT2_I(inode); unsigned int flags; unsigned short rsv_window_size; @@ -141,9 +141,6 @@ int ext2_ioctl (struct inode * inode, st #ifdef CONFIG_COMPAT long ext2_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - struct inode *inode = file-f_path.dentry-d_inode; - int ret; - /* These are just misnamed, they actually get/put from/to user an int */ switch (cmd) { case EXT2_IOC32_GETFLAGS: @@ -161,9 +158,6 @@ long ext2_compat_ioctl(struct file *file default: return -ENOIOCTLCMD; } - lock_kernel(); - ret = ext2_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg)); - unlock_kernel(); - return ret; + return ext2_ioctl(file, cmd, (unsigned long) compat_ptr(arg)); } #endif - To unsubscribe from this list: send the line unsubscribe linux-ext4 in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] [0/7] Drop BKL in ext[234] ioctls
Remove the BKL from the ext* ioctls. This is a slightly updated version of the ext[2-4] patches that hit linux-fsdevel earlier. -Andi - To unsubscribe from this list: send the line unsubscribe linux-ext4 in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] [3/7] Convert ext3 to use unlocked_ioctl v2
I checked ext3_ioctl and it looked largely safe to not be used without BKL. So convert it over to unlocked_ioctl. Signed-off-by: Andi Kleen [EMAIL PROTECTED] v1-v2: drop lock kernel for online growth. resize.c seems to do enough locking --- fs/ext3/dir.c |2 +- fs/ext3/file.c |2 +- fs/ext3/ioctl.c | 21 +++-- include/linux/ext3_fs.h |3 +-- 4 files changed, 14 insertions(+), 14 deletions(-) Index: linux/fs/ext3/dir.c === --- linux.orig/fs/ext3/dir.c +++ linux/fs/ext3/dir.c @@ -42,7 +42,7 @@ const struct file_operations ext3_dir_op .llseek = generic_file_llseek, .read = generic_read_dir, .readdir= ext3_readdir, /* we take BKL. needed?*/ - .ioctl = ext3_ioctl, /* BKL held */ + .unlocked_ioctl = ext3_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = ext3_compat_ioctl, #endif Index: linux/fs/ext3/file.c === --- linux.orig/fs/ext3/file.c +++ linux/fs/ext3/file.c @@ -112,7 +112,7 @@ const struct file_operations ext3_file_o .write = do_sync_write, .aio_read = generic_file_aio_read, .aio_write = ext3_file_write, - .ioctl = ext3_ioctl, + .unlocked_ioctl = ext3_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = ext3_compat_ioctl, #endif Index: linux/fs/ext3/ioctl.c === --- linux.orig/fs/ext3/ioctl.c +++ linux/fs/ext3/ioctl.c @@ -17,9 +17,9 @@ #include linux/smp_lock.h #include asm/uaccess.h -int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, - unsigned long arg) +long ext3_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { + struct inode *inode = filp-f_dentry-d_inode; struct ext3_inode_info *ei = EXT3_I(inode); unsigned int flags; unsigned short rsv_window_size; @@ -263,9 +270,6 @@ flags_err: #ifdef CONFIG_COMPAT long ext3_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - struct inode *inode = file-f_path.dentry-d_inode; - int ret; - /* These are just misnamed, they actually get/put from/to user an int */ switch (cmd) { case EXT3_IOC32_GETFLAGS: @@ -305,9 +309,6 @@ long ext3_compat_ioctl(struct file *file default: return -ENOIOCTLCMD; } - lock_kernel(); - ret = ext3_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg)); - unlock_kernel(); - return ret; + return ext3_ioctl(file, cmd, (unsigned long) compat_ptr(arg)); } #endif Index: linux/include/linux/ext3_fs.h === --- linux.orig/include/linux/ext3_fs.h +++ linux/include/linux/ext3_fs.h @@ -838,8 +838,7 @@ extern void ext3_get_inode_flags(struct extern void ext3_set_aops(struct inode *inode); /* ioctl.c */ -extern int ext3_ioctl (struct inode *, struct file *, unsigned int, - unsigned long); +extern long ext3_ioctl(struct file *, unsigned int, unsigned long); extern long ext3_compat_ioctl (struct file *, unsigned int, unsigned long); /* namei.c */ - To unsubscribe from this list: send the line unsubscribe linux-ext4 in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] [6/7] Convert ext4 to use unlocked_ioctl v2
I checked ext4_ioctl and it looked largely safe to not be used without BKL. So convert it over to unlocked_ioctl. Signed-off-by: Andi Kleen [EMAIL PROTECTED] v1-v2: drop BKL for online grow. resize.c seems to do enough locking --- fs/ext4/dir.c |2 +- fs/ext4/file.c |2 +- fs/ext4/ioctl.c | 20 +++- include/linux/ext4_fs.h |3 +-- 4 files changed, 14 insertions(+), 13 deletions(-) Index: linux/fs/ext4/dir.c === --- linux.orig/fs/ext4/dir.c +++ linux/fs/ext4/dir.c @@ -42,7 +42,7 @@ const struct file_operations ext4_dir_op .llseek = generic_file_llseek, .read = generic_read_dir, .readdir= ext4_readdir, /* we take BKL. needed?*/ - .ioctl = ext4_ioctl, /* BKL held */ + .unlocked_ioctl = ext4_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = ext4_compat_ioctl, #endif Index: linux/fs/ext4/file.c === --- linux.orig/fs/ext4/file.c +++ linux/fs/ext4/file.c @@ -112,7 +112,7 @@ const struct file_operations ext4_file_o .write = do_sync_write, .aio_read = generic_file_aio_read, .aio_write = ext4_file_write, - .ioctl = ext4_ioctl, + .unlocked_ioctl = ext4_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = ext4_compat_ioctl, #endif Index: linux/fs/ext4/ioctl.c === --- linux.orig/fs/ext4/ioctl.c +++ linux/fs/ext4/ioctl.c @@ -17,9 +17,9 @@ #include linux/smp_lock.h #include asm/uaccess.h -int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, - unsigned long arg) +long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { + struct inode *inode = filp-f_dentry-d_inode; struct ext4_inode_info *ei = EXT4_I(inode); unsigned int flags; unsigned short rsv_window_size; @@ -262,9 +270,6 @@ flags_err: #ifdef CONFIG_COMPAT long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - struct inode *inode = file-f_path.dentry-d_inode; - int ret; - /* These are just misnamed, they actually get/put from/to user an int */ switch (cmd) { case EXT4_IOC32_GETFLAGS: @@ -304,9 +309,6 @@ long ext4_compat_ioctl(struct file *file default: return -ENOIOCTLCMD; } - lock_kernel(); - ret = ext4_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg)); - unlock_kernel(); - return ret; + return ext4_ioctl(file, cmd, (unsigned long) compat_ptr(arg)); } #endif Index: linux/include/linux/ext4_fs.h === --- linux.orig/include/linux/ext4_fs.h +++ linux/include/linux/ext4_fs.h @@ -939,8 +939,7 @@ extern int ext4_block_truncate_page(hand struct address_space *mapping, loff_t from); /* ioctl.c */ -extern int ext4_ioctl (struct inode *, struct file *, unsigned int, - unsigned long); +extern long ext4_ioctl(struct file *, unsigned int, unsigned long); extern long ext4_compat_ioctl (struct file *, unsigned int, unsigned long); /* namei.c */ - To unsubscribe from this list: send the line unsubscribe linux-ext4 in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH, RFC] Add new development flag to the ext4 filesystem
On Fri 25-01-08 03:50:04, Andreas Dilger wrote: On Jan 25, 2008 11:05 +0100, Jan Kara wrote: For example ext2 on fsync() just sync's a single inode (and has to use private_list to track metadata buffers associated with the inode) while ext3 flushes the whole journal. As for fsync(), we definitely need to preserve correct behaviour for the file itself, but there isn't a requirement that ext2 behave exactly like ext3 (it of course cannot). In the proposed ext4-mount-unjournaled-ext2 case, the superblock would be marked dirty as it is today and an e2fsck would need to be run at boot time. That is fine so long as the fsync() will cause the one file's data to be on disk before it returns. Well, you have to also make sure that all indirect blocks are on disk before fsync() returns. Otherwise there's not much point in the fact that data itself reached the disk. And for that you need something like private_list. In ext2, directory handling code is quite different. ext2 works in page cache of the directory while ext3 uses page cache of the underlying device via buffer heads - at least this second thing would be more or less mechanical thing to change and would make sence (we wouldn't have to reimplement readahead in ext3 directory handling code as we do now). I've looked at it once but then more urgent things came and ... you know it. I don't think it is a requirement that ext3 mounting a filesystem without a journal has to use page cache for directories. I wouldn't object to that being fixed. It definitely isn't a requirement for this to work, just an implementation difference. Yes, of course. I just wanted to point out that ext2 isn't a strict subset of ext3 so there is some non-trivial work to be done before you can safely mount ext2 as ext3-without-journal. Honza -- Jan Kara [EMAIL PROTECTED] SUSE Labs, CR - To unsubscribe from this list: send the line unsubscribe linux-ext4 in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] jbd: Remove useless loop when writing commit record
Hi Andrew, here's the patch I wrote you about. Honza -- Jan Kara [EMAIL PROTECTED] SUSE Labs, CR --- Commit block was intended to have several copies of the header. But due to a bug it never had them and actually, nobody checks that. So just remove the useless loop. Signed-off-by: Jan Kara [EMAIL PROTECTED] diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 610264b..b54948f 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c @@ -104,7 +104,8 @@ static int journal_write_commit_record(journal_t *journal, { struct journal_head *descriptor; struct buffer_head *bh; - int i, ret; + journal_header_t *header; + int ret; int barrier_done = 0; if (is_journal_aborted(journal)) @@ -116,13 +117,10 @@ static int journal_write_commit_record(journal_t *journal, bh = jh2bh(descriptor); - /* AKPM: buglet - add `i' to tmp! */ - for (i = 0; i bh-b_size; i += 512) { - journal_header_t *tmp = (journal_header_t*)bh-b_data; - tmp-h_magic = cpu_to_be32(JFS_MAGIC_NUMBER); - tmp-h_blocktype = cpu_to_be32(JFS_COMMIT_BLOCK); - tmp-h_sequence = cpu_to_be32(commit_transaction-t_tid); - } + header = (journal_header_t*)(bh-b_data); + header-h_magic = cpu_to_be32(JFS_MAGIC_NUMBER); + header-h_blocktype = cpu_to_be32(JFS_COMMIT_BLOCK); + header-h_sequence = cpu_to_be32(commit_transaction-t_tid); JBUFFER_TRACE(descriptor, write commit block); set_buffer_dirty(bh); - To unsubscribe from this list: send the line unsubscribe linux-ext4 in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] Fix commit block write in JBD
On Sat 26-01-08 22:02:07, Andrew Morton wrote: On Wed, 23 Jan 2008 20:09:43 +0100 Jan Kara [EMAIL PROTECTED] wrote: Commit block is expected to have several copies of the header. Fix the bug Andrew has spotted ages ago. ages indeed. diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 610264b..a69b240 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c @@ -116,9 +116,8 @@ static int journal_write_commit_record(journal_t *journal, bh = jh2bh(descriptor); - /* AKPM: buglet - add `i' to tmp! */ for (i = 0; i bh-b_size; i += 512) { - journal_header_t *tmp = (journal_header_t*)bh-b_data; + journal_header_t *tmp = (journal_header_t*)(bh-b_data+i); tmp-h_magic = cpu_to_be32(JFS_MAGIC_NUMBER); tmp-h_blocktype = cpu_to_be32(JFS_COMMIT_BLOCK); tmp-h_sequence = cpu_to_be32(commit_transaction-t_tid); But I don't think we can _use_ this feature now. Because there are 1000 disks out there which didn't implement it. So why not just remove the loop and do a single write? Yes, but OTOH once the filesystem gets mounted with a new kernel, the journal gets quickly rewritten and we'll have correct commit blocks there. But since neither kernel nor e2fsprogs actually check for further sectors (they check for the header just in the beginning of a block), I agree that removing the loop completely is probably the best option. Nobody cared so far so I guess they won't care in future as well. I'll send you a replacement patch. Honza -- Jan Kara [EMAIL PROTECTED] SUSE Labs, CR - To unsubscribe from this list: send the line unsubscribe linux-ext4 in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC] ext3 freeze feature
Hi, What you *could* do is to start putting processes to sleep if they attempt to write to the frozen filesystem, and then detect the deadlock case where the process holding the file descriptor used to freeze the filesystem gets frozen because it attempted to write to the filesystem --- at which point it gets some kind of signal (which defaults to killing the process), and the filesystem is unfrozen and as part of the unfreeze you wake up all of the processes that were put to sleep for touching the frozen filesystem. I don't think close() usually writes to journal and the deadlock occurs. Is there the special case which close() writes to journal in case of getting signal? Cheers, Takashi - To unsubscribe from this list: send the line unsubscribe linux-ext4 in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC] ext3 freeze feature
Hi, Thank you for your comments. That's inherently unsafe - you can have multiple unfreezes running in parallel which seriously screws with the bdev semaphore count that is used to lock the device due to doing multiple up()s for every down. Your timeout thingy guarantee that at some point you will get multiple up()s occuring due to the timer firing racing with a thaw ioctl. If this interface is to be more widely exported, then it needs a complete revamp of the bdev is locked while it is frozen so that there is no chance of a double up() ever occuring on the bd_mount_sem due to racing thaws. My patch has the race condition as you said. I will fix it. Cheers, Takashi - To unsubscribe from this list: send the line unsubscribe linux-ext4 in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Integrating patches in SLES10 e2fsprogs
Theodore Tso [EMAIL PROTECTED] writes: Wow. You have a lot of patches in the SLES 10 e2fsprogs. I'm not sure why of them are there, though. For example: Patch0: elf.diff I'm not sure what this one is for. Patch1: e2fsprogs-1.35-libdir.diff This one does two different things. One is include AC_HEADER_TIME in configure.in, and the other is to use $lib instead of lib when defining root_libdir. This seems to force root_libdir to /, which makes no sense to me. We want to have the shared libs in /lib{,64}, but the devel so links have to remain in /usr/lib{,64}. But looking closer at this, it seems that these patches are not needed, since the result can be obtained by defining ELF_INSTALL_DIR. I will drop these. Patch4: e2fsprogs-blkid.diff This patch causes fsck to check the BLKID_FILE environment variable and passes it to the blkid library. But the blkid library *already* checks the BLKID_FILE environtment variable already. So I'm not sure why this is necessary at all. Ok, the patch is obsolete indeed. Patch6: e2fsprogs-mdraid.patch This apparently adds a new environment variable, BLKID_SKIP_CHECK_MDRAID, which forces blkid to not detect mdraid devices. I'm not sure why. Workaround for people having stale RAID signature on their disk: https://bugzilla.novell.com/show_bug.cgi?id=100530 Patch10:close.patch I don't understand what this patch is trying to do. This patch is obsolete, as the issue is fixed by git commit 0d961040fe9ad927254b5a0e1a4de7bedadd8579 The original patch posted in Novell bugzilla #132708 contained this additional hunk, which is likely obsolete: @@ -217,6 +217,7 @@ EXT2_CHECK_MAGIC(fs, EXT2_ET_MAGIC_EXT2FS_FILSYS); + super_shadow = fs-super; fs_state = fs-super-s_state; fs-super-s_wtime = time(NULL); Patch12:e2fsprogs-mkinstalldirs.patch Why? Is needed since we recreate the auto* files. But I agree that this patch should better set MKINSTALLDIRS = @MKDIR_P@ not to literal mkdir -p. The @MKINSTALLDIRS@ seems to be obsolete in newer gettext (which seems to pull this in). Patch22:e2fsprogs-1.40.4-uuidd_pid_path.patch The problem with this patch is that /var/run is cleared via rm -rf, so it is highly problamtic to put the scratch directory for uuidd in /var/run. Are you really sure? My interpretation of FHS is, that files under /var/run/ have to be cleared or truncated, but the subdirectories do not get deleted. Patch34:libcom_err-compile_et_permissions.patch Why? This is just a workaround and is not intended to stay there forever, it is also not intended for upstream inclusion. I have been asked to add this to avoid build problems of some other package (I think it was kerberos). I will have to check if this is still needed. Thanks, Matthias - To unsubscribe from this list: send the line unsubscribe linux-ext4 in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: e2fsprogs: pu branch
On Fri, Jan 25, 2008 at 05:32:59PM +0100, Valerie Clement wrote: Hi Ted, Here are the problems I found when testing e2fsprogs built in the pu branch. I've checked that they haven't been fixed in the latest version. (git describe = v1.40.3-98-gb6fea68) 1- Corrupt extent header report from e2fsck === Steps to reproduce it: Copy a regular file on the ext4 filesystem and make a symbolic links to it: # ln -s foo lnk Unmount fs. # e2fsck -f /dev/sdc0 e2fsck 1.40.3 (05-Dec-2007) Pass 1: Checking inodes, blocks, and sizes Error while reading over extent tree in inode 49105: Corrupt extent header Clear inodey This is a kernel-level bug, actually. The symlink is a fast symlink where the target of the symlink is in inode itself. The kernel level code should *not* be setting the EXTENTS_FL flag. Still, we'll have to put in some code to work around it for people with current kernel levels. 2- EXT2 directory corrupted report from debugfs ===i=== When trying to debug the previous problem using debugfs: debugfs: ncheck 49105 ncheck: EXT2 directory corrupted while calling ext2_dir_iterate ncheck: EXT2 directory corrupted while calling ext2_dir_iterate ncheck: EXT2 directory corrupted while calling ext2_dir_iterate Not all parts of the ext2 library have been fixed to understand extents (in particular, ext2_dir_iterate). On the todo list... 3- Strange FILE SYSTEM WAS MODIFIED report from e2fsck. === The test just creates an empty file so it is surprising that e2fsck modifies the filesystem. I found the reason of this modification when reading the git logs, e2fsck backups the primary superblock to the backups when the feature sets are different (EXT3_FEATURE_INCOMPAT_EXTENTS in that case). It's not really a problem, it's just confusing. To see with other users, perhaps. Yeah, we should add a explanatory message for that case. Thanks for pointing that out. - Ted - To unsubscribe from this list: send the line unsubscribe linux-ext4 in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC] Parallelize IO for e2fsck
Hi! It's been discussed before, but I suspect the main reason why it was never done is no one submitted a patch. Also, the problem is actually a pretty complex one. There are a couple of different stages where you might want to send an alert to processes: * Data is starting to get ejected from page/buffer cache * System is starting to swap * System is starting to really struggle to find memory * System is starting an out-of-memory killer AIX's SIGDANGER really did the last two, where the OOM killer would tend to avoid processes that had a SIGDANGER handler in favor of processes that were SIGDANGER unaware. Then there is the additional complexity in Linux that you have multiple zones of memory, which at least on the historically more popular x86 was highly, highly important. You could say that whenever there is sufficient memory pressure in any zone that you start ejecting data from caches or start to swap that you start sending the signals --- but on x86 systems with lowmem, that could happen quite frequently, and since a user process has no idea whether its resources are in lowmem or highmem, there's not much you can do about this. As user pages are always in highmem, this should be easy to decide: only send SIGDANGER when highmem is full. (Yes, there are inodes/dentries/file descriptors in lowmem, but I doubt apps will respond to SIGDANGER by closing files). -- (english) http://www.livejournal.com/~pavelmachek (cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html - To unsubscribe from this list: send the line unsubscribe linux-ext4 in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Integrating patches in SLES10 e2fsprogs
Thierry Vignaud [EMAIL PROTECTED] writes: In any case, I've just released e2fsprogs 1.40.6. which prompts me to note that it's neither on ftp://ftp.kernel.org/pub/linux/utils/util-linux-ng/v2.13 nor on http://e2fsprogs.sourceforge.net/ What is supposed to be the current primary source? Worse, http://git.kernel.org/?p=fs/ext2/e2fsprogs.git;a=summary hasn't any tag for neither 1.40.5 nor 1.40.6 in primary branch (only maint shows 1.40.5) You could be more user-friendly to use, distro packagers maintainers :-( - To unsubscribe from this list: send the line unsubscribe linux-ext4 in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Integrating patches in SLES10 e2fsprogs
Theodore Tso wrote: On Mon, Jan 28, 2008 at 04:26:53PM +0100, Matthias Koenig wrote: Patch6: e2fsprogs-mdraid.patch This apparently adds a new environment variable, BLKID_SKIP_CHECK_MDRAID, which forces blkid to not detect mdraid devices. I'm not sure why. Workaround for people having stale RAID signature on their disk: https://bugzilla.novell.com/show_bug.cgi?id=100530 Hmm... there's got to be a better way around this. Won't help existing block devices, but it'd be nice to have a common library which could be called @ mkfs time to wipe out all known signatures... mkfs.xfs tries to do this, but it'd be silly to duplicate in every mkfs. On my one of these days list is to get another cheap/used laptop so I can try out the latest Fedora Core Rawhide without having to fire up a huge (noisy) x86_64 box Just partition... ;) -Eric - To unsubscribe from this list: send the line unsubscribe linux-ext4 in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Integrating patches in SLES10 e2fsprogs
Theodore Tso [EMAIL PROTECTED] writes: In any case, I've just released e2fsprogs 1.40.6. which prompts me to note that it's neither on ftp://ftp.kernel.org/pub/linux/utils/util-linux-ng/v2.13 nor on http://e2fsprogs.sourceforge.net/ What is supposed to be the current primary source? It contains some of obviously correct patches from the Novell and Red Hat packages. More importantly, it includes the mke2fs -E test_fs feature which will be needed once we push test_fs ext4 patches to mainline, which I plan to push to Linus in the next day or two. - To unsubscribe from this list: send the line unsubscribe linux-ext4 in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Oops, incorrect tag: v1.40.5 in e2fsprogs git repository
Hi, I screwed up, and incorrectly tagged v1.40.5 in the e2fsprogs git repository. I accidentally tagged the next branch instead of the maint branch, so the the wrong commit was tagged as v1.40.5. Argh... This has been fixed on git.kernel.org, but it's still busted on repo.or.cz. If you've pulled from the e2fsprogs git repository incorrectly, you'll need to run the command: git tag -d v1.40.5 ... and then re-pull from the git repository to get the correct tag. Petr, if you could run the same command on the e2fsprogs repository on repo.or.cz, it would be much appreciated!! - Ted - To unsubscribe from this list: send the line unsubscribe linux-ext4 in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Oops, incorrect tag: v1.40.5 in e2fsprogs git repository
-BEGIN PGP SIGNED MESSAGE- Hash: SHA1 Hi, I screwed up, and incorrectly tagged v1.40.5 in the e2fsprogs git repository. I accidentally tagged the next branch instead of the maint branch, so the the wrong commit was tagged as v1.40.5. Argh... This has been fixed on git.kernel.org, but it's still busted on repo.or.cz. If you've pulled from the e2fsprogs git repository incorrectly, you'll need to run the command: git tag -d v1.40.5 ... and then re-pull from the git repository to get the correct tag. Petr, if you could run the same command on the e2fsprogs repository on repo.or.cz, it would be much appreciated!! - Ted -BEGIN PGP SIGNATURE- Version: GnuPG v1.4.6 (GNU/Linux) iD8DBQFHnhyr7To545NnTEARAr3gAKD1ikMgB74Qw+65fdZRn0kaEXry8ACcDIva bEVF2ZqTSaaf5hOE3hmfZyw= =wRDe -END PGP SIGNATURE- - To unsubscribe from this list: send the line unsubscribe linux-ext4 in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC] Parallelize IO for e2fsck
On Mon 2008-01-28 14:56:33, Theodore Tso wrote: On Mon, Jan 28, 2008 at 07:30:05PM +, Pavel Machek wrote: As user pages are always in highmem, this should be easy to decide: only send SIGDANGER when highmem is full. (Yes, there are inodes/dentries/file descriptors in lowmem, but I doubt apps will respond to SIGDANGER by closing files). Good point; for a system with at least (say) 2GB of memory, that definitely makes sense. For a system with less than 768 megs of memory (how quaint, but it wasn't that long ago this was a lot of memory :-), there wouldn't *be* any memory in highmem at all Ok, so it is 'send SIGDANGER when all zones are low', because user allocations can go from all zones (unless you have something really exotic, I'm not sure if that is true on huge NUMA machines similar). Pavel -- (english) http://www.livejournal.com/~pavelmachek (cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html - To unsubscribe from this list: send the line unsubscribe linux-ext4 in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] ext3: Fix lock inversion in direct IO
Hi Andrew, the patch below fixes a lock inversion which someone reported recently. More details below in the changelog. Can you merge the patch please? I'll also write a similar patch for ext4 once we agree this is the way to go... Honza -- Jan Kara [EMAIL PROTECTED] SUSE Labs, CR --- We cannot start transaction in ext3_direct_IO() and just let it last during the whole write because dio_get_page() acquires mmap_sem which ranks above transaction start (e.g. because we have dependency chain mmap_sem-PageLock-journal_start, or because we update atime while holding mmap_sem) and thus deadlocks could happen. We solve the problem by starting a transaction separately for each ext3_get_block() call. We *could* have a problem that we allocate a block and before its data are written out the machine crashes and thus we expose stale data. But that does not happen because for hole-filling generic code falls back to buffered writes and for file extension, we add inode to orphan list and thus in case of crash, journal replay will truncate inode back to the original size. Signed-off-by: Jan Kara [EMAIL PROTECTED] diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 9b162cd..5ab7c57 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -941,55 +941,45 @@ out: return err; } -#define DIO_CREDITS (EXT3_RESERVE_TRANS_BLOCKS + 32) +/* Maximum number of blocks we map for direct IO at once. */ +#define DIO_MAX_BLOCKS 4096 +/* + * Number of credits we need for writing DIO_MAX_BLOCKS: + * We need sb + group descriptor + bitmap + inode - 4 + * For B blocks with A block pointers per block we need: + * 1 (triple ind.) + (B/A/A + 2) (doubly ind.) + (B/A + 2) (indirect). + * If we plug in 4096 for B and 256 for A (for 1KB block size), we get 25. + */ +#define DIO_CREDITS 25 static int ext3_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { handle_t *handle = ext3_journal_current_handle(); - int ret = 0; + int ret = 0, started = 0; unsigned max_blocks = bh_result-b_size inode-i_blkbits; - if (!create) - goto get_block; /* A read */ - - if (max_blocks == 1) - goto get_block; /* A single block get */ - - if (handle-h_transaction-t_state == T_LOCKED) { - /* -* Huge direct-io writes can hold off commits for long -* periods of time. Let this commit run. -*/ - ext3_journal_stop(handle); - handle = ext3_journal_start(inode, DIO_CREDITS); - if (IS_ERR(handle)) + if (create !handle) {/* Direct IO write... */ + if (max_blocks DIO_MAX_BLOCKS) + max_blocks = DIO_MAX_BLOCKS; + handle = ext3_journal_start(inode, DIO_CREDITS + + 2 * EXT3_QUOTA_TRANS_BLOCKS(sb)); + if (IS_ERR(handle)) { ret = PTR_ERR(handle); - goto get_block; - } - - if (handle-h_buffer_credits = EXT3_RESERVE_TRANS_BLOCKS) { - /* -* Getting low on buffer credits... -*/ - ret = ext3_journal_extend(handle, DIO_CREDITS); - if (ret 0) { - /* -* Couldn't extend the transaction. Start a new one. -*/ - ret = ext3_journal_restart(handle, DIO_CREDITS); + goto out; } + started = 1; } -get_block: - if (ret == 0) { - ret = ext3_get_blocks_handle(handle, inode, iblock, + ret = ext3_get_blocks_handle(handle, inode, iblock, max_blocks, bh_result, create, 0); - if (ret 0) { - bh_result-b_size = (ret inode-i_blkbits); - ret = 0; - } + if (ret 0) { + bh_result-b_size = (ret inode-i_blkbits); + ret = 0; } + if (started) + ext3_journal_stop(handle); +out: return ret; } @@ -1680,7 +1670,8 @@ static int ext3_releasepage(struct page *page, gfp_t wait) * if the machine crashes during the write. * * If the O_DIRECT write is intantiating holes inside i_size and the machine - * crashes then stale disk data _may_ be exposed inside the file. + * crashes then stale disk data _may_ be exposed inside the file. But current + * VFS code falls back into buffered path in that case so we are safe. */ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, @@ -1689,7 +1680,7 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb, struct file *file = iocb-ki_filp; struct inode *inode =
Re: Oops, incorrect tag: v1.40.5 in e2fsprogs git repository
On Tue, Jan 29, 2008 at 01:43:31AM +0100, Petr Baudis wrote: Done, though it should be doable from your side as well by something like git push reporepo :refs/tags/v1.40.5 Wow, I thought that only worked on branches, but you're right, that works. Thanks!!! - Ted - To unsubscribe from this list: send the line unsubscribe linux-ext4 in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Oops, incorrect tag: v1.40.5 in e2fsprogs git repository
Hi, On Mon, Jan 28, 2008 at 01:19:23PM -0500, Theodore Tso wrote: I screwed up, and incorrectly tagged v1.40.5 in the e2fsprogs git repository. I accidentally tagged the next branch instead of the maint branch, so the the wrong commit was tagged as v1.40.5. Argh... This has been fixed on git.kernel.org, but it's still busted on repo.or.cz. If you've pulled from the e2fsprogs git repository incorrectly, you'll need to run the command: git tag -d v1.40.5 ... and then re-pull from the git repository to get the correct tag. Petr, if you could run the same command on the e2fsprogs repository on repo.or.cz, it would be much appreciated!! Done, though it should be doable from your side as well by something like git push reporepo :refs/tags/v1.40.5 I believe. -- Petr Pasky Baudis We don't know who it was that discovered water, but we're pretty sure that it wasn't a fish. -- Marshall McLuhan - To unsubscribe from this list: send the line unsubscribe linux-ext4 in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[GIT PULL] ext4 update
Hi Linus, Please pull from: git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4.git for_linus This is the major set of updates meant for 2.6.24 from the ext4 team; these patches have been baking in -mm for a while. The two major features included here is the multi-block allocator that has been in use by Clusterfs for their luster filesystem, as well as the journal checksumming features. There were also a huge number of clean ups and various bug fixes. Regards, - Ted Adrian Bunk (1): ext4/super.c: fix #ifdef's (CONFIG_EXT4_* - CONFIG_EXT4DEV_*) Alex Tomas (2): ext4: Add new functions for searching extent tree ext4: Add multi block allocator for ext4 Aneesh Kumar K.V (23): ext4: Introduce ext4_lblk_t ext4: Introduce ext4_update_*_feature ext4: Fix sparse warnings. ext4: Rename i_file_acl to i_file_acl_lo ext4: Rename i_dir_acl to i_size_high ext4: Add support for 48 bit inode i_blocks. ext4: Support large files ext2: Fix the max file size for ext2 file system. ext3: Fix the max file size for ext3 file system. ext4: Return after ext4_error in case of failures ext4: Change the default behaviour on error Add buffer head related helper functions ext4: add block bitmap validation ext4: Check for the correct error return from ext4: Make ext4_get_blocks_wrap take the truncate_mutex early. ext4: Convert truncate_mutex to read write semaphore. ext4: Take read lock during overwrite case. ext4: Add EXT4_IOC_MIGRATE ioctl ext4: Fix ext4_show_options to show the correct mount options. ext4: Add ext4_find_next_bit() ext4: Enable the multiblock allocator by default ext4: Check for return value from sb_set_blocksize ext4: Use the ext4_ext_actual_len() helper function Avantika Mathur (2): ext4: add ext4_group_t, and change all group variables to this type. ext4: fixes block group number being set to a negative value Chris Snook (1): jbd2: Remove printk from J_ASSERT to preserve registers during BUG Coly Li (1): ext4: sync up block group descriptor with e2fsprogs. Dmitry Monakhov (1): ext4: fix uniniatilized extent splitting error Eric Sandeen (6): ext4 extents: remove unneeded casts ext4: different maxbytes functions for bitmap extentfiles ext4: export iov_shorten from kernel for ext4's use ext4: store maxbytes for bitmapped files and return EFBIG as appropriate ext4: fix oops on corrupted ext4 mount ext4: fix up EXT4FS_DEBUG builds Girish Shilamkar (1): ext4: Add the journal checksum feature Jan Kara (2): ext4: Avoid rec_len overflow with 64KB block size jbd2: Fix assertion failure in fs/jbd2/checkpoint.c Jean Noel Cordenner (2): vfs: Add 64 bit i_version support ext4: Add inode version support in ext4 Johann Lombardi (1): jbd2: jbd2 stats through procfs Mariusz Kozlowski (1): ext4: remove unused code from ext4_find_entry() Miklos Szeredi (1): ext4: Add stripe= option to /proc/mounts Mingming Cao (4): jbd2: add lockdep support jbd2: Mark jbd2 slabs as SLAB_TEMPORARY jbd2: Use round-jiffies() function for the 5 second ext4/jbd2 wakeup jbd2: sparse pointer use of zero as null Takashi Sato (1): ext4: Support large blocksize up to PAGESIZE Documentation/filesystems/ext4.txt | 10 b/Documentation/filesystems/ext4.txt | 10 b/Documentation/filesystems/proc.txt | 39 b/fs/Kconfig |1 b/fs/afs/dir.c |9 b/fs/afs/inode.c |3 b/fs/buffer.c | 44 b/fs/ext2/super.c | 32 b/fs/ext3/super.c | 32 b/fs/ext4/Makefile |2 b/fs/ext4/balloc.c | 67 b/fs/ext4/dir.c|2 b/fs/ext4/extents.c| 24 b/fs/ext4/file.c |4 b/fs/ext4/group.h |8 b/fs/ext4/ialloc.c |2 b/fs/ext4/inode.c | 15 b/fs/ext4/ioctl.c |3 b/fs/ext4/mballoc.c| 4552 + b/fs/ext4/migrate.c| 570 +++ b/fs/ext4/namei.c |4 b/fs/ext4/resize.c | 16 b/fs/ext4/super.c | 15 b/fs/ext4/xattr.c |4 b/fs/inode.c | 17 b/fs/jbd2/checkpoint.c | 10 b/fs/jbd2/commit.c
2.6.24-ext4-1 patchset released
I've just released 2.6.24-ext4-1. It's basically just a clean up of the stable patch series, in response to LKML review comments, in preparation for Linus to pull them into mainline. git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4.git 2.6.24-ext4-1 http://git.kernel.org/?p=linux/kernel/git/tytso/ext4.git;a=shortlog;h=2.6.24-ext4-1 and ftp://ftp.kernel.org/pub/linux/kernel/people/tytso/ext4-patches/2.6.24-ext4-1 - Ted - To unsubscribe from this list: send the line unsubscribe linux-ext4 in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC] Parallelize IO for e2fsck
On Mon, 28 Jan 2008, Theodore Tso wrote: On Mon, Jan 28, 2008 at 07:30:05PM +, Pavel Machek wrote: As user pages are always in highmem, this should be easy to decide: only send SIGDANGER when highmem is full. (Yes, there are inodes/dentries/file descriptors in lowmem, but I doubt apps will respond to SIGDANGER by closing files). Good point; for a system with at least (say) 2GB of memory, that definitely makes sense. For a system with less than 768 megs of memory (how quaint, but it wasn't that long ago this was a lot of memory :-), there wouldn't *be* any memory in highmem at all not to mention machines with 1G of ram (900M lowmem, 128M highmem) David Lang - To unsubscribe from this list: send the line unsubscribe linux-ext4 in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html