Re: [Cluster-devel] [PATCH] GFS2: Test bufdata with buffer locked and gfs2_log_lock held
Hi, Now in the -nmw tree - thanks for fixing this. Also I'm sorting out a -fixes tree too, so that I can send the collected bug fixes to Linus ahead of the next merge window, Steve. On Wed, 2012-11-07 at 00:38 -0600, Benjamin Marzinski wrote: In gfs2_trans_add_bh(), gfs2 was testing if a there was a bd attached to the buffer without having the gfs2_log_lock held. It was then assuming it would stay attached for the rest of the function. However, without either the log lock being held of the buffer locked, __gfs2_ail_flush() could detach bd at any time. This patch moves the locking before the test. If there isn't a bd already attached, gfs2 can safely allocate one and attach it before locking. There is no way that the newly allocated bd could be on the ail list, and thus no way for __gfs2_ail_flush() to detach it. Signed-off-by: Benjamin Marzinski bmarz...@redhat.com --- fs/gfs2/lops.c | 14 ++ fs/gfs2/trans.c |8 2 files changed, 10 insertions(+), 12 deletions(-) Index: gfs2-3.0-nmw/fs/gfs2/lops.c === --- gfs2-3.0-nmw.orig/fs/gfs2/lops.c +++ gfs2-3.0-nmw/fs/gfs2/lops.c @@ -393,12 +393,10 @@ static void buf_lo_add(struct gfs2_sbd * struct gfs2_meta_header *mh; struct gfs2_trans *tr; - lock_buffer(bd-bd_bh); - gfs2_log_lock(sdp); tr = current-journal_info; tr-tr_touched = 1; if (!list_empty(bd-bd_list)) - goto out; + return; set_bit(GLF_LFLUSH, bd-bd_gl-gl_flags); set_bit(GLF_DIRTY, bd-bd_gl-gl_flags); mh = (struct gfs2_meta_header *)bd-bd_bh-b_data; @@ -414,9 +412,6 @@ static void buf_lo_add(struct gfs2_sbd * sdp-sd_log_num_buf++; list_add(bd-bd_list, sdp-sd_log_le_buf); tr-tr_num_buf_new++; -out: - gfs2_log_unlock(sdp); - unlock_buffer(bd-bd_bh); } static void gfs2_check_magic(struct buffer_head *bh) @@ -775,12 +770,10 @@ static void databuf_lo_add(struct gfs2_s struct address_space *mapping = bd-bd_bh-b_page-mapping; struct gfs2_inode *ip = GFS2_I(mapping-host); - lock_buffer(bd-bd_bh); - gfs2_log_lock(sdp); if (tr) tr-tr_touched = 1; if (!list_empty(bd-bd_list)) - goto out; + return; set_bit(GLF_LFLUSH, bd-bd_gl-gl_flags); set_bit(GLF_DIRTY, bd-bd_gl-gl_flags); if (gfs2_is_jdata(ip)) { @@ -791,9 +784,6 @@ static void databuf_lo_add(struct gfs2_s } else { list_add_tail(bd-bd_list, sdp-sd_log_le_ordered); } -out: - gfs2_log_unlock(sdp); - unlock_buffer(bd-bd_bh); } /** Index: gfs2-3.0-nmw/fs/gfs2/trans.c === --- gfs2-3.0-nmw.orig/fs/gfs2/trans.c +++ gfs2-3.0-nmw/fs/gfs2/trans.c @@ -155,14 +155,22 @@ void gfs2_trans_add_bh(struct gfs2_glock struct gfs2_sbd *sdp = gl-gl_sbd; struct gfs2_bufdata *bd; + lock_buffer(bh); + gfs2_log_lock(sdp); bd = bh-b_private; if (bd) gfs2_assert(sdp, bd-bd_gl == gl); else { + gfs2_log_unlock(sdp); + unlock_buffer(bh); gfs2_attach_bufdata(gl, bh, meta); bd = bh-b_private; + lock_buffer(bh); + gfs2_log_lock(sdp); } lops_add(sdp, bd); + gfs2_log_unlock(sdp); + unlock_buffer(bh); } void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
[Cluster-devel] [PATCH 3/7] GFS2: Clean up some unused assignments
From: Andrew Price anpr...@redhat.com Cleans up two cases where variables were assigned values but then never used again. Signed-off-by: Andrew Price anpr...@redhat.com Signed-off-by: Steven Whitehouse swhit...@redhat.com diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 0def050..377a68d 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -677,10 +677,8 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov, size_t writesize = iov_length(iov, nr_segs); struct dentry *dentry = file-f_dentry; struct gfs2_inode *ip = GFS2_I(dentry-d_inode); - struct gfs2_sbd *sdp; int ret; - sdp = GFS2_SB(file-f_mapping-host); ret = gfs2_rs_alloc(ip); if (ret) return ret; diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 8ff95a2..01e444b 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -621,7 +621,6 @@ static void revoke_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) static void revoke_lo_before_commit(struct gfs2_sbd *sdp) { - struct gfs2_log_descriptor *ld; struct gfs2_meta_header *mh; unsigned int offset; struct list_head *head = sdp-sd_log_le_revoke; @@ -634,7 +633,6 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp) length = gfs2_struct2blk(sdp, sdp-sd_log_num_revoke, sizeof(u64)); page = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_REVOKE, length, sdp-sd_log_num_revoke); - ld = page_address(page); offset = sizeof(struct gfs2_log_descriptor); list_for_each_entry(bd, head, bd_list) { -- 1.7.4
[Cluster-devel] [PATCH 1/7] GFS2: Fix an unchecked error from gfs2_rs_alloc
From: Andrew Price anpr...@redhat.com Check the return value of gfs2_rs_alloc(ip) and avoid a possible null pointer dereference. Signed-off-by: Andrew Price anpr...@redhat.com Signed-off-by: Steven Whitehouse swhit...@redhat.com diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 40c4b0d..c5af8e1 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -497,8 +497,11 @@ int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid) struct gfs2_quota_data **qd; int error; - if (ip-i_res == NULL) - gfs2_rs_alloc(ip); + if (ip-i_res == NULL) { + error = gfs2_rs_alloc(ip); + if (error) + return error; + } qd = ip-i_res-rs_qa_qd; -- 1.7.4
[Cluster-devel] [PATCH 7/7] GFS2: Test bufdata with buffer locked and gfs2_log_lock held
From: Benjamin Marzinski bmarz...@redhat.com In gfs2_trans_add_bh(), gfs2 was testing if a there was a bd attached to the buffer without having the gfs2_log_lock held. It was then assuming it would stay attached for the rest of the function. However, without either the log lock being held of the buffer locked, __gfs2_ail_flush() could detach bd at any time. This patch moves the locking before the test. If there isn't a bd already attached, gfs2 can safely allocate one and attach it before locking. There is no way that the newly allocated bd could be on the ail list, and thus no way for __gfs2_ail_flush() to detach it. Signed-off-by: Benjamin Marzinski bmarz...@redhat.com Signed-off-by: Steven Whitehouse swhit...@redhat.com diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 01e444b..9ceccb1 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -393,12 +393,10 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) struct gfs2_meta_header *mh; struct gfs2_trans *tr; - lock_buffer(bd-bd_bh); - gfs2_log_lock(sdp); tr = current-journal_info; tr-tr_touched = 1; if (!list_empty(bd-bd_list)) - goto out; + return; set_bit(GLF_LFLUSH, bd-bd_gl-gl_flags); set_bit(GLF_DIRTY, bd-bd_gl-gl_flags); mh = (struct gfs2_meta_header *)bd-bd_bh-b_data; @@ -414,9 +412,6 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) sdp-sd_log_num_buf++; list_add(bd-bd_list, sdp-sd_log_le_buf); tr-tr_num_buf_new++; -out: - gfs2_log_unlock(sdp); - unlock_buffer(bd-bd_bh); } static void gfs2_check_magic(struct buffer_head *bh) @@ -775,12 +770,10 @@ static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) struct address_space *mapping = bd-bd_bh-b_page-mapping; struct gfs2_inode *ip = GFS2_I(mapping-host); - lock_buffer(bd-bd_bh); - gfs2_log_lock(sdp); if (tr) tr-tr_touched = 1; if (!list_empty(bd-bd_list)) - goto out; + return; set_bit(GLF_LFLUSH, bd-bd_gl-gl_flags); set_bit(GLF_DIRTY, bd-bd_gl-gl_flags); if (gfs2_is_jdata(ip)) { @@ -791,9 +784,6 @@ static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) } else { list_add_tail(bd-bd_list, sdp-sd_log_le_ordered); } -out: - gfs2_log_unlock(sdp); - unlock_buffer(bd-bd_bh); } /** diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index adbd278..4136270 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c @@ -155,14 +155,22 @@ void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta) struct gfs2_sbd *sdp = gl-gl_sbd; struct gfs2_bufdata *bd; + lock_buffer(bh); + gfs2_log_lock(sdp); bd = bh-b_private; if (bd) gfs2_assert(sdp, bd-bd_gl == gl); else { + gfs2_log_unlock(sdp); + unlock_buffer(bh); gfs2_attach_bufdata(gl, bh, meta); bd = bh-b_private; + lock_buffer(bh); + gfs2_log_lock(sdp); } lops_add(sdp, bd); + gfs2_log_unlock(sdp); + unlock_buffer(bh); } void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) -- 1.7.4
[Cluster-devel] [PATCH 2/7] GFS2: Fix possible null pointer deref in gfs2_rs_alloc
From: Andrew Price anpr...@redhat.com Despite the return value from kmem_cache_zalloc() being checked, the error wasn't being returned until after a possible null pointer dereference. This patch returns the error immediately, allowing the removal of the error variable. Signed-off-by: Andrew Price anpr...@redhat.com Signed-off-by: Steven Whitehouse swhit...@redhat.com diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 3cc402c..43d1a20 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -553,7 +553,6 @@ void gfs2_free_clones(struct gfs2_rgrpd *rgd) */ int gfs2_rs_alloc(struct gfs2_inode *ip) { - int error = 0; struct gfs2_blkreserv *res; if (ip-i_res) @@ -561,7 +560,7 @@ int gfs2_rs_alloc(struct gfs2_inode *ip) res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS); if (!res) - error = -ENOMEM; + return -ENOMEM; RB_CLEAR_NODE(res-rs_node); @@ -571,7 +570,7 @@ int gfs2_rs_alloc(struct gfs2_inode *ip) else ip-i_res = res; up_write(ip-i_rw_mutex); - return error; + return 0; } static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs) -- 1.7.4
[Cluster-devel] [PATCH 4/7] GFS2: Require user to provide argument for FITRIM
From: Lukas Czerner lczer...@redhat.com When the fstrim_range argument is not provided by user in FITRIM ioctl we should just return EFAULT and not promoting bad behaviour by filling the structure in kernel. Let the user deal with it. Signed-off-by: Lukas Czerner lczer...@redhat.com Signed-off-by: Steven Whitehouse swhit...@redhat.com diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 43d1a20..b6bbf71 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1270,11 +1270,7 @@ int gfs2_fitrim(struct file *filp, void __user *argp) if (!blk_queue_discard(q)) return -EOPNOTSUPP; - if (argp == NULL) { - r.start = 0; - r.len = ULLONG_MAX; - r.minlen = 0; - } else if (copy_from_user(r, argp, sizeof(r))) + if (copy_from_user(r, argp, sizeof(r))) return -EFAULT; ret = gfs2_rindex_update(sdp); @@ -1323,7 +1319,7 @@ int gfs2_fitrim(struct file *filp, void __user *argp) out: r.len = trimmed 9; - if (argp copy_to_user(argp, r, sizeof(r))) + if (copy_to_user(argp, r, sizeof(r))) return -EFAULT; return ret; -- 1.7.4
[Cluster-devel] [PATCH 6/7] GFS2: Don't call file_accessed() with a shared glock
From: Benjamin Marzinski bmarz...@redhat.com file_accessed() was being called by gfs2_mmap() with a shared glock. If it needed to update the atime, it was crashing because it dirtied the inode in gfs2_dirty_inode() without holding an exclusive lock. gfs2_dirty_inode() checked if the caller was already holding a glock, but it didn't make sure that the glock was in the exclusive state. Now, instead of calling file_accessed() while holding the shared lock in gfs2_mmap(), file_accessed() is called after grabbing and releasing the glock to update the inode. If file_accessed() needs to update the atime, it will grab an exclusive lock in gfs2_dirty_inode(). gfs2_dirty_inode() now also checks to make sure that if the calling process has already locked the glock, it has an exclusive lock. Signed-off-by: Benjamin Marzinski bmarz...@redhat.com Signed-off-by: Steven Whitehouse swhit...@redhat.com diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 377a68d..e056b4c 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -516,15 +516,13 @@ static int gfs2_mmap(struct file *file, struct vm_area_struct *vma) struct gfs2_holder i_gh; int error; - gfs2_holder_init(ip-i_gl, LM_ST_SHARED, LM_FLAG_ANY, i_gh); - error = gfs2_glock_nq(i_gh); - if (error == 0) { - file_accessed(file); - gfs2_glock_dq(i_gh); - } - gfs2_holder_uninit(i_gh); + error = gfs2_glock_nq_init(ip-i_gl, LM_ST_SHARED, LM_FLAG_ANY, + i_gh); if (error) return error; + /* grab lock to update inode */ + gfs2_glock_dq_uninit(i_gh); + file_accessed(file); } vma-vm_ops = gfs2_vm_ops; diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index bc73726..d648867 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -810,7 +810,8 @@ static void gfs2_dirty_inode(struct inode *inode, int flags) return; } need_unlock = 1; - } + } else if (WARN_ON_ONCE(ip-i_gl-gl_state != LM_ST_EXCLUSIVE)) + return; if (current-journal_info == NULL) { ret = gfs2_trans_begin(sdp, RES_DINODE, 0); -- 1.7.4
[Cluster-devel] [PATCH 5/7] GFS2: Fix FITRIM argument handling
From: Lukas Czerner lczer...@redhat.com Currently implementation in gfs2 uses FITRIM arguments as it were in file system blocks units which is wrong. The FITRIM arguments (fstrim_range.start, fstrim_range.len and fstrim_range.minlen) are actually in bytes. Moreover, check for start argument beyond the end of file system, len argument being smaller than file system block and minlen argument being bigger than biggest resource group were missing. This commit converts the code to convert FITRIM argument to file system blocks and also adds appropriate checks mentioned above. All the problems were recognised by xfstests 251 and 260. Signed-off-by: Lukas Czerner lczer...@redhat.com Signed-off-by: Steven Whitehouse swhit...@redhat.com diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index b6bbf71..38fe18f 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1262,7 +1262,9 @@ int gfs2_fitrim(struct file *filp, void __user *argp) int ret = 0; u64 amt; u64 trimmed = 0; + u64 start, end, minlen; unsigned int x; + unsigned bs_shift = sdp-sd_sb.sb_bsize_shift; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -1277,8 +1279,18 @@ int gfs2_fitrim(struct file *filp, void __user *argp) if (ret) return ret; - rgd = gfs2_blk2rgrpd(sdp, r.start, 0); - rgd_end = gfs2_blk2rgrpd(sdp, r.start + r.len, 0); + start = r.start bs_shift; + end = start + (r.len bs_shift); + minlen = max_t(u64, r.minlen, + q-limits.discard_granularity) bs_shift; + + rgd = gfs2_blk2rgrpd(sdp, start, 0); + rgd_end = gfs2_blk2rgrpd(sdp, end - 1, 0); + + if (end = start || + minlen sdp-sd_max_rg_data || + start rgd_end-rd_data0 + rgd_end-rd_data) + return -EINVAL; while (1) { @@ -1290,7 +1302,9 @@ int gfs2_fitrim(struct file *filp, void __user *argp) /* Trim each bitmap in the rgrp */ for (x = 0; x rgd-rd_length; x++) { struct gfs2_bitmap *bi = rgd-rd_bits + x; - ret = gfs2_rgrp_send_discards(sdp, rgd-rd_data0, NULL, bi, r.minlen, amt); + ret = gfs2_rgrp_send_discards(sdp, + rgd-rd_data0, NULL, bi, minlen, + amt); if (ret) { gfs2_glock_dq_uninit(gh); goto out; -- 1.7.4
[Cluster-devel] GFS2: Pre-pull patch posting (fixes)
Hi, Here are a number of GFS2 bug fixes. There are three from Andy Price which fix various issues spotted by automated code analysis. There are two from Lukas Czerner fixing my mistaken assumptions as to how FITRIM should work. Finally Ben Marzinski has fixed a bug relating to mmap and atime and also a bug relating to a locking issue in the transaction code. Steve.
[Cluster-devel] GFS2: Pull request (fixes)
Hi, Please consider pulling the following GFS2 fixes, Steve. --- Here are a number of GFS2 bug fixes. There are three from Andy Price which fix various issues spotted by automated code analysis. There are two from Lukas Czerner fixing my mistaken assumptions as to how FITRIM should work. Finally Ben Marzinski has fixed a bug relating to mmap and atime and also a bug relating to a locking issue in the transaction code. The following changes since commit ddffeb8c4d0331609ef2581d84de4d763607bd37: Linux 3.7-rc1 (2012-10-14 14:41:04 -0700) are available in the git repository at: git://git.kernel.org/pub/scm/linux/kernel/git/steve/gfs2-3.0-fixes.git master Andrew Price (3): GFS2: Fix an unchecked error from gfs2_rs_alloc GFS2: Fix possible null pointer deref in gfs2_rs_alloc GFS2: Clean up some unused assignments Benjamin Marzinski (2): GFS2: Don't call file_accessed() with a shared glock GFS2: Test bufdata with buffer locked and gfs2_log_lock held Lukas Czerner (2): GFS2: Require user to provide argument for FITRIM GFS2: Fix FITRIM argument handling fs/gfs2/file.c | 14 +- fs/gfs2/lops.c | 16 ++-- fs/gfs2/quota.c |7 +-- fs/gfs2/rgrp.c | 33 + fs/gfs2/super.c |3 ++- fs/gfs2/trans.c |8 6 files changed, 43 insertions(+), 38 deletions(-) signature.asc Description: This is a digitally signed message part
[Cluster-devel] GFS2 -nmw git tree
Hi, Since Linus has pulled the fixes I sent earlier, I've rebased the -nmw tree on top of Linus latest tree, Steve.
[Cluster-devel] gfs2: skip dlm_unlock calls in unmount
When unmounting, gfs2 does a full dlm_unlock operation on every cached lock. This can create a very large amount of work and can take a long time to complete. However, the vast majority of these dlm unlock operations are unnecessary because after all the unlocks are done, gfs2 leaves the dlm lockspace, which automatically clears the locks of the leaving node, without unlocking each one individually. So, gfs2 can skip explicit dlm unlocks, and use dlm_release_lockspace to remove the locks implicitly. The one exception is when the lock's lvb is being used. In this case, dlm_unlock is called because it may update the lvb of the resource. Signed-off-by: David Teigland teigl...@redhat.com --- fs/gfs2/glock.c|1 + fs/gfs2/incore.h |1 + fs/gfs2/lock_dlm.c |6 ++ 3 files changed, 8 insertions(+) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index e6c2fd5..f3a5edb 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1528,6 +1528,7 @@ static void dump_glock_func(struct gfs2_glock *gl) void gfs2_gl_hash_clear(struct gfs2_sbd *sdp) { + set_bit(SDF_SKIP_DLM_UNLOCK, sdp-sd_flags); glock_hash_walk(clear_glock, sdp); flush_workqueue(glock_workqueue); wait_event(sdp-sd_glock_wait, atomic_read(sdp-sd_glock_disposal) == 0); diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 3d469d3..67a39cf 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -539,6 +539,7 @@ enum { SDF_DEMOTE = 5, SDF_NOJOURNALID = 6, SDF_RORECOVERY = 7, /* read only recovery */ + SDF_SKIP_DLM_UNLOCK = 8, }; #define GFS2_FSNAME_LEN256 diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index 0fb6539..efd0fb6 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -289,6 +289,12 @@ static void gdlm_put_lock(struct gfs2_glock *gl) gfs2_glstats_inc(gl, GFS2_LKS_DCOUNT); gfs2_sbstats_inc(gl, GFS2_LKS_DCOUNT); gfs2_update_request_times(gl); + + if (test_bit(SDF_SKIP_DLM_UNLOCK, sdp-sd_flags) !gl-gl_lvb[0]) { + gfs2_glock_free(gl); + return; + } + error = dlm_unlock(ls-ls_dlm, gl-gl_lksb.sb_lkid, DLM_LKF_VALBLK, NULL, gl); if (error) { -- 1.7.10.1.362.g242cab3