Currently gfs2 ordered buffer writes use WRITE_SYNC_PLUG as the IO type being dispatched. They aren't sync writes; we issue all the IO pending, then wait for it all. IOWs, this is async IO with a bulk wait on the end.
We should use normal WRITE tagging for this, and before we start waiting make sure that all the Io is issued by unplugging the device. The use of normal WRITEs for these buffers should significantly reduce the overhead of processing in the cfq elevator and enable the disk subsystem to get much closer to disk bandwidth for large sequential writes. Signed-off-by: Dave Chinner <dchin...@redhat.com> --- fs/gfs2/aops.c | 3 +++ fs/gfs2/log.c | 11 +++++++---- fs/gfs2/lops.c | 18 ++++++++++-------- 3 files changed, 20 insertions(+), 12 deletions(-) diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 7b8da94..b75784c 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -20,6 +20,7 @@ #include <linux/swap.h> #include <linux/gfs2_ondisk.h> #include <linux/backing-dev.h> +#include <linux/blkdev.h> #include "gfs2.h" #include "incore.h" @@ -34,6 +35,7 @@ #include "super.h" #include "util.h" #include "glops.h" +#include "trace_gfs2.h" static void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page, @@ -52,6 +54,7 @@ static void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page, if (gfs2_is_jdata(ip)) set_buffer_uptodate(bh); gfs2_trans_add_bh(ip->i_gl, bh, 0); + trace_gfs2_submit_bh(bh, WRITE, __func__); } } diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index bd26dff..a9797be 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -18,6 +18,7 @@ #include <linux/kthread.h> #include <linux/freezer.h> #include <linux/bio.h> +#include <linux/blkdev.h> #include "gfs2.h" #include "incore.h" @@ -121,8 +122,8 @@ __acquires(&sdp->sd_log_lock) lock_buffer(bh); if (test_clear_buffer_dirty(bh)) { bh->b_end_io = end_buffer_write_sync; - trace_gfs2_submit_bh(bh, WRITE_SYNC_PLUG, __func__); - submit_bh(WRITE_SYNC_PLUG, bh); + trace_gfs2_submit_bh(bh, WRITE, __func__); + submit_bh(WRITE, bh); } else { unlock_buffer(bh); brelse(bh); @@ -675,8 +676,8 @@ static void gfs2_ordered_write(struct gfs2_sbd *sdp) lock_buffer(bh); if (buffer_mapped(bh) && test_clear_buffer_dirty(bh)) { bh->b_end_io = end_buffer_write_sync; - trace_gfs2_submit_bh(bh, WRITE_SYNC_PLUG, __func__); - submit_bh(WRITE_SYNC_PLUG, bh); + trace_gfs2_submit_bh(bh, WRITE, __func__); + submit_bh(WRITE, bh); } else { unlock_buffer(bh); brelse(bh); @@ -692,6 +693,8 @@ static void gfs2_ordered_wait(struct gfs2_sbd *sdp) struct gfs2_bufdata *bd; struct buffer_head *bh; + blk_run_backing_dev(blk_get_backing_dev_info(sdp->sd_vfs->s_bdev), NULL); + gfs2_log_lock(sdp); while (!list_empty(&sdp->sd_log_le_ordered)) { bd = list_entry(sdp->sd_log_le_ordered.prev, struct gfs2_bufdata, bd_le.le_list); diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 7278cf0..0fe2f3c 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -15,6 +15,7 @@ #include <linux/gfs2_ondisk.h> #include <linux/bio.h> #include <linux/fs.h> +#include <linux/blkdev.h> #include "gfs2.h" #include "incore.h" @@ -198,8 +199,8 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp) } gfs2_log_unlock(sdp); - trace_gfs2_submit_bh(bh, WRITE_SYNC_PLUG, __func__); - submit_bh(WRITE_SYNC_PLUG, bh); + trace_gfs2_submit_bh(bh, WRITE, __func__); + submit_bh(WRITE, bh); gfs2_log_lock(sdp); n = 0; @@ -209,8 +210,8 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp) gfs2_log_unlock(sdp); lock_buffer(bd2->bd_bh); bh = gfs2_log_fake_buf(sdp, bd2->bd_bh); - trace_gfs2_submit_bh(bh, WRITE_SYNC_PLUG, __func__); - submit_bh(WRITE_SYNC_PLUG, bh); + trace_gfs2_submit_bh(bh, WRITE, __func__); + submit_bh(WRITE, bh); gfs2_log_lock(sdp); if (++n >= num) break; @@ -220,6 +221,7 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp) total -= num; } gfs2_log_unlock(sdp); + blk_run_backing_dev(blk_get_backing_dev_info(sdp->sd_vfs->s_bdev), NULL); } static void buf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai) @@ -573,8 +575,8 @@ static void gfs2_write_blocks(struct gfs2_sbd *sdp, struct buffer_head *bh, ptr = bh_log_ptr(bh); get_bh(bh); - trace_gfs2_submit_bh(bh, WRITE_SYNC_PLUG, __func__); - submit_bh(WRITE_SYNC_PLUG, bh); + trace_gfs2_submit_bh(bh, WRITE, __func__); + submit_bh(WRITE, bh); gfs2_log_lock(sdp); while(!list_empty(list)) { bd = list_entry(list->next, struct gfs2_bufdata, bd_le.le_list); @@ -600,8 +602,8 @@ static void gfs2_write_blocks(struct gfs2_sbd *sdp, struct buffer_head *bh, } else { bh1 = gfs2_log_fake_buf(sdp, bd->bd_bh); } - trace_gfs2_submit_bh(bh1, WRITE_SYNC_PLUG, __func__); - submit_bh(WRITE_SYNC_PLUG, bh1); + trace_gfs2_submit_bh(bh1, WRITE, __func__); + submit_bh(WRITE, bh1); gfs2_log_lock(sdp); ptr += 2; } -- 1.6.5