Re: [GFS2] Fix journal flush problem [56/70]

2006-12-04 Thread Steven Whitehouse
Hi,

On Fri, 2006-12-01 at 12:58 -0600, Russell Cattelan wrote:
> On Thu, 2006-11-30 at 12:21 +, Steven Whitehouse wrote:
> > >From b004157ab5b374a498a5874cda68c389219d23e7 Mon Sep 17 00:00:00 2001
> > From: Steven Whitehouse <[EMAIL PROTECTED]>
> > Date: Thu, 23 Nov 2006 10:51:34 -0500
> > Subject: [PATCH] [GFS2] Fix journal flush problem
> > 
> > This fixes a bug which resulted in poor performance due to flushing
> > the journal too often. The code path in question was via the inode_go_sync()
> > function in glops.c. The solution is not to flush the journal immediately
> > when inodes are ejected from memory, but batch up the work for glockd to
> > deal with later on. This means that glocks may now live on beyond the end of
> > the lifetime of their inodes (but not very much longer in the normal case).
> 
> This seems like multiple changes in one patch.
> The inode flush handling is changing quite significantly.
> The log flushing is also being changed. 
> 
They are rather closely linked, so it seems to make sense to change both
at once. See further comments below.

> > 
> > Also fixed in this patch is a bug (which was hidden by the bug mentioned 
> > above) in
> > calculation of the number of free journal blocks.
> > 
> > The gfs2_logd process has been altered to be more responsive to the journal
> > filling up. We now wake it up when the number of uncommitted journal blocks
> > has reached the threshold level rather than trying to flush directly at the
> > end of each transaction. This again means doing fewer, but larger, log
> > flushes in general.
> > 
> > Signed-off-by: Steven Whitehouse <[EMAIL PROTECTED]>
> > ---
> >  fs/gfs2/daemon.c|7 +++-
> >  fs/gfs2/glock.c |   17 +
> >  fs/gfs2/glock.h |1 -
> >  fs/gfs2/glops.c |   93 
> > +--
> >  fs/gfs2/log.c   |   17 +
> >  fs/gfs2/meta_io.c   |3 ++
> >  fs/gfs2/ops_super.c |7 ++--
> >  7 files changed, 46 insertions(+), 99 deletions(-)
> > 
> > diff --git a/fs/gfs2/daemon.c b/fs/gfs2/daemon.c
> > index cab1f68..683cb5b 100644
> > --- a/fs/gfs2/daemon.c
> > +++ b/fs/gfs2/daemon.c
> > @@ -112,6 +112,7 @@ int gfs2_logd(void *data)
> > struct gfs2_sbd *sdp = data;
> > struct gfs2_holder ji_gh;
> > unsigned long t;
> > +   int need_flush;
> >  
> > while (!kthread_should_stop()) {
> > /* Advance the log tail */
> > @@ -120,8 +121,10 @@ int gfs2_logd(void *data)
> > gfs2_tune_get(sdp, gt_log_flush_secs) * HZ;
> >  
> > gfs2_ail1_empty(sdp, DIO_ALL);
> > -
> > -   if (time_after_eq(jiffies, t)) {
> > +   gfs2_log_lock(sdp);
> > +   need_flush = sdp->sd_log_num_buf > gfs2_tune_get(sdp, 
> > gt_incore_log_blocks);
> > +   gfs2_log_unlock(sdp);
> Do we really need to lock the log just to get the log_num_buf?
> Seems like a serialization we don't need?
> 
Yes we do need a lock, and bearing in mind that its only a spinlock I
don't see that its going to be that much of a problem. Compared with the
length of time it takes to flush the journal, it must surely be a
completely minimal overhead.

> So why does this loop have a sleep timeout and a flush interval?
> Shouldn't the schedual timeout be the same as the flush interval?
> 
No it shouldn't. There are two things happening in this loop. The first
is running gfs2_ail1_empty() which happens on a much more frequent basis
than the log flushing. The log flushing runs every few seconds, or when
its triggered by the journal getting close to being full:

> > +   if (need_flush || time_after_eq(jiffies, t)) {
> > gfs2_log_flush(sdp, NULL);
> > sdp->sd_log_flush_time = jiffies;
> > }

Steve.

> > diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
> > index b8ba4d5..3c2ff81 100644
> > --- a/fs/gfs2/glock.c
> > +++ b/fs/gfs2/glock.c
> > @@ -785,21 +785,6 @@ out:
> > gfs2_holder_put(new_gh);
> >  }
> >  
> > -void gfs2_glock_inode_squish(struct inode *inode)
> > -{
> > -   struct gfs2_holder gh;
> > -   struct gfs2_glock *gl = GFS2_I(inode)->i_gl;
> > -   gfs2_holder_init(gl, LM_ST_UNLOCKED, 0, );
> > -   set_bit(HIF_DEMOTE, _iflags);
> > -   spin_lock(>gl_spin);
> > -   gfs2_assert(inode->i_sb->s_fs_info, list_empty(>gl_holders));
> > -   list_add_tail(_list, >gl_waiters2);
> > -   run_queue(gl);
> > -   spin_unlock(>gl_spin);
> > -   wait_for_completion(_wait);
> > -   gfs2_holder_uninit();
> > -}
> > -
> >  /**
> >   * state_change - record that the glock is now in a different state
> >   * @gl: the glock
> > @@ -1920,7 +1905,7 @@ out:
> >  
> >  static void scan_glock(struct gfs2_glock *gl)
> >  {
> > -   if (gl->gl_ops == _inode_glops)
> > +   if (gl->gl_ops == _inode_glops && gl->gl_object)
> > return;
> >  
> > if (gfs2_glmutex_trylock(gl)) {
> > diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
> > index a331bf8..fb39108 100644
> > --- 

Re: [GFS2] Fix journal flush problem [56/70]

2006-12-04 Thread Steven Whitehouse
Hi,

On Fri, 2006-12-01 at 12:58 -0600, Russell Cattelan wrote:
 On Thu, 2006-11-30 at 12:21 +, Steven Whitehouse wrote:
  From b004157ab5b374a498a5874cda68c389219d23e7 Mon Sep 17 00:00:00 2001
  From: Steven Whitehouse [EMAIL PROTECTED]
  Date: Thu, 23 Nov 2006 10:51:34 -0500
  Subject: [PATCH] [GFS2] Fix journal flush problem
  
  This fixes a bug which resulted in poor performance due to flushing
  the journal too often. The code path in question was via the inode_go_sync()
  function in glops.c. The solution is not to flush the journal immediately
  when inodes are ejected from memory, but batch up the work for glockd to
  deal with later on. This means that glocks may now live on beyond the end of
  the lifetime of their inodes (but not very much longer in the normal case).
 
 This seems like multiple changes in one patch.
 The inode flush handling is changing quite significantly.
 The log flushing is also being changed. 
 
They are rather closely linked, so it seems to make sense to change both
at once. See further comments below.

  
  Also fixed in this patch is a bug (which was hidden by the bug mentioned 
  above) in
  calculation of the number of free journal blocks.
  
  The gfs2_logd process has been altered to be more responsive to the journal
  filling up. We now wake it up when the number of uncommitted journal blocks
  has reached the threshold level rather than trying to flush directly at the
  end of each transaction. This again means doing fewer, but larger, log
  flushes in general.
  
  Signed-off-by: Steven Whitehouse [EMAIL PROTECTED]
  ---
   fs/gfs2/daemon.c|7 +++-
   fs/gfs2/glock.c |   17 +
   fs/gfs2/glock.h |1 -
   fs/gfs2/glops.c |   93 
  +--
   fs/gfs2/log.c   |   17 +
   fs/gfs2/meta_io.c   |3 ++
   fs/gfs2/ops_super.c |7 ++--
   7 files changed, 46 insertions(+), 99 deletions(-)
  
  diff --git a/fs/gfs2/daemon.c b/fs/gfs2/daemon.c
  index cab1f68..683cb5b 100644
  --- a/fs/gfs2/daemon.c
  +++ b/fs/gfs2/daemon.c
  @@ -112,6 +112,7 @@ int gfs2_logd(void *data)
  struct gfs2_sbd *sdp = data;
  struct gfs2_holder ji_gh;
  unsigned long t;
  +   int need_flush;
   
  while (!kthread_should_stop()) {
  /* Advance the log tail */
  @@ -120,8 +121,10 @@ int gfs2_logd(void *data)
  gfs2_tune_get(sdp, gt_log_flush_secs) * HZ;
   
  gfs2_ail1_empty(sdp, DIO_ALL);
  -
  -   if (time_after_eq(jiffies, t)) {
  +   gfs2_log_lock(sdp);
  +   need_flush = sdp-sd_log_num_buf  gfs2_tune_get(sdp, 
  gt_incore_log_blocks);
  +   gfs2_log_unlock(sdp);
 Do we really need to lock the log just to get the log_num_buf?
 Seems like a serialization we don't need?
 
Yes we do need a lock, and bearing in mind that its only a spinlock I
don't see that its going to be that much of a problem. Compared with the
length of time it takes to flush the journal, it must surely be a
completely minimal overhead.

 So why does this loop have a sleep timeout and a flush interval?
 Shouldn't the schedual timeout be the same as the flush interval?
 
No it shouldn't. There are two things happening in this loop. The first
is running gfs2_ail1_empty() which happens on a much more frequent basis
than the log flushing. The log flushing runs every few seconds, or when
its triggered by the journal getting close to being full:

  +   if (need_flush || time_after_eq(jiffies, t)) {
  gfs2_log_flush(sdp, NULL);
  sdp-sd_log_flush_time = jiffies;
  }

Steve.

  diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
  index b8ba4d5..3c2ff81 100644
  --- a/fs/gfs2/glock.c
  +++ b/fs/gfs2/glock.c
  @@ -785,21 +785,6 @@ out:
  gfs2_holder_put(new_gh);
   }
   
  -void gfs2_glock_inode_squish(struct inode *inode)
  -{
  -   struct gfs2_holder gh;
  -   struct gfs2_glock *gl = GFS2_I(inode)-i_gl;
  -   gfs2_holder_init(gl, LM_ST_UNLOCKED, 0, gh);
  -   set_bit(HIF_DEMOTE, gh.gh_iflags);
  -   spin_lock(gl-gl_spin);
  -   gfs2_assert(inode-i_sb-s_fs_info, list_empty(gl-gl_holders));
  -   list_add_tail(gh.gh_list, gl-gl_waiters2);
  -   run_queue(gl);
  -   spin_unlock(gl-gl_spin);
  -   wait_for_completion(gh.gh_wait);
  -   gfs2_holder_uninit(gh);
  -}
  -
   /**
* state_change - record that the glock is now in a different state
* @gl: the glock
  @@ -1920,7 +1905,7 @@ out:
   
   static void scan_glock(struct gfs2_glock *gl)
   {
  -   if (gl-gl_ops == gfs2_inode_glops)
  +   if (gl-gl_ops == gfs2_inode_glops  gl-gl_object)
  return;
   
  if (gfs2_glmutex_trylock(gl)) {
  diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
  index a331bf8..fb39108 100644
  --- a/fs/gfs2/glock.h
  +++ b/fs/gfs2/glock.h
  @@ -106,7 +106,6 @@ void gfs2_glock_dq_uninit_m(unsigned int
   void gfs2_glock_prefetch_num(struct gfs2_sbd *sdp, u64 number,
   const 

Re: [GFS2] Fix journal flush problem [56/70]

2006-12-01 Thread Russell Cattelan
On Thu, 2006-11-30 at 12:21 +, Steven Whitehouse wrote:
> >From b004157ab5b374a498a5874cda68c389219d23e7 Mon Sep 17 00:00:00 2001
> From: Steven Whitehouse <[EMAIL PROTECTED]>
> Date: Thu, 23 Nov 2006 10:51:34 -0500
> Subject: [PATCH] [GFS2] Fix journal flush problem
> 
> This fixes a bug which resulted in poor performance due to flushing
> the journal too often. The code path in question was via the inode_go_sync()
> function in glops.c. The solution is not to flush the journal immediately
> when inodes are ejected from memory, but batch up the work for glockd to
> deal with later on. This means that glocks may now live on beyond the end of
> the lifetime of their inodes (but not very much longer in the normal case).

This seems like multiple changes in one patch.
The inode flush handling is changing quite significantly.
The log flushing is also being changed. 

> 
> Also fixed in this patch is a bug (which was hidden by the bug mentioned 
> above) in
> calculation of the number of free journal blocks.
> 
> The gfs2_logd process has been altered to be more responsive to the journal
> filling up. We now wake it up when the number of uncommitted journal blocks
> has reached the threshold level rather than trying to flush directly at the
> end of each transaction. This again means doing fewer, but larger, log
> flushes in general.
> 
> Signed-off-by: Steven Whitehouse <[EMAIL PROTECTED]>
> ---
>  fs/gfs2/daemon.c|7 +++-
>  fs/gfs2/glock.c |   17 +
>  fs/gfs2/glock.h |1 -
>  fs/gfs2/glops.c |   93 
> +--
>  fs/gfs2/log.c   |   17 +
>  fs/gfs2/meta_io.c   |3 ++
>  fs/gfs2/ops_super.c |7 ++--
>  7 files changed, 46 insertions(+), 99 deletions(-)
> 
> diff --git a/fs/gfs2/daemon.c b/fs/gfs2/daemon.c
> index cab1f68..683cb5b 100644
> --- a/fs/gfs2/daemon.c
> +++ b/fs/gfs2/daemon.c
> @@ -112,6 +112,7 @@ int gfs2_logd(void *data)
>   struct gfs2_sbd *sdp = data;
>   struct gfs2_holder ji_gh;
>   unsigned long t;
> + int need_flush;
>  
>   while (!kthread_should_stop()) {
>   /* Advance the log tail */
> @@ -120,8 +121,10 @@ int gfs2_logd(void *data)
>   gfs2_tune_get(sdp, gt_log_flush_secs) * HZ;
>  
>   gfs2_ail1_empty(sdp, DIO_ALL);
> -
> - if (time_after_eq(jiffies, t)) {
> + gfs2_log_lock(sdp);
> + need_flush = sdp->sd_log_num_buf > gfs2_tune_get(sdp, 
> gt_incore_log_blocks);
> + gfs2_log_unlock(sdp);
Do we really need to lock the log just to get the log_num_buf?
Seems like a serialization we don't need?

So why does this loop have a sleep timeout and a flush interval?
Shouldn't the schedual timeout be the same as the flush interval?

> + if (need_flush || time_after_eq(jiffies, t)) {
>   gfs2_log_flush(sdp, NULL);
>   sdp->sd_log_flush_time = jiffies;
>   }
> diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
> index b8ba4d5..3c2ff81 100644
> --- a/fs/gfs2/glock.c
> +++ b/fs/gfs2/glock.c
> @@ -785,21 +785,6 @@ out:
>   gfs2_holder_put(new_gh);
>  }
>  
> -void gfs2_glock_inode_squish(struct inode *inode)
> -{
> - struct gfs2_holder gh;
> - struct gfs2_glock *gl = GFS2_I(inode)->i_gl;
> - gfs2_holder_init(gl, LM_ST_UNLOCKED, 0, );
> - set_bit(HIF_DEMOTE, _iflags);
> - spin_lock(>gl_spin);
> - gfs2_assert(inode->i_sb->s_fs_info, list_empty(>gl_holders));
> - list_add_tail(_list, >gl_waiters2);
> - run_queue(gl);
> - spin_unlock(>gl_spin);
> - wait_for_completion(_wait);
> - gfs2_holder_uninit();
> -}
> -
>  /**
>   * state_change - record that the glock is now in a different state
>   * @gl: the glock
> @@ -1920,7 +1905,7 @@ out:
>  
>  static void scan_glock(struct gfs2_glock *gl)
>  {
> - if (gl->gl_ops == _inode_glops)
> + if (gl->gl_ops == _inode_glops && gl->gl_object)
>   return;
>  
>   if (gfs2_glmutex_trylock(gl)) {
> diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
> index a331bf8..fb39108 100644
> --- a/fs/gfs2/glock.h
> +++ b/fs/gfs2/glock.h
> @@ -106,7 +106,6 @@ void gfs2_glock_dq_uninit_m(unsigned int
>  void gfs2_glock_prefetch_num(struct gfs2_sbd *sdp, u64 number,
>const struct gfs2_glock_operations *glops,
>unsigned int state, int flags);
> -void gfs2_glock_inode_squish(struct inode *inode);
>  
>  /**
>   * gfs2_glock_nq_init - intialize a holder and enqueue it on a glock
> diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
> index 60561ca..b068d10 100644
> --- a/fs/gfs2/glops.c
> +++ b/fs/gfs2/glops.c
> @@ -107,70 +107,6 @@ static void gfs2_pte_inval(struct gfs2_g
>  }
>  
>  /**
> - * gfs2_page_inval - Invalidate all pages associated with a glock
> - * @gl: the glock
> - *
> - */
> -
> -static void gfs2_page_inval(struct gfs2_glock *gl)
> -{
> - struct gfs2_inode *ip;
> - struct inode 

Re: [GFS2] Fix journal flush problem [56/70]

2006-12-01 Thread Russell Cattelan
On Thu, 2006-11-30 at 12:21 +, Steven Whitehouse wrote:
 From b004157ab5b374a498a5874cda68c389219d23e7 Mon Sep 17 00:00:00 2001
 From: Steven Whitehouse [EMAIL PROTECTED]
 Date: Thu, 23 Nov 2006 10:51:34 -0500
 Subject: [PATCH] [GFS2] Fix journal flush problem
 
 This fixes a bug which resulted in poor performance due to flushing
 the journal too often. The code path in question was via the inode_go_sync()
 function in glops.c. The solution is not to flush the journal immediately
 when inodes are ejected from memory, but batch up the work for glockd to
 deal with later on. This means that glocks may now live on beyond the end of
 the lifetime of their inodes (but not very much longer in the normal case).

This seems like multiple changes in one patch.
The inode flush handling is changing quite significantly.
The log flushing is also being changed. 

 
 Also fixed in this patch is a bug (which was hidden by the bug mentioned 
 above) in
 calculation of the number of free journal blocks.
 
 The gfs2_logd process has been altered to be more responsive to the journal
 filling up. We now wake it up when the number of uncommitted journal blocks
 has reached the threshold level rather than trying to flush directly at the
 end of each transaction. This again means doing fewer, but larger, log
 flushes in general.
 
 Signed-off-by: Steven Whitehouse [EMAIL PROTECTED]
 ---
  fs/gfs2/daemon.c|7 +++-
  fs/gfs2/glock.c |   17 +
  fs/gfs2/glock.h |1 -
  fs/gfs2/glops.c |   93 
 +--
  fs/gfs2/log.c   |   17 +
  fs/gfs2/meta_io.c   |3 ++
  fs/gfs2/ops_super.c |7 ++--
  7 files changed, 46 insertions(+), 99 deletions(-)
 
 diff --git a/fs/gfs2/daemon.c b/fs/gfs2/daemon.c
 index cab1f68..683cb5b 100644
 --- a/fs/gfs2/daemon.c
 +++ b/fs/gfs2/daemon.c
 @@ -112,6 +112,7 @@ int gfs2_logd(void *data)
   struct gfs2_sbd *sdp = data;
   struct gfs2_holder ji_gh;
   unsigned long t;
 + int need_flush;
  
   while (!kthread_should_stop()) {
   /* Advance the log tail */
 @@ -120,8 +121,10 @@ int gfs2_logd(void *data)
   gfs2_tune_get(sdp, gt_log_flush_secs) * HZ;
  
   gfs2_ail1_empty(sdp, DIO_ALL);
 -
 - if (time_after_eq(jiffies, t)) {
 + gfs2_log_lock(sdp);
 + need_flush = sdp-sd_log_num_buf  gfs2_tune_get(sdp, 
 gt_incore_log_blocks);
 + gfs2_log_unlock(sdp);
Do we really need to lock the log just to get the log_num_buf?
Seems like a serialization we don't need?

So why does this loop have a sleep timeout and a flush interval?
Shouldn't the schedual timeout be the same as the flush interval?

 + if (need_flush || time_after_eq(jiffies, t)) {
   gfs2_log_flush(sdp, NULL);
   sdp-sd_log_flush_time = jiffies;
   }
 diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
 index b8ba4d5..3c2ff81 100644
 --- a/fs/gfs2/glock.c
 +++ b/fs/gfs2/glock.c
 @@ -785,21 +785,6 @@ out:
   gfs2_holder_put(new_gh);
  }
  
 -void gfs2_glock_inode_squish(struct inode *inode)
 -{
 - struct gfs2_holder gh;
 - struct gfs2_glock *gl = GFS2_I(inode)-i_gl;
 - gfs2_holder_init(gl, LM_ST_UNLOCKED, 0, gh);
 - set_bit(HIF_DEMOTE, gh.gh_iflags);
 - spin_lock(gl-gl_spin);
 - gfs2_assert(inode-i_sb-s_fs_info, list_empty(gl-gl_holders));
 - list_add_tail(gh.gh_list, gl-gl_waiters2);
 - run_queue(gl);
 - spin_unlock(gl-gl_spin);
 - wait_for_completion(gh.gh_wait);
 - gfs2_holder_uninit(gh);
 -}
 -
  /**
   * state_change - record that the glock is now in a different state
   * @gl: the glock
 @@ -1920,7 +1905,7 @@ out:
  
  static void scan_glock(struct gfs2_glock *gl)
  {
 - if (gl-gl_ops == gfs2_inode_glops)
 + if (gl-gl_ops == gfs2_inode_glops  gl-gl_object)
   return;
  
   if (gfs2_glmutex_trylock(gl)) {
 diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
 index a331bf8..fb39108 100644
 --- a/fs/gfs2/glock.h
 +++ b/fs/gfs2/glock.h
 @@ -106,7 +106,6 @@ void gfs2_glock_dq_uninit_m(unsigned int
  void gfs2_glock_prefetch_num(struct gfs2_sbd *sdp, u64 number,
const struct gfs2_glock_operations *glops,
unsigned int state, int flags);
 -void gfs2_glock_inode_squish(struct inode *inode);
  
  /**
   * gfs2_glock_nq_init - intialize a holder and enqueue it on a glock
 diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
 index 60561ca..b068d10 100644
 --- a/fs/gfs2/glops.c
 +++ b/fs/gfs2/glops.c
 @@ -107,70 +107,6 @@ static void gfs2_pte_inval(struct gfs2_g
  }
  
  /**
 - * gfs2_page_inval - Invalidate all pages associated with a glock
 - * @gl: the glock
 - *
 - */
 -
 -static void gfs2_page_inval(struct gfs2_glock *gl)
 -{
 - struct gfs2_inode *ip;
 - struct inode *inode;
 -
 - ip = gl-gl_object;
 - inode = ip-i_inode;
 - if (!ip || !S_ISREG(inode-i_mode))
 

[GFS2] Fix journal flush problem [56/70]

2006-11-30 Thread Steven Whitehouse
>From b004157ab5b374a498a5874cda68c389219d23e7 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <[EMAIL PROTECTED]>
Date: Thu, 23 Nov 2006 10:51:34 -0500
Subject: [PATCH] [GFS2] Fix journal flush problem

This fixes a bug which resulted in poor performance due to flushing
the journal too often. The code path in question was via the inode_go_sync()
function in glops.c. The solution is not to flush the journal immediately
when inodes are ejected from memory, but batch up the work for glockd to
deal with later on. This means that glocks may now live on beyond the end of
the lifetime of their inodes (but not very much longer in the normal case).

Also fixed in this patch is a bug (which was hidden by the bug mentioned above) 
in
calculation of the number of free journal blocks.

The gfs2_logd process has been altered to be more responsive to the journal
filling up. We now wake it up when the number of uncommitted journal blocks
has reached the threshold level rather than trying to flush directly at the
end of each transaction. This again means doing fewer, but larger, log
flushes in general.

Signed-off-by: Steven Whitehouse <[EMAIL PROTECTED]>
---
 fs/gfs2/daemon.c|7 +++-
 fs/gfs2/glock.c |   17 +
 fs/gfs2/glock.h |1 -
 fs/gfs2/glops.c |   93 +--
 fs/gfs2/log.c   |   17 +
 fs/gfs2/meta_io.c   |3 ++
 fs/gfs2/ops_super.c |7 ++--
 7 files changed, 46 insertions(+), 99 deletions(-)

diff --git a/fs/gfs2/daemon.c b/fs/gfs2/daemon.c
index cab1f68..683cb5b 100644
--- a/fs/gfs2/daemon.c
+++ b/fs/gfs2/daemon.c
@@ -112,6 +112,7 @@ int gfs2_logd(void *data)
struct gfs2_sbd *sdp = data;
struct gfs2_holder ji_gh;
unsigned long t;
+   int need_flush;
 
while (!kthread_should_stop()) {
/* Advance the log tail */
@@ -120,8 +121,10 @@ int gfs2_logd(void *data)
gfs2_tune_get(sdp, gt_log_flush_secs) * HZ;
 
gfs2_ail1_empty(sdp, DIO_ALL);
-
-   if (time_after_eq(jiffies, t)) {
+   gfs2_log_lock(sdp);
+   need_flush = sdp->sd_log_num_buf > gfs2_tune_get(sdp, 
gt_incore_log_blocks);
+   gfs2_log_unlock(sdp);
+   if (need_flush || time_after_eq(jiffies, t)) {
gfs2_log_flush(sdp, NULL);
sdp->sd_log_flush_time = jiffies;
}
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index b8ba4d5..3c2ff81 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -785,21 +785,6 @@ out:
gfs2_holder_put(new_gh);
 }
 
-void gfs2_glock_inode_squish(struct inode *inode)
-{
-   struct gfs2_holder gh;
-   struct gfs2_glock *gl = GFS2_I(inode)->i_gl;
-   gfs2_holder_init(gl, LM_ST_UNLOCKED, 0, );
-   set_bit(HIF_DEMOTE, _iflags);
-   spin_lock(>gl_spin);
-   gfs2_assert(inode->i_sb->s_fs_info, list_empty(>gl_holders));
-   list_add_tail(_list, >gl_waiters2);
-   run_queue(gl);
-   spin_unlock(>gl_spin);
-   wait_for_completion(_wait);
-   gfs2_holder_uninit();
-}
-
 /**
  * state_change - record that the glock is now in a different state
  * @gl: the glock
@@ -1920,7 +1905,7 @@ out:
 
 static void scan_glock(struct gfs2_glock *gl)
 {
-   if (gl->gl_ops == _inode_glops)
+   if (gl->gl_ops == _inode_glops && gl->gl_object)
return;
 
if (gfs2_glmutex_trylock(gl)) {
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index a331bf8..fb39108 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -106,7 +106,6 @@ void gfs2_glock_dq_uninit_m(unsigned int
 void gfs2_glock_prefetch_num(struct gfs2_sbd *sdp, u64 number,
 const struct gfs2_glock_operations *glops,
 unsigned int state, int flags);
-void gfs2_glock_inode_squish(struct inode *inode);
 
 /**
  * gfs2_glock_nq_init - intialize a holder and enqueue it on a glock
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 60561ca..b068d10 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -107,70 +107,6 @@ static void gfs2_pte_inval(struct gfs2_g
 }
 
 /**
- * gfs2_page_inval - Invalidate all pages associated with a glock
- * @gl: the glock
- *
- */
-
-static void gfs2_page_inval(struct gfs2_glock *gl)
-{
-   struct gfs2_inode *ip;
-   struct inode *inode;
-
-   ip = gl->gl_object;
-   inode = >i_inode;
-   if (!ip || !S_ISREG(inode->i_mode))
-   return;
-
-   truncate_inode_pages(inode->i_mapping, 0);
-   gfs2_assert_withdraw(GFS2_SB(>i_inode), !inode->i_mapping->nrpages);
-   clear_bit(GIF_PAGED, >i_flags);
-}
-
-/**
- * gfs2_page_wait - Wait for writeback of data
- * @gl: the glock
- *
- * Syncs data (not metadata) for a regular file.
- * No-op for all other types.
- */
-
-static void gfs2_page_wait(struct gfs2_glock *gl)
-{
-   struct gfs2_inode *ip = gl->gl_object;
-   struct inode *inode = >i_inode;
-   

[GFS2] Fix journal flush problem [56/70]

2006-11-30 Thread Steven Whitehouse
From b004157ab5b374a498a5874cda68c389219d23e7 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse [EMAIL PROTECTED]
Date: Thu, 23 Nov 2006 10:51:34 -0500
Subject: [PATCH] [GFS2] Fix journal flush problem

This fixes a bug which resulted in poor performance due to flushing
the journal too often. The code path in question was via the inode_go_sync()
function in glops.c. The solution is not to flush the journal immediately
when inodes are ejected from memory, but batch up the work for glockd to
deal with later on. This means that glocks may now live on beyond the end of
the lifetime of their inodes (but not very much longer in the normal case).

Also fixed in this patch is a bug (which was hidden by the bug mentioned above) 
in
calculation of the number of free journal blocks.

The gfs2_logd process has been altered to be more responsive to the journal
filling up. We now wake it up when the number of uncommitted journal blocks
has reached the threshold level rather than trying to flush directly at the
end of each transaction. This again means doing fewer, but larger, log
flushes in general.

Signed-off-by: Steven Whitehouse [EMAIL PROTECTED]
---
 fs/gfs2/daemon.c|7 +++-
 fs/gfs2/glock.c |   17 +
 fs/gfs2/glock.h |1 -
 fs/gfs2/glops.c |   93 +--
 fs/gfs2/log.c   |   17 +
 fs/gfs2/meta_io.c   |3 ++
 fs/gfs2/ops_super.c |7 ++--
 7 files changed, 46 insertions(+), 99 deletions(-)

diff --git a/fs/gfs2/daemon.c b/fs/gfs2/daemon.c
index cab1f68..683cb5b 100644
--- a/fs/gfs2/daemon.c
+++ b/fs/gfs2/daemon.c
@@ -112,6 +112,7 @@ int gfs2_logd(void *data)
struct gfs2_sbd *sdp = data;
struct gfs2_holder ji_gh;
unsigned long t;
+   int need_flush;
 
while (!kthread_should_stop()) {
/* Advance the log tail */
@@ -120,8 +121,10 @@ int gfs2_logd(void *data)
gfs2_tune_get(sdp, gt_log_flush_secs) * HZ;
 
gfs2_ail1_empty(sdp, DIO_ALL);
-
-   if (time_after_eq(jiffies, t)) {
+   gfs2_log_lock(sdp);
+   need_flush = sdp-sd_log_num_buf  gfs2_tune_get(sdp, 
gt_incore_log_blocks);
+   gfs2_log_unlock(sdp);
+   if (need_flush || time_after_eq(jiffies, t)) {
gfs2_log_flush(sdp, NULL);
sdp-sd_log_flush_time = jiffies;
}
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index b8ba4d5..3c2ff81 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -785,21 +785,6 @@ out:
gfs2_holder_put(new_gh);
 }
 
-void gfs2_glock_inode_squish(struct inode *inode)
-{
-   struct gfs2_holder gh;
-   struct gfs2_glock *gl = GFS2_I(inode)-i_gl;
-   gfs2_holder_init(gl, LM_ST_UNLOCKED, 0, gh);
-   set_bit(HIF_DEMOTE, gh.gh_iflags);
-   spin_lock(gl-gl_spin);
-   gfs2_assert(inode-i_sb-s_fs_info, list_empty(gl-gl_holders));
-   list_add_tail(gh.gh_list, gl-gl_waiters2);
-   run_queue(gl);
-   spin_unlock(gl-gl_spin);
-   wait_for_completion(gh.gh_wait);
-   gfs2_holder_uninit(gh);
-}
-
 /**
  * state_change - record that the glock is now in a different state
  * @gl: the glock
@@ -1920,7 +1905,7 @@ out:
 
 static void scan_glock(struct gfs2_glock *gl)
 {
-   if (gl-gl_ops == gfs2_inode_glops)
+   if (gl-gl_ops == gfs2_inode_glops  gl-gl_object)
return;
 
if (gfs2_glmutex_trylock(gl)) {
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index a331bf8..fb39108 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -106,7 +106,6 @@ void gfs2_glock_dq_uninit_m(unsigned int
 void gfs2_glock_prefetch_num(struct gfs2_sbd *sdp, u64 number,
 const struct gfs2_glock_operations *glops,
 unsigned int state, int flags);
-void gfs2_glock_inode_squish(struct inode *inode);
 
 /**
  * gfs2_glock_nq_init - intialize a holder and enqueue it on a glock
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 60561ca..b068d10 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -107,70 +107,6 @@ static void gfs2_pte_inval(struct gfs2_g
 }
 
 /**
- * gfs2_page_inval - Invalidate all pages associated with a glock
- * @gl: the glock
- *
- */
-
-static void gfs2_page_inval(struct gfs2_glock *gl)
-{
-   struct gfs2_inode *ip;
-   struct inode *inode;
-
-   ip = gl-gl_object;
-   inode = ip-i_inode;
-   if (!ip || !S_ISREG(inode-i_mode))
-   return;
-
-   truncate_inode_pages(inode-i_mapping, 0);
-   gfs2_assert_withdraw(GFS2_SB(ip-i_inode), !inode-i_mapping-nrpages);
-   clear_bit(GIF_PAGED, ip-i_flags);
-}
-
-/**
- * gfs2_page_wait - Wait for writeback of data
- * @gl: the glock
- *
- * Syncs data (not metadata) for a regular file.
- * No-op for all other types.
- */
-
-static void gfs2_page_wait(struct gfs2_glock *gl)
-{
-   struct gfs2_inode *ip = gl-gl_object;
-   struct inode *inode =