date:20190213

[Cluster-devel] [GFS2 PATCH 7/9] gfs2: Check for log write errors and withdraw in rgrp_go_inval

2019-02-13 Thread Bob Peterson

Before this patch, function rgrp_go_inval just assumed all the
writes submitted to the journal were finished and successful. But
if they're not, and a revoke fails to make its way to the journal,
a journal replay on another node will cause corruption if we
let the go_inval function continue and tell dlm to release the
glock to another node. This patch adds a couple assert_withdraws
in the rgrp_go_inval function. The assert should cause another
node to replay the journal before continuing, thus protecting the
rgrp glock and maintaining the integrity of the rgrp.

Signed-off-by: Bob Peterson 
---
 fs/gfs2/glops.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index f372a6f169a2..64b8e5e808d8 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -168,8 +168,7 @@ static void rgrp_go_sync(struct gfs2_glock *gl)
error = filemap_fdatawait_range(mapping, gl->gl_vm.start, 
gl->gl_vm.end);
mapping_set_error(mapping, error);
gfs2_ail_empty_gl(gl);
-   gfs2_assert_withdraw(gl->gl_name.ln_sbd,
-gl->gl_name.ln_sbd->sd_log_error == 0);
+   gfs2_assert_withdraw(sdp, gl->gl_name.ln_sbd->sd_log_error == 0);
 
spin_lock(&gl->gl_lockref.lock);
rgd = gl->gl_object;
@@ -202,6 +201,8 @@ static void rgrp_go_inval(struct gfs2_glock *gl, int flags)
truncate_inode_pages_range(mapping, gl->gl_vm.start, gl->gl_vm.end);
gfs2_ail_empty_gl(gl);
 
+   gfs2_assert_withdraw(sdp, sdp->sd_log_error == 0);
+
if (rgd)
rgd->rd_flags &= ~GFS2_RDF_UPTODATE;
 }
-- 
2.20.1

[Cluster-devel] [GFS2 PATCH 1/9] gfs2: Introduce concept of a pending withdraw

2019-02-13 Thread Bob Peterson

File system withdraws can be delayed when inconsistencies are
discovered when we cannot withdraw immediately, for example, when
critical spin_locks are held. But delaying the withdraw can cause
gfs2 to ignore the error and keep running for a short period of time.
For example, an rgrp glock may be dequeued and demoted while there
are still buffers that haven't been properly revoked, due to io
errors writing to the journal.

This patch introduces a new concept of a delayed withdraw, which
means an inconsistency has been discovered and we need to withdraw
at the earliest possible opportunity. In these cases, we aren't
quite withdrawn yet, but we still need to not dequeue glocks and
other critical things. If we dequeue the glocks and the withdraw
results in our journal being replayed, the replay could overwrite
data that's been modified by a different node that acquired the
glock in the meantime.

Signed-off-by: Bob Peterson 
---
 fs/gfs2/aops.c   |  4 ++--
 fs/gfs2/file.c   |  2 +-
 fs/gfs2/glock.c  |  7 +++
 fs/gfs2/glops.c  |  2 +-
 fs/gfs2/incore.h |  1 +
 fs/gfs2/log.c| 20 
 fs/gfs2/meta_io.c|  6 +++---
 fs/gfs2/ops_fstype.c |  3 +--
 fs/gfs2/quota.c  |  2 +-
 fs/gfs2/super.c  |  6 +++---
 fs/gfs2/sys.c|  2 +-
 fs/gfs2/util.c   |  1 +
 fs/gfs2/util.h   |  8 
 13 files changed, 34 insertions(+), 30 deletions(-)

diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 05dd78f4b2b3..0d3cde8a61cd 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -521,7 +521,7 @@ static int __gfs2_readpage(void *file, struct page *page)
error = mpage_readpage(page, gfs2_block_map);
}
 
-   if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
+   if (unlikely(withdrawn(sdp)))
return -EIO;
 
return error;
@@ -638,7 +638,7 @@ static int gfs2_readpages(struct file *file, struct 
address_space *mapping,
gfs2_glock_dq(&gh);
 out_uninit:
gfs2_holder_uninit(&gh);
-   if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
+   if (unlikely(withdrawn(sdp)))
ret = -EIO;
return ret;
 }
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index a2dea5bc0427..4046f6ac7f13 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -1169,7 +1169,7 @@ static int gfs2_lock(struct file *file, int cmd, struct 
file_lock *fl)
cmd = F_SETLK;
fl->fl_type = F_UNLCK;
}
-   if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) {
+   if (unlikely(withdrawn(sdp))) {
if (fl->fl_type == F_UNLCK)
locks_lock_file_wait(file, fl);
return -EIO;
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index f66773c71bcd..c6d6e478f5e3 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -542,7 +542,7 @@ __acquires(&gl->gl_lockref.lock)
unsigned int lck_flags = (unsigned int)(gh ? gh->gh_flags : 0);
int ret;
 
-   if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) &&
+   if (unlikely(withdrawn(sdp)) &&
target != LM_ST_UNLOCKED)
return;
lck_flags &= (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP |
@@ -579,8 +579,7 @@ __acquires(&gl->gl_lockref.lock)
}
else if (ret) {
fs_err(sdp, "lm_lock ret %d\n", ret);
-   GLOCK_BUG_ON(gl, !test_bit(SDF_SHUTDOWN,
-  &sdp->sd_flags));
+   GLOCK_BUG_ON(gl, !withdrawn(sdp));
}
} else { /* lock_nolock */
finish_xmote(gl, target);
@@ -1092,7 +1091,7 @@ int gfs2_glock_nq(struct gfs2_holder *gh)
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
int error = 0;
 
-   if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
+   if (unlikely(withdrawn(sdp)))
return -EIO;
 
if (test_bit(GLF_LRU, &gl->gl_flags))
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index f15b4c57c4bd..9c86c8004ba7 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -539,7 +539,7 @@ static int freeze_go_xmote_bh(struct gfs2_glock *gl, struct 
gfs2_holder *gh)
gfs2_consist(sdp);
 
/*  Initialize some head of the log stuff  */
-   if (!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) {
+   if (!withdrawn(sdp)) {
sdp->sd_log_sequence = head.lh_sequence + 1;
gfs2_log_pointers_init(sdp, head.lh_blkno);
}
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index cdf07b408f54..8380d4db8be6 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -621,6 +621,7 @@ enum {
SDF_SKIP_DLM_UNLOCK = 8,
SDF_FORCE_AIL_FLUSH = 9,
SDF_AIL1_IO_ERROR   = 10,
+   SDF_PENDING_WITHDRAW= 11, /* Will withdraw eventually */
 };
 
 enum gfs2_freeze_state {
diff --git a/fs/gfs2/log.c b/fs/gfs2/log

[Cluster-devel] [GFS2 PATCH 8/9] gfs2: Do log_flush in gfs2_ail_empty_gl even if ail list is empty

2019-02-13 Thread Bob Peterson

Before this patch, if gfs2_ail_empty_gl saw there was nothing on
the ail list, it would return and not flush the log. The problem
is that there could still be a revoke for the rgrp sitting on the
sd_log_le_revoke list that's been recently taken off the ail list.
But that revoke still needs to be written, and the rgrp_go_inval
still needs to call log_flush_wait to ensure the revokes are all
properly written to the journal before we relinquish control of
the glock to another node. If we give the glock to another node
before we have this knowledge, the node might crash and its journal
replayed, in which case the missing revoke would allow the journal
replay to replay the rgrp over top of the rgrp we already gave to
another node, thus overwriting its changes and corrupting the
file system.

This patch makes gfs2_ail_empty_gl still call gfs2_log_flush rather
than returning.

Signed-off-by: Bob Peterson 
---
 fs/gfs2/glops.c | 23 +--
 fs/gfs2/log.c   |  2 +-
 fs/gfs2/log.h   |  1 +
 3 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 64b8e5e808d8..adae9ecf8311 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -94,8 +94,26 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
INIT_LIST_HEAD(&tr.tr_databuf);
tr.tr_revokes = atomic_read(&gl->gl_ail_count);
 
-   if (!tr.tr_revokes)
-   return;
+   if (!tr.tr_revokes) {
+   /**
+* We have nothing on the ail, but there could be revokes on
+* the sdp revoke queue, in which case, we still want to flush
+* the log and wait for it to finish.
+*
+* If the sdp revoke list is empty too, we might still have an
+* io outstanding for writing revokes, so we should wait for
+* it before proceeding.
+*
+* If none of these conditions are true, our revokes are all
+* flushed and we can return.
+*/
+   if (!list_empty(&sdp->sd_log_le_revoke))
+   goto flush;
+   else if (atomic_read(&sdp->sd_log_in_flight))
+   log_flush_wait(sdp);
+   else
+   return;
+   }
 
/* A shortened, inline version of gfs2_trans_begin()
  * tr->alloced is not set since the transaction structure is
@@ -110,6 +128,7 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
__gfs2_ail_flush(gl, 0, tr.tr_revokes);
 
gfs2_trans_end(sdp);
+flush:
gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
   GFS2_LFC_AIL_EMPTY_GL);
 }
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 0d0dec3231c9..610cd2637dc5 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -515,7 +515,7 @@ static void log_pull_tail(struct gfs2_sbd *sdp, unsigned 
int new_tail)
 }
 
 
-static void log_flush_wait(struct gfs2_sbd *sdp)
+void log_flush_wait(struct gfs2_sbd *sdp)
 {
DEFINE_WAIT(wait);
 
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
index 1bc9bd444b28..bd2d08d0f21c 100644
--- a/fs/gfs2/log.h
+++ b/fs/gfs2/log.h
@@ -75,6 +75,7 @@ extern void gfs2_log_flush(struct gfs2_sbd *sdp, struct 
gfs2_glock *gl,
   u32 type);
 extern void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans);
 extern void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control 
*wbc);
+extern void log_flush_wait(struct gfs2_sbd *sdp);
 
 extern void gfs2_log_shutdown(struct gfs2_sbd *sdp);
 extern int gfs2_logd(void *data);
-- 
2.20.1

[Cluster-devel] [GFS2 PATCH 3/9] gfs2: Empty the ail for the glock when rgrps are invalidated

2019-02-13 Thread Bob Peterson

Before this patch, function rgrp_go_inval would not invalidate the
ail list, which meant that there might still be buffers outstanding
on the ail that had revokes still pending. If the revokes had still
not been written when the glock was given to another node, and that
node (with outstanding revokes) died for some reason, the resulting
journal replay would replay the un-revoked rgrps, thus wiping out
changes made by the node who rightfully received the rgrp in EX.
This caused metadata corruption.

Signed-off-by: Bob Peterson 
---
 fs/gfs2/glops.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 9c86c8004ba7..4b0e52bf5825 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -166,6 +166,8 @@ static void rgrp_go_sync(struct gfs2_glock *gl)
error = filemap_fdatawait_range(mapping, gl->gl_vm.start, 
gl->gl_vm.end);
mapping_set_error(mapping, error);
gfs2_ail_empty_gl(gl);
+   gfs2_assert_withdraw(gl->gl_name.ln_sbd,
+gl->gl_name.ln_sbd->sd_log_error == 0);
 
spin_lock(&gl->gl_lockref.lock);
rgd = gl->gl_object;
@@ -196,6 +198,7 @@ static void rgrp_go_inval(struct gfs2_glock *gl, int flags)
WARN_ON_ONCE(!(flags & DIO_METADATA));
gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count));
truncate_inode_pages_range(mapping, gl->gl_vm.start, gl->gl_vm.end);
+   gfs2_ail_empty_gl(gl);
 
if (rgd)
rgd->rd_flags &= ~GFS2_RDF_UPTODATE;
-- 
2.20.1

[Cluster-devel] [GFS2 PATCH 0/9] GFS2: Withdraw corruption patches

2019-02-13 Thread Bob Peterson

I consider this more of a preliminary "collection" of patches rather
than a "patch set" per se. In other words, most of these do not rely
upon the previous patches, although some do. Some of them may be removed
without a lot of difficulty if they are found to be problematic. I
thought about sending them out individually, but decided against it.

These patches address a bunch of problems related to journal replay
overwriting valid gfs2 metadata due to io errors, withdraws and such.
These seem to fix several metadata corruption problems I've been able
to reliably recreate lately with multi-node multi-file system recovery
tests.

I'm not convinced we need all these patches, but they seem to work well
when put together. I just wanted to throw them out here for review so
people could point out if anything doesn't make sense. In other words,
there are likely to be revisions.

Bob Peterson (9):
  gfs2: Introduce concept of a pending withdraw
  gfs2: Ignore recovery attempts if gfs2 has io error or is withdrawn
  gfs2: Empty the ail for the glock when rgrps are invalidated
  gfs2: Force withdraw to replay journals and wait for it to finish
  gfs2: Keep transactions on ail1 list until after issuing revokes
  gfs2: Make secondary withdrawers wait for first withdrawer
  gfs2: Check for log write errors and withdraw in rgrp_go_inval
  gfs2: Do log_flush in gfs2_ail_empty_gl even if ail list is empty
  dlm: recover slot regardless of whether we still have a connection

 fs/dlm/member.c  |  11 +--
 fs/gfs2/aops.c   |   4 +-
 fs/gfs2/file.c   |   2 +-
 fs/gfs2/glock.c  |  40 +--
 fs/gfs2/glock.h  |   1 +
 fs/gfs2/glops.c  |  90 ++--
 fs/gfs2/incore.h |  10 +++
 fs/gfs2/lock_dlm.c   |  68 ++
 fs/gfs2/log.c|  74 +++-
 fs/gfs2/log.h|   1 +
 fs/gfs2/meta_io.c|   6 +-
 fs/gfs2/ops_fstype.c |  51 ++
 fs/gfs2/quota.c  |   2 +-
 fs/gfs2/super.c  |  30 
 fs/gfs2/super.h  |   1 +
 fs/gfs2/sys.c|   2 +-
 fs/gfs2/util.c   | 160 ++-
 fs/gfs2/util.h   |  11 +++
 18 files changed, 443 insertions(+), 121 deletions(-)

-- 
2.20.1

[Cluster-devel] [GFS2 PATCH 2/9] gfs2: Ignore recovery attempts if gfs2 has io error or is withdrawn

2019-02-13 Thread Bob Peterson

This patch addresses various problems with gfs2/dlm recovery.

For example, suppose a node with a bunch of gfs2 mounts suddenly
reboots due to kernel panic, and dlm determines it should perform
recovery. DLM does so from a pseudo-state machine calling various
callbacks into lock_dlm to perform a sequence of steps. It uses
generation numbers and recover bits in dlm "control" lock lvbs.

Now suppose another node tries to recover the failed node's
journal, but in so doing, encounters an IO error or withdraws
due to unforeseen circumstances, such as an hba driver failure.
In these cases, the recovery would eventually bail out, but it
would still update its generation number in the lvb. The other
nodes would all see the newer generation number and think they
don't need to do recovery because the generation number is newer
than the last one they saw, and therefore someone else has already
taken care of it.

If the file system has an io error or is withdrawn, it cannot
safely replay any journals (its own or others) but someone else
still needs to do it. Therefore we don't want it messing with
the journal recovery generation numbers: the local generation
numbers eventually get put into the lvb generation numbers to be
seen by all nodes.

This patch adds checks to many of the callbacks used by dlm
in its recovery state machine so that the functions are ignored
and skipped if an io error has occurred or if the file system
was withdraw.

Signed-off-by: Bob Peterson 
---
 fs/gfs2/lock_dlm.c | 36 
 fs/gfs2/util.c |  2 +-
 2 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 31df26ed7854..d2cb2fe1c3f3 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -1081,6 +1081,14 @@ static void gdlm_recover_prep(void *arg)
struct gfs2_sbd *sdp = arg;
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
 
+   if (test_bit(SDF_AIL1_IO_ERROR, &sdp->sd_flags)) {
+   fs_err(sdp, "recover_prep ignored due to io error.\n");
+   return;
+   }
+   if (withdrawn(sdp)) {
+   fs_err(sdp, "recover_prep ignored due to withdraw.\n");
+   return;
+   }
spin_lock(&ls->ls_recover_spin);
ls->ls_recover_block = ls->ls_recover_start;
set_bit(DFL_DLM_RECOVERY, &ls->ls_recover_flags);
@@ -1103,6 +,16 @@ static void gdlm_recover_slot(void *arg, struct dlm_slot 
*slot)
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
int jid = slot->slot - 1;
 
+   if (test_bit(SDF_AIL1_IO_ERROR, &sdp->sd_flags)) {
+   fs_err(sdp, "recover_slot jid %d ignored due to io error.\n",
+  jid);
+   return;
+   }
+   if (withdrawn(sdp)) {
+   fs_err(sdp, "recover_slot jid %d ignored due to withdraw.\n",
+  jid);
+   return;
+   }
spin_lock(&ls->ls_recover_spin);
if (ls->ls_recover_size < jid + 1) {
fs_err(sdp, "recover_slot jid %d gen %u short size %d\n",
@@ -1127,6 +1145,14 @@ static void gdlm_recover_done(void *arg, struct dlm_slot 
*slots, int num_slots,
struct gfs2_sbd *sdp = arg;
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
 
+   if (test_bit(SDF_AIL1_IO_ERROR, &sdp->sd_flags)) {
+   fs_err(sdp, "recover_done ignored due to io error.\n");
+   return;
+   }
+   if (withdrawn(sdp)) {
+   fs_err(sdp, "recover_done ignored due to withdraw.\n");
+   return;
+   }
/* ensure the ls jid arrays are large enough */
set_recover_size(sdp, slots, num_slots);
 
@@ -1154,6 +1180,16 @@ static void gdlm_recovery_result(struct gfs2_sbd *sdp, 
unsigned int jid,
 {
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
 
+   if (test_bit(SDF_AIL1_IO_ERROR, &sdp->sd_flags)) {
+   fs_err(sdp, "recovery_result jid %d ignored due to io error.\n",
+  jid);
+   return;
+   }
+   if (withdrawn(sdp)) {
+   fs_err(sdp, "recovery_result jid %d ignored due to withdraw.\n",
+  jid);
+   return;
+   }
if (test_bit(DFL_NO_DLM_OPS, &ls->ls_recover_flags))
return;
 
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
index 717aef772c60..ca6de80b5e8b 100644
--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@@ -260,7 +260,7 @@ void gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct 
buffer_head *bh,
const char *function, char *file, unsigned int line,
bool withdraw)
 {
-   if (!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
+   if (!withdrawn(sdp))
fs_err(sdp,
   "fatal: I/O error\n"
   "  block = %llu\n"
-- 
2.20.1

[Cluster-devel] [GFS2 PATCH 6/9] gfs2: Make secondary withdrawers wait for first withdrawer

2019-02-13 Thread Bob Peterson

Before this patch, if a process encountered an error and decided to
withdraw, if another process was already in the process of withdrawing,
the secondary withdraw would be silently ignored, which set it free
to proceed with its processing, unlock any locks, etc. That's correct
behavior if the original withdrawer encounters further errors down
the road. However, second withdrawers need to wait for the first
withdrawer to finish its withdraw before proceeding. If we don't wait
we could end up assuming everything is alright, unlock glocks and
telling other nodes they can have the glock, despite the fact that
a withdraw is still ongoing and may require a journal replay before
any locks are released. For example, if an rgrp glock is freed
by a process that didn't wait for the withdraw, a journal replay
could introduce file system corruption by replaying a rgrp block
that has already been granted to another node.

This patch makes secondary withdrawers wait until the primary
withdrawer is finished with its processing before proceeding.

Signed-off-by: Bob Peterson 
---
 fs/gfs2/incore.h | 3 +++
 fs/gfs2/util.c   | 9 -
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 2ddae1326ce2..51ca34594ac2 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -626,6 +626,7 @@ enum {
SDF_REMOTE_WITHDRAW = 12, /* Performing remote recovery */
SDF_WITHDRAW_RECOVERY   = 13, /* Wait for journal recovery when we are
 withdrawing */
+   SDF_WITHDRAW_COMPLETE   = 14, /* Withdraw is compete */
 };
 
 enum gfs2_freeze_state {
@@ -836,6 +837,8 @@ struct gfs2_sbd {
struct bio *sd_log_bio;
wait_queue_head_t sd_log_flush_wait;
int sd_log_error;
+   atomic_t sd_withdrawer;
+   wait_queue_head_t sd_withdraw_wait;
 
atomic_t sd_reserving_log;
wait_queue_head_t sd_reserving_log_wait;
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
index 75f67284bba8..6e0a85fc1d46 100644
--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@@ -188,9 +188,15 @@ int gfs2_lm_withdraw(struct gfs2_sbd *sdp, const char 
*fmt, ...)
struct va_format vaf;
 
if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW &&
-   test_and_set_bit(SDF_SHUTDOWN, &sdp->sd_flags))
+   test_and_set_bit(SDF_SHUTDOWN, &sdp->sd_flags)) {
+   fs_warn(sdp, "Waiting for process %d to withdraw.\n",
+   atomic_read(&sdp->sd_withdrawer));
+   wait_on_bit(&sdp->sd_flags, SDF_WITHDRAW_COMPLETE,
+   TASK_UNINTERRUPTIBLE);
return 0;
+   }
 
+   atomic_set(&sdp->sd_withdrawer, pid_nr(task_pid(current)));
clear_bit(SDF_PENDING_WITHDRAW, &sdp->sd_flags);
if (fmt) {
va_start(args, fmt);
@@ -221,6 +227,7 @@ int gfs2_lm_withdraw(struct gfs2_sbd *sdp, const char *fmt, 
...)
set_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags);
fs_err(sdp, "File system withdrawn\n");
dump_stack();
+   wake_up_bit(&sdp->sd_flags, SDF_WITHDRAW_COMPLETE);
}
 
if (sdp->sd_args.ar_errors == GFS2_ERRORS_PANIC)
-- 
2.20.1

[Cluster-devel] [GFS2 PATCH 9/9] dlm: recover slot regardless of whether we still have a connection

2019-02-13 Thread Bob Peterson

Before this patch dlm would skip the recover_slot phase of recovery
if it still had a valid comm connection to the failed node. However,
gfs2 still needs to perform journal replay, otherwise we run the
risk of journal replay that happens at reboot time overwriting
metadata we've since modified after we release the locks.

Signed-off-by: Bob Peterson 
---
 fs/dlm/member.c | 11 +++
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/fs/dlm/member.c b/fs/dlm/member.c
index 0bc43b35d2c5..155bd52eb018 100644
--- a/fs/dlm/member.c
+++ b/fs/dlm/member.c
@@ -463,17 +463,12 @@ static void dlm_lsop_recover_slot(struct dlm_ls *ls, 
struct dlm_member *memb)
if (!ls->ls_ops || !ls->ls_ops->recover_slot)
return;
 
-   /* if there is no comms connection with this node
-  or the present comms connection is newer
-  than the one when this member was added, then
-  we consider the node to have failed (versus
-  being removed due to dlm_release_lockspace) */
+   /* Recover the slot regardless of whether we have a valid connection.
+* The node may have simply withdrawn, but still needs its journal
+* replayed. */
 
error = dlm_comm_seq(memb->nodeid, &seq);
 
-   if (!error && seq == memb->comm_seq)
-   return;
-
slot.nodeid = memb->nodeid;
slot.slot = memb->slot;
 
-- 
2.20.1

[Cluster-devel] [GFS2 PATCH 4/9] gfs2: Force withdraw to replay journals and wait for it to finish

2019-02-13 Thread Bob Peterson

When a node withdraws from a file system, it often leaves its journal
in an incomplete state. This is especially true when the withdraw is
caused by io errors writing to the journal. Before this patch, a
withdraw would try to write a "shutdown" record to the journal, tell
dlm it's done with the file system, and none of the other nodes
know about the problem. Later, when the problem is fixed and the
withdrawn node is rebooted, it would then discover that its own
journal was incomplete, and replay it. However, replaying it at this
point is almost guaranteed to introduce corruption because the other
nodes are likely to have used affected resource groups that appeared
in the journal since the time of the withdraw. Replaying the journal
later will overwrite any changes made, and not through any fault of
dlm, which was instructed during the withdraw to release those
resources.

This patch makes file system withdraws seen by the entire cluster.
Withdrawing nodes dequeue their journal glock to allow recovery.

The remaining nodes check all the journals to see if they are
clean or in need of replay. They try to replay dirty journals, but
only the journals of withdrawn nodes will be "not busy" and
therefore available for replay.

Until the journal replay is complete, no i/o related glocks may be
given out, to ensure that the replay does not cause the
aforementioned corruption: We cannot allow any journal replay to
overwrite blocks associated with a glock once it is held. The
glocks not affected by a withdraw are permitted to be passed
around as normal during a withdraw. A new glops flag, called
GLOF_OK_AT_WITHDRAW, indicates glocks that may be passed around
freely while a withdraw is taking place.

One such glock is the "live" glock which is now used to signal when
a withdraw occurs. When a withdraw occurs, the node signals its
withdraw by dequeueing the "live" glock and trying to enqueue it
in EX mode, thus forcing the other nodes to all see a demote
request, by way of a "1CB" (one callback) try lock. The "live"
glock is not granted in EX; the callback is only just used to
indicate a withdraw has occurred.

Note that all nodes in the cluster must wait for the recovering
node to finish replaying the withdrawing node's journal before
continuing. To this end, it checks that the journals are clean
multiple times in a retry loop.

Signed-off-by: Bob Peterson 
---
 fs/gfs2/glock.c  |  35 --
 fs/gfs2/glock.h  |   1 +
 fs/gfs2/glops.c  |  61 +-
 fs/gfs2/incore.h |   6 ++
 fs/gfs2/lock_dlm.c   |  32 ++
 fs/gfs2/log.c|  22 +--
 fs/gfs2/meta_io.c|   2 +-
 fs/gfs2/ops_fstype.c |  48 ++
 fs/gfs2/super.c  |  24 ---
 fs/gfs2/super.h  |   1 +
 fs/gfs2/util.c   | 148 ++-
 fs/gfs2/util.h   |   3 +
 12 files changed, 315 insertions(+), 68 deletions(-)

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index c6d6e478f5e3..20fb6cdf7829 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -242,7 +242,8 @@ static void __gfs2_glock_put(struct gfs2_glock *gl)
gfs2_glock_remove_from_lru(gl);
spin_unlock(&gl->gl_lockref.lock);
GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders));
-   GLOCK_BUG_ON(gl, mapping && mapping->nrpages);
+   GLOCK_BUG_ON(gl, mapping && mapping->nrpages &&
+!test_bit(SDF_SHUTDOWN, &sdp->sd_flags));
trace_gfs2_glock_put(gl);
sdp->sd_lockstruct.ls_ops->lm_put_lock(gl);
 }
@@ -543,6 +544,8 @@ __acquires(&gl->gl_lockref.lock)
int ret;
 
if (unlikely(withdrawn(sdp)) &&
+   !(glops->go_flags & GLOF_OK_AT_WITHDRAW) &&
+   (gh && !(LM_FLAG_NOEXP & gh->gh_flags)) &&
target != LM_ST_UNLOCKED)
return;
lck_flags &= (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP |
@@ -561,9 +564,10 @@ __acquires(&gl->gl_lockref.lock)
(lck_flags & (LM_FLAG_TRY|LM_FLAG_TRY_1CB)))
clear_bit(GLF_BLOCKING, &gl->gl_flags);
spin_unlock(&gl->gl_lockref.lock);
-   if (glops->go_sync)
+   if (glops->go_sync && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
glops->go_sync(gl);
-   if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags))
+   if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags) &&
+   !test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
glops->go_inval(gl, target == LM_ST_DEFERRED ? 0 : 
DIO_METADATA);
clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags);
 
@@ -1091,7 +1095,8 @@ int gfs2_glock_nq(struct gfs2_holder *gh)
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
int error = 0;
 
-   if (unlikely(withdrawn(sdp)))
+   if (unlikely(withdrawn(sdp) && !(LM_FLAG_NOEXP & gh->gh_flags) &&
+!(gl->gl_ops->go_flags & GLOF_OK_AT_WITHDRAW)))
return -EIO;
 
if (test_bit(GLF_LRU, &gl->gl_flags))
@@ -1135,11 +1140,28 @@ int gfs2_glock_poll(struct gf

[Cluster-devel] [GFS2 PATCH 5/9] gfs2: Keep transactions on ail1 list until after issuing revokes

2019-02-13 Thread Bob Peterson

Before this patch, function gfs2_write_revokes would call function
gfs2_ail1_empty, then run the ail1 list, issuing revokes. But
gfs2_ail1_empty can move transactions to the ail2 list, and thus,
their revokes were never issued. This patch adds a new parameter to
gfs2_ail1_empty that allows the transactions to remain on the ail1
list until it can issue revokes for them. Then, if they have no more
buffers, they're moved to the ail2 list after the revokes are added.

Signed-off-by: Bob Peterson 
---
 fs/gfs2/log.c | 30 ++
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 81550038ace3..0d0dec3231c9 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -217,11 +217,12 @@ static void gfs2_ail1_empty_one(struct gfs2_sbd *sdp, 
struct gfs2_trans *tr)
 /**
  * gfs2_ail1_empty - Try to empty the ail1 lists
  * @sdp: The superblock
+ * @move_empty_to_ail2: 1 if transaction to be moved to ail2 when empty
  *
  * Tries to empty the ail1 lists, starting with the oldest first
  */
 
-static int gfs2_ail1_empty(struct gfs2_sbd *sdp)
+static int gfs2_ail1_empty(struct gfs2_sbd *sdp, bool move_empty_to_ail2)
 {
struct gfs2_trans *tr, *s;
int oldest_tr = 1;
@@ -230,10 +231,12 @@ static int gfs2_ail1_empty(struct gfs2_sbd *sdp)
spin_lock(&sdp->sd_ail_lock);
list_for_each_entry_safe_reverse(tr, s, &sdp->sd_ail1_list, tr_list) {
gfs2_ail1_empty_one(sdp, tr);
-   if (list_empty(&tr->tr_ail1_list) && oldest_tr)
-   list_move(&tr->tr_list, &sdp->sd_ail2_list);
-   else
+   if (list_empty(&tr->tr_ail1_list) && oldest_tr) {
+   if (move_empty_to_ail2)
+   list_move(&tr->tr_list, &sdp->sd_ail2_list);
+   } else {
oldest_tr = 0;
+   }
}
ret = list_empty(&sdp->sd_ail1_list);
spin_unlock(&sdp->sd_ail_lock);
@@ -609,12 +612,12 @@ void gfs2_add_revoke(struct gfs2_sbd *sdp, struct 
gfs2_bufdata *bd)
 
 void gfs2_write_revokes(struct gfs2_sbd *sdp)
 {
-   struct gfs2_trans *tr;
+   struct gfs2_trans *tr, *s;
struct gfs2_bufdata *bd, *tmp;
int have_revokes = 0;
int max_revokes = (sdp->sd_sb.sb_bsize - sizeof(struct 
gfs2_log_descriptor)) / sizeof(u64);
 
-   gfs2_ail1_empty(sdp);
+   gfs2_ail1_empty(sdp, false);
spin_lock(&sdp->sd_ail_lock);
list_for_each_entry_reverse(tr, &sdp->sd_ail1_list, tr_list) {
list_for_each_entry(bd, &tr->tr_ail2_list, bd_ail_st_list) {
@@ -640,17 +643,20 @@ void gfs2_write_revokes(struct gfs2_sbd *sdp)
}
gfs2_log_lock(sdp);
spin_lock(&sdp->sd_ail_lock);
-   list_for_each_entry_reverse(tr, &sdp->sd_ail1_list, tr_list) {
+   list_for_each_entry_safe_reverse(tr, s, &sdp->sd_ail1_list, tr_list) {
list_for_each_entry_safe(bd, tmp, &tr->tr_ail2_list, 
bd_ail_st_list) {
if (max_revokes == 0)
-   goto out_of_blocks;
+   break;
if (!list_empty(&bd->bd_list))
continue;
gfs2_add_revoke(sdp, bd);
max_revokes--;
}
+   if (list_empty(&tr->tr_ail1_list))
+   list_move(&tr->tr_list, &sdp->sd_ail2_list);
+   if (max_revokes == 0)
+   break;
}
-out_of_blocks:
spin_unlock(&sdp->sd_ail_lock);
gfs2_log_unlock(sdp);
 
@@ -842,7 +848,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock 
*gl, u32 flags)
for (;;) {
gfs2_ail1_start(sdp);
gfs2_ail1_wait(sdp);
-   if (gfs2_ail1_empty(sdp))
+   if (gfs2_ail1_empty(sdp, true))
break;
}
atomic_dec(&sdp->sd_log_blks_free); /* Adjust for 
unreserved buffer */
@@ -1008,7 +1014,7 @@ int gfs2_logd(void *data)
 
did_flush = false;
if (gfs2_jrnl_flush_reqd(sdp) || t == 0) {
-   gfs2_ail1_empty(sdp);
+   gfs2_ail1_empty(sdp, true);
if (test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))
gfs2_log_flush(sdp, NULL,
   GFS2_LOG_HEAD_FLUSH_NORMAL |
@@ -1019,7 +1025,7 @@ int gfs2_logd(void *data)
if (gfs2_ail_flush_reqd(sdp)) {
gfs2_ail1_start(sdp);
gfs2_ail1_wait(sdp);
-   gfs2_ail1_empty(sdp);
+   gfs2_ail1_empty(sdp, true);
if (test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))

[Cluster-devel] [GFS2 PATCH] Revert "gfs2: read journal in large chunks to locate the head"

2019-02-13 Thread Abhi Das

This reverts commit 2a5f14f279f59143139bcd1606903f2f80a34241.

This patch causes xfstests generic/311 to fail. Reverting this for
now until we have a proper fix.

Signed-off-by: Abhi Das 
---
 fs/gfs2/glops.c  |   1 -
 fs/gfs2/log.c|   4 +-
 fs/gfs2/lops.c   | 190 ++-
 fs/gfs2/lops.h   |   4 +-
 fs/gfs2/ops_fstype.c |   1 -
 fs/gfs2/recovery.c   | 123 
 fs/gfs2/recovery.h   |   2 +
 fs/gfs2/super.c  |   1 -
 8 files changed, 134 insertions(+), 192 deletions(-)

diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index f15b4c57c4bd..78510ab91835 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -28,7 +28,6 @@
 #include "util.h"
 #include "trans.h"
 #include "dir.h"
-#include "lops.h"
 
 struct workqueue_struct *gfs2_freeze_wq;
 
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 5bfaf381921a..b8830fda51e8 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -733,7 +733,7 @@ void gfs2_write_log_header(struct gfs2_sbd *sdp, struct 
gfs2_jdesc *jd,
lh->lh_crc = cpu_to_be32(crc);
 
gfs2_log_write(sdp, page, sb->s_blocksize, 0, addr);
-   gfs2_log_submit_bio(&sdp->sd_log_bio, REQ_OP_WRITE | op_flags);
+   gfs2_log_submit_bio(&sdp->sd_log_bio, REQ_OP_WRITE, op_flags);
log_flush_wait(sdp);
 }
 
@@ -810,7 +810,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock 
*gl, u32 flags)
 
gfs2_ordered_write(sdp);
lops_before_commit(sdp, tr);
-   gfs2_log_submit_bio(&sdp->sd_log_bio, REQ_OP_WRITE);
+   gfs2_log_submit_bio(&sdp->sd_log_bio, REQ_OP_WRITE, 0);
 
if (sdp->sd_log_head != sdp->sd_log_flush_head) {
log_flush_wait(sdp);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 94dcab655bc0..2295042bc625 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -17,9 +17,7 @@
 #include 
 #include 
 #include 
-#include 
 
-#include "bmap.h"
 #include "dir.h"
 #include "gfs2.h"
 #include "incore.h"
@@ -195,6 +193,7 @@ static void gfs2_end_log_write_bh(struct gfs2_sbd *sdp, 
struct bio_vec *bvec,
 /**
  * gfs2_end_log_write - end of i/o to the log
  * @bio: The bio
+ * @error: Status of i/o request
  *
  * Each bio_vec contains either data from the pagecache or data
  * relating to the log itself. Here we iterate over the bio_vec
@@ -231,19 +230,20 @@ static void gfs2_end_log_write(struct bio *bio)
 /**
  * gfs2_log_submit_bio - Submit any pending log bio
  * @biop: Address of the bio pointer
- * @opf: REQ_OP | op_flags
+ * @op: REQ_OP
+ * @op_flags: req_flag_bits
  *
  * Submit any pending part-built or full bio to the block device. If
  * there is no pending bio, then this is a no-op.
  */
 
-void gfs2_log_submit_bio(struct bio **biop, int opf)
+void gfs2_log_submit_bio(struct bio **biop, int op, int op_flags)
 {
struct bio *bio = *biop;
if (bio) {
struct gfs2_sbd *sdp = bio->bi_private;
atomic_inc(&sdp->sd_log_in_flight);
-   bio->bi_opf = opf;
+   bio_set_op_attrs(bio, op, op_flags);
submit_bio(bio);
*biop = NULL;
}
@@ -304,7 +304,7 @@ static struct bio *gfs2_log_get_bio(struct gfs2_sbd *sdp, 
u64 blkno,
nblk >>= sdp->sd_fsb2bb_shift;
if (blkno == nblk && !flush)
return bio;
-   gfs2_log_submit_bio(biop, op);
+   gfs2_log_submit_bio(biop, op, 0);
}
 
*biop = gfs2_log_alloc_bio(sdp, blkno, end_io);
@@ -375,184 +375,6 @@ void gfs2_log_write_page(struct gfs2_sbd *sdp, struct 
page *page)
   gfs2_log_bmap(sdp));
 }
 
-/**
- * gfs2_end_log_read - end I/O callback for reads from the log
- * @bio: The bio
- *
- * Simply unlock the pages in the bio. The main thread will wait on them and
- * process them in order as necessary.
- */
-
-static void gfs2_end_log_read(struct bio *bio)
-{
-   struct page *page;
-   struct bio_vec *bvec;
-   int i;
-
-   bio_for_each_segment_all(bvec, bio, i) {
-   page = bvec->bv_page;
-   if (bio->bi_status) {
-   int err = blk_status_to_errno(bio->bi_status);
-
-   SetPageError(page);
-   mapping_set_error(page->mapping, err);
-   }
-   unlock_page(page);
-   }
-
-   bio_put(bio);
-}
-
-/**
- * gfs2_jhead_pg_srch - Look for the journal head in a given page.
- * @jd: The journal descriptor
- * @page: The page to look in
- *
- * Returns: 1 if found, 0 otherwise.
- */
-
-static bool gfs2_jhead_pg_srch(struct gfs2_jdesc *jd,
- struct gfs2_log_header_host *head,
- struct page *page)
-{
-   struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
-   struct gfs2_log_header_host uninitialized_var(lh);
-   void *kaddr = kmap_atomic(page);
-   unsigned int offset;
-   bool ret = false;
-
-   for (offset = 0; offset < PA

[Cluster-devel] [PATCH] Revert "gfs2: read journal in large chunks to locate the head"

2019-02-13 Thread Bob Peterson

This reverts commit 2a5f14f279f59143139bcd1606903f2f80a34241.

This patch causes xfstests generic/311 to fail. Reverting this for
now until we have a proper fix.

Signed-off-by: Abhi Das 
Signed-off-by: Bob Peterson 
---
 fs/gfs2/glops.c  |   1 -
 fs/gfs2/log.c|   4 +-
 fs/gfs2/lops.c   | 190 ++-
 fs/gfs2/lops.h   |   4 +-
 fs/gfs2/ops_fstype.c |   1 -
 fs/gfs2/recovery.c   | 123 +
 fs/gfs2/recovery.h   |   2 +
 fs/gfs2/super.c  |   1 -
 8 files changed, 134 insertions(+), 192 deletions(-)

diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index f15b4c57c4bd..78510ab91835 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -28,7 +28,6 @@
 #include "util.h"
 #include "trans.h"
 #include "dir.h"
-#include "lops.h"
 
 struct workqueue_struct *gfs2_freeze_wq;
 
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 5bfaf381921a..b8830fda51e8 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -733,7 +733,7 @@ void gfs2_write_log_header(struct gfs2_sbd *sdp, struct 
gfs2_jdesc *jd,
lh->lh_crc = cpu_to_be32(crc);
 
gfs2_log_write(sdp, page, sb->s_blocksize, 0, addr);
-   gfs2_log_submit_bio(&sdp->sd_log_bio, REQ_OP_WRITE | op_flags);
+   gfs2_log_submit_bio(&sdp->sd_log_bio, REQ_OP_WRITE, op_flags);
log_flush_wait(sdp);
 }
 
@@ -810,7 +810,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock 
*gl, u32 flags)
 
gfs2_ordered_write(sdp);
lops_before_commit(sdp, tr);
-   gfs2_log_submit_bio(&sdp->sd_log_bio, REQ_OP_WRITE);
+   gfs2_log_submit_bio(&sdp->sd_log_bio, REQ_OP_WRITE, 0);
 
if (sdp->sd_log_head != sdp->sd_log_flush_head) {
log_flush_wait(sdp);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 94dcab655bc0..2295042bc625 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -17,9 +17,7 @@
 #include 
 #include 
 #include 
-#include 
 
-#include "bmap.h"
 #include "dir.h"
 #include "gfs2.h"
 #include "incore.h"
@@ -195,6 +193,7 @@ static void gfs2_end_log_write_bh(struct gfs2_sbd *sdp, 
struct bio_vec *bvec,
 /**
  * gfs2_end_log_write - end of i/o to the log
  * @bio: The bio
+ * @error: Status of i/o request
  *
  * Each bio_vec contains either data from the pagecache or data
  * relating to the log itself. Here we iterate over the bio_vec
@@ -231,19 +230,20 @@ static void gfs2_end_log_write(struct bio *bio)
 /**
  * gfs2_log_submit_bio - Submit any pending log bio
  * @biop: Address of the bio pointer
- * @opf: REQ_OP | op_flags
+ * @op: REQ_OP
+ * @op_flags: req_flag_bits
  *
  * Submit any pending part-built or full bio to the block device. If
  * there is no pending bio, then this is a no-op.
  */
 
-void gfs2_log_submit_bio(struct bio **biop, int opf)
+void gfs2_log_submit_bio(struct bio **biop, int op, int op_flags)
 {
struct bio *bio = *biop;
if (bio) {
struct gfs2_sbd *sdp = bio->bi_private;
atomic_inc(&sdp->sd_log_in_flight);
-   bio->bi_opf = opf;
+   bio_set_op_attrs(bio, op, op_flags);
submit_bio(bio);
*biop = NULL;
}
@@ -304,7 +304,7 @@ static struct bio *gfs2_log_get_bio(struct gfs2_sbd *sdp, 
u64 blkno,
nblk >>= sdp->sd_fsb2bb_shift;
if (blkno == nblk && !flush)
return bio;
-   gfs2_log_submit_bio(biop, op);
+   gfs2_log_submit_bio(biop, op, 0);
}
 
*biop = gfs2_log_alloc_bio(sdp, blkno, end_io);
@@ -375,184 +375,6 @@ void gfs2_log_write_page(struct gfs2_sbd *sdp, struct 
page *page)
   gfs2_log_bmap(sdp));
 }
 
-/**
- * gfs2_end_log_read - end I/O callback for reads from the log
- * @bio: The bio
- *
- * Simply unlock the pages in the bio. The main thread will wait on them and
- * process them in order as necessary.
- */
-
-static void gfs2_end_log_read(struct bio *bio)
-{
-   struct page *page;
-   struct bio_vec *bvec;
-   int i;
-
-   bio_for_each_segment_all(bvec, bio, i) {
-   page = bvec->bv_page;
-   if (bio->bi_status) {
-   int err = blk_status_to_errno(bio->bi_status);
-
-   SetPageError(page);
-   mapping_set_error(page->mapping, err);
-   }
-   unlock_page(page);
-   }
-
-   bio_put(bio);
-}
-
-/**
- * gfs2_jhead_pg_srch - Look for the journal head in a given page.
- * @jd: The journal descriptor
- * @page: The page to look in
- *
- * Returns: 1 if found, 0 otherwise.
- */
-
-static bool gfs2_jhead_pg_srch(struct gfs2_jdesc *jd,
- struct gfs2_log_header_host *head,
- struct page *page)
-{
-   struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
-   struct gfs2_log_header_host uninitialized_var(lh);
-   void *kaddr = kmap_atomic(page);
-   unsigned int offset;
-   bool ret = fal

[Cluster-devel] [GFS2 PATCH 7/9] gfs2: Check for log write errors and withdraw in rgrp_go_inval

[Cluster-devel] [GFS2 PATCH 1/9] gfs2: Introduce concept of a pending withdraw

[Cluster-devel] [GFS2 PATCH 8/9] gfs2: Do log_flush in gfs2_ail_empty_gl even if ail list is empty

[Cluster-devel] [GFS2 PATCH 3/9] gfs2: Empty the ail for the glock when rgrps are invalidated

[Cluster-devel] [GFS2 PATCH 0/9] GFS2: Withdraw corruption patches

[Cluster-devel] [GFS2 PATCH 2/9] gfs2: Ignore recovery attempts if gfs2 has io error or is withdrawn

[Cluster-devel] [GFS2 PATCH 6/9] gfs2: Make secondary withdrawers wait for first withdrawer

[Cluster-devel] [GFS2 PATCH 9/9] dlm: recover slot regardless of whether we still have a connection

[Cluster-devel] [GFS2 PATCH 4/9] gfs2: Force withdraw to replay journals and wait for it to finish

[Cluster-devel] [GFS2 PATCH 5/9] gfs2: Keep transactions on ail1 list until after issuing revokes

[Cluster-devel] [GFS2 PATCH] Revert "gfs2: read journal in large chunks to locate the head"

[Cluster-devel] [PATCH] Revert "gfs2: read journal in large chunks to locate the head"

12 matches

Site Navigation

Mail list logo

Footer information