If a node sends a DLM_RCOM_STATUS command and an error occurs on the
receiving side, the DLM_RCOM_STATUS_REPLY response may not be returned.
We retransmitted the DLM_RCOM_STATUS command so that we do not wait for
an infinite response.

Signed-off-by: Tadashi Miyauchi <miyau...@toshiba-tops.co.jp>
Signed-off-by: Tsutomu Owa <tsutomu....@toshiba.co.jp>
---
 fs/dlm/rcom.c    | 6 ++++++
 fs/dlm/recover.c | 4 ++++
 2 files changed, 10 insertions(+)

diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c
index f3f5e72..4ff061d 100644
--- a/fs/dlm/rcom.c
+++ b/fs/dlm/rcom.c
@@ -155,6 +155,7 @@ int dlm_rcom_status(struct dlm_ls *ls, int nodeid, uint32_t 
status_flags)
                goto out;
        }
 
+retry:
        error = create_rcom(ls, nodeid, DLM_RCOM_STATUS,
                            sizeof(struct rcom_status), &rc, &mh);
        if (error)
@@ -169,6 +170,8 @@ int dlm_rcom_status(struct dlm_ls *ls, int nodeid, uint32_t 
status_flags)
 
        error = dlm_wait_function(ls, &rcom_response);
        disallow_sync_reply(ls);
+       if (error == -ETIMEDOUT)
+               goto retry;
        if (error)
                goto out;
 
@@ -276,6 +279,7 @@ int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char 
*last_name, int last_len)
 
        ls->ls_recover_nodeid = nodeid;
 
+retry:
        error = create_rcom(ls, nodeid, DLM_RCOM_NAMES, last_len, &rc, &mh);
        if (error)
                goto out;
@@ -288,6 +292,8 @@ int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char 
*last_name, int last_len)
 
        error = dlm_wait_function(ls, &rcom_response);
        disallow_sync_reply(ls);
+       if (error == -ETIMEDOUT)
+               goto retry;
  out:
        return error;
 }
diff --git a/fs/dlm/recover.c b/fs/dlm/recover.c
index eaea789..ce2aa54 100644
--- a/fs/dlm/recover.c
+++ b/fs/dlm/recover.c
@@ -52,6 +52,10 @@ int dlm_wait_function(struct dlm_ls *ls, int (*testfn) 
(struct dlm_ls *ls))
                                        dlm_config.ci_recover_timer * HZ);
                if (rv)
                        break;
+               if (test_bit(LSFL_RCOM_WAIT, &ls->ls_flags)) {
+                       log_debug(ls, "dlm_wait_function timed out");
+                       return -ETIMEDOUT;
+               }
        }
 
        if (dlm_recovery_stopped(ls)) {
-- 
2.7.4





Reply via email to