On 2014/11/7 15:03, Junxiao Bi wrote:
> On 11/07/2014 02:36 PM, Xue jiufei wrote:
>> Hi Junxiao
>>
>> On 2014/11/7 13:54, Junxiao Bi wrote:
>>> Hi Jiufei,
>>>
>>> On 11/07/2014 11:41 AM, Xue jiufei wrote:
>>>> Function ocfs2_readpages() use nonblocking flag to avoid page lock
>>>> inversion. It will trigger cluster hang because that flag
>>>> OCFS2_LOCK_UPCONVERT_FINISHING is not cleared if nonblocking lock
>>>> cannot be granted at once. The flag would prevent dc thread from
>>>> downconverting. So other nodes cannot acheive this lockres for ever.
>>>>
>>>> So we should not set OCFS2_LOCK_UPCONVERT_FINISHING when receiving ast
>>>> if nonblocking lock had already returned.
>>>>
>>>
>>> Use OCFS2_LOCK_NONBLOCK_FINISHED can't fix this issue. Like there are
>>> two processes doing non-blocking lock in the following order.
>> I am wondering why these two processes can lock in such order.
> 
> Yes, you are right. LOCK_BUSY flag stop this race. Your fix works. But
> just as i mentioned, i think for non-blocking lock, no need do cluster
> wild locking, because AST is async, before it reached, most of the time,
> the non-blocking lock had returned fail.
We have added some debug information in function dlm_proxy_ast_handler()
and dlm_send_remote_lock_request(), and run race testcase before.
The result shows that the probability that AST message came earlier
than returned status of locking is about 1/4. 

In addition, if non-blocking lock do not do cluster wide lock, ocfs2_readpages()
will never succeed, which is not we wanted.

Thanks,
Jiufei
> 
> Thanks,
> Junxiao.
>>> 1. Process A issue non-blocking lock fail, set OCFS2_LOCK_NONBLOCK_FINISHED.
>> We had set OCFS2_LOCK_BUSY flag before process A call ocfs2_dlm_lock(),
>>> 2. Process B issue non-blocking lock fail, set
>>> OCFS2_LOCK_NONBLOCK_FINISHED again.
>> So process B must wait until ast for Process A returns and clear 
>> OCFS2_LOCK_BUSY.
>>
>>> 3. Ast for process A come, clear OCFS2_LOCK_NONBLOCK_FINISHED.
>>> 4. Ast for process B come, set OCFS2_LOCK_UPCONVERT_FINISHING.
>>>
>>> So down convert can still be blocked.
>>>
>>> To fix this issue, I think no need to call ocfs2_dlm_lock() for
>>> non-blocking lock, just check local lockres->l_level is enough, if
>>> request level equal or smaller than it, lock success, or lock fail.
>>>
>>> Thanks,
>>> Junxiao.
>>>
>> Thanks,
>> jiufei.
>>
>>>> Signed-off-by: joyce.xue <[email protected]>
>>>> ---
>>>>  fs/ocfs2/dlmglue.c | 37 +++++++++++++++++++++++++++++++------
>>>>  fs/ocfs2/ocfs2.h   |  6 ++++++
>>>>  2 files changed, 37 insertions(+), 6 deletions(-)
>>>>
>>>> diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
>>>> index 21262f2..1ddb6f1 100644
>>>> --- a/fs/ocfs2/dlmglue.c
>>>> +++ b/fs/ocfs2/dlmglue.c
>>>> @@ -861,8 +861,13 @@ static inline void 
>>>> ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lo
>>>>     * We set the OCFS2_LOCK_UPCONVERT_FINISHING flag before clearing
>>>>     * the OCFS2_LOCK_BUSY flag to prevent the dc thread from
>>>>     * downconverting the lock before the upconvert has fully completed.
>>>> +   * Do not prevent the dc thread from downconverting if NONBLOCK lock
>>>> +   * had already returned.
>>>>     */
>>>> -  lockres_or_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
>>>> +  if (!(lockres->l_flags & OCFS2_LOCK_NONBLOCK_FINISHED))
>>>> +          lockres_or_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
>>>> +  else
>>>> +          lockres_clear_flags(lockres, OCFS2_LOCK_NONBLOCK_FINISHED);
>>>>  
>>>>    lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
>>>>  }
>>>> @@ -1324,13 +1329,12 @@ static void lockres_add_mask_waiter(struct 
>>>> ocfs2_lock_res *lockres,
>>>>  
>>>>  /* returns 0 if the mw that was removed was already satisfied, -EBUSY
>>>>   * if the mask still hadn't reached its goal */
>>>> -static int lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres,
>>>> +static int __lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres,
>>>>                                  struct ocfs2_mask_waiter *mw)
>>>>  {
>>>> -  unsigned long flags;
>>>>    int ret = 0;
>>>>  
>>>> -  spin_lock_irqsave(&lockres->l_lock, flags);
>>>> +  assert_spin_locked(&lockres->l_lock);
>>>>    if (!list_empty(&mw->mw_item)) {
>>>>            if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal)
>>>>                    ret = -EBUSY;
>>>> @@ -1338,6 +1342,18 @@ static int lockres_remove_mask_waiter(struct 
>>>> ocfs2_lock_res *lockres,
>>>>            list_del_init(&mw->mw_item);
>>>>            init_completion(&mw->mw_complete);
>>>>    }
>>>> +
>>>> +  return ret;
>>>> +}
>>>> +
>>>> +static int lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres,
>>>> +                                struct ocfs2_mask_waiter *mw)
>>>> +{
>>>> +  unsigned long flags;
>>>> +  int ret = 0;
>>>> +
>>>> +  spin_lock_irqsave(&lockres->l_lock, flags);
>>>> +  ret = __lockres_remove_mask_waiter(lockres, mw);
>>>>    spin_unlock_irqrestore(&lockres->l_lock, flags);
>>>>  
>>>>    return ret;
>>>> @@ -1373,6 +1389,7 @@ static int __ocfs2_cluster_lock(struct ocfs2_super 
>>>> *osb,
>>>>    unsigned long flags;
>>>>    unsigned int gen;
>>>>    int noqueue_attempted = 0;
>>>> +  int dlm_locked = 0;
>>>>  
>>>>    ocfs2_init_mask_waiter(&mw);
>>>>  
>>>> @@ -1481,6 +1498,7 @@ again:
>>>>                    ocfs2_recover_from_dlm_error(lockres, 1);
>>>>                    goto out;
>>>>            }
>>>> +          dlm_locked = 1;
>>>>  
>>>>            mlog(0, "lock %s, successful return from ocfs2_dlm_lock\n",
>>>>                 lockres->l_name);
>>>> @@ -1514,10 +1532,17 @@ out:
>>>>    if (wait && arg_flags & OCFS2_LOCK_NONBLOCK &&
>>>>        mw.mw_mask & (OCFS2_LOCK_BUSY|OCFS2_LOCK_BLOCKED)) {
>>>>            wait = 0;
>>>> -          if (lockres_remove_mask_waiter(lockres, &mw))
>>>> +          spin_lock_irqsave(&lockres->l_lock, flags);
>>>> +          if (__lockres_remove_mask_waiter(lockres, &mw)) {
>>>> +                  if (dlm_locked)
>>>> +                          lockres_or_flags(lockres,
>>>> +                                  OCFS2_LOCK_NONBLOCK_FINISHED);
>>>> +                  spin_unlock_irqrestore(&lockres->l_lock, flags);
>>>>                    ret = -EAGAIN;
>>>> -          else
>>>> +          } else {
>>>> +                  spin_unlock_irqrestore(&lockres->l_lock, flags);
>>>>                    goto again;
>>>> +          }
>>>>    }
>>>>    if (wait) {
>>>>            ret = ocfs2_wait_for_mask(&mw);
>>>> diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
>>>> index bbec539..7d6b7d0 100644
>>>> --- a/fs/ocfs2/ocfs2.h
>>>> +++ b/fs/ocfs2/ocfs2.h
>>>> @@ -144,6 +144,12 @@ enum ocfs2_unlock_action {
>>>>                                                 * before the upconvert
>>>>                                                 * has completed */
>>>>  
>>>> +#define OCFS2_LOCK_NONBLOCK_FINISHED (0x00001000) /* NONBLOCK cluster
>>>> +                                             * lock has already
>>>> +                                             * returned, do not block
>>>> +                                             * dc thread from
>>>> +                                             * downconverting */
>>>> +
>>>>  struct ocfs2_lock_res_ops;
>>>>  
>>>>  typedef void (*ocfs2_lock_callback)(int status, unsigned long data);
>>>>
>>>
>>> .
>>>
>>
>>
> 
> .
> 



_______________________________________________
Ocfs2-devel mailing list
[email protected]
https://oss.oracle.com/mailman/listinfo/ocfs2-devel

Reply via email to