Re: [Ocfs2-devel] [PATCH 2/2] ocfs2: o2dlm fix race in purge lockres and newlock (orabug 9094491)

2010-06-18 Thread Srinivas Eeda
On 6/17/2010 7:11 PM, Sunil Mushran wrote:
 This patch looks ok on the surface. Should be usable. But checking into
 the repo is another matter.
ok, won't checkin but will give this as one-off fix for now.

 My problem is that this flag is very much like inflight_locks but
 is not the same. inflight_locks are taken only on lockres' that are
 mastered by the node. This new flag does the same but for non
 mastered locks too. A better solution will be to merge the two.
 And that means cleaning up inflight_locks.
will look into this.

 The reason for the NAK for check in is that the code is adding
 more messiness to an area that is already very messy.

 Sunil

 On 06/15/2010 09:43 PM, Srinivas Eeda wrote:
 This patch fixes the following hole.
 dlmlock tries to create a new lock on a lockres that is on purge 
 list. It calls
 dlm_get_lockresource and later adds a lock to blocked list. But in 
 this window,
 dlm_thread can purge the lockres and unhash it. This will cause a 
 BUG, as when
 the AST comes back from the master lockres is not found

 This patch marks the lockres with a new state DLM_LOCK_RES_IN_USE 
 which would
 protect lockres from dlm_thread purging it.

 Signed-off-by: Srinivas Eedasrinivas.e...@oracle.com
 ---
   fs/ocfs2/dlm/dlmcommon.h |1 +
   fs/ocfs2/dlm/dlmlock.c   |4 
   fs/ocfs2/dlm/dlmmaster.c |5 -
   fs/ocfs2/dlm/dlmthread.c |1 +
   4 files changed, 10 insertions(+), 1 deletions(-)

 diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
 index 0102be3..6cf3c08 100644
 --- a/fs/ocfs2/dlm/dlmcommon.h
 +++ b/fs/ocfs2/dlm/dlmcommon.h
 @@ -279,6 +279,7 @@ static inline void __dlm_set_joining_node(struct 
 dlm_ctxt *dlm,
   #define DLM_LOCK_RES_DIRTY0x0008
   #define DLM_LOCK_RES_IN_PROGRESS  0x0010
   #define DLM_LOCK_RES_MIGRATING0x0020
 +#define DLM_LOCK_RES_IN_USE   0x0100
   #define DLM_LOCK_RES_DROPPING_REF 0x0040
   #define DLM_LOCK_RES_BLOCK_DIRTY  0x1000
   #define DLM_LOCK_RES_SETREF_INPROG0x2000
 diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c
 index 777..501ac40 100644
 --- a/fs/ocfs2/dlm/dlmlock.c
 +++ b/fs/ocfs2/dlm/dlmlock.c
 @@ -134,6 +134,8 @@ static enum dlm_status dlmlock_master(struct 
 dlm_ctxt *dlm,
   if (status != DLM_NORMAL
   lock-ml.node != dlm-node_num) {
   /* erf.  state changed after lock was dropped. */
 +/* DLM_LOCK_RES_IN_USE is set in dlm_get_lock_resource */
 +res-state= ~DLM_LOCK_RES_IN_USE;
   spin_unlock(res-spinlock);
   dlm_error(status);
   return status;
 @@ -180,6 +182,7 @@ static enum dlm_status dlmlock_master(struct 
 dlm_ctxt *dlm,
   kick_thread = 1;
   }
   }
 +res-state= ~DLM_LOCK_RES_IN_USE;
   /* reduce the inflight count, this may result in the lockres
* being purged below during calc_usage */
   if (lock-ml.node == dlm-node_num)
 @@ -246,6 +249,7 @@ static enum dlm_status dlmlock_remote(struct 
 dlm_ctxt *dlm,

   spin_lock(res-spinlock);
   res-state= ~DLM_LOCK_RES_IN_PROGRESS;
 +res-state= ~DLM_LOCK_RES_IN_USE;
   lock-lock_pending = 0;
   if (status != DLM_NORMAL) {
   if (status == DLM_RECOVERING
 diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
 index 9289b43..f0f2d97 100644
 --- a/fs/ocfs2/dlm/dlmmaster.c
 +++ b/fs/ocfs2/dlm/dlmmaster.c
 @@ -719,6 +719,7 @@ lookup:
   if (tmpres) {
   int dropping_ref = 0;

 +tmpres-state |= DLM_LOCK_RES_IN_USE;
   spin_unlock(dlm-spinlock);

   spin_lock(tmpres-spinlock);
 @@ -731,8 +732,10 @@ lookup:
   if (tmpres-owner == dlm-node_num) {
   BUG_ON(tmpres-state  DLM_LOCK_RES_DROPPING_REF);
   dlm_lockres_grab_inflight_ref(dlm, tmpres);
 -} else if (tmpres-state  DLM_LOCK_RES_DROPPING_REF)
 +} else if (tmpres-state  DLM_LOCK_RES_DROPPING_REF) {
 +tmpres-state= ~DLM_LOCK_RES_IN_USE;
   dropping_ref = 1;
 +}
   spin_unlock(tmpres-spinlock);

   /* wait until done messaging the master, drop our ref to allow
 diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
 index fb0be6c..2b82e0b 100644
 --- a/fs/ocfs2/dlm/dlmthread.c
 +++ b/fs/ocfs2/dlm/dlmthread.c
 @@ -93,6 +93,7 @@ int __dlm_lockres_has_locks(struct 
 dlm_lock_resource *res)
   int __dlm_lockres_unused(struct dlm_lock_resource *res)
   {
   if (!__dlm_lockres_has_locks(res)
 +!(res-state  DLM_LOCK_RES_IN_USE)
   (list_empty(res-dirty)  !(res-state  DLM_LOCK_RES_DIRTY))) {
   /* try not to scan the bitmap unless the first two
* conditions are already true */



___
Ocfs2-devel mailing list
Ocfs2-devel@oss.oracle.com
http://oss.oracle.com/mailman/listinfo/ocfs2-devel


Re: [Ocfs2-devel] [PATCH 2/2] ocfs2: o2dlm fix race in purge lockres and newlock (orabug 9094491)

2010-06-17 Thread Sunil Mushran
This patch looks ok on the surface. Should be usable. But checking into
the repo is another matter.

My problem is that this flag is very much like inflight_locks but
is not the same. inflight_locks are taken only on lockres' that are
mastered by the node. This new flag does the same but for non
mastered locks too. A better solution will be to merge the two.
And that means cleaning up inflight_locks.

The reason for the NAK for check in is that the code is adding
more messiness to an area that is already very messy.

Sunil

On 06/15/2010 09:43 PM, Srinivas Eeda wrote:
 This patch fixes the following hole.
 dlmlock tries to create a new lock on a lockres that is on purge list. It 
 calls
 dlm_get_lockresource and later adds a lock to blocked list. But in this 
 window,
 dlm_thread can purge the lockres and unhash it. This will cause a BUG, as when
 the AST comes back from the master lockres is not found

 This patch marks the lockres with a new state DLM_LOCK_RES_IN_USE which would
 protect lockres from dlm_thread purging it.

 Signed-off-by: Srinivas Eedasrinivas.e...@oracle.com
 ---
   fs/ocfs2/dlm/dlmcommon.h |1 +
   fs/ocfs2/dlm/dlmlock.c   |4 
   fs/ocfs2/dlm/dlmmaster.c |5 -
   fs/ocfs2/dlm/dlmthread.c |1 +
   4 files changed, 10 insertions(+), 1 deletions(-)

 diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
 index 0102be3..6cf3c08 100644
 --- a/fs/ocfs2/dlm/dlmcommon.h
 +++ b/fs/ocfs2/dlm/dlmcommon.h
 @@ -279,6 +279,7 @@ static inline void __dlm_set_joining_node(struct dlm_ctxt 
 *dlm,
   #define DLM_LOCK_RES_DIRTY0x0008
   #define DLM_LOCK_RES_IN_PROGRESS  0x0010
   #define DLM_LOCK_RES_MIGRATING0x0020
 +#define DLM_LOCK_RES_IN_USE   0x0100
   #define DLM_LOCK_RES_DROPPING_REF 0x0040
   #define DLM_LOCK_RES_BLOCK_DIRTY  0x1000
   #define DLM_LOCK_RES_SETREF_INPROG0x2000
 diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c
 index 777..501ac40 100644
 --- a/fs/ocfs2/dlm/dlmlock.c
 +++ b/fs/ocfs2/dlm/dlmlock.c
 @@ -134,6 +134,8 @@ static enum dlm_status dlmlock_master(struct dlm_ctxt 
 *dlm,
   if (status != DLM_NORMAL
   lock-ml.node != dlm-node_num) {
   /* erf.  state changed after lock was dropped. */
 + /* DLM_LOCK_RES_IN_USE is set in dlm_get_lock_resource */
 + res-state= ~DLM_LOCK_RES_IN_USE;
   spin_unlock(res-spinlock);
   dlm_error(status);
   return status;
 @@ -180,6 +182,7 @@ static enum dlm_status dlmlock_master(struct dlm_ctxt 
 *dlm,
   kick_thread = 1;
   }
   }
 + res-state= ~DLM_LOCK_RES_IN_USE;
   /* reduce the inflight count, this may result in the lockres
* being purged below during calc_usage */
   if (lock-ml.node == dlm-node_num)
 @@ -246,6 +249,7 @@ static enum dlm_status dlmlock_remote(struct dlm_ctxt 
 *dlm,

   spin_lock(res-spinlock);
   res-state= ~DLM_LOCK_RES_IN_PROGRESS;
 + res-state= ~DLM_LOCK_RES_IN_USE;
   lock-lock_pending = 0;
   if (status != DLM_NORMAL) {
   if (status == DLM_RECOVERING
 diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
 index 9289b43..f0f2d97 100644
 --- a/fs/ocfs2/dlm/dlmmaster.c
 +++ b/fs/ocfs2/dlm/dlmmaster.c
 @@ -719,6 +719,7 @@ lookup:
   if (tmpres) {
   int dropping_ref = 0;

 + tmpres-state |= DLM_LOCK_RES_IN_USE;
   spin_unlock(dlm-spinlock);

   spin_lock(tmpres-spinlock);
 @@ -731,8 +732,10 @@ lookup:
   if (tmpres-owner == dlm-node_num) {
   BUG_ON(tmpres-state  DLM_LOCK_RES_DROPPING_REF);
   dlm_lockres_grab_inflight_ref(dlm, tmpres);
 - } else if (tmpres-state  DLM_LOCK_RES_DROPPING_REF)
 + } else if (tmpres-state  DLM_LOCK_RES_DROPPING_REF) {
 + tmpres-state= ~DLM_LOCK_RES_IN_USE;
   dropping_ref = 1;
 + }
   spin_unlock(tmpres-spinlock);

   /* wait until done messaging the master, drop our ref to allow
 diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
 index fb0be6c..2b82e0b 100644
 --- a/fs/ocfs2/dlm/dlmthread.c
 +++ b/fs/ocfs2/dlm/dlmthread.c
 @@ -93,6 +93,7 @@ int __dlm_lockres_has_locks(struct dlm_lock_resource *res)
   int __dlm_lockres_unused(struct dlm_lock_resource *res)
   {
   if (!__dlm_lockres_has_locks(res)
 + !(res-state  DLM_LOCK_RES_IN_USE)
   (list_empty(res-dirty)  !(res-state  DLM_LOCK_RES_DIRTY))) {
   /* try not to scan the bitmap unless the first two
* conditions are already true */



___
Ocfs2-devel mailing list
Ocfs2-devel@oss.oracle.com
http://oss.oracle.com/mailman/listinfo/ocfs2-devel


[Ocfs2-devel] [PATCH 2/2] ocfs2: o2dlm fix race in purge lockres and newlock (orabug 9094491)

2010-06-15 Thread Srinivas Eeda
This patch fixes the following hole.
dlmlock tries to create a new lock on a lockres that is on purge list. It calls
dlm_get_lockresource and later adds a lock to blocked list. But in this window,
dlm_thread can purge the lockres and unhash it. This will cause a BUG, as when
the AST comes back from the master lockres is not found

This patch marks the lockres with a new state DLM_LOCK_RES_IN_USE which would
protect lockres from dlm_thread purging it.

Signed-off-by: Srinivas Eeda srinivas.e...@oracle.com
---
 fs/ocfs2/dlm/dlmcommon.h |1 +
 fs/ocfs2/dlm/dlmlock.c   |4 
 fs/ocfs2/dlm/dlmmaster.c |5 -
 fs/ocfs2/dlm/dlmthread.c |1 +
 4 files changed, 10 insertions(+), 1 deletions(-)

diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
index 0102be3..6cf3c08 100644
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -279,6 +279,7 @@ static inline void __dlm_set_joining_node(struct dlm_ctxt 
*dlm,
 #define DLM_LOCK_RES_DIRTY0x0008
 #define DLM_LOCK_RES_IN_PROGRESS  0x0010
 #define DLM_LOCK_RES_MIGRATING0x0020
+#define DLM_LOCK_RES_IN_USE   0x0100
 #define DLM_LOCK_RES_DROPPING_REF 0x0040
 #define DLM_LOCK_RES_BLOCK_DIRTY  0x1000
 #define DLM_LOCK_RES_SETREF_INPROG0x2000
diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c
index 777..501ac40 100644
--- a/fs/ocfs2/dlm/dlmlock.c
+++ b/fs/ocfs2/dlm/dlmlock.c
@@ -134,6 +134,8 @@ static enum dlm_status dlmlock_master(struct dlm_ctxt *dlm,
if (status != DLM_NORMAL 
lock-ml.node != dlm-node_num) {
/* erf.  state changed after lock was dropped. */
+   /* DLM_LOCK_RES_IN_USE is set in dlm_get_lock_resource */
+   res-state = ~DLM_LOCK_RES_IN_USE;
spin_unlock(res-spinlock);
dlm_error(status);
return status;
@@ -180,6 +182,7 @@ static enum dlm_status dlmlock_master(struct dlm_ctxt *dlm,
kick_thread = 1;
}
}
+   res-state = ~DLM_LOCK_RES_IN_USE;
/* reduce the inflight count, this may result in the lockres
 * being purged below during calc_usage */
if (lock-ml.node == dlm-node_num)
@@ -246,6 +249,7 @@ static enum dlm_status dlmlock_remote(struct dlm_ctxt *dlm,
 
spin_lock(res-spinlock);
res-state = ~DLM_LOCK_RES_IN_PROGRESS;
+   res-state = ~DLM_LOCK_RES_IN_USE;
lock-lock_pending = 0;
if (status != DLM_NORMAL) {
if (status == DLM_RECOVERING 
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 9289b43..f0f2d97 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -719,6 +719,7 @@ lookup:
if (tmpres) {
int dropping_ref = 0;
 
+   tmpres-state |= DLM_LOCK_RES_IN_USE;
spin_unlock(dlm-spinlock);
 
spin_lock(tmpres-spinlock);
@@ -731,8 +732,10 @@ lookup:
if (tmpres-owner == dlm-node_num) {
BUG_ON(tmpres-state  DLM_LOCK_RES_DROPPING_REF);
dlm_lockres_grab_inflight_ref(dlm, tmpres);
-   } else if (tmpres-state  DLM_LOCK_RES_DROPPING_REF)
+   } else if (tmpres-state  DLM_LOCK_RES_DROPPING_REF) {
+   tmpres-state = ~DLM_LOCK_RES_IN_USE;
dropping_ref = 1;
+   }
spin_unlock(tmpres-spinlock);
 
/* wait until done messaging the master, drop our ref to allow
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
index fb0be6c..2b82e0b 100644
--- a/fs/ocfs2/dlm/dlmthread.c
+++ b/fs/ocfs2/dlm/dlmthread.c
@@ -93,6 +93,7 @@ int __dlm_lockres_has_locks(struct dlm_lock_resource *res)
 int __dlm_lockres_unused(struct dlm_lock_resource *res)
 {
if (!__dlm_lockres_has_locks(res) 
+   !(res-state  DLM_LOCK_RES_IN_USE) 
(list_empty(res-dirty)  !(res-state  DLM_LOCK_RES_DIRTY))) {
/* try not to scan the bitmap unless the first two
 * conditions are already true */
-- 
1.5.6.5


___
Ocfs2-devel mailing list
Ocfs2-devel@oss.oracle.com
http://oss.oracle.com/mailman/listinfo/ocfs2-devel