[Devel] Re: [PATCH] Clear the objhash before completing restart, but delay free (v2)

2011-02-02 Thread Oren Laadan

Thanks for testing - pushed both patches to v23-rc1.

Oren.

On 02/01/2011 02:38 PM, Dan Smith wrote:
 OL Original patch posted by Dan Smith.
 
 Tested-by: Dan Smith da...@us.ibm.com
 
 With this and my pipe refcount fix, all my pipe tests pass, including
 the ones that were stuck before because the hash was getting free'd
 late.
 
 Thanks!
 
___
Containers mailing list
contain...@lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers

___
Devel mailing list
Devel@openvz.org
https://openvz.org/mailman/listinfo/devel


[Devel] Re: [PATCH] Clear the objhash before completing restart, but delay free (v2)

2011-02-01 Thread Dan Smith
OL Original patch posted by Dan Smith.

Tested-by: Dan Smith da...@us.ibm.com

With this and my pipe refcount fix, all my pipe tests pass, including
the ones that were stuck before because the hash was getting free'd
late.

Thanks!

-- 
Dan Smith
IBM Linux Technology Center
email: da...@us.ibm.com
___
Containers mailing list
contain...@lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers

___
Devel mailing list
Devel@openvz.org
https://openvz.org/mailman/listinfo/devel


[Devel] Re: [PATCH] Clear the objhash before completing restart, but delay free (v2)

2011-01-31 Thread Oren Laadan

I modifed the patch a bit according to our IRC chat today:


From 9c74f82411d77cf0194a17ba99af0dd31070e88a Mon Sep 17 00:00:00 2001
From: Oren Laadan or...@cs.columbia.edu
Date: Mon, 31 Jan 2011 19:01:49 -0500
Subject: [PATCH] c/r: clear the objhash before completing restart, but delay 
free (v3)

This patch causes the restart coordinator to drop references to
objects in the objhash before releasing the restarted tasks.  This
ensures that objects held exclusively there are released before any
task starts running again.

Keeping those references when we allow restarted tasks to return to
userspace is racy if an object's behavior depends on this reference.
One example is restarting half-closed pipes: without this patch they
temporarily (until the coordinator clears the objhash) appear fully
open on both sides. Consider a pipe with read-side closed. A write to
the write-side should produce EPIPE/SIGPIPE, however, because the
objhash contains a reference to both ends of the pipe, the read-side
at restart will not really be closed until after the cleanup, and a
process would unexpectedly not receive an error. Moving the object
hash clear prevents this race and others like it.

To avoid the overhead of actually freeing the object hash's structures
at the same time, we defer the actual memory free to until after the
restarted tasks are allowed to resume execution.

Original patch posted by Dan Smith.

Changes in v3:
 - Use a flag on ctx-kflags to indicate that -drop was done on
   the entire hash instead of moving elements to a free list
Changes in v2:
 - Avoid adding another list_head to ckpt_obj by re-using the hlist
   hash node for the free list

Cc: Dan Smith da...@us.ibm.com
Signed-off-by: Oren Laadan or...@cs.columbia.edu
---
 include/linux/checkpoint.h  |3 +++
 kernel/checkpoint/objhash.c |   17 +
 kernel/checkpoint/restart.c |8 
 3 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/include/linux/checkpoint.h b/include/linux/checkpoint.h
index adaf6af..6c0ccfd 100644
--- a/include/linux/checkpoint.h
+++ b/include/linux/checkpoint.h
@@ -50,11 +50,13 @@ extern long do_sys_restart(pid_t pid, int fd,
 #define CKPT_CTX_RESTART_BIT   1
 #define CKPT_CTX_SUCCESS_BIT   2
 #define CKPT_CTX_ERROR_BIT 3
+#define CKPT_CTX_DROPPED_BIT   4
 
 #define CKPT_CTX_CHECKPOINT(1  CKPT_CTX_CHECKPOINT_BIT)
 #define CKPT_CTX_RESTART   (1  CKPT_CTX_RESTART_BIT)
 #define CKPT_CTX_SUCCESS   (1  CKPT_CTX_SUCCESS_BIT)
 #define CKPT_CTX_ERROR (1  CKPT_CTX_ERROR_BIT)
+#define CKPT_CTX_DROPPED   (1  CKPT_CTX_DROPPED_BIT)
 
 /* ckpt_ctx: uflags */
 #define CHECKPOINT_USER_FLAGS \
@@ -176,6 +178,7 @@ extern void restore_notify_error(struct ckpt_ctx *ctx);
 
 /* obj_hash */
 extern void ckpt_obj_hash_free(struct ckpt_ctx *ctx);
+extern void ckpt_obj_hash_drop(struct ckpt_ctx *ctx);
 extern int ckpt_obj_hash_alloc(struct ckpt_ctx *ctx);
 
 extern int restore_obj(struct ckpt_ctx *ctx, struct ckpt_hdr_objref *h);
diff --git a/kernel/checkpoint/objhash.c b/kernel/checkpoint/objhash.c
index 52062a8..40c2e9b 100644
--- a/kernel/checkpoint/objhash.c
+++ b/kernel/checkpoint/objhash.c
@@ -66,7 +66,7 @@ EXPORT_SYMBOL(register_checkpoint_obj);
 #define CKPT_OBJ_HASH_NBITS  10
 #define CKPT_OBJ_HASH_TOTAL  (1UL  CKPT_OBJ_HASH_NBITS)
 
-static void obj_hash_clear(struct ckpt_obj_hash *obj_hash)
+static void obj_hash_free(struct ckpt_obj_hash *obj_hash, int drop, int clean)
 {
struct hlist_head *h = obj_hash-head;
struct hlist_node *n, *t;
@@ -75,19 +75,28 @@ static void obj_hash_clear(struct ckpt_obj_hash *obj_hash)
 
for (i = 0; i  CKPT_OBJ_HASH_TOTAL; i++) {
hlist_for_each_entry_safe(obj, n, t, h[i], hash) {
-   if (obj-ops-ref_drop)
+   if (drop  obj-ops-ref_drop)
obj-ops-ref_drop(obj-ptr, 1);
-   kfree(obj);
+   if (clean)
+   kfree(obj);
}
}
+
+}
+
+void ckpt_obj_hash_drop(struct ckpt_ctx *ctx)
+{
+   obj_hash_free(ctx-obj_hash, 1, 0);
+   ckpt_set_ctx_kflag(ctx, CKPT_CTX_DROPPED);
 }
 
 void ckpt_obj_hash_free(struct ckpt_ctx *ctx)
 {
struct ckpt_obj_hash *obj_hash = ctx-obj_hash;
+   int dropped = ctx-kflags  CKPT_CTX_DROPPED;
 
if (obj_hash) {
-   obj_hash_clear(obj_hash);
+   obj_hash_free(obj_hash, !dropped, 1);
kfree(obj_hash-head);
kfree(ctx-obj_hash);
ctx-obj_hash = NULL;
diff --git a/kernel/checkpoint/restart.c b/kernel/checkpoint/restart.c
index 2eae499..01da67f 100644
--- a/kernel/checkpoint/restart.c
+++ b/kernel/checkpoint/restart.c
@@ -1345,6 +1345,14 @@ static int do_restore_coord(struct ckpt_ctx *ctx, pid_t 
pid)
destroy_descendants(ctx);
ret = ckpt_get_error(ctx);
} else {
+   

[Devel] Re: [PATCH] Clear the objhash before completing restart, but delay free (v2)

2010-11-01 Thread Oren Laadan
Dan,

Does this patch solve the performance problem in freeing all
the objhash entries upfront (e.g. before letting userspace
resume) ?

If so, is there still a performance hit for doing the 'clear'
portion early before resuming the tasks ?   How much does it
depend on the complexity of the hierarchy being checkpointed ?

Oren.

On 10/19/2010 10:03 AM, Dan Smith wrote:
 This patch causes the restart coordinator to clear the object hash
 before releasing the restarted tasks.  It does this to make sure
 that any objects being held exclusively by the hash are released
 before the tasks start running again.
 
 If we continue to postpone clearing the object hash until restart
 returns to userspace there can be a race where the restarted tasks
 behave differently due to the references held by the objhash.
 One specific example of this is restarting half-closed pipes.
 Without this patch we've got a race between the coordinator --
 about to clear the object hash -- and two restarted tasks connected
 via a half-closed pipe. Because the object hash contains a reference
 to both ends of the pipe one end of the pipe will not be closed
 and EPIPE/SIGPIPE won't be handled when reading from the pipe
 for instance. As far as the restarted userspace task can tell the
 pipe may briefly appear to re-open. Moving the object hash clear
 prevents this race and others like it.
 
 Note that eventually the coordinator would close the pipe and correct
 behavior would be restored. Thus this bug would only affect the
 correctness of userspace -- after a close() the pipe may briefly re-open
 and allow more data to be sent before automatically closing again.
 
 To avoid the overhead of actually freeing the object hash's structures
 at the same time, this adds a queue to ckpt_obj_hash and pushes
 the ckpt_obj structures there to be free()'d later during the cleanup
 process.
 
 Changes in v2:
  - Avoid adding another list_head to ckpt_obj by re-using the hlist
hash node for the free list
 
 Signed-off-by: Dan Smith da...@us.ibm.com
 ---
  include/linux/checkpoint.h  |1 +
  kernel/checkpoint/objhash.c |   18 +++---
  kernel/checkpoint/restart.c |2 ++
  3 files changed, 18 insertions(+), 3 deletions(-)
 
 diff --git a/include/linux/checkpoint.h b/include/linux/checkpoint.h
 index a11d40e..f888363 100644
 --- a/include/linux/checkpoint.h
 +++ b/include/linux/checkpoint.h
 @@ -179,6 +179,7 @@ extern void restore_notify_error(struct ckpt_ctx *ctx);
  extern int ckpt_obj_module_get(void);
  extern void ckpt_obj_module_put(void);
  
 +extern void ckpt_obj_hash_clear(struct ckpt_ctx *ctx);
  extern void ckpt_obj_hash_free(struct ckpt_ctx *ctx);
  extern int ckpt_obj_hash_alloc(struct ckpt_ctx *ctx);
  
 diff --git a/kernel/checkpoint/objhash.c b/kernel/checkpoint/objhash.c
 index 62c34ff..4aae0c0 100644
 --- a/kernel/checkpoint/objhash.c
 +++ b/kernel/checkpoint/objhash.c
 @@ -36,6 +36,7 @@ struct ckpt_obj {
  struct ckpt_obj_hash {
   struct hlist_head *head;
   struct hlist_head list;
 + struct hlist_head free;
   int next_free_objref;
  };
  
 @@ -128,8 +129,9 @@ int ckpt_obj_module_get(void)
  #define CKPT_OBJ_HASH_NBITS  10
  #define CKPT_OBJ_HASH_TOTAL  (1UL  CKPT_OBJ_HASH_NBITS)
  
 -static void obj_hash_clear(struct ckpt_obj_hash *obj_hash)
 +void ckpt_obj_hash_clear(struct ckpt_ctx *ctx)
  {
 + struct ckpt_obj_hash *obj_hash = ctx-obj_hash;
   struct hlist_head *h = obj_hash-head;
   struct hlist_node *n, *t;
   struct ckpt_obj *obj;
 @@ -139,7 +141,9 @@ static void obj_hash_clear(struct ckpt_obj_hash *obj_hash)
   hlist_for_each_entry_safe(obj, n, t, h[i], hash) {
   if (obj-ops-ref_drop)
   obj-ops-ref_drop(obj-ptr, 1);
 - kfree(obj);
 + hlist_del_init(obj-hash);
 + hlist_del(obj-next);
 + hlist_add_head(obj-hash, obj_hash-free);
   }
   }
  }
 @@ -149,7 +153,14 @@ void ckpt_obj_hash_free(struct ckpt_ctx *ctx)
   struct ckpt_obj_hash *obj_hash = ctx-obj_hash;
  
   if (obj_hash) {
 - obj_hash_clear(obj_hash);
 + struct hlist_node *pos, *n;
 + struct ckpt_obj *obj;
 +
 + ckpt_obj_hash_clear(ctx);
 +
 + hlist_for_each_entry_safe(obj, pos, n, obj_hash-free, hash)
 + kfree(obj);
 +
   kfree(obj_hash-head);
   kfree(ctx-obj_hash);
   ctx-obj_hash = NULL;
 @@ -173,6 +184,7 @@ int ckpt_obj_hash_alloc(struct ckpt_ctx *ctx)
   obj_hash-head = head;
   obj_hash-next_free_objref = 1;
   INIT_HLIST_HEAD(obj_hash-list);
 + INIT_HLIST_HEAD(obj_hash-free);
  
   ctx-obj_hash = obj_hash;
   return 0;
 diff --git a/kernel/checkpoint/restart.c b/kernel/checkpoint/restart.c
 index 17270b8..f2241a9 100644
 --- a/kernel/checkpoint/restart.c
 +++ b/kernel/checkpoint/restart.c
 @@ -1349,6 +1349,8 @@ 

[Devel] Re: [PATCH] Clear the objhash before completing restart, but delay free (v2)

2010-11-01 Thread Dan Smith
OL Does this patch solve the performance problem in freeing all
OL the objhash entries upfront (e.g. before letting userspace
OL resume) ?

OL If so, is there still a performance hit for doing the 'clear'
OL portion early before resuming the tasks ?   How much does it
OL depend on the complexity of the hierarchy being checkpointed ?

I've actually been unable to detect any real different in performance
between this and the way it was before.  I've run it against several
different large checkpoints (multiple processes, with sockets, IPC,
many files open, etc).  I have another option for the functionality,
which is to mark only specific items as needing to be free()'d early,
but I can't discern a difference between that or this option either.
What I can discern is that this patch is more likely to prevent
similar issues in the future by making the behavior of the hash far
more generally correct, IMHO.

-- 
Dan Smith
IBM Linux Technology Center
email: da...@us.ibm.com
___
Containers mailing list
contain...@lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers

___
Devel mailing list
Devel@openvz.org
https://openvz.org/mailman/listinfo/devel


[Devel] Re: [PATCH] Clear the objhash before completing restart, but delay free (v2)

2010-11-01 Thread Oren Laadan


On 11/01/2010 01:01 PM, Dan Smith wrote:
 OL Does this patch solve the performance problem in freeing all
 OL the objhash entries upfront (e.g. before letting userspace
 OL resume) ?
 
 OL If so, is there still a performance hit for doing the 'clear'
 OL portion early before resuming the tasks ?   How much does it
 OL depend on the complexity of the hierarchy being checkpointed ?
 
 I've actually been unable to detect any real different in performance
 between this and the way it was before.  I've run it against several
 different large checkpoints (multiple processes, with sockets, IPC,
 many files open, etc).  I have another option for the functionality,
 which is to mark only specific items as needing to be free()'d early,
 but I can't discern a difference between that or this option either.
 What I can discern is that this patch is more likely to prevent
 similar issues in the future by making the behavior of the hash far
 more generally correct, IMHO.

So to be clear: is there a performance issue due to moving the
cleanup - some or all - to before allowing tasks to resume ?
(This is what I understood over IRC).

I agree that it's more correct and safe this way. But I want to
know what the implications of a (or any) fix we make.

Oren.
___
Containers mailing list
contain...@lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers

___
Devel mailing list
Devel@openvz.org
https://openvz.org/mailman/listinfo/devel


[Devel] Re: [PATCH] Clear the objhash before completing restart, but delay free (v2)

2010-11-01 Thread Dan Smith
OL So to be clear: is there a performance issue due to moving the
OL cleanup - some or all - to before allowing tasks to resume ?
OL (This is what I understood over IRC).

Not that I have been able to measure, no.

-- 
Dan Smith
IBM Linux Technology Center
email: da...@us.ibm.com
___
Containers mailing list
contain...@lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers

___
Devel mailing list
Devel@openvz.org
https://openvz.org/mailman/listinfo/devel


[Devel] Re: [PATCH] Clear the objhash before completing restart, but delay free (v2)

2010-10-19 Thread Matt Helsley
On Tue, Oct 19, 2010 at 07:03:11AM -0700, Dan Smith wrote:
 This patch causes the restart coordinator to clear the object hash
 before releasing the restarted tasks.  It does this to make sure
 that any objects being held exclusively by the hash are released
 before the tasks start running again.
 
 If we continue to postpone clearing the object hash until restart
 returns to userspace there can be a race where the restarted tasks
 behave differently due to the references held by the objhash.
 One specific example of this is restarting half-closed pipes.
 Without this patch we've got a race between the coordinator --
 about to clear the object hash -- and two restarted tasks connected
 via a half-closed pipe. Because the object hash contains a reference
 to both ends of the pipe one end of the pipe will not be closed
 and EPIPE/SIGPIPE won't be handled when reading from the pipe
 for instance. As far as the restarted userspace task can tell the
 pipe may briefly appear to re-open. Moving the object hash clear
 prevents this race and others like it.
 
 Note that eventually the coordinator would close the pipe and correct
 behavior would be restored. Thus this bug would only affect the
 correctness of userspace -- after a close() the pipe may briefly re-open
 and allow more data to be sent before automatically closing again.
 
 To avoid the overhead of actually freeing the object hash's structures
 at the same time, this adds a queue to ckpt_obj_hash and pushes
 the ckpt_obj structures there to be free()'d later during the cleanup
 process.
 
 Changes in v2:
  - Avoid adding another list_head to ckpt_obj by re-using the hlist
hash node for the free list
 
 Signed-off-by: Dan Smith da...@us.ibm.com

Looks good.

Reviewed-by: Matt Helsley matth...@us.ibm.com

 ---
  include/linux/checkpoint.h  |1 +
  kernel/checkpoint/objhash.c |   18 +++---
  kernel/checkpoint/restart.c |2 ++
  3 files changed, 18 insertions(+), 3 deletions(-)
 
 diff --git a/include/linux/checkpoint.h b/include/linux/checkpoint.h
 index a11d40e..f888363 100644
 --- a/include/linux/checkpoint.h
 +++ b/include/linux/checkpoint.h
 @@ -179,6 +179,7 @@ extern void restore_notify_error(struct ckpt_ctx *ctx);
  extern int ckpt_obj_module_get(void);
  extern void ckpt_obj_module_put(void);
 
 +extern void ckpt_obj_hash_clear(struct ckpt_ctx *ctx);
  extern void ckpt_obj_hash_free(struct ckpt_ctx *ctx);
  extern int ckpt_obj_hash_alloc(struct ckpt_ctx *ctx);
 
 diff --git a/kernel/checkpoint/objhash.c b/kernel/checkpoint/objhash.c
 index 62c34ff..4aae0c0 100644
 --- a/kernel/checkpoint/objhash.c
 +++ b/kernel/checkpoint/objhash.c
 @@ -36,6 +36,7 @@ struct ckpt_obj {
  struct ckpt_obj_hash {
   struct hlist_head *head;
   struct hlist_head list;
 + struct hlist_head free;
   int next_free_objref;
  };
 
 @@ -128,8 +129,9 @@ int ckpt_obj_module_get(void)
  #define CKPT_OBJ_HASH_NBITS  10
  #define CKPT_OBJ_HASH_TOTAL  (1UL  CKPT_OBJ_HASH_NBITS)
 
 -static void obj_hash_clear(struct ckpt_obj_hash *obj_hash)
 +void ckpt_obj_hash_clear(struct ckpt_ctx *ctx)
  {
 + struct ckpt_obj_hash *obj_hash = ctx-obj_hash;
   struct hlist_head *h = obj_hash-head;
   struct hlist_node *n, *t;
   struct ckpt_obj *obj;
 @@ -139,7 +141,9 @@ static void obj_hash_clear(struct ckpt_obj_hash *obj_hash)
   hlist_for_each_entry_safe(obj, n, t, h[i], hash) {
   if (obj-ops-ref_drop)
   obj-ops-ref_drop(obj-ptr, 1);
 - kfree(obj);
 + hlist_del_init(obj-hash);
 + hlist_del(obj-next);
 + hlist_add_head(obj-hash, obj_hash-free);
   }
   }
  }
 @@ -149,7 +153,14 @@ void ckpt_obj_hash_free(struct ckpt_ctx *ctx)
   struct ckpt_obj_hash *obj_hash = ctx-obj_hash;
 
   if (obj_hash) {
 - obj_hash_clear(obj_hash);
 + struct hlist_node *pos, *n;
 + struct ckpt_obj *obj;
 +
 + ckpt_obj_hash_clear(ctx);
 +
 + hlist_for_each_entry_safe(obj, pos, n, obj_hash-free, hash)
 + kfree(obj);
 +
   kfree(obj_hash-head);
   kfree(ctx-obj_hash);
   ctx-obj_hash = NULL;
 @@ -173,6 +184,7 @@ int ckpt_obj_hash_alloc(struct ckpt_ctx *ctx)
   obj_hash-head = head;
   obj_hash-next_free_objref = 1;
   INIT_HLIST_HEAD(obj_hash-list);
 + INIT_HLIST_HEAD(obj_hash-free);
 
   ctx-obj_hash = obj_hash;
   return 0;
 diff --git a/kernel/checkpoint/restart.c b/kernel/checkpoint/restart.c
 index 17270b8..f2241a9 100644
 --- a/kernel/checkpoint/restart.c
 +++ b/kernel/checkpoint/restart.c
 @@ -1349,6 +1349,8 @@ static int do_restore_coord(struct ckpt_ctx *ctx, pid_t 
 pid)
   if (ret  0)
   ckpt_err(ctx, ret, restart failed (coordinator)\n);
 
 + ckpt_obj_hash_clear(ctx);
 +
   if (ckpt_test_error(ctx)) {