The task address space (task->mm) may be shared between processes if
CLONE_VM is used, and particularly among threads. Accordingly, treat
'task->mm' as a shared object: during checkpoint check against the
objhash and only dump the contents if seen for the first time. During
restart, likewise, only restore if it's a new instance, otherwise use
the one already registered in the objhash.

Signed-off-by: Oren Laadan <or...@cs.columbia.edu>
---
 checkpoint/ckpt_mem.c      |   36 +++++++++++++++++++++++-------------
 checkpoint/objhash.c       |    6 ++++++
 checkpoint/rstr_mem.c      |   31 +++++++++++++++++++++++++++----
 include/linux/checkpoint.h |    1 +
 4 files changed, 57 insertions(+), 17 deletions(-)

diff --git a/checkpoint/ckpt_mem.c b/checkpoint/ckpt_mem.c
index 92a3edc..0df3cda 100644
--- a/checkpoint/ckpt_mem.c
+++ b/checkpoint/ckpt_mem.c
@@ -733,22 +733,29 @@ int cr_write_mm(struct cr_ctx *ctx, struct task_struct *t)
        struct cr_hdr_mm *hh;
        struct mm_struct *mm;
        struct vm_area_struct *vma;
-       int objref, ret;
+       int objref, new;
+       int ret;
+
+       mm = get_task_mm(t);
+
+       new = cr_obj_add_ptr(ctx, mm, &objref, CR_OBJ_MM, 0);
+       if (new < 0) {
+               ret = new;
+               goto mmput;
+       }
 
        h.type = CR_HDR_MM;
        h.len = sizeof(*hh);
 
+       ret = -ENOMEM;
        hh = cr_hbuf_get(ctx, sizeof(*hh));
        if (!hh)
-               return -ENOMEM;
+               goto mmput;
 
-       mm = get_task_mm(t);
+       down_read(&mm->mmap_sem);
 
-       objref = 0;     /* will be meaningful with multiple processes */
        hh->objref = objref;
 
-       down_read(&mm->mmap_sem);
-
        hh->start_code = mm->start_code;
        hh->end_code = mm->end_code;
        hh->start_data = mm->start_data;
@@ -770,17 +777,20 @@ int cr_write_mm(struct cr_ctx *ctx, struct task_struct *t)
        if (ret < 0)
                goto out;
 
-       /* write the vma's */
-       for (vma = mm->mmap; vma; vma = vma->vm_next) {
-               ret = cr_write_vma(ctx, vma);
-               if (ret < 0)
-                       goto out;
-       }
+       if (new) {
+               /* write the vma's */
+               for (vma = mm->mmap; vma; vma = vma->vm_next) {
+                       ret = cr_write_vma(ctx, vma);
+                       if (ret < 0)
+                               goto out;
+               }
 
-       ret = cr_write_mm_context(ctx, mm);
+               ret = cr_write_mm_context(ctx, mm);
+       }
 
  out:
        up_read(&mm->mmap_sem);
+ mmput:
        mmput(mm);
        return ret;
 }
diff --git a/checkpoint/objhash.c b/checkpoint/objhash.c
index 25916c1..6584579 100644
--- a/checkpoint/objhash.c
+++ b/checkpoint/objhash.c
@@ -38,6 +38,9 @@ static void cr_obj_ref_drop(struct cr_objref *obj)
        case CR_OBJ_INODE:
                iput((struct inode *) obj->ptr);
                break;
+       case CR_OBJ_MM:
+               mmput((struct mm_struct *) obj->ptr);
+               break;
        default:
                BUG();
        }
@@ -55,6 +58,9 @@ static int cr_obj_ref_grab(struct cr_objref *obj)
                if (!igrab((struct inode *) obj->ptr))
                        ret = -EBADF;
                break;
+       case CR_OBJ_MM:
+               atomic_inc(&((struct mm_struct *) obj->ptr)->mm_users);
+               break;
        default:
                BUG();
        }
diff --git a/checkpoint/rstr_mem.c b/checkpoint/rstr_mem.c
index da3eb0e..7e73129 100644
--- a/checkpoint/rstr_mem.c
+++ b/checkpoint/rstr_mem.c
@@ -505,16 +505,39 @@ int cr_read_mm(struct cr_ctx *ctx)
        if (ret < 0)
                goto out;
 
-       cr_debug("map_count %d\n", hh->map_count);
+       cr_debug("map_count %d objref %d\n", hh->map_count, hh->objref);
 
-       /* XXX need more sanity checks */
+       /* FIX need more sanity checks */
 
        ret = -EINVAL;
-       if ((hh->start_code > hh->end_code) ||
-           (hh->start_data > hh->end_data))
+       if (hh->objref < 0)
                goto out;
+       if ((hh->start_code > hh->end_code) || (hh->start_data > hh->end_data))
+               goto out;
+
+       /* if the mm's objref is in the objhash, use that instance */
+       mm = cr_obj_get_by_ref(ctx, hh->objref, CR_OBJ_MM);
+       if (IS_ERR(mm)) {
+               ret = PTR_ERR(mm);
+               goto out;
+       }
 
+       if (mm) {
+               if (mm != current->mm) {
+                       ret = exec_mmap(mm);
+                       if (ret < 0)
+                               goto out;
+                       atomic_inc(&mm->mm_users);
+               }
+               ret = 0;
+               goto out;
+       }
+
+       /* otherwise, add our mm to the objhash for future generations */
        mm = current->mm;
+       ret = cr_obj_add_ref(ctx, mm, hh->objref, CR_OBJ_MM, 0);
+       if (ret < 0)
+               goto out;
 
        /* point of no return -- destruct current mm */
        down_write(&mm->mmap_sem);
diff --git a/include/linux/checkpoint.h b/include/linux/checkpoint.h
index 043535c..1bfe284 100644
--- a/include/linux/checkpoint.h
+++ b/include/linux/checkpoint.h
@@ -77,6 +77,7 @@ extern void cr_ctx_put(struct cr_ctx *ctx);
 enum {
        CR_OBJ_FILE = 1,
        CR_OBJ_INODE,
+       CR_OBJ_MM,
        CR_OBJ_MAX
 };
 
-- 
1.5.4.3

_______________________________________________
Containers mailing list
contain...@lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers

_______________________________________________
Devel mailing list
Devel@openvz.org
https://openvz.org/mailman/listinfo/devel

Reply via email to