Add API to change vdso blob type with arch_prctl.
As this is usefull only by needs of CRIU, expose
this interface under CONFIG_CHECKPOINT_RESTORE.

Cc: Andy Lutomirski <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: "H. Peter Anvin" <[email protected]>

[Differences to vanilla patches:
 o API only for 32-bit vDSO mapping
 o unmap previous vdso just by mm->context.vdso pointer]
Signed-off-by: Dmitry Safonov <[email protected]>
---
 arch/x86/include/asm/elf.h        |   4 ++
 arch/x86/include/uapi/asm/prctl.h |   6 +++
 arch/x86/kernel/process_64.c      |  13 +++++
 arch/x86/vdso/vdso32-setup.c      | 105 ++++++++++++++++++++++++++++++--------
 4 files changed, 107 insertions(+), 21 deletions(-)

diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index 5518cbccef59..e114fff00087 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -336,6 +336,10 @@ extern int x32_setup_additional_pages(struct linux_binprm 
*bprm,
 extern int syscall32_setup_pages(struct linux_binprm *, int exstack);
 #define compat_arch_setup_additional_pages     syscall32_setup_pages
 
+#ifdef CONFIG_X86_64
+extern int do_map_compat_vdso(unsigned long addr);
+#endif
+
 extern unsigned long arch_randomize_brk(struct mm_struct *mm);
 #define arch_randomize_brk arch_randomize_brk
 
diff --git a/arch/x86/include/uapi/asm/prctl.h 
b/arch/x86/include/uapi/asm/prctl.h
index 3ac5032fae09..405e860b2aec 100644
--- a/arch/x86/include/uapi/asm/prctl.h
+++ b/arch/x86/include/uapi/asm/prctl.h
@@ -6,4 +6,10 @@
 #define ARCH_GET_FS 0x1003
 #define ARCH_GET_GS 0x1004
 
+#ifdef CONFIG_CHECKPOINT_RESTORE
+# define ARCH_MAP_VDSO_X32     0x2001
+# define ARCH_MAP_VDSO_32      0x2002
+# define ARCH_MAP_VDSO_64      0x2003
+#endif
+
 #endif /* _ASM_X86_PRCTL_H */
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 3c5a3c1eac62..2485430f4f2a 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -554,6 +554,19 @@ long do_arch_prctl(struct task_struct *task, int code, 
unsigned long addr)
                break;
        }
 
+#ifdef CONFIG_CHECKPOINT_RESTORE
+       case ARCH_MAP_VDSO_32:
+               return do_map_compat_vdso(addr);
+
+       /*
+        * x32 and 64 vDSO remap API is omitted for simplicity.
+        * We do need 32-bit vDSO blob mapping for compatible
+        * applications Restore, but not x32/64 (at least, for now).
+        */
+       case ARCH_MAP_VDSO_X32:
+       case ARCH_MAP_VDSO_64:
+#endif
+
        default:
                ret = -EINVAL;
                break;
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
index d49dffaabc3b..5056d0ec9ab7 100644
--- a/arch/x86/vdso/vdso32-setup.c
+++ b/arch/x86/vdso/vdso32-setup.c
@@ -416,6 +416,37 @@ out:
        return pages;
 }
 
+/* Call under mm->mmap_sem */
+static int __arch_setup_additional_pages(unsigned long addr, bool compat)
+{
+       struct mm_struct *mm = current->mm;
+       int ret;
+
+       current->mm->context.vdso = (void *)addr;
+
+       if (compat_uses_vma || !compat) {
+               struct page **pages = uts_prep_vdso_pages_locked(compat);
+               if (IS_ERR(pages))
+                       return PTR_ERR(pages);
+
+               /*
+                * MAYWRITE to allow gdb to COW and set breakpoints
+                */
+               ret = install_special_mapping(mm, addr, PAGE_SIZE,
+                                             VM_READ|VM_EXEC|
+                                             VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
+                                             pages);
+
+               if (ret)
+                       return ret;
+       }
+
+       current_thread_info()->sysenter_return =
+               VDSO32_SYMBOL(addr, SYSENTER_RETURN);
+
+       return 0;
+}
+
 /* Setup a VMA at program startup for the vsyscall page */
 int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 {
@@ -450,41 +481,73 @@ int arch_setup_additional_pages(struct linux_binprm 
*bprm, int uses_interp)
                }
        }
 
-       current->mm->context.vdso = (void *)addr;
+       ret = __arch_setup_additional_pages(addr, compat);
+       if (ret)
+               current->mm->context.vdso = NULL;
 
-       if (compat_uses_vma || !compat) {
-               struct page **pages = uts_prep_vdso_pages_locked(compat);
-               if (IS_ERR(pages)) {
-                       ret = PTR_ERR(pages);
-                       goto up_fail;
-               }
+up_fail:
 
-               /*
-                * MAYWRITE to allow gdb to COW and set breakpoints
-                */
-               ret = install_special_mapping(mm, addr, PAGE_SIZE,
-                                             VM_READ|VM_EXEC|
-                                             VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
-                                             pages);
+       up_write(&mm->mmap_sem);
 
-               if (ret)
-                       goto up_fail;
+       return ret;
+}
+
+#ifdef CONFIG_X86_64
+
+int do_map_compat_vdso(unsigned long req_addr)
+{
+       struct mm_struct *mm = current->mm;
+       unsigned long vdso_addr;
+       struct vm_area_struct *vdso_vma;
+       int ret;
+       bool compat;
+
+       if (vdso_enabled == VDSO_DISABLED)
+               return -ENOENT;
+
+       down_write(&mm->mmap_sem);
+
+       compat = (vdso_enabled == VDSO_COMPAT);
+       /* Maybe we can omit this check, but yet let it be for safety */
+       if (compat && req_addr != VDSO_HIGH_BASE) {
+               ret = -EFAULT;
+               goto up_fail;
        }
 
-       current_thread_info()->sysenter_return =
-               VDSO32_SYMBOL(addr, SYSENTER_RETURN);
+       /* Don't wanna copy security checks like security_mmap_addr() */
+       vdso_addr = get_unmapped_area(NULL, req_addr, PAGE_SIZE, 0, 0);
+       if (IS_ERR_VALUE(vdso_addr)) {
+               ret = vdso_addr;
+               goto up_fail;
+       }
+
+       if (req_addr != vdso_addr) {
+               ret = -EFAULT;
+               goto up_fail;
+       }
 
-  up_fail:
+       /*
+        * Firstly, unmap old vdso - as install_special_mapping may not
+        * do rlimit/cgroup accounting right - get rid of the old one by
+        * remove_vma().
+        */
+       vdso_vma = find_vma_intersection(mm, (unsigned long)mm->context.vdso,
+                       (unsigned long)mm->context.vdso +
+                       PAGE_SIZE*init_uts_ns.vdso.nr_pages);
+       if (vdso_vma)
+               do_munmap(mm, vdso_vma->vm_start,
+                       vdso_vma->vm_end - vdso_vma->vm_start);
+
+       ret = __arch_setup_additional_pages(req_addr, compat);
        if (ret)
                current->mm->context.vdso = NULL;
 
+up_fail:
        up_write(&mm->mmap_sem);
 
        return ret;
 }
 
-#ifdef CONFIG_X86_64
-
 subsys_initcall(sysenter_setup);
 
 #ifdef CONFIG_SYSCTL
-- 
2.9.0

_______________________________________________
Devel mailing list
[email protected]
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to