In theory and in practice, x86_64 COMPAT=y kernel will restore i386 images
and in other direction. There are small problems still and it doesn't work,
but mentioning anyway.

Right now x86_64 kernel restores only x86_64 images and 64-bit tasks.

Signed-off-by: Alexey Dobriyan <adobri...@gmail.com>
---
 arch/x86/ia32/ia32entry.S        |    2 +
 arch/x86/include/asm/unistd_64.h |    4 +
 include/linux/kstate-image.h     |   36 ++++
 include/linux/kstate.h           |    2 +-
 kernel/kstate/Makefile           |    1 +
 kernel/kstate/kstate-x86_64.c    |  336 ++++++++++++++++++++++++++++++++++++++
 6 files changed, 380 insertions(+), 1 deletions(-)
 create mode 100644 kernel/kstate/kstate-x86_64.c

diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index a505202..b12e911 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -830,4 +830,6 @@ ia32_sys_call_table:
        .quad sys_inotify_init1
        .quad compat_sys_preadv
        .quad compat_sys_pwritev
+       .quad sys_checkpoint            /* 335 */
+       .quad sys_restart
 ia32_syscall_end:
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index f818294..a839c66 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -657,6 +657,10 @@ __SYSCALL(__NR_inotify_init1, sys_inotify_init1)
 __SYSCALL(__NR_preadv, sys_preadv)
 #define __NR_pwritev                           296
 __SYSCALL(__NR_pwritev, sys_pwritev)
+#define __NR_checkpoint                                297
+__SYSCALL(__NR_checkpoint, sys_checkpoint)
+#define __NR_restart                           298
+__SYSCALL(__NR_restart, sys_restart)
 
 
 #ifndef __NO_STUBS
diff --git a/include/linux/kstate-image.h b/include/linux/kstate-image.h
index 3c93432..d697d97 100644
--- a/include/linux/kstate-image.h
+++ b/include/linux/kstate-image.h
@@ -28,6 +28,7 @@ struct kstate_image_header {
        /* Mutable part. */
        /* Arch of the kernel which dumped the image. */
 #define KSTATE_ARCH_I386       1
+#define KSTATE_ARCH_X86_64     2
        __le32  kernel_arch;
        /*
         * Distributions are expected to leave image version alone and
@@ -74,6 +75,8 @@ struct kstate_image_task_struct {
 #define KSTATE_SEG_NULL                0
 #define KSTATE_SEG_USER32_CS   1
 #define KSTATE_SEG_USER32_DS   2
+#define KSTATE_SEG_USER64_CS   3
+#define KSTATE_SEG_USER64_DS   4
 #define KSTATE_SEG_TLS         0x4000  /* 0100 0000 0000 00xx */
 #define KSTATE_SEG_LDT         0x8000  /* 100x xxxx xxxx xxxx */
 
@@ -110,6 +113,39 @@ struct kstate_image_task_struct_i386 {
        /* __u8 xstate[len_xstate]; */
 } __packed;
 
+struct kstate_image_task_struct_x86_64 {
+       __u64           r15;
+       __u64           r14;
+       __u64           r13;
+       __u64           r12;
+       __u64           rbp;
+       __u64           rbx;
+       __u64           r11;
+       __u64           r10;
+       __u64           r9;
+       __u64           r8;
+       __u64           rax;
+       __u64           rcx;
+       __u64           rdx;
+       __u64           rsi;
+       __u64           rdi;
+       __u64           orig_rax;
+       __u64           rip;
+       __u64           rflags;
+       __u64           rsp;
+
+       __u64           fs;
+       __u64           gs;
+       __u16           cs;
+       __u16           ds;
+       __u16           es;
+       __u16           fsindex;
+       __u16           gsindex;
+       __u16           ss;
+
+       __u64           tls_array[3];
+} __packed;
+
 struct kstate_image_mm_struct {
        struct kstate_object_header hdr;
 
diff --git a/include/linux/kstate.h b/include/linux/kstate.h
index c4b55b6..95898ec 100644
--- a/include/linux/kstate.h
+++ b/include/linux/kstate.h
@@ -67,7 +67,7 @@ int kstate_collect_all_file(struct kstate_context *ctx);
 int kstate_dump_all_file(struct kstate_context *ctx);
 int kstate_restore_file(struct kstate_context *ctx, kstate_ref_t *ref);
 
-#if defined(CONFIG_X86_32)
+#if defined(CONFIG_X86_32) || defined(CONFIG_X86_64)
 extern const __u32 kstate_kernel_arch;
 int kstate_arch_check_image_header(struct kstate_image_header *i);
 
diff --git a/kernel/kstate/Makefile b/kernel/kstate/Makefile
index ca19a22..0678fc9 100644
--- a/kernel/kstate/Makefile
+++ b/kernel/kstate/Makefile
@@ -7,3 +7,4 @@ kstate-y += kstate-mm.o
 kstate-y += kstate-object.o
 kstate-y += kstate-task.o
 kstate-$(CONFIG_X86_32) += kstate-x86_32.o
+kstate-$(CONFIG_X86_64) += kstate-x86_64.o
diff --git a/kernel/kstate/kstate-x86_64.c b/kernel/kstate/kstate-x86_64.c
new file mode 100644
index 0000000..0d85704
--- /dev/null
+++ b/kernel/kstate/kstate-x86_64.c
@@ -0,0 +1,336 @@
+/* Copyright (C) 2000-2009 Parallels Holdings, Ltd. */
+#include <linux/sched.h>
+
+#include <linux/kstate.h>
+#include <linux/kstate-image.h>
+
+const __u32 kstate_kernel_arch = KSTATE_ARCH_X86_64;
+
+int kstate_arch_check_image_header(struct kstate_image_header *i)
+{
+       if (i->kernel_arch == cpu_to_le32(KSTATE_ARCH_X86_64))
+               return 0;
+       return -EINVAL;
+}
+
+__u32 kstate_task_struct_arch(struct task_struct *tsk)
+{
+       return KSTATE_ARCH_X86_64;
+}
+
+static int check_rflags(__u64 rflags)
+{
+       rflags &= ~X86_EFLAGS_CF;
+       rflags &= ~X86_EFLAGS_PF;
+       rflags &= ~X86_EFLAGS_AF;
+       rflags &= ~X86_EFLAGS_ZF;
+       rflags &= ~X86_EFLAGS_SF;
+       rflags &= ~X86_EFLAGS_TF;
+       rflags &= ~X86_EFLAGS_DF;
+       rflags &= ~X86_EFLAGS_OF;
+       rflags &= ~X86_EFLAGS_NT;
+       rflags &= ~X86_EFLAGS_AC;
+       rflags &= ~X86_EFLAGS_ID;
+       if (rflags != (X86_EFLAGS_IF|0x2)) {
+               pr_debug("%s: rflags %016llx\n", __func__, (unsigned long 
long)rflags);
+               return -EINVAL;
+       }
+       return 0;
+}
+
+static int check_segment64(__u16 seg)
+{
+       switch (seg) {
+       case KSTATE_SEG_NULL:
+       case KSTATE_SEG_USER64_CS:
+       case KSTATE_SEG_USER64_DS:
+               return 0;
+       }
+       if (seg & KSTATE_SEG_TLS) {
+               if ((seg & ~KSTATE_SEG_TLS) > GDT_ENTRY_TLS_MAX - 
GDT_ENTRY_TLS_MIN) {
+                       pr_debug("%s: seg %04x, GDT_ENTRY_TLS_MIN %u, 
GDT_ENTRY_TLS_MAX %u\n", __func__, seg, GDT_ENTRY_TLS_MIN, GDT_ENTRY_TLS_MAX);
+                       return -EINVAL;
+               }
+               return 0;
+       }
+       if (seg & KSTATE_SEG_LDT) {
+               if ((seg & ~KSTATE_SEG_LDT) > 0x1fff) {
+                       pr_debug("%s: seg %04x\n", __func__, seg);
+                       return -EINVAL;
+               }
+               return 0;
+       }
+       pr_debug("%s: seg %04x\n", __func__, seg);
+       return -EINVAL;
+}
+
+static int check_tls(struct desc_struct *desc)
+{
+       if (desc->l != 0 || desc->s != 1 || desc->dpl != 3)
+               return -EINVAL;
+       return 0;
+}
+
+static int check_image_task_struct_x86_64(struct kstate_image_task_struct 
*tsk_i)
+{
+       struct kstate_image_task_struct_x86_64 *i = (void *)(tsk_i + 1);
+       int rv;
+
+       if (tsk_i->hdr.obj_len < sizeof(*tsk_i) + sizeof(*i))
+               return -EINVAL;
+
+       rv = check_rflags(i->rflags);
+       if (rv < 0)
+               return rv;
+
+       if (i->fs >= TASK_SIZE_MAX)
+               return -EINVAL;
+       if (i->gs >= TASK_SIZE_MAX)
+               return -EINVAL;
+
+       if (i->cs == KSTATE_SEG_NULL)
+               return -EINVAL;
+       rv = check_segment64(i->cs);
+       if (rv < 0)
+               return rv;
+       rv = check_segment64(i->ds);
+       if (rv < 0)
+               return rv;
+       rv = check_segment64(i->es);
+       if (rv < 0)
+               return rv;
+       rv = check_segment64(i->fsindex);
+       if (rv < 0)
+               return rv;
+       rv = check_segment64(i->gsindex);
+       if (rv < 0)
+               return rv;
+       rv = check_segment64(i->ss);
+       if (rv < 0)
+               return rv;
+
+       if (i->tls_array[0]) {
+               rv = check_tls((struct desc_struct *)&i->tls_array[0]);
+               if (rv < 0)
+                       return rv;
+       }
+       if (i->tls_array[1]) {
+               rv = check_tls((struct desc_struct *)&i->tls_array[1]);
+               if (rv < 0)
+                       return rv;
+       }
+       if (i->tls_array[2]) {
+               rv = check_tls((struct desc_struct *)&i->tls_array[2]);
+               if (rv < 0)
+                       return rv;
+       }
+
+       return 0;
+}
+
+int kstate_arch_check_image_task_struct(struct kstate_image_task_struct *i)
+{
+       if (i->tsk_arch == KSTATE_ARCH_X86_64)
+               return check_image_task_struct_x86_64(i);
+       return -EINVAL;
+}
+
+unsigned int kstate_arch_len_task_struct(struct task_struct *tsk)
+{
+       return sizeof(struct kstate_image_task_struct_x86_64);
+}
+
+int kstate_arch_check_task_struct(struct task_struct *tsk)
+{
+       struct restart_block *rb;
+
+#ifdef CONFIG_COMPAT
+       if (test_tsk_thread_flag(tsk, TIF_IA32)) {
+               WARN_ON(1);
+               return -EINVAL;
+       }
+#endif
+       if (test_tsk_thread_flag(tsk, TIF_DEBUG)) {
+               WARN_ON(1);
+               return -EINVAL;
+       }
+       if (tsk->thread.xstate) {
+               WARN_ON(1);
+               return -EINVAL;
+       }
+       rb = &task_thread_info(tsk)->restart_block;
+       if (rb->fn != current_thread_info()->restart_block.fn) {
+               WARN(1, "rb->fn = %pF\n", rb->fn);
+               return -EINVAL;
+       }
+       if (tsk->thread.io_bitmap_ptr) {
+               WARN_ON(1);
+               return -EINVAL;
+       }
+#ifdef CONFIG_X86_DS
+       if (tsk->thread.ds_ctx) {
+               WARN_ON(1);
+               return -EINVAL;
+       }
+#endif
+       return 0;
+}
+
+static __u16 encode_segment(u16 seg)
+{
+       if (seg == 0)
+               return KSTATE_SEG_NULL;
+       BUG_ON((seg & 3) != 3);
+       if (seg & 4)
+               return KSTATE_SEG_LDT | (seg >> 3);
+
+       if (seg == __USER_CS)
+               return KSTATE_SEG_USER64_CS;
+       if (seg == __USER_DS)
+               return KSTATE_SEG_USER64_DS;
+
+       if (GDT_ENTRY_TLS_MIN <= (seg >> 3) && (seg >> 3) <= GDT_ENTRY_TLS_MAX)
+               return KSTATE_SEG_TLS | ((seg >> 3) - GDT_ENTRY_TLS_MIN);
+       BUG();
+}
+
+static u16 decode_segment(__u16 seg)
+{
+       if (seg == KSTATE_SEG_NULL)
+               return 0;
+       if (seg == KSTATE_SEG_USER64_CS)
+               return __USER_CS;
+       if (seg == KSTATE_SEG_USER64_DS)
+               return __USER_DS;
+
+       BUILD_BUG_ON(GDT_ENTRY_TLS_MAX - GDT_ENTRY_TLS_MIN + 1 != 3);
+       if ((seg & KSTATE_SEG_TLS) == KSTATE_SEG_TLS) {
+               seg &= ~KSTATE_SEG_TLS;
+               if (seg <= GDT_ENTRY_TLS_MAX - GDT_ENTRY_TLS_MIN)
+                       return ((GDT_ENTRY_TLS_MIN + seg) << 3) | 3;
+       }
+       if ((seg & KSTATE_SEG_LDT) == KSTATE_SEG_LDT) {
+               seg &= ~KSTATE_SEG_LDT;
+               return (seg << 3) | 7;
+       }
+       BUG();
+}
+
+static int dump_task_struct_x86_64(struct kstate_context *ctx, struct 
task_struct *tsk, void *arch_i)
+{
+       struct kstate_image_task_struct_x86_64 *i = arch_i;
+       struct pt_regs *regs = task_pt_regs(tsk);
+
+       i->r15 = regs->r15;
+       i->r14 = regs->r14;
+       i->r13 = regs->r13;
+       i->r12 = regs->r12;
+       i->rbp = regs->bp;
+       i->rbx = regs->bx;
+       i->r11 = regs->r11;
+       i->r10 = regs->r10;
+       i->r9 = regs->r9;
+       i->r8 = regs->r8;
+       i->rax = regs->ax;
+       i->rcx = regs->cx;
+       i->rdx = regs->dx;
+       i->rsi = regs->si;
+       i->rdi = regs->di;
+       i->orig_rax = regs->orig_ax;
+       i->rip = regs->ip;
+       i->rflags = regs->flags;
+       i->rsp = regs->sp;
+
+       i->fs = tsk->thread.fs;
+       i->gs = tsk->thread.gs;
+       i->cs = encode_segment(regs->cs);
+       i->ds = encode_segment(tsk->thread.ds);
+       i->es = encode_segment(tsk->thread.es);
+       i->fsindex = encode_segment(tsk->thread.fsindex);
+       i->gsindex = encode_segment(tsk->thread.gsindex);
+       i->ss = encode_segment(regs->ss);
+
+       BUILD_BUG_ON(sizeof(tsk->thread.tls_array[0]) != 8);
+       BUILD_BUG_ON(sizeof(tsk->thread.tls_array) != 3 * 8);
+       memcpy(i->tls_array, tsk->thread.tls_array, sizeof(i->tls_array));
+
+       return 0;
+}
+
+int kstate_arch_dump_task_struct(struct kstate_context *ctx, struct 
task_struct *tsk, void *arch_i)
+{
+       return dump_task_struct_x86_64(ctx, tsk, arch_i);
+}
+
+static int restore_task_struct_x86_64(struct task_struct *tsk, struct 
kstate_image_task_struct_x86_64 *i)
+{
+       struct pt_regs *regs = task_pt_regs(tsk);
+
+       tsk->thread.sp = (unsigned long)regs;
+       tsk->thread.sp0 = (unsigned long)(regs + 1);
+
+       regs->r15 = i->r15;
+       regs->r14 = i->r14;
+       regs->r13 = i->r13;
+       regs->r12 = i->r12;
+       regs->bp = i->rbp;
+       regs->bx = i->rbx;
+       regs->r11 = i->r11;
+       regs->r10 = i->r10;
+       regs->r9 = i->r9;
+       regs->r8 = i->r8;
+       regs->ax = i->rax;
+       regs->cx = i->rcx;
+       regs->dx = i->rdx;
+       regs->si = i->rsi;
+       regs->di = i->rdi;
+       regs->orig_ax = i->orig_rax;
+       regs->ip = i->rip;
+       regs->flags = i->rflags;
+       regs->sp = i->rsp;
+       tsk->thread.usersp = regs->sp;
+
+       tsk->thread.fs = i->fs;
+       tsk->thread.gs = i->gs;
+       regs->cs = decode_segment(i->cs);
+       tsk->thread.ds = decode_segment(i->ds);
+       tsk->thread.es = decode_segment(i->es);
+       tsk->thread.fsindex = decode_segment(i->fsindex);
+       tsk->thread.gsindex = decode_segment(i->gsindex);
+       regs->ss = decode_segment(i->ss);
+
+       memcpy(tsk->thread.tls_array, i->tls_array, sizeof(i->tls_array));
+
+       set_tsk_thread_flag(tsk, TIF_FORK);
+       return 0;
+}
+
+int kstate_arch_restore_task_struct(struct task_struct *tsk, struct 
kstate_image_task_struct *i)
+{
+       if (i->tsk_arch == KSTATE_ARCH_X86_64) {
+               return restore_task_struct_x86_64(tsk, (void *)(i + 1));
+       }
+       BUG();
+}
+
+int kstate_arch_check_mm_struct(struct mm_struct *mm)
+{
+       mutex_lock(&mm->context.lock);
+       if (mm->context.ldt || mm->context.size != 0) {
+               mutex_unlock(&mm->context.lock);
+               WARN_ON(1);
+               return -EINVAL;
+       }
+       mutex_unlock(&mm->context.lock);
+       return 0;
+}
+
+unsigned int kstate_arch_len_mm_struct(struct mm_struct *mm)
+{
+       return 0;
+}
+
+int kstate_arch_dump_mm_struct(struct kstate_context *ctx, struct mm_struct 
*mm, void *arch_i)
+{
+       return 0;
+}
-- 
1.5.6.5

_______________________________________________
Containers mailing list
contain...@lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers

_______________________________________________
Devel mailing list
Devel@openvz.org
https://openvz.org/mailman/listinfo/devel

Reply via email to