[PATCH 1/3 -mm] kexec jump -v8 : kexec jump basic

2007-12-20 Thread Huang, Ying
This patch implements the functionality of jumping between the kexeced
kernel and the original kernel.

To support jumping between two kernels, before jumping to (executing)
the new kernel and jumping back to the original kernel, the devices
are put into quiescent state, and the state of devices and CPU is
saved. After jumping back from kexeced kernel and jumping to the new
kernel, the state of devices and CPU are restored accordingly. The
devices/CPU state save/restore code of software suspend is called to
implement corresponding function.

To support jumping without reserving memory. One shadow backup page
(source page) is allocated for each page used by new (kexeced) kernel
(destination page). When do kexec_load, the image of new kernel is
loaded into source pages, and before executing, the destination pages
and the source pages are swapped, so the contents of destination pages
are backupped. Before jumping to the new (kexeced) kernel and after
jumping back to the original kernel, the destination pages and the
source pages are swapped too.

A jump back protocol for kexec is defined and documented. It is an
extension to ordinary function calling protocol. So, the facility
provided by this patch can be used to call ordinary C function in
physical mode.

A flag named KEXEC_PRESERVE_CONTEXT for sys_kexec_load is added to
indicate that the loaded kernel image is used for jumping back.

Signed-off-by: Huang Ying <[EMAIL PROTECTED]>

---
 Documentation/i386/jump_back_protocol.txt |   66 ++
 arch/powerpc/kernel/machine_kexec.c   |2 
 arch/ppc/kernel/machine_kexec.c   |2 
 arch/sh/kernel/machine_kexec.c|2 
 arch/x86/kernel/machine_kexec_32.c|   39 +-
 arch/x86/kernel/machine_kexec_64.c|2 
 arch/x86/kernel/relocate_kernel_32.S  |  194 ++
 include/asm-x86/kexec_32.h|   34 -
 include/linux/kexec.h |   14 +-
 kernel/kexec.c|   65 +-
 kernel/power/Kconfig  |2 
 kernel/sys.c  |   35 +++--
 12 files changed, 403 insertions(+), 54 deletions(-)

--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -20,6 +20,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
 static u32 kexec_pgd[1024] PAGE_ALIGNED;
@@ -83,10 +84,14 @@ static void load_segments(void)
  * reboot code buffer to allow us to avoid allocations
  * later.
  *
- * Currently nothing.
+ * Turn off NX bit for control page.
  */
 int machine_kexec_prepare(struct kimage *image)
 {
+   if (nx_enabled) {
+   change_page_attr(image->control_code_page, 1, PAGE_KERNEL_EXEC);
+   global_flush_tlb();
+   }
return 0;
 }
 
@@ -96,25 +101,45 @@ int machine_kexec_prepare(struct kimage 
  */
 void machine_kexec_cleanup(struct kimage *image)
 {
+   if (nx_enabled) {
+   change_page_attr(image->control_code_page, 1, PAGE_KERNEL);
+   global_flush_tlb();
+   }
 }
 
 /*
  * Do not allocate memory (or fail in any way) in machine_kexec().
  * We are past the point of no return, committed to rebooting now.
  */
-NORET_TYPE void machine_kexec(struct kimage *image)
+void machine_kexec(struct kimage *image)
 {
unsigned long page_list[PAGES_NR];
void *control_page;
+   asmlinkage NORET_TYPE void
+   (*relocate_kernel_ptr)(unsigned long indirection_page,
+  unsigned long control_page,
+  unsigned long start_address,
+  unsigned int has_pae) ATTRIB_NORET;
 
/* Interrupts aren't acceptable while we reboot */
local_irq_disable();
 
control_page = page_address(image->control_code_page);
-   memcpy(control_page, relocate_kernel, PAGE_SIZE);
+   memcpy(control_page, relocate_page, PAGE_SIZE/2);
+   KJUMP_MAGIC(control_page) = 0;
 
+   if (image->preserve_context) {
+   KJUMP_MAGIC(control_page) = KJUMP_MAGIC_NUMBER;
+   if (kexec_jump_save_cpu(control_page)) {
+   image->start = KJUMP_ENTRY(control_page);
+   return;
+   }
+   }
+
+   relocate_kernel_ptr = control_page +
+   ((void *)relocate_kernel - (void *)relocate_page);
page_list[PA_CONTROL_PAGE] = __pa(control_page);
-   page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel;
+   page_list[VA_CONTROL_PAGE] = (unsigned long)control_page;
page_list[PA_PGD] = __pa(kexec_pgd);
page_list[VA_PGD] = (unsigned long)kexec_pgd;
 #ifdef CONFIG_X86_PAE
@@ -127,6 +152,7 @@ NORET_TYPE void machine_kexec(struct kim
page_list[VA_PTE_0] = (unsigned long)kexec_pte0;
page_list[PA_PTE_1] = __pa(kexec_pte1);
page_list[VA_PTE_1] = (unsigned 

[PATCH 1/3 -mm] kexec jump -v8 : kexec jump basic

2007-12-20 Thread Huang, Ying
This patch implements the functionality of jumping between the kexeced
kernel and the original kernel.

To support jumping between two kernels, before jumping to (executing)
the new kernel and jumping back to the original kernel, the devices
are put into quiescent state, and the state of devices and CPU is
saved. After jumping back from kexeced kernel and jumping to the new
kernel, the state of devices and CPU are restored accordingly. The
devices/CPU state save/restore code of software suspend is called to
implement corresponding function.

To support jumping without reserving memory. One shadow backup page
(source page) is allocated for each page used by new (kexeced) kernel
(destination page). When do kexec_load, the image of new kernel is
loaded into source pages, and before executing, the destination pages
and the source pages are swapped, so the contents of destination pages
are backupped. Before jumping to the new (kexeced) kernel and after
jumping back to the original kernel, the destination pages and the
source pages are swapped too.

A jump back protocol for kexec is defined and documented. It is an
extension to ordinary function calling protocol. So, the facility
provided by this patch can be used to call ordinary C function in
physical mode.

A flag named KEXEC_PRESERVE_CONTEXT for sys_kexec_load is added to
indicate that the loaded kernel image is used for jumping back.

Signed-off-by: Huang Ying [EMAIL PROTECTED]

---
 Documentation/i386/jump_back_protocol.txt |   66 ++
 arch/powerpc/kernel/machine_kexec.c   |2 
 arch/ppc/kernel/machine_kexec.c   |2 
 arch/sh/kernel/machine_kexec.c|2 
 arch/x86/kernel/machine_kexec_32.c|   39 +-
 arch/x86/kernel/machine_kexec_64.c|2 
 arch/x86/kernel/relocate_kernel_32.S  |  194 ++
 include/asm-x86/kexec_32.h|   34 -
 include/linux/kexec.h |   14 +-
 kernel/kexec.c|   65 +-
 kernel/power/Kconfig  |2 
 kernel/sys.c  |   35 +++--
 12 files changed, 403 insertions(+), 54 deletions(-)

--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -20,6 +20,7 @@
 #include asm/cpufeature.h
 #include asm/desc.h
 #include asm/system.h
+#include asm/cacheflush.h
 
 #define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
 static u32 kexec_pgd[1024] PAGE_ALIGNED;
@@ -83,10 +84,14 @@ static void load_segments(void)
  * reboot code buffer to allow us to avoid allocations
  * later.
  *
- * Currently nothing.
+ * Turn off NX bit for control page.
  */
 int machine_kexec_prepare(struct kimage *image)
 {
+   if (nx_enabled) {
+   change_page_attr(image-control_code_page, 1, PAGE_KERNEL_EXEC);
+   global_flush_tlb();
+   }
return 0;
 }
 
@@ -96,25 +101,45 @@ int machine_kexec_prepare(struct kimage 
  */
 void machine_kexec_cleanup(struct kimage *image)
 {
+   if (nx_enabled) {
+   change_page_attr(image-control_code_page, 1, PAGE_KERNEL);
+   global_flush_tlb();
+   }
 }
 
 /*
  * Do not allocate memory (or fail in any way) in machine_kexec().
  * We are past the point of no return, committed to rebooting now.
  */
-NORET_TYPE void machine_kexec(struct kimage *image)
+void machine_kexec(struct kimage *image)
 {
unsigned long page_list[PAGES_NR];
void *control_page;
+   asmlinkage NORET_TYPE void
+   (*relocate_kernel_ptr)(unsigned long indirection_page,
+  unsigned long control_page,
+  unsigned long start_address,
+  unsigned int has_pae) ATTRIB_NORET;
 
/* Interrupts aren't acceptable while we reboot */
local_irq_disable();
 
control_page = page_address(image-control_code_page);
-   memcpy(control_page, relocate_kernel, PAGE_SIZE);
+   memcpy(control_page, relocate_page, PAGE_SIZE/2);
+   KJUMP_MAGIC(control_page) = 0;
 
+   if (image-preserve_context) {
+   KJUMP_MAGIC(control_page) = KJUMP_MAGIC_NUMBER;
+   if (kexec_jump_save_cpu(control_page)) {
+   image-start = KJUMP_ENTRY(control_page);
+   return;
+   }
+   }
+
+   relocate_kernel_ptr = control_page +
+   ((void *)relocate_kernel - (void *)relocate_page);
page_list[PA_CONTROL_PAGE] = __pa(control_page);
-   page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel;
+   page_list[VA_CONTROL_PAGE] = (unsigned long)control_page;
page_list[PA_PGD] = __pa(kexec_pgd);
page_list[VA_PGD] = (unsigned long)kexec_pgd;
 #ifdef CONFIG_X86_PAE
@@ -127,6 +152,7 @@ NORET_TYPE void machine_kexec(struct kim
page_list[VA_PTE_0] = (unsigned long)kexec_pte0;
page_list[PA_PTE_1] =