On 8 November 2024 03:26:58 GMT-08:00, "H. Peter Anvin" <h...@zytor.com> wrote: >On November 8, 2024 6:22:41 AM GMT+01:00, David Woodhouse ><dw...@infradead.org> wrote: >>From: David Woodhouse <d...@amazon.co.uk> >> >>Now that it's handled sanely by a linker script we can have actual data, >>and just use %rip-relative addressing to access it. >> >>If we could call the *copy* instead of the original relocate_kernel in >>the kernel text, then we could use %rip-relative addressing everywhere. >> >>Signed-off-by: David Woodhouse <d...@amazon.co.uk> >>--- >> arch/x86/kernel/relocate_kernel_64.S | 58 ++++++++++++++++------------ >> arch/x86/kernel/vmlinux.lds.S | 2 +- >> 2 files changed, 35 insertions(+), 25 deletions(-) >> >>diff --git a/arch/x86/kernel/relocate_kernel_64.S >>b/arch/x86/kernel/relocate_kernel_64.S >>index 1efcbd340528..577aa1672349 100644 >>--- a/arch/x86/kernel/relocate_kernel_64.S >>+++ b/arch/x86/kernel/relocate_kernel_64.S >>@@ -27,18 +27,28 @@ >> * ~ control_page + PAGE_SIZE are used as data storage and stack for >> * jumping back >> */ >>-#define DATA(offset) (KEXEC_CONTROL_CODE_MAX_SIZE+(offset)) >> >>+ .section .data.relocate_kernel,"a"; >> /* Minimal CPU state */ >>-#define RSP DATA(0x0) >>-#define CR0 DATA(0x8) >>-#define CR3 DATA(0x10) >>-#define CR4 DATA(0x18) >>- >>+SYM_DATA_LOCAL(saved_rsp, .quad 0) >>+SYM_DATA_LOCAL(saved_cr0, .quad 0) >>+SYM_DATA_LOCAL(saved_cr3, .quad 0) >>+SYM_DATA_LOCAL(saved_cr4, .quad 0) >> /* other data */ >>-#define CP_PA_TABLE_PAGE DATA(0x20) >>-#define CP_PA_SWAP_PAGE DATA(0x28) >>-#define CP_PA_BACKUP_PAGES_MAP DATA(0x30) >>+SYM_DATA_LOCAL(pa_table_page, .quad 0) >>+SYM_DATA_LOCAL(pa_swap_page, .quad 0) >>+SYM_DATA_LOCAL(pa_backup_pages_map, .quad 0) >>+ >>+/* >>+ * There are two physical copies of relocate_kernel(), one in the original >>+ * Kernel text and the other copied to the control page. There is a virtual >>+ * mapping of each, in the original kernel. It is the *original* which is >>+ * called from machine_kexec(), largely becaose the copy isn't mapped as an >>+ * executable page. Thus, this code cannot just use %rip-relative addressing >>+ * until after the %cr3 change and the jump to identity_mapped(). Until >>+ * then, some pointer arithmetic is required. >>+ */ >>+#define DATA(x) (x - relocate_kernel) >> >> .section .text.relocate_kernel,"ax"; >> .code64 >>@@ -63,13 +73,13 @@ SYM_CODE_START_NOALIGN(relocate_kernel) >> pushf >> >> movq PTR(VA_CONTROL_PAGE)(%rsi), %r11 >>- movq %rsp, RSP(%r11) >>+ movq %rsp, DATA(saved_rsp)(%r11) >> movq %cr0, %rax >>- movq %rax, CR0(%r11) >>+ movq %rax, DATA(saved_cr0)(%r11) >> movq %cr3, %rax >>- movq %rax, CR3(%r11) >>+ movq %rax, DATA(saved_cr3)(%r11) >> movq %cr4, %rax >>- movq %rax, CR4(%r11) >>+ movq %rax, DATA(saved_cr4)(%r11) >> >> /* Save CR4. Required to enable the right paging mode later. */ >> movq %rax, %r13 >>@@ -94,9 +104,9 @@ SYM_CODE_START_NOALIGN(relocate_kernel) >> movq PTR(PA_SWAP_PAGE)(%rsi), %r10 >> >> /* save some information for jumping back */ >>- movq %r9, CP_PA_TABLE_PAGE(%r11) >>- movq %r10, CP_PA_SWAP_PAGE(%r11) >>- movq %rdi, CP_PA_BACKUP_PAGES_MAP(%r11) >>+ movq %r9, DATA(pa_table_page)(%r11) >>+ movq %r10, DATA(pa_swap_page)(%r11) >>+ movq %rdi, DATA(pa_backup_pages_map)(%r11) >> >> /* Save the preserve_context to %r11 as swap_pages clobbers %rcx. */ >> movq %rcx, %r11 >>@@ -128,7 +138,7 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) >> /* set return address to 0 if not preserving context */ >> pushq $0 >> /* store the start address on the stack */ >>- pushq %rdx >>+ pushq start_address(%rip) >> >> /* >> * Clear X86_CR4_CET (if it was set) such that we can clear CR0_WP >>@@ -227,9 +237,9 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) >> /* get the re-entry point of the peer system */ >> movq 0(%rsp), %rbp >> leaq relocate_kernel(%rip), %r8 >>- movq CP_PA_SWAP_PAGE(%r8), %r10 >>- movq CP_PA_BACKUP_PAGES_MAP(%r8), %rdi >>- movq CP_PA_TABLE_PAGE(%r8), %rax >>+ movq pa_swap_page(%rip), %r10 >>+ movq pa_backup_pages_map(%rip), %rdi >>+ movq pa_table_page(%rip), %rax >> movq %rax, %cr3 >> lea PAGE_SIZE(%r8), %rsp >> call swap_pages >>@@ -243,11 +253,11 @@ SYM_CODE_END(identity_mapped) >> SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped) >> UNWIND_HINT_END_OF_STACK >> ANNOTATE_NOENDBR // RET target, above >>- movq RSP(%r8), %rsp >>- movq CR4(%r8), %rax >>+ movq saved_rsp(%rip), %rsp >>+ movq saved_cr4(%rip), %rax >> movq %rax, %cr4 >>- movq CR3(%r8), %rax >>- movq CR0(%r8), %r8 >>+ movq saved_cr3(%rip), %rax >>+ movq saved_cr0(%r8), %r8 >> movq %rax, %cr3 >> movq %r8, %cr0 >> movq %rbp, %rax >>diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S >>index ad451371e179..65f879b31a82 100644 >>--- a/arch/x86/kernel/vmlinux.lds.S >>+++ b/arch/x86/kernel/vmlinux.lds.S >>@@ -100,7 +100,7 @@ const_pcpu_hot = pcpu_hot; >> . = ALIGN(PAGE_SIZE); \ >> __relocate_kernel_start = .; \ >> *(.text.relocate_kernel); \ >>- *(.rodata.relocate_kernel); \ >>+ *(.data.relocate_kernel); \ >> __relocate_kernel_end = .; >> #else >> #define KEXEC_RELOCATE_KERNEL_TEXT > >Looks good at first glance. I'm currently traveling so I haven't fully >reviewed it though.
Ta. That's good enough for me to go ahead and port the rest over. Is there a selftest for the preserve-context mode somewhere, with a payload that just does a "ret"? _______________________________________________ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec