Re: [PATCH v3 04/12] ppc64/kexec_file: avoid stomping memory used by special regions
Hari Bathini writes: > On 15/07/20 8:09 am, Thiago Jung Bauermann wrote: >> >> Hari Bathini writes: >> > > > >>> +/** >>> + * __locate_mem_hole_top_down - Looks top down for a large enough memory >>> hole >>> + * in the memory regions between buf_min & >>> buf_max >>> + * for the buffer. If found, sets kbuf->mem. >>> + * @kbuf: Buffer contents and memory parameters. >>> + * @buf_min:Minimum address for the buffer. >>> + * @buf_max:Maximum address for the buffer. >>> + * >>> + * Returns 0 on success, negative errno on error. >>> + */ >>> +static int __locate_mem_hole_top_down(struct kexec_buf *kbuf, >>> + u64 buf_min, u64 buf_max) >>> +{ >>> + int ret = -EADDRNOTAVAIL; >>> + phys_addr_t start, end; >>> + u64 i; >>> + >>> + for_each_mem_range_rev(i, &memblock.memory, NULL, NUMA_NO_NODE, >>> + MEMBLOCK_NONE, &start, &end, NULL) { >>> + if (start > buf_max) >>> + continue; >>> + >>> + /* Memory hole not found */ >>> + if (end < buf_min) >>> + break; >>> + >>> + /* Adjust memory region based on the given range */ >>> + if (start < buf_min) >>> + start = buf_min; >>> + if (end > buf_max) >>> + end = buf_max; >>> + >>> + start = ALIGN(start, kbuf->buf_align); >>> + if (start < end && (end - start + 1) >= kbuf->memsz) { >> >> This is why I dislike using start and end to express address ranges: >> >> While struct resource seems to use the [address, end] convention, my > > struct crash_mem also uses [address, end] convention. > This off-by-one error did not cause any issues as the hole start and size we > try to find > are at least page aligned. > > Nonetheless, I think fixing 'end' early in the loop with "end -= 1" would > ensure > correctness while continuing to use the same convention for structs crash_mem > & resource. Sounds good. -- Thiago Jung Bauermann IBM Linux Technology Center
Re: [PATCH v3 04/12] ppc64/kexec_file: avoid stomping memory used by special regions
On 15/07/20 8:09 am, Thiago Jung Bauermann wrote: > > Hari Bathini writes: > >> +/** >> + * __locate_mem_hole_top_down - Looks top down for a large enough memory >> hole >> + * in the memory regions between buf_min & >> buf_max >> + * for the buffer. If found, sets kbuf->mem. >> + * @kbuf: Buffer contents and memory parameters. >> + * @buf_min:Minimum address for the buffer. >> + * @buf_max:Maximum address for the buffer. >> + * >> + * Returns 0 on success, negative errno on error. >> + */ >> +static int __locate_mem_hole_top_down(struct kexec_buf *kbuf, >> + u64 buf_min, u64 buf_max) >> +{ >> +int ret = -EADDRNOTAVAIL; >> +phys_addr_t start, end; >> +u64 i; >> + >> +for_each_mem_range_rev(i, &memblock.memory, NULL, NUMA_NO_NODE, >> + MEMBLOCK_NONE, &start, &end, NULL) { >> +if (start > buf_max) >> +continue; >> + >> +/* Memory hole not found */ >> +if (end < buf_min) >> +break; >> + >> +/* Adjust memory region based on the given range */ >> +if (start < buf_min) >> +start = buf_min; >> +if (end > buf_max) >> +end = buf_max; >> + >> +start = ALIGN(start, kbuf->buf_align); >> +if (start < end && (end - start + 1) >= kbuf->memsz) { > > This is why I dislike using start and end to express address ranges: > > While struct resource seems to use the [address, end] convention, my struct crash_mem also uses [address, end] convention. This off-by-one error did not cause any issues as the hole start and size we try to find are at least page aligned. Nonetheless, I think fixing 'end' early in the loop with "end -= 1" would ensure correctness while continuing to use the same convention for structs crash_mem & resource. Thanks Hari
Re: [PATCH v3 04/12] ppc64/kexec_file: avoid stomping memory used by special regions
Thiago Jung Bauermann writes: > Hari Bathini writes: > >> diff --git a/arch/powerpc/include/asm/crashdump-ppc64.h >> b/arch/powerpc/include/asm/crashdump-ppc64.h >> new file mode 100644 >> index 000..90deb46 >> --- /dev/null >> +++ b/arch/powerpc/include/asm/crashdump-ppc64.h >> @@ -0,0 +1,10 @@ >> +/* SPDX-License-Identifier: GPL-2.0-only */ >> +#ifndef _ASM_POWERPC_CRASHDUMP_PPC64_H >> +#define _ASM_POWERPC_CRASHDUMP_PPC64_H >> + >> +/* min & max addresses for kdump load segments */ >> +#define KDUMP_BUF_MIN (crashk_res.start) >> +#define KDUMP_BUF_MAX ((crashk_res.end < ppc64_rma_size) ? \ >> + crashk_res.end : (ppc64_rma_size - 1)) >> + >> +#endif /* __ASM_POWERPC_CRASHDUMP_PPC64_H */ >> diff --git a/arch/powerpc/include/asm/kexec.h >> b/arch/powerpc/include/asm/kexec.h >> index 7008ea1..bf47a01 100644 >> --- a/arch/powerpc/include/asm/kexec.h >> +++ b/arch/powerpc/include/asm/kexec.h >> @@ -100,14 +100,16 @@ void relocate_new_kernel(unsigned long >> indirection_page, unsigned long reboot_co >> #ifdef CONFIG_KEXEC_FILE >> extern const struct kexec_file_ops kexec_elf64_ops; >> >> -#ifdef CONFIG_IMA_KEXEC >> #define ARCH_HAS_KIMAGE_ARCH >> >> struct kimage_arch { >> +struct crash_mem *exclude_ranges; >> + >> +#ifdef CONFIG_IMA_KEXEC >> phys_addr_t ima_buffer_addr; >> size_t ima_buffer_size; >> -}; >> #endif >> +}; >> >> int setup_purgatory(struct kimage *image, const void *slave_code, >> const void *fdt, unsigned long kernel_load_addr, >> @@ -125,6 +127,7 @@ int setup_new_fdt_ppc64(const struct kimage *image, void >> *fdt, >> unsigned long initrd_load_addr, >> unsigned long initrd_len, const char *cmdline); >> #endif /* CONFIG_PPC64 */ >> + >> #endif /* CONFIG_KEXEC_FILE */ >> >> #else /* !CONFIG_KEXEC_CORE */ >> diff --git a/arch/powerpc/kexec/elf_64.c b/arch/powerpc/kexec/elf_64.c >> index 23ad04c..c695f94 100644 >> --- a/arch/powerpc/kexec/elf_64.c >> +++ b/arch/powerpc/kexec/elf_64.c >> @@ -22,6 +22,7 @@ >> #include >> #include >> #include >> +#include >> >> static void *elf64_load(struct kimage *image, char *kernel_buf, >> unsigned long kernel_len, char *initrd, >> @@ -46,6 +47,12 @@ static void *elf64_load(struct kimage *image, char >> *kernel_buf, >> if (ret) >> goto out; >> >> +if (image->type == KEXEC_TYPE_CRASH) { >> +/* min & max buffer values for kdump case */ >> +kbuf.buf_min = pbuf.buf_min = KDUMP_BUF_MIN; >> +kbuf.buf_max = pbuf.buf_max = KDUMP_BUF_MAX; > > This is only my personal opinion and an actual maintainer may disagree, > but just looking at the lines above, I would assume that KDUMP_BUF_MIN > and KDUMP_BUF_MAX were constants, when in fact they aren't. > > I suggest using static inline macros in , for > example: > > static inline resource_size_t get_kdump_buf_min(void) > { > return crashk_res.start; > } > > static inline resource_size_t get_kdump_buf_max(void) > { > return (crashk_res.end < ppc64_rma_size) ? \ >crashk_res.end : (ppc64_rma_size - 1) > } I later noticed that KDUMP_BUF_MIN and KDUMP_BUF_MAX are only used here. In this case, I think the best option is to avoid the macros and inline functions and just use the actual expressions in the code. -- Thiago Jung Bauermann IBM Linux Technology Center
Re: [PATCH v3 04/12] ppc64/kexec_file: avoid stomping memory used by special regions
Hari Bathini writes: > diff --git a/arch/powerpc/include/asm/crashdump-ppc64.h > b/arch/powerpc/include/asm/crashdump-ppc64.h > new file mode 100644 > index 000..90deb46 > --- /dev/null > +++ b/arch/powerpc/include/asm/crashdump-ppc64.h > @@ -0,0 +1,10 @@ > +/* SPDX-License-Identifier: GPL-2.0-only */ > +#ifndef _ASM_POWERPC_CRASHDUMP_PPC64_H > +#define _ASM_POWERPC_CRASHDUMP_PPC64_H > + > +/* min & max addresses for kdump load segments */ > +#define KDUMP_BUF_MIN(crashk_res.start) > +#define KDUMP_BUF_MAX((crashk_res.end < ppc64_rma_size) ? \ > + crashk_res.end : (ppc64_rma_size - 1)) > + > +#endif /* __ASM_POWERPC_CRASHDUMP_PPC64_H */ > diff --git a/arch/powerpc/include/asm/kexec.h > b/arch/powerpc/include/asm/kexec.h > index 7008ea1..bf47a01 100644 > --- a/arch/powerpc/include/asm/kexec.h > +++ b/arch/powerpc/include/asm/kexec.h > @@ -100,14 +100,16 @@ void relocate_new_kernel(unsigned long > indirection_page, unsigned long reboot_co > #ifdef CONFIG_KEXEC_FILE > extern const struct kexec_file_ops kexec_elf64_ops; > > -#ifdef CONFIG_IMA_KEXEC > #define ARCH_HAS_KIMAGE_ARCH > > struct kimage_arch { > + struct crash_mem *exclude_ranges; > + > +#ifdef CONFIG_IMA_KEXEC > phys_addr_t ima_buffer_addr; > size_t ima_buffer_size; > -}; > #endif > +}; > > int setup_purgatory(struct kimage *image, const void *slave_code, > const void *fdt, unsigned long kernel_load_addr, > @@ -125,6 +127,7 @@ int setup_new_fdt_ppc64(const struct kimage *image, void > *fdt, > unsigned long initrd_load_addr, > unsigned long initrd_len, const char *cmdline); > #endif /* CONFIG_PPC64 */ > + > #endif /* CONFIG_KEXEC_FILE */ > > #else /* !CONFIG_KEXEC_CORE */ > diff --git a/arch/powerpc/kexec/elf_64.c b/arch/powerpc/kexec/elf_64.c > index 23ad04c..c695f94 100644 > --- a/arch/powerpc/kexec/elf_64.c > +++ b/arch/powerpc/kexec/elf_64.c > @@ -22,6 +22,7 @@ > #include > #include > #include > +#include > > static void *elf64_load(struct kimage *image, char *kernel_buf, > unsigned long kernel_len, char *initrd, > @@ -46,6 +47,12 @@ static void *elf64_load(struct kimage *image, char > *kernel_buf, > if (ret) > goto out; > > + if (image->type == KEXEC_TYPE_CRASH) { > + /* min & max buffer values for kdump case */ > + kbuf.buf_min = pbuf.buf_min = KDUMP_BUF_MIN; > + kbuf.buf_max = pbuf.buf_max = KDUMP_BUF_MAX; This is only my personal opinion and an actual maintainer may disagree, but just looking at the lines above, I would assume that KDUMP_BUF_MIN and KDUMP_BUF_MAX were constants, when in fact they aren't. I suggest using static inline macros in , for example: static inline resource_size_t get_kdump_buf_min(void) { return crashk_res.start; } static inline resource_size_t get_kdump_buf_max(void) { return (crashk_res.end < ppc64_rma_size) ? \ crashk_res.end : (ppc64_rma_size - 1) } > + } > + > ret = kexec_elf_load(image, &ehdr, &elf_info, &kbuf, &kernel_load_addr); > if (ret) > goto out; > +/** > + * __locate_mem_hole_top_down - Looks top down for a large enough memory hole > + * in the memory regions between buf_min & > buf_max > + * for the buffer. If found, sets kbuf->mem. > + * @kbuf: Buffer contents and memory parameters. > + * @buf_min:Minimum address for the buffer. > + * @buf_max:Maximum address for the buffer. > + * > + * Returns 0 on success, negative errno on error. > + */ > +static int __locate_mem_hole_top_down(struct kexec_buf *kbuf, > + u64 buf_min, u64 buf_max) > +{ > + int ret = -EADDRNOTAVAIL; > + phys_addr_t start, end; > + u64 i; > + > + for_each_mem_range_rev(i, &memblock.memory, NULL, NUMA_NO_NODE, > +MEMBLOCK_NONE, &start, &end, NULL) { > + if (start > buf_max) > + continue; > + > + /* Memory hole not found */ > + if (end < buf_min) > + break; > + > + /* Adjust memory region based on the given range */ > + if (start < buf_min) > + start = buf_min; > + if (end > buf_max) > + end = buf_max; > + > + start = ALIGN(start, kbuf->buf_align); > + if (start < end && (end - start + 1) >= kbuf->memsz) { This is why I dislike using start and end to express address ranges: While struct resource seems to use the [address, end] convention, my reading of memblock code is that it uses [addres, end). This is guaranteed to lead to bugs. So the above has an off-by-one error. To calculate the size of the current range, you need to use `end - start`. > +
[PATCH v3 04/12] ppc64/kexec_file: avoid stomping memory used by special regions
crashkernel region could have an overlap with special memory regions like opal, rtas, tce-table & such. These regions are referred to as exclude memory ranges. Setup this ranges during image probe in order to avoid them while finding the buffer for different kdump segments. Override arch_kexec_locate_mem_hole() to locate a memory hole taking these ranges into account. Signed-off-by: Hari Bathini --- v2 -> v3: * If there are no exclude ranges, the right thing to do is fallbacking back to default kexec_locate_mem_hole() implementation instead of returning 0. Fixed that. v1 -> v2: * Did arch_kexec_locate_mem_hole() override to handle special regions. * Ensured holes in the memory are accounted for while locating mem hole. * Updated add_rtas_mem_range() & add_opal_mem_range() callsites based on the new prototype for these functions. arch/powerpc/include/asm/crashdump-ppc64.h | 10 + arch/powerpc/include/asm/kexec.h |7 - arch/powerpc/kexec/elf_64.c|7 + arch/powerpc/kexec/file_load_64.c | 324 4 files changed, 344 insertions(+), 4 deletions(-) create mode 100644 arch/powerpc/include/asm/crashdump-ppc64.h diff --git a/arch/powerpc/include/asm/crashdump-ppc64.h b/arch/powerpc/include/asm/crashdump-ppc64.h new file mode 100644 index 000..90deb46 --- /dev/null +++ b/arch/powerpc/include/asm/crashdump-ppc64.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef _ASM_POWERPC_CRASHDUMP_PPC64_H +#define _ASM_POWERPC_CRASHDUMP_PPC64_H + +/* min & max addresses for kdump load segments */ +#define KDUMP_BUF_MIN (crashk_res.start) +#define KDUMP_BUF_MAX ((crashk_res.end < ppc64_rma_size) ? \ +crashk_res.end : (ppc64_rma_size - 1)) + +#endif /* __ASM_POWERPC_CRASHDUMP_PPC64_H */ diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h index 7008ea1..bf47a01 100644 --- a/arch/powerpc/include/asm/kexec.h +++ b/arch/powerpc/include/asm/kexec.h @@ -100,14 +100,16 @@ void relocate_new_kernel(unsigned long indirection_page, unsigned long reboot_co #ifdef CONFIG_KEXEC_FILE extern const struct kexec_file_ops kexec_elf64_ops; -#ifdef CONFIG_IMA_KEXEC #define ARCH_HAS_KIMAGE_ARCH struct kimage_arch { + struct crash_mem *exclude_ranges; + +#ifdef CONFIG_IMA_KEXEC phys_addr_t ima_buffer_addr; size_t ima_buffer_size; -}; #endif +}; int setup_purgatory(struct kimage *image, const void *slave_code, const void *fdt, unsigned long kernel_load_addr, @@ -125,6 +127,7 @@ int setup_new_fdt_ppc64(const struct kimage *image, void *fdt, unsigned long initrd_load_addr, unsigned long initrd_len, const char *cmdline); #endif /* CONFIG_PPC64 */ + #endif /* CONFIG_KEXEC_FILE */ #else /* !CONFIG_KEXEC_CORE */ diff --git a/arch/powerpc/kexec/elf_64.c b/arch/powerpc/kexec/elf_64.c index 23ad04c..c695f94 100644 --- a/arch/powerpc/kexec/elf_64.c +++ b/arch/powerpc/kexec/elf_64.c @@ -22,6 +22,7 @@ #include #include #include +#include static void *elf64_load(struct kimage *image, char *kernel_buf, unsigned long kernel_len, char *initrd, @@ -46,6 +47,12 @@ static void *elf64_load(struct kimage *image, char *kernel_buf, if (ret) goto out; + if (image->type == KEXEC_TYPE_CRASH) { + /* min & max buffer values for kdump case */ + kbuf.buf_min = pbuf.buf_min = KDUMP_BUF_MIN; + kbuf.buf_max = pbuf.buf_max = KDUMP_BUF_MAX; + } + ret = kexec_elf_load(image, &ehdr, &elf_info, &kbuf, &kernel_load_addr); if (ret) goto out; diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c index e6bff960..7673481 100644 --- a/arch/powerpc/kexec/file_load_64.c +++ b/arch/powerpc/kexec/file_load_64.c @@ -17,6 +17,9 @@ #include #include #include +#include +#include +#include const struct kexec_file_ops * const kexec_file_loaders[] = { &kexec_elf64_ops, @@ -24,6 +27,240 @@ const struct kexec_file_ops * const kexec_file_loaders[] = { }; /** + * get_exclude_memory_ranges - Get exclude memory ranges. This list includes + * regions like opal/rtas, tce-table, initrd, + * kernel, htab which should be avoided while + * setting up kexec load segments. + * @mem_ranges:Range list to add the memory ranges to. + * + * Returns 0 on success, negative errno on error. + */ +static int get_exclude_memory_ranges(struct crash_mem **mem_ranges) +{ + int ret; + + ret = add_tce_mem_ranges(mem_ranges); + if (ret) + goto out; + + ret = add_initrd_mem_range(mem_ranges); + if (ret) + goto out; + + ret = add_htab_mem_range(mem_ranges); + if (ret) +