Re: [PATCH 4/4] mm: remove compat numa syscalls
On Fri, Sep 18, 2020 at 03:24:39PM +0200, Arnd Bergmann wrote: > The compat implementations for mbind, get_mempolicy, set_mempolicy > and migrate_pages are just there to handle the subtly different > layout of bitmaps on 32-bit hosts. > > The compat implementation however lacks some of the checks that > are present in the native one, in particular for checking that > the extra bits are all zero when user space has a larger mask > size than the kernel. Worse, those extra bits do not get cleared > when copying in or out of the kernel, which can lead to incorrect > data as well. > > Unify the implementation to handle the compat bitmap layout directly > in the get_nodes() and copy_nodes_to_user() helpers. Splitting out > the get_bitmap() helper from get_nodes() also helps readability of the > native case. > > On x86, two additional problems are addressed by this: compat tasks can > pass a bitmap at the end of a mapping, causing a fault when reading > across the page boundary for a 64-bit word. x32 tasks might also run > into problems with get_mempolicy corrupting data when an odd number of > 32-bit words gets passed. > > On parisc the migrate_pages() system call apparently had the wrong > calling convention, as big-endian architectures expect the words > inside of a bitmap to be swapped. This is not a problem though > since parisc has no NUMA support. > > Signed-off-by: Arnd Bergmann > --- > arch/arm64/include/asm/unistd32.h | 8 +- > arch/mips/kernel/syscalls/syscall_n32.tbl | 8 +- > arch/mips/kernel/syscalls/syscall_o32.tbl | 8 +- > arch/parisc/kernel/syscalls/syscall.tbl | 6 +- > arch/powerpc/kernel/syscalls/syscall.tbl | 8 +- > arch/s390/kernel/syscalls/syscall.tbl | 8 +- > arch/sparc/kernel/syscalls/syscall.tbl| 8 +- > arch/x86/entry/syscalls/syscall_32.tbl| 2 +- > include/linux/compat.h| 15 -- > include/uapi/asm-generic/unistd.h | 8 +- > kernel/kexec.c| 6 +- > kernel/sys_ni.c | 4 - > mm/mempolicy.c| 193 +- > 13 files changed, 79 insertions(+), 203 deletions(-) > > diff --git a/arch/arm64/include/asm/unistd32.h > b/arch/arm64/include/asm/unistd32.h > index af793775ba98..31479f7120a0 100644 > --- a/arch/arm64/include/asm/unistd32.h > +++ b/arch/arm64/include/asm/unistd32.h > @@ -649,11 +649,11 @@ __SYSCALL(__NR_inotify_add_watch, sys_inotify_add_watch) > #define __NR_inotify_rm_watch 318 > __SYSCALL(__NR_inotify_rm_watch, sys_inotify_rm_watch) > #define __NR_mbind 319 > -__SYSCALL(__NR_mbind, compat_sys_mbind) > +__SYSCALL(__NR_mbind, sys_mbind) > #define __NR_get_mempolicy 320 > -__SYSCALL(__NR_get_mempolicy, compat_sys_get_mempolicy) > +__SYSCALL(__NR_get_mempolicy, sys_get_mempolicy) > #define __NR_set_mempolicy 321 > -__SYSCALL(__NR_set_mempolicy, compat_sys_set_mempolicy) > +__SYSCALL(__NR_set_mempolicy, sys_set_mempolicy) > #define __NR_openat 322 > __SYSCALL(__NR_openat, compat_sys_openat) > #define __NR_mkdirat 323 > @@ -811,7 +811,7 @@ __SYSCALL(__NR_rseq, sys_rseq) > #define __NR_io_pgetevents 399 > __SYSCALL(__NR_io_pgetevents, compat_sys_io_pgetevents) > #define __NR_migrate_pages 400 > -__SYSCALL(__NR_migrate_pages, compat_sys_migrate_pages) > +__SYSCALL(__NR_migrate_pages, sys_migrate_pages) > #define __NR_kexec_file_load 401 > __SYSCALL(__NR_kexec_file_load, sys_kexec_file_load) > /* 402 is unused */ > diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl > b/arch/mips/kernel/syscalls/syscall_n32.tbl > index 7fa1ca45e44c..15fda882d07e 100644 > --- a/arch/mips/kernel/syscalls/syscall_n32.tbl > +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl > @@ -239,9 +239,9 @@ > 228 n32 clock_nanosleep sys_clock_nanosleep_time32 > 229 n32 tgkill sys_tgkill > 230 n32 utimes sys_utimes_time32 > -231 n32 mbind compat_sys_mbind > -232 n32 get_mempolicy compat_sys_get_mempolicy > -233 n32 set_mempolicy compat_sys_set_mempolicy > +231 n32 mbind sys_mbind > +232 n32 get_mempolicy sys_get_mempolicy > +233 n32 set_mempolicy sys_set_mempolicy > 234 n32 mq_open compat_sys_mq_open > 235 n32 mq_unlink sys_mq_unlink > 236 n32 mq_timedsendsys_mq_timedsend_time32 > @@ -258,7 +258,7 @@ > 247 n32 inotify_initsys_inotify_init > 248 n32 inotify_add_watch sys_inotify_add_watch > 249 n32 inotify_rm_watchsys_inotify_rm_watch > -250 n32 migrate_pages compat_sys_migrate_pages > +250 n32 migrate_pages sys_migrate_pages > 251 n32 openat sys_openat > 252 n32 mk
Re: [PATCH 3/4] mm: remove compat_sys_move_pages
On Fri, Sep 18, 2020 at 03:24:38PM +0200, Arnd Bergmann wrote: > The compat move_pages() implementation uses compat_alloc_user_space() > for converting the pointer array. Moving the compat handling into > the function itself is a bit simpler and lets us avoid the > compat_alloc_user_space() call. > > Signed-off-by: Arnd Bergmann > --- > arch/arm64/include/asm/unistd32.h | 2 +- > arch/mips/kernel/syscalls/syscall_n32.tbl | 2 +- > arch/mips/kernel/syscalls/syscall_o32.tbl | 2 +- > arch/parisc/kernel/syscalls/syscall.tbl | 2 +- > arch/powerpc/kernel/syscalls/syscall.tbl | 2 +- > arch/s390/kernel/syscalls/syscall.tbl | 2 +- > arch/sparc/kernel/syscalls/syscall.tbl| 2 +- > arch/x86/entry/syscalls/syscall_32.tbl| 2 +- > arch/x86/entry/syscalls/syscall_64.tbl| 2 +- > include/linux/compat.h| 5 --- > include/uapi/asm-generic/unistd.h | 2 +- > kernel/sys_ni.c | 1 - > mm/migrate.c | 45 +++ > 13 files changed, 32 insertions(+), 39 deletions(-) > > diff --git a/arch/arm64/include/asm/unistd32.h > b/arch/arm64/include/asm/unistd32.h > index b6517df74037..af793775ba98 100644 > --- a/arch/arm64/include/asm/unistd32.h > +++ b/arch/arm64/include/asm/unistd32.h > @@ -699,7 +699,7 @@ __SYSCALL(__NR_tee, sys_tee) > #define __NR_vmsplice 343 > __SYSCALL(__NR_vmsplice, compat_sys_vmsplice) > #define __NR_move_pages 344 > -__SYSCALL(__NR_move_pages, compat_sys_move_pages) > +__SYSCALL(__NR_move_pages, sys_move_pages) > #define __NR_getcpu 345 > __SYSCALL(__NR_getcpu, sys_getcpu) > #define __NR_epoll_pwait 346 > diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl > b/arch/mips/kernel/syscalls/syscall_n32.tbl > index ad157aab4c09..7fa1ca45e44c 100644 > --- a/arch/mips/kernel/syscalls/syscall_n32.tbl > +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl > @@ -279,7 +279,7 @@ > 268 n32 sync_file_range sys_sync_file_range > 269 n32 tee sys_tee > 270 n32 vmsplicecompat_sys_vmsplice > -271 n32 move_pages compat_sys_move_pages > +271 n32 move_pages sys_move_pages > 272 n32 set_robust_list compat_sys_set_robust_list > 273 n32 get_robust_list compat_sys_get_robust_list > 274 n32 kexec_load sys_kexec_load > diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl > b/arch/mips/kernel/syscalls/syscall_o32.tbl > index 57baf6c8008f..194c7fbeedf7 100644 > --- a/arch/mips/kernel/syscalls/syscall_o32.tbl > +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl > @@ -319,7 +319,7 @@ > 305 o32 sync_file_range sys_sync_file_range > sys32_sync_file_range > 306 o32 tee sys_tee > 307 o32 vmsplicesys_vmsplice > compat_sys_vmsplice > -308 o32 move_pages sys_move_pages > compat_sys_move_pages > +308 o32 move_pages sys_move_pages > 309 o32 set_robust_list sys_set_robust_list > compat_sys_set_robust_list > 310 o32 get_robust_list sys_get_robust_list > compat_sys_get_robust_list > 311 o32 kexec_load sys_kexec_load > diff --git a/arch/parisc/kernel/syscalls/syscall.tbl > b/arch/parisc/kernel/syscalls/syscall.tbl > index 778bf166d7bd..5c17edaffe70 100644 > --- a/arch/parisc/kernel/syscalls/syscall.tbl > +++ b/arch/parisc/kernel/syscalls/syscall.tbl > @@ -331,7 +331,7 @@ > 292 64 sync_file_range sys_sync_file_range > 293 common tee sys_tee > 294 common vmsplicesys_vmsplice > compat_sys_vmsplice > -295 common move_pages sys_move_pages > compat_sys_move_pages > +295 common move_pages sys_move_pages > 296 common getcpu sys_getcpu > 297 common epoll_pwait sys_epoll_pwait > compat_sys_epoll_pwait > 298 common statfs64sys_statfs64 > compat_sys_statfs64 > diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl > b/arch/powerpc/kernel/syscalls/syscall.tbl > index f128ba8b9a71..04fb42d7b377 100644 > --- a/arch/powerpc/kernel/syscalls/syscall.tbl > +++ b/arch/powerpc/kernel/syscalls/syscall.tbl > @@ -389,7 +389,7 @@ > 298 common faccessat sys_faccessat > 299 common get_robust_list sys_get_robust_list > compat_sys_get_robust_list > 300 common set_robust_list sys_set_robust_list > compat_sys_set_robust_list > -301 common move_pages sys_move_pages > compat_sys_move_pages > +301 common move_pa
Re: [PATCH 2/4] kexec: remove compat_sys_kexec_load syscall
On Fri, Sep 18, 2020 at 03:24:37PM +0200, Arnd Bergmann wrote: > The compat version of sys_kexec_load() uses compat_alloc_user_space to > convert the user-provided arguments into the native format. > > Move the conversion into the regular implementation with > an in_compat_syscall() check to simplify it and avoid the > compat_alloc_user_space() call. > > Signed-off-by: Arnd Bergmann > --- > arch/arm64/include/asm/unistd32.h | 2 +- > arch/mips/kernel/syscalls/syscall_n32.tbl | 2 +- > arch/mips/kernel/syscalls/syscall_o32.tbl | 2 +- > arch/parisc/kernel/syscalls/syscall.tbl | 2 +- > arch/powerpc/kernel/syscalls/syscall.tbl | 2 +- > arch/s390/kernel/syscalls/syscall.tbl | 2 +- > arch/sparc/kernel/syscalls/syscall.tbl| 2 +- > arch/x86/entry/syscalls/syscall_32.tbl| 2 +- > arch/x86/entry/syscalls/syscall_64.tbl| 2 +- > include/linux/compat.h| 6 -- > include/uapi/asm-generic/unistd.h | 2 +- > kernel/kexec.c| 75 ++- > 12 files changed, 29 insertions(+), 72 deletions(-) > > diff --git a/arch/arm64/include/asm/unistd32.h > b/arch/arm64/include/asm/unistd32.h > index 734860ac7cf9..b6517df74037 100644 > --- a/arch/arm64/include/asm/unistd32.h > +++ b/arch/arm64/include/asm/unistd32.h > @@ -705,7 +705,7 @@ __SYSCALL(__NR_getcpu, sys_getcpu) > #define __NR_epoll_pwait 346 > __SYSCALL(__NR_epoll_pwait, compat_sys_epoll_pwait) > #define __NR_kexec_load 347 > -__SYSCALL(__NR_kexec_load, compat_sys_kexec_load) > +__SYSCALL(__NR_kexec_load, sys_kexec_load) > #define __NR_utimensat 348 > __SYSCALL(__NR_utimensat, sys_utimensat_time32) > #define __NR_signalfd 349 > diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl > b/arch/mips/kernel/syscalls/syscall_n32.tbl > index f9df9edb67a4..ad157aab4c09 100644 > --- a/arch/mips/kernel/syscalls/syscall_n32.tbl > +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl > @@ -282,7 +282,7 @@ > 271 n32 move_pages compat_sys_move_pages > 272 n32 set_robust_list compat_sys_set_robust_list > 273 n32 get_robust_list compat_sys_get_robust_list > -274 n32 kexec_load compat_sys_kexec_load > +274 n32 kexec_load sys_kexec_load > 275 n32 getcpu sys_getcpu > 276 n32 epoll_pwait compat_sys_epoll_pwait > 277 n32 ioprio_set sys_ioprio_set > diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl > b/arch/mips/kernel/syscalls/syscall_o32.tbl > index 195b43cf27c8..57baf6c8008f 100644 > --- a/arch/mips/kernel/syscalls/syscall_o32.tbl > +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl > @@ -322,7 +322,7 @@ > 308 o32 move_pages sys_move_pages > compat_sys_move_pages > 309 o32 set_robust_list sys_set_robust_list > compat_sys_set_robust_list > 310 o32 get_robust_list sys_get_robust_list > compat_sys_get_robust_list > -311 o32 kexec_load sys_kexec_load > compat_sys_kexec_load > +311 o32 kexec_load sys_kexec_load > 312 o32 getcpu sys_getcpu > 313 o32 epoll_pwait sys_epoll_pwait > compat_sys_epoll_pwait > 314 o32 ioprio_set sys_ioprio_set > diff --git a/arch/parisc/kernel/syscalls/syscall.tbl > b/arch/parisc/kernel/syscalls/syscall.tbl > index def64d221cd4..778bf166d7bd 100644 > --- a/arch/parisc/kernel/syscalls/syscall.tbl > +++ b/arch/parisc/kernel/syscalls/syscall.tbl > @@ -336,7 +336,7 @@ > 297 common epoll_pwait sys_epoll_pwait > compat_sys_epoll_pwait > 298 common statfs64sys_statfs64 > compat_sys_statfs64 > 299 common fstatfs64 sys_fstatfs64 > compat_sys_fstatfs64 > -300 common kexec_load sys_kexec_load > compat_sys_kexec_load > +300 common kexec_load sys_kexec_load > 301 32 utimensat sys_utimensat_time32 > 301 64 utimensat sys_utimensat > 302 common signalfdsys_signalfd > compat_sys_signalfd > diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl > b/arch/powerpc/kernel/syscalls/syscall.tbl > index c2d737ff2e7b..f128ba8b9a71 100644 > --- a/arch/powerpc/kernel/syscalls/syscall.tbl > +++ b/arch/powerpc/kernel/syscalls/syscall.tbl > @@ -350,7 +350,7 @@ > 265 64 mq_timedreceive sys_mq_timedreceive > 266 nospu mq_notify sys_mq_notify > compat_sys_mq_notify > 267 nospu mq_getsetattr sys_mq_getsetattr > compat_sys_mq_getsetattr > -268 nospu kexec_load
Re: [PATCH 1/4] x86: add __X32_COND_SYSCALL() macro
On Fri, Sep 18, 2020 at 03:24:36PM +0200, Arnd Bergmann wrote: > sys_move_pages() is an optional syscall, and once we remove > the compat version of it in favor of the native one with an > in_compat_syscall() check, the x32 syscall table refers to > a __x32_sys_move_pages symbol that may not exist when the > syscall is disabled. > > Change the COND_SYSCALL() definition on x86 to also include > the redirection for x32. > > Signed-off-by: Arnd Bergmann Adding the x86 maintainers and Brian Gerst. Brian proposed another problem to the mess that most of the compat syscall handlers used by x32 here: https://lkml.org/lkml/2020/6/16/664 hpa didn't particularly like it, but with your and my pending series we'll soon use more native than compat syscalls for x32, so something will need to change.. > --- > arch/x86/include/asm/syscall_wrapper.h | 5 + > 1 file changed, 5 insertions(+) > > diff --git a/arch/x86/include/asm/syscall_wrapper.h > b/arch/x86/include/asm/syscall_wrapper.h > index a84333adeef2..5eacd35a7f97 100644 > --- a/arch/x86/include/asm/syscall_wrapper.h > +++ b/arch/x86/include/asm/syscall_wrapper.h > @@ -171,12 +171,16 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs > *regs); > __SYS_STUBx(x32, compat_sys##name, \ > SC_X86_64_REGS_TO_ARGS(x, __VA_ARGS__)) > > +#define __X32_COND_SYSCALL(name) \ > + __COND_SYSCALL(x32, sys_##name) > + > #define __X32_COMPAT_COND_SYSCALL(name) > \ > __COND_SYSCALL(x32, compat_sys_##name) > > #define __X32_COMPAT_SYS_NI(name)\ > __SYS_NI(x32, compat_sys_##name) > #else /* CONFIG_X86_X32 */ > +#define __X32_COND_SYSCALL(name) > #define __X32_COMPAT_SYS_STUB0(name) > #define __X32_COMPAT_SYS_STUBx(x, name, ...) > #define __X32_COMPAT_COND_SYSCALL(name) > @@ -253,6 +257,7 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs > *regs); > static long __do_sys_##sname(const struct pt_regs *__unused) > > #define COND_SYSCALL(name) \ > + __X32_COND_SYSCALL(name)\ > __X64_COND_SYSCALL(name)\ > __IA32_COND_SYSCALL(name) > > -- > 2.27.0 > ---end quoted text--- ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
Re: [PATCH] MIPS: Loongson64: Add kexec/kdump support
Hi, Jinyang, On Fri, Sep 18, 2020 at 2:20 PM Jinyang He wrote: > > On 09/17/2020 09:52 PM, Zhou Yanjie wrote: > > Hello, > > > > 在 2020/9/17 下午8:41, Jinyang He 写道: > >> Hi, Huacai, > >> > >> > >> On 09/16/2020 01:39 PM, Huacai Chen wrote: > >>> Hi, Jinyang, > >>> > >>> On Tue, Sep 15, 2020 at 10:17 PM Jinyang He > >>> wrote: > > > On 09/16/2020 09:33 AM, Jiaxun Yang wrote: > > 于 2020年9月15日 GMT+08:00 下午9:07:43, Jinyang He > > 写到: > >> Add loongson_kexec_prepare(), loongson_kexec_shutdown() and > >> loongson_kexec_crashdown() for passing the parameters of kexec_args. > >> > >> To start loongson64, CPU0 needs 3 parameters: > >> fw_arg0: the number of cmd. > >> fw_arg1: cmd structure which seems strange, the cmd array[index]'s > >> value is cmd string's address, index >= 1. > >> fw_arg2: environment. > >> > >> Secondary CPUs do not need parameter at once. They query their > >> mailbox to get PC, SP and GP in a loop before CPU0 brings them up > >> and passes these parameters via mailbox. > >> > >> loongson_kexec_prepare(): Alloc new memory to save cmd for kexec. > >> Combine the kexec append option string as cmd structure, and the cmd > >> struct will be parsed in fw_init_cmdline() of > >> arch/mips/fw/lib/cmdline.c. > >> image->control_code_page need pointing to a safe memory page. In > >> order to > >> maintain compatibility for the old firmware the low 2MB is reserverd > >> and safe for Loongson. So let it points here. > >> > >> loongson_kexec_shutdown(): Wake up all present CPUs and let them go > >> to reboot_code_buffer. Pass the kexec parameters to kexec_args. > >> > >> loongson_crash_shutdown(): Pass the kdump parameters to kexec_args. > >> > >> The assembly part provide a way like BIOS doing to keep secondary > >> CPUs in a querying loop. > >> > >> This patch referenced [1][2][3]. > >> > >> [1] arch/mips/cavium-octeon/setup.c > >> [2] https://patchwork.kernel.org/patch/10799217/ > >> [3] > >> https://gitee.com/loongsonlab/qemu/blob/master/hw/mips/loongson3a_rom.h > >> > >> > >> Co-developed-by: Youling Tang > >> Signed-off-by: Youling Tang > >> Signed-off-by: Jinyang He > >> --- > >> arch/mips/kernel/relocate_kernel.S | 19 > >> arch/mips/loongson64/reset.c | 88 > >> ++ > >> 2 files changed, 107 insertions(+) > >> > >> diff --git a/arch/mips/kernel/relocate_kernel.S > >> b/arch/mips/kernel/relocate_kernel.S > >> index ac87089..061cbfb 100644 > >> --- a/arch/mips/kernel/relocate_kernel.S > >> +++ b/arch/mips/kernel/relocate_kernel.S > >> @@ -133,6 +133,25 @@ LEAF(kexec_smp_wait) > >> #else > >> sync > >> #endif > >> + > >> +#ifdef CONFIG_CPU_LOONGSON64 > >> +#define MAILBOX_BASE 0x90003ff01000 > > Please avoid hardcoded SMP information. You're breaking Loongson > > 3B support. > > > Ok, I see. Since my machine is Loongson 3A. I'll send v2 > after I test it in 3B. > >>> 1, My original version can work on both Loongson-3A and Loongson-3B, > >>> why you modify my patch and hadn't discuss with me? > >>> > >>> 2, With this single patch both kexec and kdump cannot work reliably, > >>> because kexec need this patch: > >>> https://patchwork.kernel.org/patch/11695929/ > >>> > >>> and kdump need my first patch in my original version: > >>> https://patchwork.kernel.org/patch/10799215/ > >>> > >>> You may argue that you have tested. Yes, I believe that, I'm not > >>> saying that you haven't test, and I'm not saying that your patch > >>> cannot work, I'm just saying that your patch is not robust. > >>> > >>> 3, I'm the original author and paying attention to kexec/kdump > >>> continuosly, I will send a new version once the above two patches be > >>> accepted. But you re-send my patch without any communication with me, > >>> why you so impatient? > >>> > >>> Huacai > >>> > >> > >> 1, Your original version: > >>https://patchwork.kernel.org/patch/10799217/ > >> > >> This patch can work on Loongson-3A, I tested it. > >> > >> But it works wrong after the follow behaviors, > >>kexec -l vmlinux --append=cmdline_kexec > >>kexec -p vmlinux --append=cmdline_kdump > >>kexec -e > >> > > > > What 's wrong with you Loongson guys? Why do you always send patches > > with the same functions as the original author withou any > > communication with the original author? Especially the original author > > of the patch is the maintainer of Loongson64 architecture. > > Hi, > > Just take it easy, try to keep calm and do not panic. > > You do not understand the story, the background, what happened before, > and what happens next, so please let me express my opinion. > > When I use the mainline kernel on the Loongson platform, I find kexec > can not work well
Re: [PATCH] Only allow to set crash_kexec_post_notifiers on boot time
On Fri, 18 Sep 2020 11:25:46 +0800 Dave Young wrote: > crash_kexec_post_notifiers enables running various panic notifier > before kdump kernel booting. This increases risks of kdump failure. > It is well documented in kernel-parameters.txt. We do not suggest > people to enable it together with kdump unless he/she is really sure. > This is also not suggested to be enabled by default when users are > not aware in distributions. > > But unfortunately it is enabled by default in systemd, see below > discussions in a systemd report, we can not convince systemd to change > it: > https://github.com/systemd/systemd/issues/16661 > > Actually we have got reports about kdump kernel hangs in both s390x > and powerpcle cases caused by the systemd change, also some x86 cases > could also be caused by the same (although that is in Hyper-V code > instead of systemd, that need to be addressed separately). > > Thus to avoid the auto enablement here just disable the param writable > permission in sysfs. > Well. I don't think this is at all a desirable way of resolving a disagreement with the systemd developers At the above github address I'm seeing "ryncsn added a commit to ryncsn/systemd that referenced this issue 9 days ago", "pstore: don't enable crash_kexec_post_notifiers by default". So didn't that address the issue? ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
[PATCH printk v2 1/3] printk: move printk_info into separate array
The majority of the size of a descriptor is taken up by meta data, which is often not of interest to the ringbuffer (for example, when performing state checks). Since descriptors are often temporarily stored on the stack, keeping their size minimal will help reduce stack pressure. Rather than embedding the printk_info into the descriptor, create a separate printk_info array. The index of a descriptor in the descriptor array corresponds to the printk_info with the same index in the printk_info array. The rules for validity of a printk_info match the existing rules for the data blocks: the descriptor must be in a consistent state. Signed-off-by: John Ogness --- kernel/printk/printk.c| 30 +-- kernel/printk/printk_ringbuffer.c | 145 +++--- kernel/printk/printk_ringbuffer.h | 29 +++--- 3 files changed, 133 insertions(+), 71 deletions(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 9a2e23191576..25cfe4fe48af 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -959,11 +959,11 @@ void log_buf_vmcoreinfo_setup(void) VMCOREINFO_STRUCT_SIZE(prb_desc_ring); VMCOREINFO_OFFSET(prb_desc_ring, count_bits); VMCOREINFO_OFFSET(prb_desc_ring, descs); + VMCOREINFO_OFFSET(prb_desc_ring, infos); VMCOREINFO_OFFSET(prb_desc_ring, head_id); VMCOREINFO_OFFSET(prb_desc_ring, tail_id); VMCOREINFO_STRUCT_SIZE(prb_desc); - VMCOREINFO_OFFSET(prb_desc, info); VMCOREINFO_OFFSET(prb_desc, state_var); VMCOREINFO_OFFSET(prb_desc, text_blk_lpos); VMCOREINFO_OFFSET(prb_desc, dict_blk_lpos); @@ -1097,11 +1097,13 @@ static char setup_dict_buf[CONSOLE_EXT_LOG_MAX] __initdata; void __init setup_log_buf(int early) { + struct printk_info *new_infos; unsigned int new_descs_count; struct prb_desc *new_descs; struct printk_info info; struct printk_record r; size_t new_descs_size; + size_t new_infos_size; unsigned long flags; char *new_dict_buf; char *new_log_buf; @@ -1142,8 +1144,7 @@ void __init setup_log_buf(int early) if (unlikely(!new_dict_buf)) { pr_err("log_buf_len: %lu dict bytes not available\n", new_log_buf_len); - memblock_free(__pa(new_log_buf), new_log_buf_len); - return; + goto err_free_log_buf; } new_descs_size = new_descs_count * sizeof(struct prb_desc); @@ -1151,9 +1152,15 @@ void __init setup_log_buf(int early) if (unlikely(!new_descs)) { pr_err("log_buf_len: %zu desc bytes not available\n", new_descs_size); - memblock_free(__pa(new_dict_buf), new_log_buf_len); - memblock_free(__pa(new_log_buf), new_log_buf_len); - return; + goto err_free_dict_buf; + } + + new_infos_size = new_descs_count * sizeof(struct printk_info); + new_infos = memblock_alloc(new_infos_size, LOG_ALIGN); + if (unlikely(!new_infos)) { + pr_err("log_buf_len: %zu info bytes not available\n", + new_infos_size); + goto err_free_descs; } prb_rec_init_rd(&r, &info, @@ -1163,7 +1170,8 @@ void __init setup_log_buf(int early) prb_init(&printk_rb_dynamic, new_log_buf, ilog2(new_log_buf_len), new_dict_buf, ilog2(new_log_buf_len), -new_descs, ilog2(new_descs_count)); +new_descs, ilog2(new_descs_count), +new_infos); logbuf_lock_irqsave(flags); @@ -1192,6 +1200,14 @@ void __init setup_log_buf(int early) pr_info("log_buf_len: %u bytes\n", log_buf_len); pr_info("early log buf free: %u(%u%%)\n", free, (free * 100) / __LOG_BUF_LEN); + return; + +err_free_descs: + memblock_free(__pa(new_descs), new_descs_size); +err_free_dict_buf: + memblock_free(__pa(new_dict_buf), new_log_buf_len); +err_free_log_buf: + memblock_free(__pa(new_log_buf), new_log_buf_len); } static bool __read_mostly ignore_loglevel; diff --git a/kernel/printk/printk_ringbuffer.c b/kernel/printk/printk_ringbuffer.c index f4e2e9890e0f..de4b10a98623 100644 --- a/kernel/printk/printk_ringbuffer.c +++ b/kernel/printk/printk_ringbuffer.c @@ -15,10 +15,10 @@ * The printk_ringbuffer is made up of 3 internal ringbuffers: * * desc_ring - * A ring of descriptors. A descriptor contains all record meta data - * (sequence number, timestamp, loglevel, etc.) as well as internal state - * information about the record and logical positions specifying where in - * the other ringbuffers the text and dictionary strings are located. + * A ring of descriptors and their meta data (such as sequence number, + * timestamp, loglevel, etc.) as well as internal state information about + * the record and logica
[PATCH printk v2 3/3] printk: remove dict ring
Since there is no code that will ever store anything into the dict ring, remove it. If any future dictionary properties are to be added, these should be added to the struct printk_info. Signed-off-by: John Ogness --- kernel/printk/printk.c| 46 +++-- kernel/printk/printk_ringbuffer.c | 155 +++--- kernel/printk/printk_ringbuffer.h | 63 +++- 3 files changed, 64 insertions(+), 200 deletions(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 269f0abd1ddf..77660354a7c5 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -427,7 +427,6 @@ static u32 log_buf_len = __LOG_BUF_LEN; * Define the average message size. This only affects the number of * descriptors that will be available. Underestimating is better than * overestimating (too many available descriptors is better than not enough). - * The dictionary buffer will be the same size as the text buffer. */ #define PRB_AVGBITS 5 /* 32 character average length */ @@ -435,7 +434,7 @@ static u32 log_buf_len = __LOG_BUF_LEN; #error CONFIG_LOG_BUF_SHIFT value too small. #endif _DEFINE_PRINTKRB(printk_rb_static, CONFIG_LOG_BUF_SHIFT - PRB_AVGBITS, -PRB_AVGBITS, PRB_AVGBITS, &__log_buf[0]); +PRB_AVGBITS, &__log_buf[0]); static struct printk_ringbuffer printk_rb_dynamic; @@ -502,12 +501,12 @@ static int log_store(u32 caller_id, int facility, int level, struct printk_record r; u16 trunc_msg_len = 0; - prb_rec_init_wr(&r, text_len, 0); + prb_rec_init_wr(&r, text_len); if (!prb_reserve(&e, prb, &r)) { /* truncate the message if it is too long for empty buffer */ truncate_msg(&text_len, &trunc_msg_len); - prb_rec_init_wr(&r, text_len + trunc_msg_len, 0); + prb_rec_init_wr(&r, text_len + trunc_msg_len); /* survive when the log buffer is too small for trunc_msg */ if (!prb_reserve(&e, prb, &r)) return 0; @@ -897,8 +896,7 @@ static int devkmsg_open(struct inode *inode, struct file *file) mutex_init(&user->lock); prb_rec_init_rd(&user->record, &user->info, - &user->text_buf[0], sizeof(user->text_buf), - NULL, 0); + &user->text_buf[0], sizeof(user->text_buf)); logbuf_lock_irq(); user->seq = prb_first_valid_seq(prb); @@ -956,7 +954,6 @@ void log_buf_vmcoreinfo_setup(void) VMCOREINFO_STRUCT_SIZE(printk_ringbuffer); VMCOREINFO_OFFSET(printk_ringbuffer, desc_ring); VMCOREINFO_OFFSET(printk_ringbuffer, text_data_ring); - VMCOREINFO_OFFSET(printk_ringbuffer, dict_data_ring); VMCOREINFO_OFFSET(printk_ringbuffer, fail); VMCOREINFO_STRUCT_SIZE(prb_desc_ring); @@ -969,7 +966,6 @@ void log_buf_vmcoreinfo_setup(void) VMCOREINFO_STRUCT_SIZE(prb_desc); VMCOREINFO_OFFSET(prb_desc, state_var); VMCOREINFO_OFFSET(prb_desc, text_blk_lpos); - VMCOREINFO_OFFSET(prb_desc, dict_blk_lpos); VMCOREINFO_STRUCT_SIZE(prb_data_blk_lpos); VMCOREINFO_OFFSET(prb_data_blk_lpos, begin); @@ -979,7 +975,6 @@ void log_buf_vmcoreinfo_setup(void) VMCOREINFO_OFFSET(printk_info, seq); VMCOREINFO_OFFSET(printk_info, ts_nsec); VMCOREINFO_OFFSET(printk_info, text_len); - VMCOREINFO_OFFSET(printk_info, dict_len); VMCOREINFO_OFFSET(printk_info, caller_id); VMCOREINFO_OFFSET(printk_info, dev_info); @@ -1080,7 +1075,7 @@ static unsigned int __init add_to_rb(struct printk_ringbuffer *rb, struct prb_reserved_entry e; struct printk_record dest_r; - prb_rec_init_wr(&dest_r, r->info->text_len, 0); + prb_rec_init_wr(&dest_r, r->info->text_len); if (!prb_reserve(&e, rb, &dest_r)) return 0; @@ -,7 +1106,6 @@ void __init setup_log_buf(int early) size_t new_descs_size; size_t new_infos_size; unsigned long flags; - char *new_dict_buf; char *new_log_buf; unsigned int free; u64 seq; @@ -1146,19 +1140,12 @@ void __init setup_log_buf(int early) return; } - new_dict_buf = memblock_alloc(new_log_buf_len, LOG_ALIGN); - if (unlikely(!new_dict_buf)) { - pr_err("log_buf_len: %lu dict bytes not available\n", - new_log_buf_len); - goto err_free_log_buf; - } - new_descs_size = new_descs_count * sizeof(struct prb_desc); new_descs = memblock_alloc(new_descs_size, LOG_ALIGN); if (unlikely(!new_descs)) { pr_err("log_buf_len: %zu desc bytes not available\n", new_descs_size); - goto err_free_dict_buf; + goto err_free_log_buf; } new_infos_size = new_descs_count * sizeof(struct printk_info); @@ -1169,13 +1156,10
[PATCH printk v2 2/3] printk: move dictionary keys to dev_printk_info
Dictionaries are only used for SUBSYSTEM and DEVICE properties. The current implementation stores the property names each time they are used. This requires more space than otherwise necessary. Also, because the dictionary entries are currently considered optional, it cannot be relied upon that they are always available, even if the writer wanted to store them. These issues will increase should new dictionary properties be introduced. Rather than storing the subsystem and device properties in the dict ring, introduce a struct dev_printk_info with separate fields to store only the property values. Embed this struct within the struct printk_info to provide guaranteed availability. Signed-off-by: John Ogness --- Documentation/admin-guide/kdump/gdbmacros.txt | 73 drivers/base/core.c | 46 ++--- include/linux/dev_printk.h| 8 + include/linux/printk.h| 6 +- kernel/printk/internal.h | 4 +- kernel/printk/printk.c| 165 +- kernel/printk/printk_ringbuffer.h | 3 + kernel/printk/printk_safe.c | 2 +- scripts/gdb/linux/dmesg.py| 16 +- 9 files changed, 163 insertions(+), 160 deletions(-) diff --git a/Documentation/admin-guide/kdump/gdbmacros.txt b/Documentation/admin-guide/kdump/gdbmacros.txt index 94fabb165abf..82aecdcae8a6 100644 --- a/Documentation/admin-guide/kdump/gdbmacros.txt +++ b/Documentation/admin-guide/kdump/gdbmacros.txt @@ -172,13 +172,13 @@ end define dump_record set var $desc = $arg0 - if ($argc > 1) - set var $prev_flags = $arg1 + set var $info = $arg1 + if ($argc > 2) + set var $prev_flags = $arg2 else set var $prev_flags = 0 end - set var $info = &$desc->info set var $prefix = 1 set var $newline = 1 @@ -237,44 +237,36 @@ define dump_record # handle dictionary data - set var $begin = $desc->dict_blk_lpos.begin % (1U << prb->dict_data_ring.size_bits) - set var $next = $desc->dict_blk_lpos.next % (1U << prb->dict_data_ring.size_bits) - - # handle data-less record - if ($begin & 1) - set var $dict_len = 0 - set var $dict = "" - else - # handle wrapping data block - if ($begin > $next) - set var $begin = 0 - end - - # skip over descriptor id - set var $begin = $begin + sizeof(long) - - # handle truncated message - if ($next - $begin < $info->dict_len) - set var $dict_len = $next - $begin - else - set var $dict_len = $info->dict_len + set var $dict = &$info->dev_info.subsystem[0] + set var $dict_len = sizeof($info->dev_info.subsystem) + if ($dict[0] != '\0') + printf " SUBSYSTEM=" + set var $idx = 0 + while ($idx < $dict_len) + set var $c = $dict[$idx] + if ($c == '\0') + loop_break + else + if ($c < ' ' || $c >= 127 || $c == '\\') + printf "\\x%02x", $c + else + printf "%c", $c + end + end + set var $idx = $idx + 1 end - - set var $dict = &prb->dict_data_ring.data[$begin] + printf "\n" end - if ($dict_len > 0) + set var $dict = &$info->dev_info.device[0] + set var $dict_len = sizeof($info->dev_info.device) + if ($dict[0] != '\0') + printf " DEVICE=" set var $idx = 0 - set var $line = 1 while ($idx < $dict_len) - if ($line) - printf " " - set var $line = 0 - end set var $c = $dict[$idx] if ($c == '\0') - printf "\n" - set var $line = 1 + loop_break else if ($c < ' ' || $c >= 127 || $c == '\\') printf "\\x%02x", $c @@ -288,10 +280,10 @@ define dump_record end end document dump_record - Dump a single record. The first parameter is the descriptor - sequence number, the second is optional and specifies the - previous record's flags, used for properly formatting - continued lines. + Dump a single record. The first parameter is the descriptor, + the second parameter is the in
[PATCH printk v2 0/3] printk: move dictionaries to meta data
Hello, Here is v2 for a series to move all existing dictionary properties (SUBSYSTEM and DEVICE) into the meta data of a record, thus eliminating the need for the dict ring. This change affects how the dictionaries are stored, but does not affect how they are presented to userspace. (v1 is here [0]). The main purpose of the change is to address concerns [1] about the reliability of dictionary properties as well as allowing to efficiently expand the type and amount of meta data available [2]. This series is based heavily on the proof of concept [3] from Petr Mladek. (Petr, feel free to add Co-developed-by tags.) The series is based on the printk-rework branch of the printk git tree: f5f022e53b87 ("printk: reimplement log_cont using record extension") The list of changes since v1: drivers/base/core.c === - set_dev_info(): use strscpy() instead of snprintf() (thank you Rasmus Villemoes) kernel/printk/printk.c == - setup_log_buf(): fix cleanup in error handling - log_buf_vmcoreinfo_setup(): add VMCOREINFO for struct dev_printk_info array sizes so that crash tools do not need to rely on property value termination John Ogness [0] https://lkml.kernel.org/r/20200917131644.25838-1-john.ogn...@linutronix.de [1] https://lkml.kernel.org/r/20200904151336.GC20558@alley [2] https://lkml.kernel.org/r/008801d684f9$43e1c140$cba543c0$@samsung.com [3] https://lkml.kernel.org/r/20200911095035.GI3864@alley John Ogness (3): printk: move printk_info into separate array printk: move dictionary keys to dev_printk_info printk: remove dict ring Documentation/admin-guide/kdump/gdbmacros.txt | 73 ++--- drivers/base/core.c | 46 +-- include/linux/dev_printk.h| 8 + include/linux/printk.h| 6 +- kernel/printk/internal.h | 4 +- kernel/printk/printk.c| 221 ++--- kernel/printk/printk_ringbuffer.c | 292 -- kernel/printk/printk_ringbuffer.h | 95 ++ kernel/printk/printk_safe.c | 2 +- scripts/gdb/linux/dmesg.py| 16 +- 10 files changed, 346 insertions(+), 417 deletions(-) -- 2.20.1 ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
Re: [PATCH printk 3/3] printk: remove dict ring
On Thu 2020-09-17 15:22:44, John Ogness wrote: > Since there is no code that will ever store anything into the dict > ring, remove it. If any future dictionary properties are to be > added, these should be added to the struct printk_info. > > Signed-off-by: John Ogness I like the result. It simplified the code and removed some twists. Reviewed-by: Petr Mladek Best Regards, Petr ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
Re: [PATCH printk 2/3] printk: move dictionary keys to dev_printk_info
On Fri 2020-09-18 14:32:41, Rasmus Villemoes wrote: > On 18/09/2020 14.13, Petr Mladek wrote: > > On Fri 2020-09-18 08:16:37, Rasmus Villemoes wrote: > >> On 17/09/2020 15.16, John Ogness wrote: > >> > >>> if (dev->class) > >>> subsys = dev->class->name; > >>> else if (dev->bus) > >>> subsys = dev->bus->name; > >>> else > >>> - return 0; > >>> + return; > >>> > >>> - pos += snprintf(hdr + pos, hdrlen - pos, "SUBSYSTEM=%s", subsys); > >>> - if (pos >= hdrlen) > >>> - goto overflow; > >>> + snprintf(dev_info->subsystem, sizeof(dev_info->subsystem), subsys); > >> > >> It's unlikely that subsys would contain a %, but this will be yet > >> another place to spend brain cycles ignoring if doing static analysis. > >> So can we not do this. Either of strXcpy() for X=s,l will do the same > >> thing, and likely faster. > > > > Good point! Better be on the safe size in a generic printk() API. > > > > Well, I am afraid that this would be only small drop in a huge lake. > > class->name and bus->name seems to be passed to %s in so many > > *print*() calls all over the kernel code. > > So what? printf("%s", some_string_that_might_contain_percent_chars) is > not a problem. Grr, shame on me. I have completely messed this. The combination of Friday afternoon and noisy kids did not help me much to get it right. > printf(some_string_that_might_contain_percent_chars) is. I fully agree that passing unknown string as "fmt" is dangerous and must be used carefully. It is not needed here. > And yes, one could do > > snprintf(dev_info->subsystem, sizeof(dev_info->subsystem), "%s", subsys); > > but one might as well avoid the snprintf overhead and use one of the > strX functions that have the exact same semantics (copy as much as > there's room for, ensure nul-termination). Yes, we should use either snprinf() with %s or strXcpy(). Best Regards, Petr ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
[PATCH -next] fs/proc/vmcore: Fix 'sizez' kernel-doc warning in vmcore.c
Fixes the following W=1 kernel build warning(s): fs/proc/vmcore.c:458: warning: Excess function parameter 'sizez' description in 'vmcore_alloc_buf' Rename sizez to size. Signed-off-by: Wang Hai --- fs/proc/vmcore.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index c3a345c28a93..bfb0aa4d63e4 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -446,7 +446,7 @@ static const struct vm_operations_struct vmcore_mmap_ops = { /** * vmcore_alloc_buf - allocate buffer in vmalloc memory - * @sizez: size of buffer + * @size: size of buffer * * If CONFIG_MMU is defined, use vmalloc_user() to allow users to mmap * the buffer to user-space by means of remap_vmalloc_range(). -- 2.17.1 ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
[PATCH 3/4] mm: remove compat_sys_move_pages
The compat move_pages() implementation uses compat_alloc_user_space() for converting the pointer array. Moving the compat handling into the function itself is a bit simpler and lets us avoid the compat_alloc_user_space() call. Signed-off-by: Arnd Bergmann --- arch/arm64/include/asm/unistd32.h | 2 +- arch/mips/kernel/syscalls/syscall_n32.tbl | 2 +- arch/mips/kernel/syscalls/syscall_o32.tbl | 2 +- arch/parisc/kernel/syscalls/syscall.tbl | 2 +- arch/powerpc/kernel/syscalls/syscall.tbl | 2 +- arch/s390/kernel/syscalls/syscall.tbl | 2 +- arch/sparc/kernel/syscalls/syscall.tbl| 2 +- arch/x86/entry/syscalls/syscall_32.tbl| 2 +- arch/x86/entry/syscalls/syscall_64.tbl| 2 +- include/linux/compat.h| 5 --- include/uapi/asm-generic/unistd.h | 2 +- kernel/sys_ni.c | 1 - mm/migrate.c | 45 +++ 13 files changed, 32 insertions(+), 39 deletions(-) diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index b6517df74037..af793775ba98 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -699,7 +699,7 @@ __SYSCALL(__NR_tee, sys_tee) #define __NR_vmsplice 343 __SYSCALL(__NR_vmsplice, compat_sys_vmsplice) #define __NR_move_pages 344 -__SYSCALL(__NR_move_pages, compat_sys_move_pages) +__SYSCALL(__NR_move_pages, sys_move_pages) #define __NR_getcpu 345 __SYSCALL(__NR_getcpu, sys_getcpu) #define __NR_epoll_pwait 346 diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index ad157aab4c09..7fa1ca45e44c 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -279,7 +279,7 @@ 268n32 sync_file_range sys_sync_file_range 269n32 tee sys_tee 270n32 vmsplicecompat_sys_vmsplice -271n32 move_pages compat_sys_move_pages +271n32 move_pages sys_move_pages 272n32 set_robust_list compat_sys_set_robust_list 273n32 get_robust_list compat_sys_get_robust_list 274n32 kexec_load sys_kexec_load diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index 57baf6c8008f..194c7fbeedf7 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -319,7 +319,7 @@ 305o32 sync_file_range sys_sync_file_range sys32_sync_file_range 306o32 tee sys_tee 307o32 vmsplicesys_vmsplice compat_sys_vmsplice -308o32 move_pages sys_move_pages compat_sys_move_pages +308o32 move_pages sys_move_pages 309o32 set_robust_list sys_set_robust_list compat_sys_set_robust_list 310o32 get_robust_list sys_get_robust_list compat_sys_get_robust_list 311o32 kexec_load sys_kexec_load diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index 778bf166d7bd..5c17edaffe70 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -331,7 +331,7 @@ 29264 sync_file_range sys_sync_file_range 293common tee sys_tee 294common vmsplicesys_vmsplice compat_sys_vmsplice -295common move_pages sys_move_pages compat_sys_move_pages +295common move_pages sys_move_pages 296common getcpu sys_getcpu 297common epoll_pwait sys_epoll_pwait compat_sys_epoll_pwait 298common statfs64sys_statfs64 compat_sys_statfs64 diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index f128ba8b9a71..04fb42d7b377 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -389,7 +389,7 @@ 298common faccessat sys_faccessat 299common get_robust_list sys_get_robust_list compat_sys_get_robust_list 300common set_robust_list sys_set_robust_list compat_sys_set_robust_list -301common move_pages sys_move_pages compat_sys_move_pages +301common move_pages sys_move_pages 302common getcpu sys_getcpu 303nospu epoll_pwait sys_epoll_pwait compat_sys_epoll_pwait 304
[PATCH 4/4] mm: remove compat numa syscalls
The compat implementations for mbind, get_mempolicy, set_mempolicy and migrate_pages are just there to handle the subtly different layout of bitmaps on 32-bit hosts. The compat implementation however lacks some of the checks that are present in the native one, in particular for checking that the extra bits are all zero when user space has a larger mask size than the kernel. Worse, those extra bits do not get cleared when copying in or out of the kernel, which can lead to incorrect data as well. Unify the implementation to handle the compat bitmap layout directly in the get_nodes() and copy_nodes_to_user() helpers. Splitting out the get_bitmap() helper from get_nodes() also helps readability of the native case. On x86, two additional problems are addressed by this: compat tasks can pass a bitmap at the end of a mapping, causing a fault when reading across the page boundary for a 64-bit word. x32 tasks might also run into problems with get_mempolicy corrupting data when an odd number of 32-bit words gets passed. On parisc the migrate_pages() system call apparently had the wrong calling convention, as big-endian architectures expect the words inside of a bitmap to be swapped. This is not a problem though since parisc has no NUMA support. Signed-off-by: Arnd Bergmann --- arch/arm64/include/asm/unistd32.h | 8 +- arch/mips/kernel/syscalls/syscall_n32.tbl | 8 +- arch/mips/kernel/syscalls/syscall_o32.tbl | 8 +- arch/parisc/kernel/syscalls/syscall.tbl | 6 +- arch/powerpc/kernel/syscalls/syscall.tbl | 8 +- arch/s390/kernel/syscalls/syscall.tbl | 8 +- arch/sparc/kernel/syscalls/syscall.tbl| 8 +- arch/x86/entry/syscalls/syscall_32.tbl| 2 +- include/linux/compat.h| 15 -- include/uapi/asm-generic/unistd.h | 8 +- kernel/kexec.c| 6 +- kernel/sys_ni.c | 4 - mm/mempolicy.c| 193 +- 13 files changed, 79 insertions(+), 203 deletions(-) diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index af793775ba98..31479f7120a0 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -649,11 +649,11 @@ __SYSCALL(__NR_inotify_add_watch, sys_inotify_add_watch) #define __NR_inotify_rm_watch 318 __SYSCALL(__NR_inotify_rm_watch, sys_inotify_rm_watch) #define __NR_mbind 319 -__SYSCALL(__NR_mbind, compat_sys_mbind) +__SYSCALL(__NR_mbind, sys_mbind) #define __NR_get_mempolicy 320 -__SYSCALL(__NR_get_mempolicy, compat_sys_get_mempolicy) +__SYSCALL(__NR_get_mempolicy, sys_get_mempolicy) #define __NR_set_mempolicy 321 -__SYSCALL(__NR_set_mempolicy, compat_sys_set_mempolicy) +__SYSCALL(__NR_set_mempolicy, sys_set_mempolicy) #define __NR_openat 322 __SYSCALL(__NR_openat, compat_sys_openat) #define __NR_mkdirat 323 @@ -811,7 +811,7 @@ __SYSCALL(__NR_rseq, sys_rseq) #define __NR_io_pgetevents 399 __SYSCALL(__NR_io_pgetevents, compat_sys_io_pgetevents) #define __NR_migrate_pages 400 -__SYSCALL(__NR_migrate_pages, compat_sys_migrate_pages) +__SYSCALL(__NR_migrate_pages, sys_migrate_pages) #define __NR_kexec_file_load 401 __SYSCALL(__NR_kexec_file_load, sys_kexec_file_load) /* 402 is unused */ diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index 7fa1ca45e44c..15fda882d07e 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -239,9 +239,9 @@ 228n32 clock_nanosleep sys_clock_nanosleep_time32 229n32 tgkill sys_tgkill 230n32 utimes sys_utimes_time32 -231n32 mbind compat_sys_mbind -232n32 get_mempolicy compat_sys_get_mempolicy -233n32 set_mempolicy compat_sys_set_mempolicy +231n32 mbind sys_mbind +232n32 get_mempolicy sys_get_mempolicy +233n32 set_mempolicy sys_set_mempolicy 234n32 mq_open compat_sys_mq_open 235n32 mq_unlink sys_mq_unlink 236n32 mq_timedsendsys_mq_timedsend_time32 @@ -258,7 +258,7 @@ 247n32 inotify_initsys_inotify_init 248n32 inotify_add_watch sys_inotify_add_watch 249n32 inotify_rm_watchsys_inotify_rm_watch -250n32 migrate_pages compat_sys_migrate_pages +250n32 migrate_pages sys_migrate_pages 251n32 openat sys_openat 252n32 mkdirat sys_mkdirat 253n32 mknodat sys_mknodat diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index 194c7fbeedf7..6
[PATCH 2/4] kexec: remove compat_sys_kexec_load syscall
The compat version of sys_kexec_load() uses compat_alloc_user_space to convert the user-provided arguments into the native format. Move the conversion into the regular implementation with an in_compat_syscall() check to simplify it and avoid the compat_alloc_user_space() call. Signed-off-by: Arnd Bergmann --- arch/arm64/include/asm/unistd32.h | 2 +- arch/mips/kernel/syscalls/syscall_n32.tbl | 2 +- arch/mips/kernel/syscalls/syscall_o32.tbl | 2 +- arch/parisc/kernel/syscalls/syscall.tbl | 2 +- arch/powerpc/kernel/syscalls/syscall.tbl | 2 +- arch/s390/kernel/syscalls/syscall.tbl | 2 +- arch/sparc/kernel/syscalls/syscall.tbl| 2 +- arch/x86/entry/syscalls/syscall_32.tbl| 2 +- arch/x86/entry/syscalls/syscall_64.tbl| 2 +- include/linux/compat.h| 6 -- include/uapi/asm-generic/unistd.h | 2 +- kernel/kexec.c| 75 ++- 12 files changed, 29 insertions(+), 72 deletions(-) diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index 734860ac7cf9..b6517df74037 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -705,7 +705,7 @@ __SYSCALL(__NR_getcpu, sys_getcpu) #define __NR_epoll_pwait 346 __SYSCALL(__NR_epoll_pwait, compat_sys_epoll_pwait) #define __NR_kexec_load 347 -__SYSCALL(__NR_kexec_load, compat_sys_kexec_load) +__SYSCALL(__NR_kexec_load, sys_kexec_load) #define __NR_utimensat 348 __SYSCALL(__NR_utimensat, sys_utimensat_time32) #define __NR_signalfd 349 diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index f9df9edb67a4..ad157aab4c09 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -282,7 +282,7 @@ 271n32 move_pages compat_sys_move_pages 272n32 set_robust_list compat_sys_set_robust_list 273n32 get_robust_list compat_sys_get_robust_list -274n32 kexec_load compat_sys_kexec_load +274n32 kexec_load sys_kexec_load 275n32 getcpu sys_getcpu 276n32 epoll_pwait compat_sys_epoll_pwait 277n32 ioprio_set sys_ioprio_set diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index 195b43cf27c8..57baf6c8008f 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -322,7 +322,7 @@ 308o32 move_pages sys_move_pages compat_sys_move_pages 309o32 set_robust_list sys_set_robust_list compat_sys_set_robust_list 310o32 get_robust_list sys_get_robust_list compat_sys_get_robust_list -311o32 kexec_load sys_kexec_load compat_sys_kexec_load +311o32 kexec_load sys_kexec_load 312o32 getcpu sys_getcpu 313o32 epoll_pwait sys_epoll_pwait compat_sys_epoll_pwait 314o32 ioprio_set sys_ioprio_set diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index def64d221cd4..778bf166d7bd 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -336,7 +336,7 @@ 297common epoll_pwait sys_epoll_pwait compat_sys_epoll_pwait 298common statfs64sys_statfs64 compat_sys_statfs64 299common fstatfs64 sys_fstatfs64 compat_sys_fstatfs64 -300common kexec_load sys_kexec_load compat_sys_kexec_load +300common kexec_load sys_kexec_load 30132 utimensat sys_utimensat_time32 30164 utimensat sys_utimensat 302common signalfdsys_signalfd compat_sys_signalfd diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index c2d737ff2e7b..f128ba8b9a71 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -350,7 +350,7 @@ 26564 mq_timedreceive sys_mq_timedreceive 266nospu mq_notify sys_mq_notify compat_sys_mq_notify 267nospu mq_getsetattr sys_mq_getsetattr compat_sys_mq_getsetattr -268nospu kexec_load sys_kexec_load compat_sys_kexec_load +268nospu kexec_load sys_kexec_load 269nospu add_key sys_add_key 270nospu
[PATCH 1/4] x86: add __X32_COND_SYSCALL() macro
sys_move_pages() is an optional syscall, and once we remove the compat version of it in favor of the native one with an in_compat_syscall() check, the x32 syscall table refers to a __x32_sys_move_pages symbol that may not exist when the syscall is disabled. Change the COND_SYSCALL() definition on x86 to also include the redirection for x32. Signed-off-by: Arnd Bergmann --- arch/x86/include/asm/syscall_wrapper.h | 5 + 1 file changed, 5 insertions(+) diff --git a/arch/x86/include/asm/syscall_wrapper.h b/arch/x86/include/asm/syscall_wrapper.h index a84333adeef2..5eacd35a7f97 100644 --- a/arch/x86/include/asm/syscall_wrapper.h +++ b/arch/x86/include/asm/syscall_wrapper.h @@ -171,12 +171,16 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs); __SYS_STUBx(x32, compat_sys##name, \ SC_X86_64_REGS_TO_ARGS(x, __VA_ARGS__)) +#define __X32_COND_SYSCALL(name) \ + __COND_SYSCALL(x32, sys_##name) + #define __X32_COMPAT_COND_SYSCALL(name) \ __COND_SYSCALL(x32, compat_sys_##name) #define __X32_COMPAT_SYS_NI(name) \ __SYS_NI(x32, compat_sys_##name) #else /* CONFIG_X86_X32 */ +#define __X32_COND_SYSCALL(name) #define __X32_COMPAT_SYS_STUB0(name) #define __X32_COMPAT_SYS_STUBx(x, name, ...) #define __X32_COMPAT_COND_SYSCALL(name) @@ -253,6 +257,7 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs); static long __do_sys_##sname(const struct pt_regs *__unused) #define COND_SYSCALL(name) \ + __X32_COND_SYSCALL(name)\ __X64_COND_SYSCALL(name)\ __IA32_COND_SYSCALL(name) -- 2.27.0 ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
[PATCH 0/4] syscalls: remove compat_alloc_user_space callers
Going through compat_alloc_user_space() to convert indirect system call arguments tends to add complexity compared to handling the native and compat logic in the same code. I have patches for all other uses of compat_alloc_user_space() as well, and would expect that we can subsequently remove the interface itself. Arnd Arnd Bergmann (4): x86: add __X32_COND_SYSCALL() macro kexec: remove compat_sys_kexec_load syscall mm: remove compat_sys_move_pages mm: remove compat numa syscalls arch/arm64/include/asm/unistd32.h | 12 +- arch/mips/kernel/syscalls/syscall_n32.tbl | 12 +- arch/mips/kernel/syscalls/syscall_o32.tbl | 12 +- arch/parisc/kernel/syscalls/syscall.tbl | 10 +- arch/powerpc/kernel/syscalls/syscall.tbl | 12 +- arch/s390/kernel/syscalls/syscall.tbl | 12 +- arch/sparc/kernel/syscalls/syscall.tbl| 12 +- arch/x86/entry/syscalls/syscall_32.tbl| 6 +- arch/x86/entry/syscalls/syscall_64.tbl| 4 +- arch/x86/include/asm/syscall_wrapper.h| 5 + include/linux/compat.h| 26 --- include/uapi/asm-generic/unistd.h | 12 +- kernel/kexec.c| 77 +++-- kernel/sys_ni.c | 5 - mm/mempolicy.c| 193 +- mm/migrate.c | 45 +++-- 16 files changed, 143 insertions(+), 312 deletions(-) -- 2.27.0 ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
Re: [PATCH printk 2/3] printk: move dictionary keys to dev_printk_info
On 18/09/2020 14.13, Petr Mladek wrote: > On Fri 2020-09-18 08:16:37, Rasmus Villemoes wrote: >> On 17/09/2020 15.16, John Ogness wrote: >> >>> if (dev->class) >>> subsys = dev->class->name; >>> else if (dev->bus) >>> subsys = dev->bus->name; >>> else >>> - return 0; >>> + return; >>> >>> - pos += snprintf(hdr + pos, hdrlen - pos, "SUBSYSTEM=%s", subsys); >>> - if (pos >= hdrlen) >>> - goto overflow; >>> + snprintf(dev_info->subsystem, sizeof(dev_info->subsystem), subsys); >> >> It's unlikely that subsys would contain a %, but this will be yet >> another place to spend brain cycles ignoring if doing static analysis. >> So can we not do this. Either of strXcpy() for X=s,l will do the same >> thing, and likely faster. > > Good point! Better be on the safe size in a generic printk() API. > > Well, I am afraid that this would be only small drop in a huge lake. > class->name and bus->name seems to be passed to %s in so many > *print*() calls all over the kernel code. So what? printf("%s", some_string_that_might_contain_percent_chars) is not a problem. printf(some_string_that_might_contain_percent_chars) is. And yes, one could do snprintf(dev_info->subsystem, sizeof(dev_info->subsystem), "%s", subsys); but one might as well avoid the snprintf overhead and use one of the strX functions that have the exact same semantics (copy as much as there's room for, ensure nul-termination). Rasmus ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
Re: [PATCH printk 2/3] printk: move dictionary keys to dev_printk_info
On Fri 2020-09-18 08:16:37, Rasmus Villemoes wrote: > On 17/09/2020 15.16, John Ogness wrote: > > > if (dev->class) > > subsys = dev->class->name; > > else if (dev->bus) > > subsys = dev->bus->name; > > else > > - return 0; > > + return; > > > > - pos += snprintf(hdr + pos, hdrlen - pos, "SUBSYSTEM=%s", subsys); > > - if (pos >= hdrlen) > > - goto overflow; > > + snprintf(dev_info->subsystem, sizeof(dev_info->subsystem), subsys); > > It's unlikely that subsys would contain a %, but this will be yet > another place to spend brain cycles ignoring if doing static analysis. > So can we not do this. Either of strXcpy() for X=s,l will do the same > thing, and likely faster. Good point! Better be on the safe size in a generic printk() API. Well, I am afraid that this would be only small drop in a huge lake. class->name and bus->name seems to be passed to %s in so many *print*() calls all over the kernel code. IMHO, this is not the right place to prevent the problem. Dangerous names must be prevented when a new bus, class, device is added. Best Rergards, Petr ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
[PATCH 1/1] makedumpfile: make use of 'uts_namespace.name' offset in VMCOREINFO
The offset of the field 'init_uts_ns.name' has changed since linux-next commit 9a56493f6942c0e2df1579986128721da96e00d8. Make use of the offset 'uts_namespace.name' if available in VMCOREINFO. Signed-off-by: Alexander Egorenkov --- makedumpfile.c | 17 +++-- makedumpfile.h | 6 ++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/makedumpfile.c b/makedumpfile.c index 4c4251e..5b8b90c 100644 --- a/makedumpfile.c +++ b/makedumpfile.c @@ -1159,7 +1159,10 @@ check_release(void) if (SYMBOL(system_utsname) != NOT_FOUND_SYMBOL) { utsname = SYMBOL(system_utsname); } else if (SYMBOL(init_uts_ns) != NOT_FOUND_SYMBOL) { - utsname = SYMBOL(init_uts_ns) + sizeof(int); + if (OFFSET(uts_namespace.name) != NOT_FOUND_STRUCTURE) + utsname = SYMBOL(init_uts_ns) + OFFSET(uts_namespace.name); + else + utsname = SYMBOL(init_uts_ns) + sizeof(int); } else { ERRMSG("Can't get the symbol of system_utsname.\n"); return FALSE; @@ -2008,6 +2011,11 @@ get_structure_info(void) SIZE_INIT(cpu_spec, "cpu_spec"); OFFSET_INIT(cpu_spec.mmu_features, "cpu_spec", "mmu_features"); + /* +* Get offsets of the uts_namespace's members. +*/ + OFFSET_INIT(uts_namespace.name, "uts_namespace", "name"); + return TRUE; } @@ -2077,7 +2085,10 @@ get_str_osrelease_from_vmlinux(void) if (SYMBOL(system_utsname) != NOT_FOUND_SYMBOL) { utsname = SYMBOL(system_utsname); } else if (SYMBOL(init_uts_ns) != NOT_FOUND_SYMBOL) { - utsname = SYMBOL(init_uts_ns) + sizeof(int); + if (OFFSET(uts_namespace.name) != NOT_FOUND_STRUCTURE) + utsname = SYMBOL(init_uts_ns) + OFFSET(uts_namespace.name); + else + utsname = SYMBOL(init_uts_ns) + sizeof(int); } else { ERRMSG("Can't get the symbol of system_utsname.\n"); return FALSE; @@ -2284,6 +2295,7 @@ write_vmcoreinfo_data(void) WRITE_MEMBER_OFFSET("vmemmap_backing.list", vmemmap_backing.list); WRITE_MEMBER_OFFSET("mmu_psize_def.shift", mmu_psize_def.shift); WRITE_MEMBER_OFFSET("cpu_spec.mmu_features", cpu_spec.mmu_features); + WRITE_MEMBER_OFFSET("uts_namespace.name", uts_namespace.name); if (SYMBOL(node_data) != NOT_FOUND_SYMBOL) WRITE_ARRAY_LENGTH("node_data", node_data); @@ -2682,6 +2694,7 @@ read_vmcoreinfo(void) READ_MEMBER_OFFSET("vmemmap_backing.list", vmemmap_backing.list); READ_MEMBER_OFFSET("mmu_psize_def.shift", mmu_psize_def.shift); READ_MEMBER_OFFSET("cpu_spec.mmu_features", cpu_spec.mmu_features); + READ_MEMBER_OFFSET("uts_namespace.name", uts_namespace.name); READ_STRUCTURE_SIZE("printk_log", printk_log); if (SIZE(printk_log) != NOT_FOUND_STRUCTURE) { diff --git a/makedumpfile.h b/makedumpfile.h index 03fb4ce..9088f1f 100644 --- a/makedumpfile.h +++ b/makedumpfile.h @@ -1719,6 +1719,8 @@ struct size_table { longcpu_spec; longpageflags; + + longuts_namespace; }; struct offset_table { @@ -1880,6 +1882,10 @@ struct offset_table { struct cpu_spec_s { longmmu_features; } cpu_spec; + + struct uts_namespace_s { + longname; + } uts_namespace; }; /* -- 2.26.2 ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
Re: [PATCH printk 2/3] printk: move dictionary keys to dev_printk_info
On Thu 2020-09-17 15:22:43, John Ogness wrote: > Dictionaries are only used for SUBSYSTEM and DEVICE properties. The > current implementation stores the property names each time they are > used. This requires more space than otherwise necessary. Also, > because the dictionary entries are currently considered optional, > it cannot be relied upon that they are always available, even if the > writer wanted to store them. These issues will increase should new > dictionary properties be introduced. > > Rather than storing the subsystem and device properties in the > dict ring, introduce a struct dev_printk_info with separate fields > to store only the property values. Embed this struct within the > struct printk_info to provide guaranteed availability. > > Signed-off-by: John Ogness Reviewed-by: Petr Mladek Best Regards, Petr ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
Re: [PATCH printk 1/3] printk: move printk_info into separate array
On 2020-09-18, Petr Mladek wrote: >> --- a/kernel/printk/printk.c >> +++ b/kernel/printk/printk.c >> @@ -1097,6 +1097,7 @@ static char setup_dict_buf[CONSOLE_EXT_LOG_MAX] >> __initdata; >> >> void __init setup_log_buf(int early) >> { >> +struct printk_info *new_infos; >> unsigned int new_descs_count; >> struct prb_desc *new_descs; >> struct printk_info info; >> @@ -1156,6 +1157,17 @@ void __init setup_log_buf(int early) >> return; >> } >> >> +new_descs_size = new_descs_count * sizeof(struct printk_info); > > Must be stored into new variable, e.g. new_infos_size.= Ack. >> +new_infos = memblock_alloc(new_descs_size, LOG_ALIGN); >> +if (unlikely(!new_infos)) { >> +pr_err("log_buf_len: %zu info bytes not available\n", >> + new_descs_size); >> +memblock_free(__pa(new_descs), new_log_buf_len); >> +memblock_free(__pa(new_dict_buf), new_log_buf_len); > > The above two calls have wrong size. > > The same problem is there also in the error path when new_descs > allocation fail. It might be better to handle this using some > goto err_* tagrets. > > Please, fix the old problem in a separate patch. The "old problem" didn't exist. The problem is introduced with this series. I will fix it with appropriate goto err_* targets for v2. >> --- a/kernel/printk/printk_ringbuffer.c >> +++ b/kernel/printk/printk_ringbuffer.c >> @@ -1726,12 +1762,12 @@ static bool copy_data(struct prb_data_ring >> *data_ring, >> /* >> * Actual cannot be less than expected. It can be more than expected >> * because of the trailing alignment padding. >> + * >> + * Note that invalid @len values can occur because the caller loads >> + * the value during an allowed data race. > > I hope that this will not bite us in the future. The fact is that > copying the entire struct printk_info in get_desc() is ugly and > copy_data() has to be careful anyway. It isn't an issue because the state is verified again at the end of prb_read(). I added the comment because if all you are looking at is copy_data(), you may not know that @len was read on a data-race. Whereas inside of prb_read(), it is obvious that the memcpy() is a data-race. John Ogness ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
Re: [PATCH] arm64: Add purgatory printing
Hi Matthias, On Fri, Sep 18, 2020 at 2:01 PM Matthias Brugger wrote: > > > > On 18/09/2020 07:16, Bhupesh SHARMA wrote: > > Hi Matthias, > > > > Thanks for the patch. Some nitpicks inline: > > > > On Fri, Sep 18, 2020 at 1:09 AM wrote: > >> > >> From: Matthias Brugger > >> > >> Add option to allow purgatory printing on arm64 hardware > >> by passing the console name which should be used. > >> Based on a patch by Geoff Levand. > >> > >> Cc: Geoff Levand > >> Signed-off-by: Matthias Brugger > >> --- > >> kexec/arch/arm64/include/arch/options.h | 6 ++- > >> kexec/arch/arm64/kexec-arm64.c | 61 + > >> purgatory/arch/arm64/purgatory-arm64.c | 17 ++- > >> 3 files changed, 82 insertions(+), 2 deletions(-) > > > > Probably we also need to update the man page 'kexec/kexec.8' to add > > documentation about the newly introduced argument. > > > > I checked the documentation and under "ARCHITECTURE OPTIONS" and it only > documents a subset of the x86(_64) architecture specific commands. So I think > there is more work to do to get the documentation right. > > But after having a look, I think we might want to use "--serial" as option > instead of "--console" to be in sync with x86. Although many architectures > reinvent the options, if we can get it as near to x86 as possible, I think > that > would be a good thing. I'll do that in v2. That's a good suggestion. I think the '--serial' option is a better one as compared to '--console'. > >> diff --git a/kexec/arch/arm64/include/arch/options.h > >> b/kexec/arch/arm64/include/arch/options.h > >> index a17d933..be7d169 100644 > >> --- a/kexec/arch/arm64/include/arch/options.h > >> +++ b/kexec/arch/arm64/include/arch/options.h > >> @@ -5,7 +5,8 @@ > >> #define OPT_DTB((OPT_MAX)+1) > >> #define OPT_INITRD ((OPT_MAX)+2) > >> #define OPT_REUSE_CMDLINE ((OPT_MAX)+3) > >> -#define OPT_ARCH_MAX ((OPT_MAX)+4) > >> +#define OPT_CONSOLE((OPT_MAX)+4) > >> +#define OPT_ARCH_MAX ((OPT_MAX)+5) > >> > >> #define KEXEC_ARCH_OPTIONS \ > >> KEXEC_OPTIONS \ > >> @@ -13,6 +14,7 @@ > >> { "command-line", 1, NULL, OPT_APPEND }, \ > >> { "dtb", 1, NULL, OPT_DTB }, \ > >> { "initrd",1, NULL, OPT_INITRD }, \ > >> + { "console", 1, NULL, OPT_CONSOLE }, \ > >> { "ramdisk", 1, NULL, OPT_INITRD }, \ > >> { "reuse-cmdline", 0, NULL, OPT_REUSE_CMDLINE }, \ > >> > >> @@ -25,6 +27,7 @@ static const char arm64_opts_usage[] __attribute__ > >> ((unused)) = > >> " --command-line=STRING Set the kernel command line to STRING.\n" > >> " --dtb=FILEUse FILE as the device tree blob.\n" > >> " --initrd=FILE Use FILE as the kernel initial ramdisk.\n" > >> +" --console=STRING Console used for purgatory printing.\n" > >> " --ramdisk=FILEUse FILE as the kernel initial ramdisk.\n" > >> " --reuse-cmdline Use kernel command line from running > >> system.\n"; > > > > Just a thought... sometimes the console string is also available as a > > part of the '--reuse-cmdline' command line argument passed to the > > kdump kernel. Can we also try to extract the --console string from the > > cmdline provided to the primary kernel itself? > > > > Well the problem is, that there can be more then one consoles present, so > which > one would be the correct one? > I see this more like a debug feature which you use knowing which console you > are > looking for the debug messages. In the end it only helps you to see if kdump > failed in the production system kernel or in the crash kernel. Indeed, I think we need to add some more comments to explain this option and distinguish it better from the serial port(s) mentioned in the '--reuse-cmdline' option. > >> @@ -32,6 +35,7 @@ struct arm64_opts { > >> const char *command_line; > >> const char *dtb; > >> const char *initrd; > >> + const char *console; > >> }; > >> > >> extern struct arm64_opts arm64_opts; > >> diff --git a/kexec/arch/arm64/kexec-arm64.c > >> b/kexec/arch/arm64/kexec-arm64.c > >> index 45ebc54..44c9e6e 100644 > >> --- a/kexec/arch/arm64/kexec-arm64.c > >> +++ b/kexec/arch/arm64/kexec-arm64.c > >> @@ -165,6 +165,8 @@ int arch_process_options(int argc, char **argv) > >> break; > >> case OPT_KEXEC_FILE_SYSCALL: > >> do_kexec_file_syscall = 1; > >> + case OPT_CONSOLE: > >> + arm64_opts.console = optarg; > >> break; > >> default: > >> break; /* Ignore core and unknown options. */ > >> @@ -180,12 +182,62 @@ int arch_process_options(int argc, char **argv) > >> dbgprintf("%s:%d: dtb: %s\n", __func__, __LINE__, > >> (do_kexec_file_syscall && arm64_opts.dtb ?
Re: [PATCH printk 1/3] printk: move printk_info into separate array
On Thu 2020-09-17 15:22:42, John Ogness wrote: > The majority of the size of a descriptor is taken up by meta data, > which is often not of interest to the ringbuffer (for example, > when performing state checks). Since descriptors are often > temporarily stored on the stack, keeping their size minimal will > help reduce stack pressure. > > Rather than embedding the printk_info into the descriptor, create > a separate printk_info array. The index of a descriptor in the > descriptor array corresponds to the printk_info with the same > index in the printk_info array. The rules for validity of a > printk_info match the existing rules for the data blocks: the > descriptor must be in a consistent state. I like this approach. IMHO, it better separates dict_ring- and printk-specific metadata. The three printk-specific values (seq, caller_id, len) are still accessed by the ring buffer code. It is not ideal. But it helps to hide tricky operations in the ring buffer API, keep the code more safe and sane. These exceptions are actually better visible now. > --- a/kernel/printk/printk.c > +++ b/kernel/printk/printk.c > @@ -1097,6 +1097,7 @@ static char setup_dict_buf[CONSOLE_EXT_LOG_MAX] > __initdata; > > void __init setup_log_buf(int early) > { > + struct printk_info *new_infos; > unsigned int new_descs_count; > struct prb_desc *new_descs; > struct printk_info info; > @@ -1156,6 +1157,17 @@ void __init setup_log_buf(int early) > return; > } > > + new_descs_size = new_descs_count * sizeof(struct printk_info); Must be stored into new variable, e.g. new_infos_size.= > + new_infos = memblock_alloc(new_descs_size, LOG_ALIGN); > + if (unlikely(!new_infos)) { > + pr_err("log_buf_len: %zu info bytes not available\n", > +new_descs_size); > + memblock_free(__pa(new_descs), new_log_buf_len); > + memblock_free(__pa(new_dict_buf), new_log_buf_len); The above two calls have wrong size. The same problem is there also in the error path when new_descs allocation fail. It might be better to handle this using some goto err_* tagrets. Please, fix the old problem in a separate patch. > + memblock_free(__pa(new_log_buf), new_log_buf_len); > + return; > + } > + > prb_rec_init_rd(&r, &info, > &setup_text_buf[0], sizeof(setup_text_buf), > &setup_dict_buf[0], sizeof(setup_dict_buf)); > --- a/kernel/printk/printk_ringbuffer.c > +++ b/kernel/printk/printk_ringbuffer.c > @@ -1726,12 +1762,12 @@ static bool copy_data(struct prb_data_ring *data_ring, > /* >* Actual cannot be less than expected. It can be more than expected >* because of the trailing alignment padding. > + * > + * Note that invalid @len values can occur because the caller loads > + * the value during an allowed data race. I hope that this will not bite us in the future. The fact is that copying the entire struct printk_info in get_desc() is ugly and copy_data() has to be careful anyway. >*/ > - if (WARN_ON_ONCE(data_size < (unsigned int)len)) { > - pr_warn_once("wrong data size (%u, expecting >=%hu) for data: > %.*s\n", > - data_size, len, data_size, data); > + if (data_size < (unsigned int)len) > return false; > - } > > /* Caller interested in the line count? */ > if (line_count) Otherwise, I like this patch. Best Regards, Petr ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
Re: [PATCH v12 3/9] x86: kdump: use macro CRASH_ADDR_LOW_MAX in functions reserve_crashkernel[_low]()
Hi Catalin, On 2020/9/18 16:59, chenzhou wrote: > Hi Baoquan, > > On 2020/9/18 15:25, Baoquan He wrote: >> Hi, >> >> On 09/07/20 at 09:47pm, Chen Zhou wrote: >>> To make the functions reserve_crashkernel[_low]() as generic, >>> replace some hard-coded numbers with macro CRASH_ADDR_LOW_MAX. >>> >>> Signed-off-by: Chen Zhou >>> --- >>> arch/x86/kernel/setup.c | 11 ++- >>> 1 file changed, 6 insertions(+), 5 deletions(-) >>> >>> diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c >>> index d7fd90c52dae..71a6a6e7ca5b 100644 >>> --- a/arch/x86/kernel/setup.c >>> +++ b/arch/x86/kernel/setup.c >>> @@ -430,7 +430,7 @@ static int __init reserve_crashkernel_low(void) >>> unsigned long total_low_mem; >>> int ret; >>> >>> - total_low_mem = memblock_mem_size(1UL << (32 - PAGE_SHIFT)); >>> + total_low_mem = memblock_mem_size(CRASH_ADDR_LOW_MAX >> PAGE_SHIFT); >> Just note that the replacement has been done in another patch from Mike >> Rapoport, partially. He seems to have done reserve_crashkernel_low() >> part, there's one left in reserve_crashkernel(), you might want to check >> that. >> >> Mike's patch which is from a patchset has been merged into Andrew's next >> tree. >> >> commit 6e50f7672ffa362e9bd4bc0c0d2524ed872828c5 >> Author: Mike Rapoport >> Date: Wed Aug 26 15:22:32 2020 +1000 >> >> x86/setup: simplify reserve_crashkernel() As Baoquan said, some functions have been changed in the next tree, if i need to rebase on top of the next tree. Thanks, Chen Zhou > Yeah, the function reserve_crashkernel() has been changed in the next tree. > Thanks for your review and reminder. > > Thanks, > Chen Zhou >>> >>> /* crashkernel=Y,low */ >>> ret = parse_crashkernel_low(boot_command_line, total_low_mem, >>> &low_size, &base); >>> @@ -451,7 +451,7 @@ static int __init reserve_crashkernel_low(void) >>> return 0; >>> } >>> >>> - low_base = memblock_find_in_range(CRASH_ALIGN, 1ULL << 32, low_size, >>> CRASH_ALIGN); >>> + low_base = memblock_find_in_range(CRASH_ALIGN, CRASH_ADDR_LOW_MAX, >>> low_size, CRASH_ALIGN); >>> if (!low_base) { >>> pr_err("Cannot reserve %ldMB crashkernel low memory, please try >>> smaller size.\n", >>>(unsigned long)(low_size >> 20)); >>> @@ -504,8 +504,9 @@ static void __init reserve_crashkernel(void) >>> if (!crash_base) { >>> /* >>> * Set CRASH_ADDR_LOW_MAX upper bound for crash memory, >>> -* crashkernel=x,high reserves memory over 4G, also allocates >>> -* 256M extra low memory for DMA buffers and swiotlb. >>> +* crashkernel=x,high reserves memory over CRASH_ADDR_LOW_MAX, >>> +* also allocates 256M extra low memory for DMA buffers >>> +* and swiotlb. >>> * But the extra memory is not required for all machines. >>> * So try low memory first and fall back to high memory >>> * unless "crashkernel=size[KMG],high" is specified. >>> @@ -539,7 +540,7 @@ static void __init reserve_crashkernel(void) >>> return; >>> } >>> >>> - if (crash_base >= (1ULL << 32) && reserve_crashkernel_low()) { >>> + if (crash_base >= CRASH_ADDR_LOW_MAX && reserve_crashkernel_low()) { >>> memblock_free(crash_base, crash_size); >>> return; >>> } >>> -- >>> 2.20.1 >>> >> . >> ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
Re: [PATCH v12 3/9] x86: kdump: use macro CRASH_ADDR_LOW_MAX in functions reserve_crashkernel[_low]()
Hi Baoquan, On 2020/9/18 15:25, Baoquan He wrote: > Hi, > > On 09/07/20 at 09:47pm, Chen Zhou wrote: >> To make the functions reserve_crashkernel[_low]() as generic, >> replace some hard-coded numbers with macro CRASH_ADDR_LOW_MAX. >> >> Signed-off-by: Chen Zhou >> --- >> arch/x86/kernel/setup.c | 11 ++- >> 1 file changed, 6 insertions(+), 5 deletions(-) >> >> diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c >> index d7fd90c52dae..71a6a6e7ca5b 100644 >> --- a/arch/x86/kernel/setup.c >> +++ b/arch/x86/kernel/setup.c >> @@ -430,7 +430,7 @@ static int __init reserve_crashkernel_low(void) >> unsigned long total_low_mem; >> int ret; >> >> -total_low_mem = memblock_mem_size(1UL << (32 - PAGE_SHIFT)); >> +total_low_mem = memblock_mem_size(CRASH_ADDR_LOW_MAX >> PAGE_SHIFT); > Just note that the replacement has been done in another patch from Mike > Rapoport, partially. He seems to have done reserve_crashkernel_low() > part, there's one left in reserve_crashkernel(), you might want to check > that. > > Mike's patch which is from a patchset has been merged into Andrew's next > tree. > > commit 6e50f7672ffa362e9bd4bc0c0d2524ed872828c5 > Author: Mike Rapoport > Date: Wed Aug 26 15:22:32 2020 +1000 > > x86/setup: simplify reserve_crashkernel() Yeah, the function reserve_crashkernel() has been changed in the next tree. Thanks for your review and reminder. Thanks, Chen Zhou > >> >> /* crashkernel=Y,low */ >> ret = parse_crashkernel_low(boot_command_line, total_low_mem, >> &low_size, &base); >> @@ -451,7 +451,7 @@ static int __init reserve_crashkernel_low(void) >> return 0; >> } >> >> -low_base = memblock_find_in_range(CRASH_ALIGN, 1ULL << 32, low_size, >> CRASH_ALIGN); >> +low_base = memblock_find_in_range(CRASH_ALIGN, CRASH_ADDR_LOW_MAX, >> low_size, CRASH_ALIGN); >> if (!low_base) { >> pr_err("Cannot reserve %ldMB crashkernel low memory, please try >> smaller size.\n", >> (unsigned long)(low_size >> 20)); >> @@ -504,8 +504,9 @@ static void __init reserve_crashkernel(void) >> if (!crash_base) { >> /* >> * Set CRASH_ADDR_LOW_MAX upper bound for crash memory, >> - * crashkernel=x,high reserves memory over 4G, also allocates >> - * 256M extra low memory for DMA buffers and swiotlb. >> + * crashkernel=x,high reserves memory over CRASH_ADDR_LOW_MAX, >> + * also allocates 256M extra low memory for DMA buffers >> + * and swiotlb. >> * But the extra memory is not required for all machines. >> * So try low memory first and fall back to high memory >> * unless "crashkernel=size[KMG],high" is specified. >> @@ -539,7 +540,7 @@ static void __init reserve_crashkernel(void) >> return; >> } >> >> -if (crash_base >= (1ULL << 32) && reserve_crashkernel_low()) { >> +if (crash_base >= CRASH_ADDR_LOW_MAX && reserve_crashkernel_low()) { >> memblock_free(crash_base, crash_size); >> return; >> } >> -- >> 2.20.1 >> > . > ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
Re: [PATCH] arm64: Add purgatory printing
On 18/09/2020 07:16, Bhupesh SHARMA wrote: Hi Matthias, Thanks for the patch. Some nitpicks inline: On Fri, Sep 18, 2020 at 1:09 AM wrote: From: Matthias Brugger Add option to allow purgatory printing on arm64 hardware by passing the console name which should be used. Based on a patch by Geoff Levand. Cc: Geoff Levand Signed-off-by: Matthias Brugger --- kexec/arch/arm64/include/arch/options.h | 6 ++- kexec/arch/arm64/kexec-arm64.c | 61 + purgatory/arch/arm64/purgatory-arm64.c | 17 ++- 3 files changed, 82 insertions(+), 2 deletions(-) Probably we also need to update the man page 'kexec/kexec.8' to add documentation about the newly introduced argument. I checked the documentation and under "ARCHITECTURE OPTIONS" and it only documents a subset of the x86(_64) architecture specific commands. So I think there is more work to do to get the documentation right. But after having a look, I think we might want to use "--serial" as option instead of "--console" to be in sync with x86. Although many architectures reinvent the options, if we can get it as near to x86 as possible, I think that would be a good thing. I'll do that in v2. diff --git a/kexec/arch/arm64/include/arch/options.h b/kexec/arch/arm64/include/arch/options.h index a17d933..be7d169 100644 --- a/kexec/arch/arm64/include/arch/options.h +++ b/kexec/arch/arm64/include/arch/options.h @@ -5,7 +5,8 @@ #define OPT_DTB((OPT_MAX)+1) #define OPT_INITRD ((OPT_MAX)+2) #define OPT_REUSE_CMDLINE ((OPT_MAX)+3) -#define OPT_ARCH_MAX ((OPT_MAX)+4) +#define OPT_CONSOLE((OPT_MAX)+4) +#define OPT_ARCH_MAX ((OPT_MAX)+5) #define KEXEC_ARCH_OPTIONS \ KEXEC_OPTIONS \ @@ -13,6 +14,7 @@ { "command-line", 1, NULL, OPT_APPEND }, \ { "dtb", 1, NULL, OPT_DTB }, \ { "initrd",1, NULL, OPT_INITRD }, \ + { "console", 1, NULL, OPT_CONSOLE }, \ { "ramdisk", 1, NULL, OPT_INITRD }, \ { "reuse-cmdline", 0, NULL, OPT_REUSE_CMDLINE }, \ @@ -25,6 +27,7 @@ static const char arm64_opts_usage[] __attribute__ ((unused)) = " --command-line=STRING Set the kernel command line to STRING.\n" " --dtb=FILEUse FILE as the device tree blob.\n" " --initrd=FILE Use FILE as the kernel initial ramdisk.\n" +" --console=STRING Console used for purgatory printing.\n" " --ramdisk=FILEUse FILE as the kernel initial ramdisk.\n" " --reuse-cmdline Use kernel command line from running system.\n"; Just a thought... sometimes the console string is also available as a part of the '--reuse-cmdline' command line argument passed to the kdump kernel. Can we also try to extract the --console string from the cmdline provided to the primary kernel itself? Well the problem is, that there can be more then one consoles present, so which one would be the correct one? I see this more like a debug feature which you use knowing which console you are looking for the debug messages. In the end it only helps you to see if kdump failed in the production system kernel or in the crash kernel. @@ -32,6 +35,7 @@ struct arm64_opts { const char *command_line; const char *dtb; const char *initrd; + const char *console; }; extern struct arm64_opts arm64_opts; diff --git a/kexec/arch/arm64/kexec-arm64.c b/kexec/arch/arm64/kexec-arm64.c index 45ebc54..44c9e6e 100644 --- a/kexec/arch/arm64/kexec-arm64.c +++ b/kexec/arch/arm64/kexec-arm64.c @@ -165,6 +165,8 @@ int arch_process_options(int argc, char **argv) break; case OPT_KEXEC_FILE_SYSCALL: do_kexec_file_syscall = 1; + case OPT_CONSOLE: + arm64_opts.console = optarg; break; default: break; /* Ignore core and unknown options. */ @@ -180,12 +182,62 @@ int arch_process_options(int argc, char **argv) dbgprintf("%s:%d: dtb: %s\n", __func__, __LINE__, (do_kexec_file_syscall && arm64_opts.dtb ? "(ignored)" : arm64_opts.dtb)); + dbgprintf("%s:%d: console: %s\n", __func__, __LINE__, + arm64_opts.console); + if (do_kexec_file_syscall) arm64_opts.dtb = NULL; return 0; } +/** + * find_purgatory_sink - Find a sink for purgatory output. + */ + +static uint64_t find_purgatory_sink(const char *console) +{ + int fd, ret; + char folder[255], device[255], mem[255]; + struct stat sb; + char buffer[18]; Just trying to understand the reasoning behind keeping the buffer 18 chars long. Can the bytes read from the console exceed the array size here (may be a boundary check is required here to avoid overflows)? The buffer
[PATCH] docs: admin-guide: update kdump documentation due to change of crash URL
Since crash utility has moved to github, the original URL is no longer available. Let's update it accordingly. Suggested-by: Dave Young Signed-off-by: Lianbo Jiang --- Documentation/admin-guide/kdump/kdump.rst | 7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/kdump/kdump.rst b/Documentation/admin-guide/kdump/kdump.rst index 2da65fef2a1c..75a9dd98e76e 100644 --- a/Documentation/admin-guide/kdump/kdump.rst +++ b/Documentation/admin-guide/kdump/kdump.rst @@ -509,9 +509,12 @@ ELF32-format headers using the --elf32-core-headers kernel option on the dump kernel. You can also use the Crash utility to analyze dump files in Kdump -format. Crash is available on Dave Anderson's site at the following URL: +format. Crash is available at the following URL: - http://people.redhat.com/~anderson/ + https://github.com/crash-utility/crash + +Crash document can be found at: + https://crash-utility.github.io/ Trigger Kdump on WARN() === -- 2.17.1 ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
[PATCH 1/1] kdump: append uts_namespace.name offset to VMCOREINFO
The offset of the field 'init_uts_ns.name' has changed since commit 9a56493f6942c0e2df1579986128721da96e00d8. Make the offset of the field 'uts_namespace.name' available in VMCOREINFO because tools like 'crash-utility' and 'makedumpfile' must be able to read it from crash dumps. Signed-off-by: Alexander Egorenkov --- kernel/crash_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/crash_core.c b/kernel/crash_core.c index 106e4500fd53..173fdc261882 100644 --- a/kernel/crash_core.c +++ b/kernel/crash_core.c @@ -447,6 +447,7 @@ static int __init crash_save_vmcoreinfo_init(void) VMCOREINFO_PAGESIZE(PAGE_SIZE); VMCOREINFO_SYMBOL(init_uts_ns); + VMCOREINFO_OFFSET(uts_namespace, name); VMCOREINFO_SYMBOL(node_online_map); #ifdef CONFIG_MMU VMCOREINFO_SYMBOL_ARRAY(swapper_pg_dir); -- 2.26.2 ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
Re: [PATCH v12 3/9] x86: kdump: use macro CRASH_ADDR_LOW_MAX in functions reserve_crashkernel[_low]()
Hi, On 09/07/20 at 09:47pm, Chen Zhou wrote: > To make the functions reserve_crashkernel[_low]() as generic, > replace some hard-coded numbers with macro CRASH_ADDR_LOW_MAX. > > Signed-off-by: Chen Zhou > --- > arch/x86/kernel/setup.c | 11 ++- > 1 file changed, 6 insertions(+), 5 deletions(-) > > diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c > index d7fd90c52dae..71a6a6e7ca5b 100644 > --- a/arch/x86/kernel/setup.c > +++ b/arch/x86/kernel/setup.c > @@ -430,7 +430,7 @@ static int __init reserve_crashkernel_low(void) > unsigned long total_low_mem; > int ret; > > - total_low_mem = memblock_mem_size(1UL << (32 - PAGE_SHIFT)); > + total_low_mem = memblock_mem_size(CRASH_ADDR_LOW_MAX >> PAGE_SHIFT); Just note that the replacement has been done in another patch from Mike Rapoport, partially. He seems to have done reserve_crashkernel_low() part, there's one left in reserve_crashkernel(), you might want to check that. Mike's patch which is from a patchset has been merged into Andrew's next tree. commit 6e50f7672ffa362e9bd4bc0c0d2524ed872828c5 Author: Mike Rapoport Date: Wed Aug 26 15:22:32 2020 +1000 x86/setup: simplify reserve_crashkernel() > > /* crashkernel=Y,low */ > ret = parse_crashkernel_low(boot_command_line, total_low_mem, > &low_size, &base); > @@ -451,7 +451,7 @@ static int __init reserve_crashkernel_low(void) > return 0; > } > > - low_base = memblock_find_in_range(CRASH_ALIGN, 1ULL << 32, low_size, > CRASH_ALIGN); > + low_base = memblock_find_in_range(CRASH_ALIGN, CRASH_ADDR_LOW_MAX, > low_size, CRASH_ALIGN); > if (!low_base) { > pr_err("Cannot reserve %ldMB crashkernel low memory, please try > smaller size.\n", > (unsigned long)(low_size >> 20)); > @@ -504,8 +504,9 @@ static void __init reserve_crashkernel(void) > if (!crash_base) { > /* >* Set CRASH_ADDR_LOW_MAX upper bound for crash memory, > - * crashkernel=x,high reserves memory over 4G, also allocates > - * 256M extra low memory for DMA buffers and swiotlb. > + * crashkernel=x,high reserves memory over CRASH_ADDR_LOW_MAX, > + * also allocates 256M extra low memory for DMA buffers > + * and swiotlb. >* But the extra memory is not required for all machines. >* So try low memory first and fall back to high memory >* unless "crashkernel=size[KMG],high" is specified. > @@ -539,7 +540,7 @@ static void __init reserve_crashkernel(void) > return; > } > > - if (crash_base >= (1ULL << 32) && reserve_crashkernel_low()) { > + if (crash_base >= CRASH_ADDR_LOW_MAX && reserve_crashkernel_low()) { > memblock_free(crash_base, crash_size); > return; > } > -- > 2.20.1 > ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
Re: [PATCH] arm64: Add purgatory printing
Hi Geoff, On 18/09/2020 04:04, Geoff Levand wrote: Hi Matthias, On 9/17/20 12:38 PM, matthias@kernel.org wrote: Add option to allow purgatory printing on arm64 hardware by passing the console name which should be used. +static uint64_t find_purgatory_sink(const char *console) +{ + int fd, ret; + char folder[255], device[255], mem[255]; + struct stat sb; + char buffer[18]; + uint64_t iomem = 0x0; + + if (!console) + return 0; + + sprintf(device, "/sys/class/tty/%s", console); + if (!stat(folder, &sb) == 0 && S_ISDIR(sb.st_mode)) { + fprintf(stderr, "kexec: %s: No valid console found for %s\n", + __func__, device); + return 0; + } + + sprintf(mem, "%s%s", device, "/iomem_base"); + printf("console memory read from %s\n", mem); + + fd = open(mem, O_RDONLY); + if (fd < 0) { + fprintf(stderr, "kexec: %s: No able to open %s\n", + __func__, mem); + return 0; + } + + memset(buffer, '\0', sizeof(char) * 18); I think I'd like to just see 'memset(buffer, 0, sizeof(buffer));'. + ret = read(fd, buffer, 18); And 'ret = read(fd, buffer, sizeof(buffer));'. You are correct, I'll add it to v2. Thanks for the quick review! Matthias + if (ret < 0) { + fprintf(stderr, "kexec: %s: not able to read fd\n", __func__); + close(fd); + return 0; + } + + sscanf(buffer, "%lx", &iomem); + printf("console memory is at %#lx\n", iomem); + + close(fd); + return iomem; +} + Otherwise, looks OK. -Geoff ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec