Re: [PATCH 4/4] mm: remove compat numa syscalls

2020-09-18 Thread Christoph Hellwig
On Fri, Sep 18, 2020 at 03:24:39PM +0200, Arnd Bergmann wrote:
> The compat implementations for mbind, get_mempolicy, set_mempolicy
> and migrate_pages are just there to handle the subtly different
> layout of bitmaps on 32-bit hosts.
> 
> The compat implementation however lacks some of the checks that
> are present in the native one, in particular for checking that
> the extra bits are all zero when user space has a larger mask
> size than the kernel. Worse, those extra bits do not get cleared
> when copying in or out of the kernel, which can lead to incorrect
> data as well.
> 
> Unify the implementation to handle the compat bitmap layout directly
> in the get_nodes() and copy_nodes_to_user() helpers.  Splitting out
> the get_bitmap() helper from get_nodes() also helps readability of the
> native case.
> 
> On x86, two additional problems are addressed by this: compat tasks can
> pass a bitmap at the end of a mapping, causing a fault when reading
> across the page boundary for a 64-bit word. x32 tasks might also run
> into problems with get_mempolicy corrupting data when an odd number of
> 32-bit words gets passed.
> 
> On parisc the migrate_pages() system call apparently had the wrong
> calling convention, as big-endian architectures expect the words
> inside of a bitmap to be swapped. This is not a problem though
> since parisc has no NUMA support.
> 
> Signed-off-by: Arnd Bergmann 
> ---
>  arch/arm64/include/asm/unistd32.h |   8 +-
>  arch/mips/kernel/syscalls/syscall_n32.tbl |   8 +-
>  arch/mips/kernel/syscalls/syscall_o32.tbl |   8 +-
>  arch/parisc/kernel/syscalls/syscall.tbl   |   6 +-
>  arch/powerpc/kernel/syscalls/syscall.tbl  |   8 +-
>  arch/s390/kernel/syscalls/syscall.tbl |   8 +-
>  arch/sparc/kernel/syscalls/syscall.tbl|   8 +-
>  arch/x86/entry/syscalls/syscall_32.tbl|   2 +-
>  include/linux/compat.h|  15 --
>  include/uapi/asm-generic/unistd.h |   8 +-
>  kernel/kexec.c|   6 +-
>  kernel/sys_ni.c   |   4 -
>  mm/mempolicy.c| 193 +-
>  13 files changed, 79 insertions(+), 203 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/unistd32.h 
> b/arch/arm64/include/asm/unistd32.h
> index af793775ba98..31479f7120a0 100644
> --- a/arch/arm64/include/asm/unistd32.h
> +++ b/arch/arm64/include/asm/unistd32.h
> @@ -649,11 +649,11 @@ __SYSCALL(__NR_inotify_add_watch, sys_inotify_add_watch)
>  #define __NR_inotify_rm_watch 318
>  __SYSCALL(__NR_inotify_rm_watch, sys_inotify_rm_watch)
>  #define __NR_mbind 319
> -__SYSCALL(__NR_mbind, compat_sys_mbind)
> +__SYSCALL(__NR_mbind, sys_mbind)
>  #define __NR_get_mempolicy 320
> -__SYSCALL(__NR_get_mempolicy, compat_sys_get_mempolicy)
> +__SYSCALL(__NR_get_mempolicy, sys_get_mempolicy)
>  #define __NR_set_mempolicy 321
> -__SYSCALL(__NR_set_mempolicy, compat_sys_set_mempolicy)
> +__SYSCALL(__NR_set_mempolicy, sys_set_mempolicy)
>  #define __NR_openat 322
>  __SYSCALL(__NR_openat, compat_sys_openat)
>  #define __NR_mkdirat 323
> @@ -811,7 +811,7 @@ __SYSCALL(__NR_rseq, sys_rseq)
>  #define __NR_io_pgetevents 399
>  __SYSCALL(__NR_io_pgetevents, compat_sys_io_pgetevents)
>  #define __NR_migrate_pages 400
> -__SYSCALL(__NR_migrate_pages, compat_sys_migrate_pages)
> +__SYSCALL(__NR_migrate_pages, sys_migrate_pages)
>  #define __NR_kexec_file_load 401
>  __SYSCALL(__NR_kexec_file_load, sys_kexec_file_load)
>  /* 402 is unused */
> diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl 
> b/arch/mips/kernel/syscalls/syscall_n32.tbl
> index 7fa1ca45e44c..15fda882d07e 100644
> --- a/arch/mips/kernel/syscalls/syscall_n32.tbl
> +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
> @@ -239,9 +239,9 @@
>  228  n32 clock_nanosleep sys_clock_nanosleep_time32
>  229  n32 tgkill  sys_tgkill
>  230  n32 utimes  sys_utimes_time32
> -231  n32 mbind   compat_sys_mbind
> -232  n32 get_mempolicy   compat_sys_get_mempolicy
> -233  n32 set_mempolicy   compat_sys_set_mempolicy
> +231  n32 mbind   sys_mbind
> +232  n32 get_mempolicy   sys_get_mempolicy
> +233  n32 set_mempolicy   sys_set_mempolicy
>  234  n32 mq_open compat_sys_mq_open
>  235  n32 mq_unlink   sys_mq_unlink
>  236  n32 mq_timedsendsys_mq_timedsend_time32
> @@ -258,7 +258,7 @@
>  247  n32 inotify_initsys_inotify_init
>  248  n32 inotify_add_watch   sys_inotify_add_watch
>  249  n32 inotify_rm_watchsys_inotify_rm_watch
> -250  n32 migrate_pages   compat_sys_migrate_pages
> +250  n32 migrate_pages   sys_migrate_pages
>  251  n32 openat  sys_openat
>  252  n32 mk

Re: [PATCH 3/4] mm: remove compat_sys_move_pages

2020-09-18 Thread Christoph Hellwig
On Fri, Sep 18, 2020 at 03:24:38PM +0200, Arnd Bergmann wrote:
> The compat move_pages() implementation uses compat_alloc_user_space()
> for converting the pointer array. Moving the compat handling into
> the function itself is a bit simpler and lets us avoid the
> compat_alloc_user_space() call.
> 
> Signed-off-by: Arnd Bergmann 
> ---
>  arch/arm64/include/asm/unistd32.h |  2 +-
>  arch/mips/kernel/syscalls/syscall_n32.tbl |  2 +-
>  arch/mips/kernel/syscalls/syscall_o32.tbl |  2 +-
>  arch/parisc/kernel/syscalls/syscall.tbl   |  2 +-
>  arch/powerpc/kernel/syscalls/syscall.tbl  |  2 +-
>  arch/s390/kernel/syscalls/syscall.tbl |  2 +-
>  arch/sparc/kernel/syscalls/syscall.tbl|  2 +-
>  arch/x86/entry/syscalls/syscall_32.tbl|  2 +-
>  arch/x86/entry/syscalls/syscall_64.tbl|  2 +-
>  include/linux/compat.h|  5 ---
>  include/uapi/asm-generic/unistd.h |  2 +-
>  kernel/sys_ni.c   |  1 -
>  mm/migrate.c  | 45 +++
>  13 files changed, 32 insertions(+), 39 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/unistd32.h 
> b/arch/arm64/include/asm/unistd32.h
> index b6517df74037..af793775ba98 100644
> --- a/arch/arm64/include/asm/unistd32.h
> +++ b/arch/arm64/include/asm/unistd32.h
> @@ -699,7 +699,7 @@ __SYSCALL(__NR_tee, sys_tee)
>  #define __NR_vmsplice 343
>  __SYSCALL(__NR_vmsplice, compat_sys_vmsplice)
>  #define __NR_move_pages 344
> -__SYSCALL(__NR_move_pages, compat_sys_move_pages)
> +__SYSCALL(__NR_move_pages, sys_move_pages)
>  #define __NR_getcpu 345
>  __SYSCALL(__NR_getcpu, sys_getcpu)
>  #define __NR_epoll_pwait 346
> diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl 
> b/arch/mips/kernel/syscalls/syscall_n32.tbl
> index ad157aab4c09..7fa1ca45e44c 100644
> --- a/arch/mips/kernel/syscalls/syscall_n32.tbl
> +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
> @@ -279,7 +279,7 @@
>  268  n32 sync_file_range sys_sync_file_range
>  269  n32 tee sys_tee
>  270  n32 vmsplicecompat_sys_vmsplice
> -271  n32 move_pages  compat_sys_move_pages
> +271  n32 move_pages  sys_move_pages
>  272  n32 set_robust_list compat_sys_set_robust_list
>  273  n32 get_robust_list compat_sys_get_robust_list
>  274  n32 kexec_load  sys_kexec_load
> diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl 
> b/arch/mips/kernel/syscalls/syscall_o32.tbl
> index 57baf6c8008f..194c7fbeedf7 100644
> --- a/arch/mips/kernel/syscalls/syscall_o32.tbl
> +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
> @@ -319,7 +319,7 @@
>  305  o32 sync_file_range sys_sync_file_range 
> sys32_sync_file_range
>  306  o32 tee sys_tee
>  307  o32 vmsplicesys_vmsplice
> compat_sys_vmsplice
> -308  o32 move_pages  sys_move_pages  
> compat_sys_move_pages
> +308  o32 move_pages  sys_move_pages
>  309  o32 set_robust_list sys_set_robust_list 
> compat_sys_set_robust_list
>  310  o32 get_robust_list sys_get_robust_list 
> compat_sys_get_robust_list
>  311  o32 kexec_load  sys_kexec_load
> diff --git a/arch/parisc/kernel/syscalls/syscall.tbl 
> b/arch/parisc/kernel/syscalls/syscall.tbl
> index 778bf166d7bd..5c17edaffe70 100644
> --- a/arch/parisc/kernel/syscalls/syscall.tbl
> +++ b/arch/parisc/kernel/syscalls/syscall.tbl
> @@ -331,7 +331,7 @@
>  292  64  sync_file_range sys_sync_file_range
>  293  common  tee sys_tee
>  294  common  vmsplicesys_vmsplice
> compat_sys_vmsplice
> -295  common  move_pages  sys_move_pages  
> compat_sys_move_pages
> +295  common  move_pages  sys_move_pages
>  296  common  getcpu  sys_getcpu
>  297  common  epoll_pwait sys_epoll_pwait 
> compat_sys_epoll_pwait
>  298  common  statfs64sys_statfs64
> compat_sys_statfs64
> diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl 
> b/arch/powerpc/kernel/syscalls/syscall.tbl
> index f128ba8b9a71..04fb42d7b377 100644
> --- a/arch/powerpc/kernel/syscalls/syscall.tbl
> +++ b/arch/powerpc/kernel/syscalls/syscall.tbl
> @@ -389,7 +389,7 @@
>  298  common  faccessat   sys_faccessat
>  299  common  get_robust_list sys_get_robust_list 
> compat_sys_get_robust_list
>  300  common  set_robust_list sys_set_robust_list 
> compat_sys_set_robust_list
> -301  common  move_pages  sys_move_pages  
> compat_sys_move_pages
> +301  common  move_pa

Re: [PATCH 2/4] kexec: remove compat_sys_kexec_load syscall

2020-09-18 Thread Christoph Hellwig
On Fri, Sep 18, 2020 at 03:24:37PM +0200, Arnd Bergmann wrote:
> The compat version of sys_kexec_load() uses compat_alloc_user_space to
> convert the user-provided arguments into the native format.
> 
> Move the conversion into the regular implementation with
> an in_compat_syscall() check to simplify it and avoid the
> compat_alloc_user_space() call.
> 
> Signed-off-by: Arnd Bergmann 
> ---
>  arch/arm64/include/asm/unistd32.h |  2 +-
>  arch/mips/kernel/syscalls/syscall_n32.tbl |  2 +-
>  arch/mips/kernel/syscalls/syscall_o32.tbl |  2 +-
>  arch/parisc/kernel/syscalls/syscall.tbl   |  2 +-
>  arch/powerpc/kernel/syscalls/syscall.tbl  |  2 +-
>  arch/s390/kernel/syscalls/syscall.tbl |  2 +-
>  arch/sparc/kernel/syscalls/syscall.tbl|  2 +-
>  arch/x86/entry/syscalls/syscall_32.tbl|  2 +-
>  arch/x86/entry/syscalls/syscall_64.tbl|  2 +-
>  include/linux/compat.h|  6 --
>  include/uapi/asm-generic/unistd.h |  2 +-
>  kernel/kexec.c| 75 ++-
>  12 files changed, 29 insertions(+), 72 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/unistd32.h 
> b/arch/arm64/include/asm/unistd32.h
> index 734860ac7cf9..b6517df74037 100644
> --- a/arch/arm64/include/asm/unistd32.h
> +++ b/arch/arm64/include/asm/unistd32.h
> @@ -705,7 +705,7 @@ __SYSCALL(__NR_getcpu, sys_getcpu)
>  #define __NR_epoll_pwait 346
>  __SYSCALL(__NR_epoll_pwait, compat_sys_epoll_pwait)
>  #define __NR_kexec_load 347
> -__SYSCALL(__NR_kexec_load, compat_sys_kexec_load)
> +__SYSCALL(__NR_kexec_load, sys_kexec_load)
>  #define __NR_utimensat 348
>  __SYSCALL(__NR_utimensat, sys_utimensat_time32)
>  #define __NR_signalfd 349
> diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl 
> b/arch/mips/kernel/syscalls/syscall_n32.tbl
> index f9df9edb67a4..ad157aab4c09 100644
> --- a/arch/mips/kernel/syscalls/syscall_n32.tbl
> +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
> @@ -282,7 +282,7 @@
>  271  n32 move_pages  compat_sys_move_pages
>  272  n32 set_robust_list compat_sys_set_robust_list
>  273  n32 get_robust_list compat_sys_get_robust_list
> -274  n32 kexec_load  compat_sys_kexec_load
> +274  n32 kexec_load  sys_kexec_load
>  275  n32 getcpu  sys_getcpu
>  276  n32 epoll_pwait compat_sys_epoll_pwait
>  277  n32 ioprio_set  sys_ioprio_set
> diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl 
> b/arch/mips/kernel/syscalls/syscall_o32.tbl
> index 195b43cf27c8..57baf6c8008f 100644
> --- a/arch/mips/kernel/syscalls/syscall_o32.tbl
> +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
> @@ -322,7 +322,7 @@
>  308  o32 move_pages  sys_move_pages  
> compat_sys_move_pages
>  309  o32 set_robust_list sys_set_robust_list 
> compat_sys_set_robust_list
>  310  o32 get_robust_list sys_get_robust_list 
> compat_sys_get_robust_list
> -311  o32 kexec_load  sys_kexec_load  
> compat_sys_kexec_load
> +311  o32 kexec_load  sys_kexec_load
>  312  o32 getcpu  sys_getcpu
>  313  o32 epoll_pwait sys_epoll_pwait 
> compat_sys_epoll_pwait
>  314  o32 ioprio_set  sys_ioprio_set
> diff --git a/arch/parisc/kernel/syscalls/syscall.tbl 
> b/arch/parisc/kernel/syscalls/syscall.tbl
> index def64d221cd4..778bf166d7bd 100644
> --- a/arch/parisc/kernel/syscalls/syscall.tbl
> +++ b/arch/parisc/kernel/syscalls/syscall.tbl
> @@ -336,7 +336,7 @@
>  297  common  epoll_pwait sys_epoll_pwait 
> compat_sys_epoll_pwait
>  298  common  statfs64sys_statfs64
> compat_sys_statfs64
>  299  common  fstatfs64   sys_fstatfs64   
> compat_sys_fstatfs64
> -300  common  kexec_load  sys_kexec_load  
> compat_sys_kexec_load
> +300  common  kexec_load  sys_kexec_load
>  301  32  utimensat   sys_utimensat_time32
>  301  64  utimensat   sys_utimensat
>  302  common  signalfdsys_signalfd
> compat_sys_signalfd
> diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl 
> b/arch/powerpc/kernel/syscalls/syscall.tbl
> index c2d737ff2e7b..f128ba8b9a71 100644
> --- a/arch/powerpc/kernel/syscalls/syscall.tbl
> +++ b/arch/powerpc/kernel/syscalls/syscall.tbl
> @@ -350,7 +350,7 @@
>  265  64  mq_timedreceive sys_mq_timedreceive
>  266  nospu   mq_notify   sys_mq_notify   
> compat_sys_mq_notify
>  267  nospu   mq_getsetattr   sys_mq_getsetattr   
> compat_sys_mq_getsetattr
> -268  nospu   kexec_load   

Re: [PATCH 1/4] x86: add __X32_COND_SYSCALL() macro

2020-09-18 Thread Christoph Hellwig
On Fri, Sep 18, 2020 at 03:24:36PM +0200, Arnd Bergmann wrote:
> sys_move_pages() is an optional syscall, and once we remove
> the compat version of it in favor of the native one with an
> in_compat_syscall() check, the x32 syscall table refers to
> a __x32_sys_move_pages symbol that may not exist when the
> syscall is disabled.
> 
> Change the COND_SYSCALL() definition on x86 to also include
> the redirection for x32.
> 
> Signed-off-by: Arnd Bergmann 

Adding the x86 maintainers and Brian Gerst.  Brian proposed another
problem to the mess that most of the compat syscall handlers used by
x32 here:

   https://lkml.org/lkml/2020/6/16/664

hpa didn't particularly like it, but with your and my pending series
we'll soon use more native than compat syscalls for x32, so something
will need to change..

> ---
>  arch/x86/include/asm/syscall_wrapper.h | 5 +
>  1 file changed, 5 insertions(+)
> 
> diff --git a/arch/x86/include/asm/syscall_wrapper.h 
> b/arch/x86/include/asm/syscall_wrapper.h
> index a84333adeef2..5eacd35a7f97 100644
> --- a/arch/x86/include/asm/syscall_wrapper.h
> +++ b/arch/x86/include/asm/syscall_wrapper.h
> @@ -171,12 +171,16 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs 
> *regs);
>   __SYS_STUBx(x32, compat_sys##name,  \
>   SC_X86_64_REGS_TO_ARGS(x, __VA_ARGS__))
>  
> +#define __X32_COND_SYSCALL(name) \
> + __COND_SYSCALL(x32, sys_##name)
> +
>  #define __X32_COMPAT_COND_SYSCALL(name)  
> \
>   __COND_SYSCALL(x32, compat_sys_##name)
>  
>  #define __X32_COMPAT_SYS_NI(name)\
>   __SYS_NI(x32, compat_sys_##name)
>  #else /* CONFIG_X86_X32 */
> +#define __X32_COND_SYSCALL(name)
>  #define __X32_COMPAT_SYS_STUB0(name)
>  #define __X32_COMPAT_SYS_STUBx(x, name, ...)
>  #define __X32_COMPAT_COND_SYSCALL(name)
> @@ -253,6 +257,7 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs 
> *regs);
>   static long __do_sys_##sname(const struct pt_regs *__unused)
>  
>  #define COND_SYSCALL(name)   \
> + __X32_COND_SYSCALL(name)\
>   __X64_COND_SYSCALL(name)\
>   __IA32_COND_SYSCALL(name)
>  
> -- 
> 2.27.0
> 
---end quoted text---

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH] MIPS: Loongson64: Add kexec/kdump support

2020-09-18 Thread Huacai Chen
Hi, Jinyang,

On Fri, Sep 18, 2020 at 2:20 PM Jinyang He  wrote:
>
> On 09/17/2020 09:52 PM, Zhou Yanjie wrote:
> > Hello,
> >
> > 在 2020/9/17 下午8:41, Jinyang He 写道:
> >> Hi, Huacai,
> >>
> >>
> >> On 09/16/2020 01:39 PM, Huacai Chen wrote:
> >>> Hi, Jinyang,
> >>>
> >>> On Tue, Sep 15, 2020 at 10:17 PM Jinyang He 
> >>> wrote:
> 
> 
>  On 09/16/2020 09:33 AM, Jiaxun Yang wrote:
> > 于 2020年9月15日 GMT+08:00 下午9:07:43, Jinyang He
> >  写到:
> >> Add loongson_kexec_prepare(), loongson_kexec_shutdown() and
> >> loongson_kexec_crashdown() for passing the parameters of kexec_args.
> >>
> >> To start loongson64, CPU0 needs 3 parameters:
> >> fw_arg0: the number of cmd.
> >> fw_arg1: cmd structure which seems strange, the cmd array[index]'s
> >>   value is cmd string's address, index >= 1.
> >> fw_arg2: environment.
> >>
> >> Secondary CPUs do not need parameter at once. They query their
> >> mailbox to get PC, SP and GP in a loop before CPU0 brings them up
> >> and passes these parameters via mailbox.
> >>
> >> loongson_kexec_prepare(): Alloc new memory to save cmd for kexec.
> >> Combine the kexec append option string as cmd structure, and the cmd
> >> struct will be parsed in fw_init_cmdline() of
> >> arch/mips/fw/lib/cmdline.c.
> >> image->control_code_page need pointing to a safe memory page. In
> >> order to
> >> maintain compatibility for the old firmware the low 2MB is reserverd
> >> and safe for Loongson. So let it points here.
> >>
> >> loongson_kexec_shutdown(): Wake up all present CPUs and let them go
> >> to reboot_code_buffer. Pass the kexec parameters to kexec_args.
> >>
> >> loongson_crash_shutdown(): Pass the kdump parameters to kexec_args.
> >>
> >> The assembly part provide a way like BIOS doing to keep secondary
> >> CPUs in a querying loop.
> >>
> >> This patch referenced [1][2][3].
> >>
> >> [1] arch/mips/cavium-octeon/setup.c
> >> [2] https://patchwork.kernel.org/patch/10799217/
> >> [3]
> >> https://gitee.com/loongsonlab/qemu/blob/master/hw/mips/loongson3a_rom.h
> >>
> >>
> >> Co-developed-by: Youling Tang 
> >> Signed-off-by: Youling Tang 
> >> Signed-off-by: Jinyang He 
> >> ---
> >> arch/mips/kernel/relocate_kernel.S | 19 
> >> arch/mips/loongson64/reset.c   | 88
> >> ++
> >> 2 files changed, 107 insertions(+)
> >>
> >> diff --git a/arch/mips/kernel/relocate_kernel.S
> >> b/arch/mips/kernel/relocate_kernel.S
> >> index ac87089..061cbfb 100644
> >> --- a/arch/mips/kernel/relocate_kernel.S
> >> +++ b/arch/mips/kernel/relocate_kernel.S
> >> @@ -133,6 +133,25 @@ LEAF(kexec_smp_wait)
> >> #else
> >>   sync
> >> #endif
> >> +
> >> +#ifdef CONFIG_CPU_LOONGSON64
> >> +#define MAILBOX_BASE 0x90003ff01000
> > Please avoid hardcoded SMP information. You're breaking Loongson
> > 3B support.
> >
>  Ok, I see. Since my machine is Loongson 3A. I'll send v2
>  after I test it in 3B.
> >>> 1, My original version can work on both Loongson-3A and Loongson-3B,
> >>> why you modify my patch and hadn't discuss with me?
> >>>
> >>> 2, With this single patch both kexec and kdump cannot work reliably,
> >>> because kexec need this patch:
> >>> https://patchwork.kernel.org/patch/11695929/
> >>>
> >>> and kdump need my first patch in my original version:
> >>> https://patchwork.kernel.org/patch/10799215/
> >>>
> >>> You may argue that you have tested. Yes, I believe that, I'm not
> >>> saying that you haven't test, and I'm not saying that your patch
> >>> cannot work, I'm just saying that your patch is not robust.
> >>>
> >>> 3, I'm the original author and paying attention to kexec/kdump
> >>> continuosly, I will send a new version once the above two patches be
> >>> accepted. But you re-send my patch without any communication with me,
> >>> why you so impatient?
> >>>
> >>> Huacai
> >>>
> >>
> >> 1, Your original version:
> >>https://patchwork.kernel.org/patch/10799217/
> >>
> >> This patch can work on Loongson-3A, I tested it.
> >>
> >> But it works wrong after the follow behaviors,
> >>kexec -l vmlinux --append=cmdline_kexec
> >>kexec -p vmlinux --append=cmdline_kdump
> >>kexec -e
> >>
> >
> > What 's wrong with you Loongson guys? Why do you always send patches
> > with the same functions as the original author withou any
> > communication with the original author? Especially the original author
> > of the patch is the maintainer of Loongson64 architecture.
>
> Hi,
>
> Just take it easy, try to keep calm and do not panic.
>
> You do not understand the story, the background, what happened before,
> and what happens next, so please let me express my opinion.
>
> When I use the mainline kernel on the Loongson platform, I find kexec
> can not work well 

Re: [PATCH] Only allow to set crash_kexec_post_notifiers on boot time

2020-09-18 Thread Andrew Morton
On Fri, 18 Sep 2020 11:25:46 +0800 Dave Young  wrote:

> crash_kexec_post_notifiers enables running various panic notifier
> before kdump kernel booting. This increases risks of kdump failure.
> It is well documented in kernel-parameters.txt. We do not suggest
> people to enable it together with kdump unless he/she is really sure.
> This is also not suggested to be enabled by default when users are
> not aware in distributions.
> 
> But unfortunately it is enabled by default in systemd, see below
> discussions in a systemd report, we can not convince systemd to change
> it:
> https://github.com/systemd/systemd/issues/16661
> 
> Actually we have got reports about kdump kernel hangs in both s390x
> and powerpcle cases caused by the systemd change,  also some x86 cases
> could also be caused by the same (although that is in Hyper-V code
> instead of systemd, that need to be addressed separately).
> 
> Thus to avoid the auto enablement here just disable the param writable
> permission in sysfs.
> 

Well.  I don't think this is at all a desirable way of resolving a
disagreement with the systemd developers

At the above github address I'm seeing "ryncsn added a commit to
ryncsn/systemd that referenced this issue 9 days ago", "pstore: don't
enable crash_kexec_post_notifiers by default".  So didn't that address
the issue?

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH printk v2 1/3] printk: move printk_info into separate array

2020-09-18 Thread John Ogness
The majority of the size of a descriptor is taken up by meta data,
which is often not of interest to the ringbuffer (for example,
when performing state checks). Since descriptors are often
temporarily stored on the stack, keeping their size minimal will
help reduce stack pressure.

Rather than embedding the printk_info into the descriptor, create
a separate printk_info array. The index of a descriptor in the
descriptor array corresponds to the printk_info with the same
index in the printk_info array. The rules for validity of a
printk_info match the existing rules for the data blocks: the
descriptor must be in a consistent state.

Signed-off-by: John Ogness 
---
 kernel/printk/printk.c|  30 +--
 kernel/printk/printk_ringbuffer.c | 145 +++---
 kernel/printk/printk_ringbuffer.h |  29 +++---
 3 files changed, 133 insertions(+), 71 deletions(-)

diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 9a2e23191576..25cfe4fe48af 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -959,11 +959,11 @@ void log_buf_vmcoreinfo_setup(void)
VMCOREINFO_STRUCT_SIZE(prb_desc_ring);
VMCOREINFO_OFFSET(prb_desc_ring, count_bits);
VMCOREINFO_OFFSET(prb_desc_ring, descs);
+   VMCOREINFO_OFFSET(prb_desc_ring, infos);
VMCOREINFO_OFFSET(prb_desc_ring, head_id);
VMCOREINFO_OFFSET(prb_desc_ring, tail_id);
 
VMCOREINFO_STRUCT_SIZE(prb_desc);
-   VMCOREINFO_OFFSET(prb_desc, info);
VMCOREINFO_OFFSET(prb_desc, state_var);
VMCOREINFO_OFFSET(prb_desc, text_blk_lpos);
VMCOREINFO_OFFSET(prb_desc, dict_blk_lpos);
@@ -1097,11 +1097,13 @@ static char setup_dict_buf[CONSOLE_EXT_LOG_MAX] 
__initdata;
 
 void __init setup_log_buf(int early)
 {
+   struct printk_info *new_infos;
unsigned int new_descs_count;
struct prb_desc *new_descs;
struct printk_info info;
struct printk_record r;
size_t new_descs_size;
+   size_t new_infos_size;
unsigned long flags;
char *new_dict_buf;
char *new_log_buf;
@@ -1142,8 +1144,7 @@ void __init setup_log_buf(int early)
if (unlikely(!new_dict_buf)) {
pr_err("log_buf_len: %lu dict bytes not available\n",
   new_log_buf_len);
-   memblock_free(__pa(new_log_buf), new_log_buf_len);
-   return;
+   goto err_free_log_buf;
}
 
new_descs_size = new_descs_count * sizeof(struct prb_desc);
@@ -1151,9 +1152,15 @@ void __init setup_log_buf(int early)
if (unlikely(!new_descs)) {
pr_err("log_buf_len: %zu desc bytes not available\n",
   new_descs_size);
-   memblock_free(__pa(new_dict_buf), new_log_buf_len);
-   memblock_free(__pa(new_log_buf), new_log_buf_len);
-   return;
+   goto err_free_dict_buf;
+   }
+
+   new_infos_size = new_descs_count * sizeof(struct printk_info);
+   new_infos = memblock_alloc(new_infos_size, LOG_ALIGN);
+   if (unlikely(!new_infos)) {
+   pr_err("log_buf_len: %zu info bytes not available\n",
+  new_infos_size);
+   goto err_free_descs;
}
 
prb_rec_init_rd(&r, &info,
@@ -1163,7 +1170,8 @@ void __init setup_log_buf(int early)
prb_init(&printk_rb_dynamic,
 new_log_buf, ilog2(new_log_buf_len),
 new_dict_buf, ilog2(new_log_buf_len),
-new_descs, ilog2(new_descs_count));
+new_descs, ilog2(new_descs_count),
+new_infos);
 
logbuf_lock_irqsave(flags);
 
@@ -1192,6 +1200,14 @@ void __init setup_log_buf(int early)
pr_info("log_buf_len: %u bytes\n", log_buf_len);
pr_info("early log buf free: %u(%u%%)\n",
free, (free * 100) / __LOG_BUF_LEN);
+   return;
+
+err_free_descs:
+   memblock_free(__pa(new_descs), new_descs_size);
+err_free_dict_buf:
+   memblock_free(__pa(new_dict_buf), new_log_buf_len);
+err_free_log_buf:
+   memblock_free(__pa(new_log_buf), new_log_buf_len);
 }
 
 static bool __read_mostly ignore_loglevel;
diff --git a/kernel/printk/printk_ringbuffer.c 
b/kernel/printk/printk_ringbuffer.c
index f4e2e9890e0f..de4b10a98623 100644
--- a/kernel/printk/printk_ringbuffer.c
+++ b/kernel/printk/printk_ringbuffer.c
@@ -15,10 +15,10 @@
  * The printk_ringbuffer is made up of 3 internal ringbuffers:
  *
  *   desc_ring
- * A ring of descriptors. A descriptor contains all record meta data
- * (sequence number, timestamp, loglevel, etc.) as well as internal state
- * information about the record and logical positions specifying where in
- * the other ringbuffers the text and dictionary strings are located.
+ * A ring of descriptors and their meta data (such as sequence number,
+ * timestamp, loglevel, etc.) as well as internal state information about
+ * the record and logica

[PATCH printk v2 3/3] printk: remove dict ring

2020-09-18 Thread John Ogness
Since there is no code that will ever store anything into the dict
ring, remove it. If any future dictionary properties are to be
added, these should be added to the struct printk_info.

Signed-off-by: John Ogness 
---
 kernel/printk/printk.c|  46 +++--
 kernel/printk/printk_ringbuffer.c | 155 +++---
 kernel/printk/printk_ringbuffer.h |  63 +++-
 3 files changed, 64 insertions(+), 200 deletions(-)

diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 269f0abd1ddf..77660354a7c5 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -427,7 +427,6 @@ static u32 log_buf_len = __LOG_BUF_LEN;
  * Define the average message size. This only affects the number of
  * descriptors that will be available. Underestimating is better than
  * overestimating (too many available descriptors is better than not enough).
- * The dictionary buffer will be the same size as the text buffer.
  */
 #define PRB_AVGBITS 5  /* 32 character average length */
 
@@ -435,7 +434,7 @@ static u32 log_buf_len = __LOG_BUF_LEN;
 #error CONFIG_LOG_BUF_SHIFT value too small.
 #endif
 _DEFINE_PRINTKRB(printk_rb_static, CONFIG_LOG_BUF_SHIFT - PRB_AVGBITS,
-PRB_AVGBITS, PRB_AVGBITS, &__log_buf[0]);
+PRB_AVGBITS, &__log_buf[0]);
 
 static struct printk_ringbuffer printk_rb_dynamic;
 
@@ -502,12 +501,12 @@ static int log_store(u32 caller_id, int facility, int 
level,
struct printk_record r;
u16 trunc_msg_len = 0;
 
-   prb_rec_init_wr(&r, text_len, 0);
+   prb_rec_init_wr(&r, text_len);
 
if (!prb_reserve(&e, prb, &r)) {
/* truncate the message if it is too long for empty buffer */
truncate_msg(&text_len, &trunc_msg_len);
-   prb_rec_init_wr(&r, text_len + trunc_msg_len, 0);
+   prb_rec_init_wr(&r, text_len + trunc_msg_len);
/* survive when the log buffer is too small for trunc_msg */
if (!prb_reserve(&e, prb, &r))
return 0;
@@ -897,8 +896,7 @@ static int devkmsg_open(struct inode *inode, struct file 
*file)
mutex_init(&user->lock);
 
prb_rec_init_rd(&user->record, &user->info,
-   &user->text_buf[0], sizeof(user->text_buf),
-   NULL, 0);
+   &user->text_buf[0], sizeof(user->text_buf));
 
logbuf_lock_irq();
user->seq = prb_first_valid_seq(prb);
@@ -956,7 +954,6 @@ void log_buf_vmcoreinfo_setup(void)
VMCOREINFO_STRUCT_SIZE(printk_ringbuffer);
VMCOREINFO_OFFSET(printk_ringbuffer, desc_ring);
VMCOREINFO_OFFSET(printk_ringbuffer, text_data_ring);
-   VMCOREINFO_OFFSET(printk_ringbuffer, dict_data_ring);
VMCOREINFO_OFFSET(printk_ringbuffer, fail);
 
VMCOREINFO_STRUCT_SIZE(prb_desc_ring);
@@ -969,7 +966,6 @@ void log_buf_vmcoreinfo_setup(void)
VMCOREINFO_STRUCT_SIZE(prb_desc);
VMCOREINFO_OFFSET(prb_desc, state_var);
VMCOREINFO_OFFSET(prb_desc, text_blk_lpos);
-   VMCOREINFO_OFFSET(prb_desc, dict_blk_lpos);
 
VMCOREINFO_STRUCT_SIZE(prb_data_blk_lpos);
VMCOREINFO_OFFSET(prb_data_blk_lpos, begin);
@@ -979,7 +975,6 @@ void log_buf_vmcoreinfo_setup(void)
VMCOREINFO_OFFSET(printk_info, seq);
VMCOREINFO_OFFSET(printk_info, ts_nsec);
VMCOREINFO_OFFSET(printk_info, text_len);
-   VMCOREINFO_OFFSET(printk_info, dict_len);
VMCOREINFO_OFFSET(printk_info, caller_id);
VMCOREINFO_OFFSET(printk_info, dev_info);
 
@@ -1080,7 +1075,7 @@ static unsigned int __init add_to_rb(struct 
printk_ringbuffer *rb,
struct prb_reserved_entry e;
struct printk_record dest_r;
 
-   prb_rec_init_wr(&dest_r, r->info->text_len, 0);
+   prb_rec_init_wr(&dest_r, r->info->text_len);
 
if (!prb_reserve(&e, rb, &dest_r))
return 0;
@@ -,7 +1106,6 @@ void __init setup_log_buf(int early)
size_t new_descs_size;
size_t new_infos_size;
unsigned long flags;
-   char *new_dict_buf;
char *new_log_buf;
unsigned int free;
u64 seq;
@@ -1146,19 +1140,12 @@ void __init setup_log_buf(int early)
return;
}
 
-   new_dict_buf = memblock_alloc(new_log_buf_len, LOG_ALIGN);
-   if (unlikely(!new_dict_buf)) {
-   pr_err("log_buf_len: %lu dict bytes not available\n",
-  new_log_buf_len);
-   goto err_free_log_buf;
-   }
-
new_descs_size = new_descs_count * sizeof(struct prb_desc);
new_descs = memblock_alloc(new_descs_size, LOG_ALIGN);
if (unlikely(!new_descs)) {
pr_err("log_buf_len: %zu desc bytes not available\n",
   new_descs_size);
-   goto err_free_dict_buf;
+   goto err_free_log_buf;
}
 
new_infos_size = new_descs_count * sizeof(struct printk_info);
@@ -1169,13 +1156,10 

[PATCH printk v2 2/3] printk: move dictionary keys to dev_printk_info

2020-09-18 Thread John Ogness
Dictionaries are only used for SUBSYSTEM and DEVICE properties. The
current implementation stores the property names each time they are
used. This requires more space than otherwise necessary. Also,
because the dictionary entries are currently considered optional,
it cannot be relied upon that they are always available, even if the
writer wanted to store them. These issues will increase should new
dictionary properties be introduced.

Rather than storing the subsystem and device properties in the
dict ring, introduce a struct dev_printk_info with separate fields
to store only the property values. Embed this struct within the
struct printk_info to provide guaranteed availability.

Signed-off-by: John Ogness 
---
 Documentation/admin-guide/kdump/gdbmacros.txt |  73 
 drivers/base/core.c   |  46 ++---
 include/linux/dev_printk.h|   8 +
 include/linux/printk.h|   6 +-
 kernel/printk/internal.h  |   4 +-
 kernel/printk/printk.c| 165 +-
 kernel/printk/printk_ringbuffer.h |   3 +
 kernel/printk/printk_safe.c   |   2 +-
 scripts/gdb/linux/dmesg.py|  16 +-
 9 files changed, 163 insertions(+), 160 deletions(-)

diff --git a/Documentation/admin-guide/kdump/gdbmacros.txt 
b/Documentation/admin-guide/kdump/gdbmacros.txt
index 94fabb165abf..82aecdcae8a6 100644
--- a/Documentation/admin-guide/kdump/gdbmacros.txt
+++ b/Documentation/admin-guide/kdump/gdbmacros.txt
@@ -172,13 +172,13 @@ end
 
 define dump_record
set var $desc = $arg0
-   if ($argc > 1)
-   set var $prev_flags = $arg1
+   set var $info = $arg1
+   if ($argc > 2)
+   set var $prev_flags = $arg2
else
set var $prev_flags = 0
end
 
-   set var $info = &$desc->info
set var $prefix = 1
set var $newline = 1
 
@@ -237,44 +237,36 @@ define dump_record
 
# handle dictionary data
 
-   set var $begin = $desc->dict_blk_lpos.begin % (1U << 
prb->dict_data_ring.size_bits)
-   set var $next = $desc->dict_blk_lpos.next % (1U << 
prb->dict_data_ring.size_bits)
-
-   # handle data-less record
-   if ($begin & 1)
-   set var $dict_len = 0
-   set var $dict = ""
-   else
-   # handle wrapping data block
-   if ($begin > $next)
-   set var $begin = 0
-   end
-
-   # skip over descriptor id
-   set var $begin = $begin + sizeof(long)
-
-   # handle truncated message
-   if ($next - $begin < $info->dict_len)
-   set var $dict_len = $next - $begin
-   else
-   set var $dict_len = $info->dict_len
+   set var $dict = &$info->dev_info.subsystem[0]
+   set var $dict_len = sizeof($info->dev_info.subsystem)
+   if ($dict[0] != '\0')
+   printf " SUBSYSTEM="
+   set var $idx = 0
+   while ($idx < $dict_len)
+   set var $c = $dict[$idx]
+   if ($c == '\0')
+   loop_break
+   else
+   if ($c < ' ' || $c >= 127 || $c == '\\')
+   printf "\\x%02x", $c
+   else
+   printf "%c", $c
+   end
+   end
+   set var $idx = $idx + 1
end
-
-   set var $dict = &prb->dict_data_ring.data[$begin]
+   printf "\n"
end
 
-   if ($dict_len > 0)
+   set var $dict = &$info->dev_info.device[0]
+   set var $dict_len = sizeof($info->dev_info.device)
+   if ($dict[0] != '\0')
+   printf " DEVICE="
set var $idx = 0
-   set var $line = 1
while ($idx < $dict_len)
-   if ($line)
-   printf " "
-   set var $line = 0
-   end
set var $c = $dict[$idx]
if ($c == '\0')
-   printf "\n"
-   set var $line = 1
+   loop_break
else
if ($c < ' ' || $c >= 127 || $c == '\\')
printf "\\x%02x", $c
@@ -288,10 +280,10 @@ define dump_record
end
 end
 document dump_record
-   Dump a single record. The first parameter is the descriptor
-   sequence number, the second is optional and specifies the
-   previous record's flags, used for properly formatting
-   continued lines.
+   Dump a single record. The first parameter is the descriptor,
+   the second parameter is the in

[PATCH printk v2 0/3] printk: move dictionaries to meta data

2020-09-18 Thread John Ogness
Hello,

Here is v2 for a series to move all existing dictionary
properties (SUBSYSTEM and DEVICE) into the meta data of a
record, thus eliminating the need for the dict ring. This
change affects how the dictionaries are stored, but does not
affect how they are presented to userspace. (v1 is here [0]).

The main purpose of the change is to address concerns [1]
about the reliability of dictionary properties as well as
allowing to efficiently expand the type and amount of
meta data available [2].

This series is based heavily on the proof of concept [3] from
Petr Mladek. (Petr, feel free to add Co-developed-by tags.)

The series is based on the printk-rework branch of the printk
git tree:

f5f022e53b87 ("printk: reimplement log_cont using record extension")

The list of changes since v1:

drivers/base/core.c
===

- set_dev_info(): use strscpy() instead of snprintf() (thank
  you Rasmus Villemoes)

kernel/printk/printk.c
==

- setup_log_buf(): fix cleanup in error handling

- log_buf_vmcoreinfo_setup(): add VMCOREINFO for
  struct dev_printk_info array sizes so that crash tools
  do not need to rely on property value termination

John Ogness

[0] https://lkml.kernel.org/r/20200917131644.25838-1-john.ogn...@linutronix.de
[1] https://lkml.kernel.org/r/20200904151336.GC20558@alley
[2] https://lkml.kernel.org/r/008801d684f9$43e1c140$cba543c0$@samsung.com
[3] https://lkml.kernel.org/r/20200911095035.GI3864@alley

John Ogness (3):
  printk: move printk_info into separate array
  printk: move dictionary keys to dev_printk_info
  printk: remove dict ring

 Documentation/admin-guide/kdump/gdbmacros.txt |  73 ++---
 drivers/base/core.c   |  46 +--
 include/linux/dev_printk.h|   8 +
 include/linux/printk.h|   6 +-
 kernel/printk/internal.h  |   4 +-
 kernel/printk/printk.c| 221 ++---
 kernel/printk/printk_ringbuffer.c | 292 --
 kernel/printk/printk_ringbuffer.h |  95 ++
 kernel/printk/printk_safe.c   |   2 +-
 scripts/gdb/linux/dmesg.py|  16 +-
 10 files changed, 346 insertions(+), 417 deletions(-)

-- 
2.20.1


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH printk 3/3] printk: remove dict ring

2020-09-18 Thread Petr Mladek
On Thu 2020-09-17 15:22:44, John Ogness wrote:
> Since there is no code that will ever store anything into the dict
> ring, remove it. If any future dictionary properties are to be
> added, these should be added to the struct printk_info.
> 
> Signed-off-by: John Ogness 

I like the result. It simplified the code and removed some twists.

Reviewed-by: Petr Mladek 

Best Regards,
Petr

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH printk 2/3] printk: move dictionary keys to dev_printk_info

2020-09-18 Thread Petr Mladek
On Fri 2020-09-18 14:32:41, Rasmus Villemoes wrote:
> On 18/09/2020 14.13, Petr Mladek wrote:
> > On Fri 2020-09-18 08:16:37, Rasmus Villemoes wrote:
> >> On 17/09/2020 15.16, John Ogness wrote:
> >>
> >>>   if (dev->class)
> >>>   subsys = dev->class->name;
> >>>   else if (dev->bus)
> >>>   subsys = dev->bus->name;
> >>>   else
> >>> - return 0;
> >>> + return;
> >>>  
> >>> - pos += snprintf(hdr + pos, hdrlen - pos, "SUBSYSTEM=%s", subsys);
> >>> - if (pos >= hdrlen)
> >>> - goto overflow;
> >>> + snprintf(dev_info->subsystem, sizeof(dev_info->subsystem), subsys);
> >>
> >> It's unlikely that subsys would contain a %, but this will be yet
> >> another place to spend brain cycles ignoring if doing static analysis.
> >> So can we not do this. Either of strXcpy() for X=s,l will do the same
> >> thing, and likely faster.
> > 
> > Good point! Better be on the safe size in a generic printk() API.
> > 
> > Well, I am afraid that this would be only small drop in a huge lake.
> > class->name and bus->name seems to be passed to %s in so many
> > *print*() calls all over the kernel code.
> 
> So what? printf("%s", some_string_that_might_contain_percent_chars) is
> not a problem.

Grr, shame on me. I have completely messed this. The combination of
Friday afternoon and noisy kids did not help me much to get it right.

> printf(some_string_that_might_contain_percent_chars) is.

I fully agree that passing unknown string as "fmt" is dangerous and
must be used carefully. It is not needed here.

> And yes, one could do
> 
>   snprintf(dev_info->subsystem, sizeof(dev_info->subsystem), "%s", subsys);
>
> but one might as well avoid the snprintf overhead and use one of the
> strX functions that have the exact same semantics (copy as much as
> there's room for, ensure nul-termination).

Yes, we should use either snprinf() with %s or strXcpy().

Best Regards,
Petr

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH -next] fs/proc/vmcore: Fix 'sizez' kernel-doc warning in vmcore.c

2020-09-18 Thread Wang Hai
Fixes the following W=1 kernel build warning(s):

fs/proc/vmcore.c:458: warning: Excess function parameter 'sizez' description in 
'vmcore_alloc_buf'

Rename sizez to size.

Signed-off-by: Wang Hai 
---
 fs/proc/vmcore.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index c3a345c28a93..bfb0aa4d63e4 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -446,7 +446,7 @@ static const struct vm_operations_struct vmcore_mmap_ops = {
 
 /**
  * vmcore_alloc_buf - allocate buffer in vmalloc memory
- * @sizez: size of buffer
+ * @size: size of buffer
  *
  * If CONFIG_MMU is defined, use vmalloc_user() to allow users to mmap
  * the buffer to user-space by means of remap_vmalloc_range().
-- 
2.17.1


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH 3/4] mm: remove compat_sys_move_pages

2020-09-18 Thread Arnd Bergmann
The compat move_pages() implementation uses compat_alloc_user_space()
for converting the pointer array. Moving the compat handling into
the function itself is a bit simpler and lets us avoid the
compat_alloc_user_space() call.

Signed-off-by: Arnd Bergmann 
---
 arch/arm64/include/asm/unistd32.h |  2 +-
 arch/mips/kernel/syscalls/syscall_n32.tbl |  2 +-
 arch/mips/kernel/syscalls/syscall_o32.tbl |  2 +-
 arch/parisc/kernel/syscalls/syscall.tbl   |  2 +-
 arch/powerpc/kernel/syscalls/syscall.tbl  |  2 +-
 arch/s390/kernel/syscalls/syscall.tbl |  2 +-
 arch/sparc/kernel/syscalls/syscall.tbl|  2 +-
 arch/x86/entry/syscalls/syscall_32.tbl|  2 +-
 arch/x86/entry/syscalls/syscall_64.tbl|  2 +-
 include/linux/compat.h|  5 ---
 include/uapi/asm-generic/unistd.h |  2 +-
 kernel/sys_ni.c   |  1 -
 mm/migrate.c  | 45 +++
 13 files changed, 32 insertions(+), 39 deletions(-)

diff --git a/arch/arm64/include/asm/unistd32.h 
b/arch/arm64/include/asm/unistd32.h
index b6517df74037..af793775ba98 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -699,7 +699,7 @@ __SYSCALL(__NR_tee, sys_tee)
 #define __NR_vmsplice 343
 __SYSCALL(__NR_vmsplice, compat_sys_vmsplice)
 #define __NR_move_pages 344
-__SYSCALL(__NR_move_pages, compat_sys_move_pages)
+__SYSCALL(__NR_move_pages, sys_move_pages)
 #define __NR_getcpu 345
 __SYSCALL(__NR_getcpu, sys_getcpu)
 #define __NR_epoll_pwait 346
diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl 
b/arch/mips/kernel/syscalls/syscall_n32.tbl
index ad157aab4c09..7fa1ca45e44c 100644
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -279,7 +279,7 @@
 268n32 sync_file_range sys_sync_file_range
 269n32 tee sys_tee
 270n32 vmsplicecompat_sys_vmsplice
-271n32 move_pages  compat_sys_move_pages
+271n32 move_pages  sys_move_pages
 272n32 set_robust_list compat_sys_set_robust_list
 273n32 get_robust_list compat_sys_get_robust_list
 274n32 kexec_load  sys_kexec_load
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl 
b/arch/mips/kernel/syscalls/syscall_o32.tbl
index 57baf6c8008f..194c7fbeedf7 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -319,7 +319,7 @@
 305o32 sync_file_range sys_sync_file_range 
sys32_sync_file_range
 306o32 tee sys_tee
 307o32 vmsplicesys_vmsplice
compat_sys_vmsplice
-308o32 move_pages  sys_move_pages  
compat_sys_move_pages
+308o32 move_pages  sys_move_pages
 309o32 set_robust_list sys_set_robust_list 
compat_sys_set_robust_list
 310o32 get_robust_list sys_get_robust_list 
compat_sys_get_robust_list
 311o32 kexec_load  sys_kexec_load
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl 
b/arch/parisc/kernel/syscalls/syscall.tbl
index 778bf166d7bd..5c17edaffe70 100644
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -331,7 +331,7 @@
 29264  sync_file_range sys_sync_file_range
 293common  tee sys_tee
 294common  vmsplicesys_vmsplice
compat_sys_vmsplice
-295common  move_pages  sys_move_pages  
compat_sys_move_pages
+295common  move_pages  sys_move_pages
 296common  getcpu  sys_getcpu
 297common  epoll_pwait sys_epoll_pwait 
compat_sys_epoll_pwait
 298common  statfs64sys_statfs64
compat_sys_statfs64
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl 
b/arch/powerpc/kernel/syscalls/syscall.tbl
index f128ba8b9a71..04fb42d7b377 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -389,7 +389,7 @@
 298common  faccessat   sys_faccessat
 299common  get_robust_list sys_get_robust_list 
compat_sys_get_robust_list
 300common  set_robust_list sys_set_robust_list 
compat_sys_set_robust_list
-301common  move_pages  sys_move_pages  
compat_sys_move_pages
+301common  move_pages  sys_move_pages
 302common  getcpu  sys_getcpu
 303nospu   epoll_pwait sys_epoll_pwait 
compat_sys_epoll_pwait
 304

[PATCH 4/4] mm: remove compat numa syscalls

2020-09-18 Thread Arnd Bergmann
The compat implementations for mbind, get_mempolicy, set_mempolicy
and migrate_pages are just there to handle the subtly different
layout of bitmaps on 32-bit hosts.

The compat implementation however lacks some of the checks that
are present in the native one, in particular for checking that
the extra bits are all zero when user space has a larger mask
size than the kernel. Worse, those extra bits do not get cleared
when copying in or out of the kernel, which can lead to incorrect
data as well.

Unify the implementation to handle the compat bitmap layout directly
in the get_nodes() and copy_nodes_to_user() helpers.  Splitting out
the get_bitmap() helper from get_nodes() also helps readability of the
native case.

On x86, two additional problems are addressed by this: compat tasks can
pass a bitmap at the end of a mapping, causing a fault when reading
across the page boundary for a 64-bit word. x32 tasks might also run
into problems with get_mempolicy corrupting data when an odd number of
32-bit words gets passed.

On parisc the migrate_pages() system call apparently had the wrong
calling convention, as big-endian architectures expect the words
inside of a bitmap to be swapped. This is not a problem though
since parisc has no NUMA support.

Signed-off-by: Arnd Bergmann 
---
 arch/arm64/include/asm/unistd32.h |   8 +-
 arch/mips/kernel/syscalls/syscall_n32.tbl |   8 +-
 arch/mips/kernel/syscalls/syscall_o32.tbl |   8 +-
 arch/parisc/kernel/syscalls/syscall.tbl   |   6 +-
 arch/powerpc/kernel/syscalls/syscall.tbl  |   8 +-
 arch/s390/kernel/syscalls/syscall.tbl |   8 +-
 arch/sparc/kernel/syscalls/syscall.tbl|   8 +-
 arch/x86/entry/syscalls/syscall_32.tbl|   2 +-
 include/linux/compat.h|  15 --
 include/uapi/asm-generic/unistd.h |   8 +-
 kernel/kexec.c|   6 +-
 kernel/sys_ni.c   |   4 -
 mm/mempolicy.c| 193 +-
 13 files changed, 79 insertions(+), 203 deletions(-)

diff --git a/arch/arm64/include/asm/unistd32.h 
b/arch/arm64/include/asm/unistd32.h
index af793775ba98..31479f7120a0 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -649,11 +649,11 @@ __SYSCALL(__NR_inotify_add_watch, sys_inotify_add_watch)
 #define __NR_inotify_rm_watch 318
 __SYSCALL(__NR_inotify_rm_watch, sys_inotify_rm_watch)
 #define __NR_mbind 319
-__SYSCALL(__NR_mbind, compat_sys_mbind)
+__SYSCALL(__NR_mbind, sys_mbind)
 #define __NR_get_mempolicy 320
-__SYSCALL(__NR_get_mempolicy, compat_sys_get_mempolicy)
+__SYSCALL(__NR_get_mempolicy, sys_get_mempolicy)
 #define __NR_set_mempolicy 321
-__SYSCALL(__NR_set_mempolicy, compat_sys_set_mempolicy)
+__SYSCALL(__NR_set_mempolicy, sys_set_mempolicy)
 #define __NR_openat 322
 __SYSCALL(__NR_openat, compat_sys_openat)
 #define __NR_mkdirat 323
@@ -811,7 +811,7 @@ __SYSCALL(__NR_rseq, sys_rseq)
 #define __NR_io_pgetevents 399
 __SYSCALL(__NR_io_pgetevents, compat_sys_io_pgetevents)
 #define __NR_migrate_pages 400
-__SYSCALL(__NR_migrate_pages, compat_sys_migrate_pages)
+__SYSCALL(__NR_migrate_pages, sys_migrate_pages)
 #define __NR_kexec_file_load 401
 __SYSCALL(__NR_kexec_file_load, sys_kexec_file_load)
 /* 402 is unused */
diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl 
b/arch/mips/kernel/syscalls/syscall_n32.tbl
index 7fa1ca45e44c..15fda882d07e 100644
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -239,9 +239,9 @@
 228n32 clock_nanosleep sys_clock_nanosleep_time32
 229n32 tgkill  sys_tgkill
 230n32 utimes  sys_utimes_time32
-231n32 mbind   compat_sys_mbind
-232n32 get_mempolicy   compat_sys_get_mempolicy
-233n32 set_mempolicy   compat_sys_set_mempolicy
+231n32 mbind   sys_mbind
+232n32 get_mempolicy   sys_get_mempolicy
+233n32 set_mempolicy   sys_set_mempolicy
 234n32 mq_open compat_sys_mq_open
 235n32 mq_unlink   sys_mq_unlink
 236n32 mq_timedsendsys_mq_timedsend_time32
@@ -258,7 +258,7 @@
 247n32 inotify_initsys_inotify_init
 248n32 inotify_add_watch   sys_inotify_add_watch
 249n32 inotify_rm_watchsys_inotify_rm_watch
-250n32 migrate_pages   compat_sys_migrate_pages
+250n32 migrate_pages   sys_migrate_pages
 251n32 openat  sys_openat
 252n32 mkdirat sys_mkdirat
 253n32 mknodat sys_mknodat
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl 
b/arch/mips/kernel/syscalls/syscall_o32.tbl
index 194c7fbeedf7..6

[PATCH 2/4] kexec: remove compat_sys_kexec_load syscall

2020-09-18 Thread Arnd Bergmann
The compat version of sys_kexec_load() uses compat_alloc_user_space to
convert the user-provided arguments into the native format.

Move the conversion into the regular implementation with
an in_compat_syscall() check to simplify it and avoid the
compat_alloc_user_space() call.

Signed-off-by: Arnd Bergmann 
---
 arch/arm64/include/asm/unistd32.h |  2 +-
 arch/mips/kernel/syscalls/syscall_n32.tbl |  2 +-
 arch/mips/kernel/syscalls/syscall_o32.tbl |  2 +-
 arch/parisc/kernel/syscalls/syscall.tbl   |  2 +-
 arch/powerpc/kernel/syscalls/syscall.tbl  |  2 +-
 arch/s390/kernel/syscalls/syscall.tbl |  2 +-
 arch/sparc/kernel/syscalls/syscall.tbl|  2 +-
 arch/x86/entry/syscalls/syscall_32.tbl|  2 +-
 arch/x86/entry/syscalls/syscall_64.tbl|  2 +-
 include/linux/compat.h|  6 --
 include/uapi/asm-generic/unistd.h |  2 +-
 kernel/kexec.c| 75 ++-
 12 files changed, 29 insertions(+), 72 deletions(-)

diff --git a/arch/arm64/include/asm/unistd32.h 
b/arch/arm64/include/asm/unistd32.h
index 734860ac7cf9..b6517df74037 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -705,7 +705,7 @@ __SYSCALL(__NR_getcpu, sys_getcpu)
 #define __NR_epoll_pwait 346
 __SYSCALL(__NR_epoll_pwait, compat_sys_epoll_pwait)
 #define __NR_kexec_load 347
-__SYSCALL(__NR_kexec_load, compat_sys_kexec_load)
+__SYSCALL(__NR_kexec_load, sys_kexec_load)
 #define __NR_utimensat 348
 __SYSCALL(__NR_utimensat, sys_utimensat_time32)
 #define __NR_signalfd 349
diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl 
b/arch/mips/kernel/syscalls/syscall_n32.tbl
index f9df9edb67a4..ad157aab4c09 100644
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -282,7 +282,7 @@
 271n32 move_pages  compat_sys_move_pages
 272n32 set_robust_list compat_sys_set_robust_list
 273n32 get_robust_list compat_sys_get_robust_list
-274n32 kexec_load  compat_sys_kexec_load
+274n32 kexec_load  sys_kexec_load
 275n32 getcpu  sys_getcpu
 276n32 epoll_pwait compat_sys_epoll_pwait
 277n32 ioprio_set  sys_ioprio_set
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl 
b/arch/mips/kernel/syscalls/syscall_o32.tbl
index 195b43cf27c8..57baf6c8008f 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -322,7 +322,7 @@
 308o32 move_pages  sys_move_pages  
compat_sys_move_pages
 309o32 set_robust_list sys_set_robust_list 
compat_sys_set_robust_list
 310o32 get_robust_list sys_get_robust_list 
compat_sys_get_robust_list
-311o32 kexec_load  sys_kexec_load  
compat_sys_kexec_load
+311o32 kexec_load  sys_kexec_load
 312o32 getcpu  sys_getcpu
 313o32 epoll_pwait sys_epoll_pwait 
compat_sys_epoll_pwait
 314o32 ioprio_set  sys_ioprio_set
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl 
b/arch/parisc/kernel/syscalls/syscall.tbl
index def64d221cd4..778bf166d7bd 100644
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -336,7 +336,7 @@
 297common  epoll_pwait sys_epoll_pwait 
compat_sys_epoll_pwait
 298common  statfs64sys_statfs64
compat_sys_statfs64
 299common  fstatfs64   sys_fstatfs64   
compat_sys_fstatfs64
-300common  kexec_load  sys_kexec_load  
compat_sys_kexec_load
+300common  kexec_load  sys_kexec_load
 30132  utimensat   sys_utimensat_time32
 30164  utimensat   sys_utimensat
 302common  signalfdsys_signalfd
compat_sys_signalfd
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl 
b/arch/powerpc/kernel/syscalls/syscall.tbl
index c2d737ff2e7b..f128ba8b9a71 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -350,7 +350,7 @@
 26564  mq_timedreceive sys_mq_timedreceive
 266nospu   mq_notify   sys_mq_notify   
compat_sys_mq_notify
 267nospu   mq_getsetattr   sys_mq_getsetattr   
compat_sys_mq_getsetattr
-268nospu   kexec_load  sys_kexec_load  
compat_sys_kexec_load
+268nospu   kexec_load  sys_kexec_load
 269nospu   add_key sys_add_key
 270nospu   

[PATCH 1/4] x86: add __X32_COND_SYSCALL() macro

2020-09-18 Thread Arnd Bergmann
sys_move_pages() is an optional syscall, and once we remove
the compat version of it in favor of the native one with an
in_compat_syscall() check, the x32 syscall table refers to
a __x32_sys_move_pages symbol that may not exist when the
syscall is disabled.

Change the COND_SYSCALL() definition on x86 to also include
the redirection for x32.

Signed-off-by: Arnd Bergmann 
---
 arch/x86/include/asm/syscall_wrapper.h | 5 +
 1 file changed, 5 insertions(+)

diff --git a/arch/x86/include/asm/syscall_wrapper.h 
b/arch/x86/include/asm/syscall_wrapper.h
index a84333adeef2..5eacd35a7f97 100644
--- a/arch/x86/include/asm/syscall_wrapper.h
+++ b/arch/x86/include/asm/syscall_wrapper.h
@@ -171,12 +171,16 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs 
*regs);
__SYS_STUBx(x32, compat_sys##name,  \
SC_X86_64_REGS_TO_ARGS(x, __VA_ARGS__))
 
+#define __X32_COND_SYSCALL(name)   \
+   __COND_SYSCALL(x32, sys_##name)
+
 #define __X32_COMPAT_COND_SYSCALL(name)
\
__COND_SYSCALL(x32, compat_sys_##name)
 
 #define __X32_COMPAT_SYS_NI(name)  \
__SYS_NI(x32, compat_sys_##name)
 #else /* CONFIG_X86_X32 */
+#define __X32_COND_SYSCALL(name)
 #define __X32_COMPAT_SYS_STUB0(name)
 #define __X32_COMPAT_SYS_STUBx(x, name, ...)
 #define __X32_COMPAT_COND_SYSCALL(name)
@@ -253,6 +257,7 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs 
*regs);
static long __do_sys_##sname(const struct pt_regs *__unused)
 
 #define COND_SYSCALL(name) \
+   __X32_COND_SYSCALL(name)\
__X64_COND_SYSCALL(name)\
__IA32_COND_SYSCALL(name)
 
-- 
2.27.0


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH 0/4] syscalls: remove compat_alloc_user_space callers

2020-09-18 Thread Arnd Bergmann
Going through compat_alloc_user_space() to convert indirect system call
arguments tends to add complexity compared to handling the native and
compat logic in the same code.

I have patches for all other uses of compat_alloc_user_space() as well,
and would expect that we can subsequently remove the interface itself.

  Arnd

Arnd Bergmann (4):
  x86: add __X32_COND_SYSCALL() macro
  kexec: remove compat_sys_kexec_load syscall
  mm: remove compat_sys_move_pages
  mm: remove compat numa syscalls

 arch/arm64/include/asm/unistd32.h |  12 +-
 arch/mips/kernel/syscalls/syscall_n32.tbl |  12 +-
 arch/mips/kernel/syscalls/syscall_o32.tbl |  12 +-
 arch/parisc/kernel/syscalls/syscall.tbl   |  10 +-
 arch/powerpc/kernel/syscalls/syscall.tbl  |  12 +-
 arch/s390/kernel/syscalls/syscall.tbl |  12 +-
 arch/sparc/kernel/syscalls/syscall.tbl|  12 +-
 arch/x86/entry/syscalls/syscall_32.tbl|   6 +-
 arch/x86/entry/syscalls/syscall_64.tbl|   4 +-
 arch/x86/include/asm/syscall_wrapper.h|   5 +
 include/linux/compat.h|  26 ---
 include/uapi/asm-generic/unistd.h |  12 +-
 kernel/kexec.c|  77 +++--
 kernel/sys_ni.c   |   5 -
 mm/mempolicy.c| 193 +-
 mm/migrate.c  |  45 +++--
 16 files changed, 143 insertions(+), 312 deletions(-)

-- 
2.27.0


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH printk 2/3] printk: move dictionary keys to dev_printk_info

2020-09-18 Thread Rasmus Villemoes
On 18/09/2020 14.13, Petr Mladek wrote:
> On Fri 2020-09-18 08:16:37, Rasmus Villemoes wrote:
>> On 17/09/2020 15.16, John Ogness wrote:
>>
>>> if (dev->class)
>>> subsys = dev->class->name;
>>> else if (dev->bus)
>>> subsys = dev->bus->name;
>>> else
>>> -   return 0;
>>> +   return;
>>>  
>>> -   pos += snprintf(hdr + pos, hdrlen - pos, "SUBSYSTEM=%s", subsys);
>>> -   if (pos >= hdrlen)
>>> -   goto overflow;
>>> +   snprintf(dev_info->subsystem, sizeof(dev_info->subsystem), subsys);
>>
>> It's unlikely that subsys would contain a %, but this will be yet
>> another place to spend brain cycles ignoring if doing static analysis.
>> So can we not do this. Either of strXcpy() for X=s,l will do the same
>> thing, and likely faster.
> 
> Good point! Better be on the safe size in a generic printk() API.
> 
> Well, I am afraid that this would be only small drop in a huge lake.
> class->name and bus->name seems to be passed to %s in so many
> *print*() calls all over the kernel code.

So what? printf("%s", some_string_that_might_contain_percent_chars) is
not a problem.

printf(some_string_that_might_contain_percent_chars) is.

And yes, one could do

  snprintf(dev_info->subsystem, sizeof(dev_info->subsystem), "%s", subsys);

but one might as well avoid the snprintf overhead and use one of the
strX functions that have the exact same semantics (copy as much as
there's room for, ensure nul-termination).

Rasmus

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH printk 2/3] printk: move dictionary keys to dev_printk_info

2020-09-18 Thread Petr Mladek
On Fri 2020-09-18 08:16:37, Rasmus Villemoes wrote:
> On 17/09/2020 15.16, John Ogness wrote:
> 
> > if (dev->class)
> > subsys = dev->class->name;
> > else if (dev->bus)
> > subsys = dev->bus->name;
> > else
> > -   return 0;
> > +   return;
> >  
> > -   pos += snprintf(hdr + pos, hdrlen - pos, "SUBSYSTEM=%s", subsys);
> > -   if (pos >= hdrlen)
> > -   goto overflow;
> > +   snprintf(dev_info->subsystem, sizeof(dev_info->subsystem), subsys);
> 
> It's unlikely that subsys would contain a %, but this will be yet
> another place to spend brain cycles ignoring if doing static analysis.
> So can we not do this. Either of strXcpy() for X=s,l will do the same
> thing, and likely faster.

Good point! Better be on the safe size in a generic printk() API.

Well, I am afraid that this would be only small drop in a huge lake.
class->name and bus->name seems to be passed to %s in so many
*print*() calls all over the kernel code.

IMHO, this is not the right place to prevent the problem. Dangerous
names must be prevented when a new bus, class, device is added.

Best Rergards,
Petr

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH 1/1] makedumpfile: make use of 'uts_namespace.name' offset in VMCOREINFO

2020-09-18 Thread Alexander Egorenkov
The offset of the field 'init_uts_ns.name' has changed
since linux-next commit 9a56493f6942c0e2df1579986128721da96e00d8.
Make use of the offset 'uts_namespace.name' if available in VMCOREINFO.

Signed-off-by: Alexander Egorenkov 
---
 makedumpfile.c | 17 +++--
 makedumpfile.h |  6 ++
 2 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/makedumpfile.c b/makedumpfile.c
index 4c4251e..5b8b90c 100644
--- a/makedumpfile.c
+++ b/makedumpfile.c
@@ -1159,7 +1159,10 @@ check_release(void)
if (SYMBOL(system_utsname) != NOT_FOUND_SYMBOL) {
utsname = SYMBOL(system_utsname);
} else if (SYMBOL(init_uts_ns) != NOT_FOUND_SYMBOL) {
-   utsname = SYMBOL(init_uts_ns) + sizeof(int);
+   if (OFFSET(uts_namespace.name) != NOT_FOUND_STRUCTURE)
+   utsname = SYMBOL(init_uts_ns) + 
OFFSET(uts_namespace.name);
+   else
+   utsname = SYMBOL(init_uts_ns) + sizeof(int);
} else {
ERRMSG("Can't get the symbol of system_utsname.\n");
return FALSE;
@@ -2008,6 +2011,11 @@ get_structure_info(void)
SIZE_INIT(cpu_spec, "cpu_spec");
OFFSET_INIT(cpu_spec.mmu_features, "cpu_spec", "mmu_features");
 
+   /*
+* Get offsets of the uts_namespace's members.
+*/
+   OFFSET_INIT(uts_namespace.name, "uts_namespace", "name");
+
return TRUE;
 }
 
@@ -2077,7 +2085,10 @@ get_str_osrelease_from_vmlinux(void)
if (SYMBOL(system_utsname) != NOT_FOUND_SYMBOL) {
utsname = SYMBOL(system_utsname);
} else if (SYMBOL(init_uts_ns) != NOT_FOUND_SYMBOL) {
-   utsname = SYMBOL(init_uts_ns) + sizeof(int);
+   if (OFFSET(uts_namespace.name) != NOT_FOUND_STRUCTURE)
+   utsname = SYMBOL(init_uts_ns) + 
OFFSET(uts_namespace.name);
+   else
+   utsname = SYMBOL(init_uts_ns) + sizeof(int);
} else {
ERRMSG("Can't get the symbol of system_utsname.\n");
return FALSE;
@@ -2284,6 +2295,7 @@ write_vmcoreinfo_data(void)
WRITE_MEMBER_OFFSET("vmemmap_backing.list", vmemmap_backing.list);
WRITE_MEMBER_OFFSET("mmu_psize_def.shift", mmu_psize_def.shift);
WRITE_MEMBER_OFFSET("cpu_spec.mmu_features", cpu_spec.mmu_features);
+   WRITE_MEMBER_OFFSET("uts_namespace.name", uts_namespace.name);
 
if (SYMBOL(node_data) != NOT_FOUND_SYMBOL)
WRITE_ARRAY_LENGTH("node_data", node_data);
@@ -2682,6 +2694,7 @@ read_vmcoreinfo(void)
READ_MEMBER_OFFSET("vmemmap_backing.list", vmemmap_backing.list);
READ_MEMBER_OFFSET("mmu_psize_def.shift", mmu_psize_def.shift);
READ_MEMBER_OFFSET("cpu_spec.mmu_features", cpu_spec.mmu_features);
+   READ_MEMBER_OFFSET("uts_namespace.name", uts_namespace.name);
 
READ_STRUCTURE_SIZE("printk_log", printk_log);
if (SIZE(printk_log) != NOT_FOUND_STRUCTURE) {
diff --git a/makedumpfile.h b/makedumpfile.h
index 03fb4ce..9088f1f 100644
--- a/makedumpfile.h
+++ b/makedumpfile.h
@@ -1719,6 +1719,8 @@ struct size_table {
longcpu_spec;
 
longpageflags;
+
+   longuts_namespace;
 };
 
 struct offset_table {
@@ -1880,6 +1882,10 @@ struct offset_table {
struct cpu_spec_s {
longmmu_features;
} cpu_spec;
+
+   struct uts_namespace_s {
+   longname;
+   } uts_namespace;
 };
 
 /*
-- 
2.26.2


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH printk 2/3] printk: move dictionary keys to dev_printk_info

2020-09-18 Thread Petr Mladek
On Thu 2020-09-17 15:22:43, John Ogness wrote:
> Dictionaries are only used for SUBSYSTEM and DEVICE properties. The
> current implementation stores the property names each time they are
> used. This requires more space than otherwise necessary. Also,
> because the dictionary entries are currently considered optional,
> it cannot be relied upon that they are always available, even if the
> writer wanted to store them. These issues will increase should new
> dictionary properties be introduced.
> 
> Rather than storing the subsystem and device properties in the
> dict ring, introduce a struct dev_printk_info with separate fields
> to store only the property values. Embed this struct within the
> struct printk_info to provide guaranteed availability.
> 
> Signed-off-by: John Ogness 

Reviewed-by: Petr Mladek 

Best Regards,
Petr

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH printk 1/3] printk: move printk_info into separate array

2020-09-18 Thread John Ogness
On 2020-09-18, Petr Mladek  wrote:
>> --- a/kernel/printk/printk.c
>> +++ b/kernel/printk/printk.c
>> @@ -1097,6 +1097,7 @@ static char setup_dict_buf[CONSOLE_EXT_LOG_MAX] 
>> __initdata;
>>  
>>  void __init setup_log_buf(int early)
>>  {
>> +struct printk_info *new_infos;
>>  unsigned int new_descs_count;
>>  struct prb_desc *new_descs;
>>  struct printk_info info;
>> @@ -1156,6 +1157,17 @@ void __init setup_log_buf(int early)
>>  return;
>>  }
>>  
>> +new_descs_size = new_descs_count * sizeof(struct printk_info);
>
> Must be stored into new variable, e.g.  new_infos_size.=

Ack.

>> +new_infos = memblock_alloc(new_descs_size, LOG_ALIGN);
>> +if (unlikely(!new_infos)) {
>> +pr_err("log_buf_len: %zu info bytes not available\n",
>> +   new_descs_size);
>> +memblock_free(__pa(new_descs), new_log_buf_len);
>> +memblock_free(__pa(new_dict_buf), new_log_buf_len);
>
> The above two calls have wrong size.
>
> The same problem is there also in the error path when new_descs
> allocation fail. It might be better to handle this using some
> goto err_* tagrets.
>
> Please, fix the old problem in a separate patch.

The "old problem" didn't exist. The problem is introduced with this
series. I will fix it with appropriate goto err_* targets for v2.

>> --- a/kernel/printk/printk_ringbuffer.c
>> +++ b/kernel/printk/printk_ringbuffer.c
>> @@ -1726,12 +1762,12 @@ static bool copy_data(struct prb_data_ring 
>> *data_ring,
>>  /*
>>   * Actual cannot be less than expected. It can be more than expected
>>   * because of the trailing alignment padding.
>> + *
>> + * Note that invalid @len values can occur because the caller loads
>> + * the value during an allowed data race.
>
> I hope that this will not bite us in the future. The fact is that
> copying the entire struct printk_info in get_desc() is ugly and
> copy_data() has to be careful anyway.

It isn't an issue because the state is verified again at the end of
prb_read(). I added the comment because if all you are looking at is
copy_data(), you may not know that @len was read on a data-race. Whereas
inside of prb_read(), it is obvious that the memcpy() is a data-race.

John Ogness

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH] arm64: Add purgatory printing

2020-09-18 Thread Bhupesh SHARMA
Hi Matthias,

On Fri, Sep 18, 2020 at 2:01 PM Matthias Brugger  wrote:
>
>
>
> On 18/09/2020 07:16, Bhupesh SHARMA wrote:
> > Hi Matthias,
> >
> > Thanks for the patch. Some nitpicks inline:
> >
> > On Fri, Sep 18, 2020 at 1:09 AM  wrote:
> >>
> >> From: Matthias Brugger 
> >>
> >> Add option to allow purgatory printing on arm64 hardware
> >> by passing the console name which should be used.
> >> Based on a patch by Geoff Levand.
> >>
> >> Cc: Geoff Levand 
> >> Signed-off-by: Matthias Brugger 
> >> ---
> >>   kexec/arch/arm64/include/arch/options.h |  6 ++-
> >>   kexec/arch/arm64/kexec-arm64.c  | 61 +
> >>   purgatory/arch/arm64/purgatory-arm64.c  | 17 ++-
> >>   3 files changed, 82 insertions(+), 2 deletions(-)
> >
> > Probably we also need to update the man page 'kexec/kexec.8' to add
> > documentation about the newly introduced argument.
> >
>
> I checked the documentation and under "ARCHITECTURE OPTIONS" and it only
> documents a subset of the x86(_64) architecture specific commands. So I think
> there is more work to do to get the documentation right.
>
> But after having a look, I think we might want to use "--serial" as option
> instead of "--console" to be in sync with x86. Although many architectures
> reinvent the options, if we can get it as near to x86 as possible, I think 
> that
> would be a good thing. I'll do that in v2.

That's a good suggestion. I think the '--serial' option is a better
one as compared to '--console'.

> >> diff --git a/kexec/arch/arm64/include/arch/options.h 
> >> b/kexec/arch/arm64/include/arch/options.h
> >> index a17d933..be7d169 100644
> >> --- a/kexec/arch/arm64/include/arch/options.h
> >> +++ b/kexec/arch/arm64/include/arch/options.h
> >> @@ -5,7 +5,8 @@
> >>   #define OPT_DTB((OPT_MAX)+1)
> >>   #define OPT_INITRD ((OPT_MAX)+2)
> >>   #define OPT_REUSE_CMDLINE  ((OPT_MAX)+3)
> >> -#define OPT_ARCH_MAX   ((OPT_MAX)+4)
> >> +#define OPT_CONSOLE((OPT_MAX)+4)
> >> +#define OPT_ARCH_MAX   ((OPT_MAX)+5)
> >>
> >>   #define KEXEC_ARCH_OPTIONS \
> >>  KEXEC_OPTIONS \
> >> @@ -13,6 +14,7 @@
> >>  { "command-line",  1, NULL, OPT_APPEND }, \
> >>  { "dtb",   1, NULL, OPT_DTB }, \
> >>  { "initrd",1, NULL, OPT_INITRD }, \
> >> +   { "console",   1, NULL, OPT_CONSOLE }, \
> >>  { "ramdisk",   1, NULL, OPT_INITRD }, \
> >>  { "reuse-cmdline", 0, NULL, OPT_REUSE_CMDLINE }, \
> >>
> >> @@ -25,6 +27,7 @@ static const char arm64_opts_usage[] __attribute__ 
> >> ((unused)) =
> >>   " --command-line=STRING Set the kernel command line to STRING.\n"
> >>   " --dtb=FILEUse FILE as the device tree blob.\n"
> >>   " --initrd=FILE Use FILE as the kernel initial ramdisk.\n"
> >> +" --console=STRING  Console used for purgatory printing.\n"
> >>   " --ramdisk=FILEUse FILE as the kernel initial ramdisk.\n"
> >>   " --reuse-cmdline   Use kernel command line from running 
> >> system.\n";
> >
> > Just a thought... sometimes the console string is also available as a
> > part of the '--reuse-cmdline' command line argument passed to the
> > kdump kernel. Can we also try to extract the --console string from the
> > cmdline provided to the primary kernel itself?
> >
>
> Well the problem is, that there can be more then one consoles present, so 
> which
> one would be the correct one?
> I see this more like a debug feature which you use knowing which console you 
> are
> looking for the debug messages. In the end it only helps you to see if kdump
> failed in the production system kernel or in the crash kernel.

Indeed, I think we need to add some more comments to explain this
option and distinguish it better from the serial port(s) mentioned in
the '--reuse-cmdline' option.

> >> @@ -32,6 +35,7 @@ struct arm64_opts {
> >>  const char *command_line;
> >>  const char *dtb;
> >>  const char *initrd;
> >> +   const char *console;
> >>   };
> >>
> >>   extern struct arm64_opts arm64_opts;
> >> diff --git a/kexec/arch/arm64/kexec-arm64.c 
> >> b/kexec/arch/arm64/kexec-arm64.c
> >> index 45ebc54..44c9e6e 100644
> >> --- a/kexec/arch/arm64/kexec-arm64.c
> >> +++ b/kexec/arch/arm64/kexec-arm64.c
> >> @@ -165,6 +165,8 @@ int arch_process_options(int argc, char **argv)
> >>  break;
> >>  case OPT_KEXEC_FILE_SYSCALL:
> >>  do_kexec_file_syscall = 1;
> >> +   case OPT_CONSOLE:
> >> +   arm64_opts.console = optarg;
> >>  break;
> >>  default:
> >>  break; /* Ignore core and unknown options. */
> >> @@ -180,12 +182,62 @@ int arch_process_options(int argc, char **argv)
> >>  dbgprintf("%s:%d: dtb: %s\n", __func__, __LINE__,
> >>  (do_kexec_file_syscall && arm64_opts.dtb ? 

Re: [PATCH printk 1/3] printk: move printk_info into separate array

2020-09-18 Thread Petr Mladek
On Thu 2020-09-17 15:22:42, John Ogness wrote:
> The majority of the size of a descriptor is taken up by meta data,
> which is often not of interest to the ringbuffer (for example,
> when performing state checks). Since descriptors are often
> temporarily stored on the stack, keeping their size minimal will
> help reduce stack pressure.
> 
> Rather than embedding the printk_info into the descriptor, create
> a separate printk_info array. The index of a descriptor in the
> descriptor array corresponds to the printk_info with the same
> index in the printk_info array. The rules for validity of a
> printk_info match the existing rules for the data blocks: the
> descriptor must be in a consistent state.

I like this approach.

IMHO, it better separates dict_ring- and printk-specific metadata.

The three printk-specific values (seq, caller_id, len) are still
accessed by the ring buffer code. It is not ideal. But it helps
to hide tricky operations in the ring buffer API, keep the code
more safe and sane. These exceptions are actually better
visible now.

> --- a/kernel/printk/printk.c
> +++ b/kernel/printk/printk.c
> @@ -1097,6 +1097,7 @@ static char setup_dict_buf[CONSOLE_EXT_LOG_MAX] 
> __initdata;
>  
>  void __init setup_log_buf(int early)
>  {
> + struct printk_info *new_infos;
>   unsigned int new_descs_count;
>   struct prb_desc *new_descs;
>   struct printk_info info;
> @@ -1156,6 +1157,17 @@ void __init setup_log_buf(int early)
>   return;
>   }
>  
> + new_descs_size = new_descs_count * sizeof(struct printk_info);

Must be stored into new variable, e.g.  new_infos_size.=

> + new_infos = memblock_alloc(new_descs_size, LOG_ALIGN);
> + if (unlikely(!new_infos)) {
> + pr_err("log_buf_len: %zu info bytes not available\n",
> +new_descs_size);
> + memblock_free(__pa(new_descs), new_log_buf_len);
> + memblock_free(__pa(new_dict_buf), new_log_buf_len);

The above two calls have wrong size.

The same problem is there also in the error path when new_descs
allocation fail. It might be better to handle this using some
goto err_* tagrets.

Please, fix the old problem in a separate patch.

> + memblock_free(__pa(new_log_buf), new_log_buf_len);
> + return;
> + }
> +
>   prb_rec_init_rd(&r, &info,
>   &setup_text_buf[0], sizeof(setup_text_buf),
>   &setup_dict_buf[0], sizeof(setup_dict_buf));


> --- a/kernel/printk/printk_ringbuffer.c
> +++ b/kernel/printk/printk_ringbuffer.c
> @@ -1726,12 +1762,12 @@ static bool copy_data(struct prb_data_ring *data_ring,
>   /*
>* Actual cannot be less than expected. It can be more than expected
>* because of the trailing alignment padding.
> +  *
> +  * Note that invalid @len values can occur because the caller loads
> +  * the value during an allowed data race.

I hope that this will not bite us in the future. The fact is that
copying the entire struct printk_info in get_desc() is ugly and
copy_data() has to be careful anyway.

>*/
> - if (WARN_ON_ONCE(data_size < (unsigned int)len)) {
> - pr_warn_once("wrong data size (%u, expecting >=%hu) for data: 
> %.*s\n",
> -  data_size, len, data_size, data);
> + if (data_size < (unsigned int)len)


>   return false;
> - }
>  
>   /* Caller interested in the line count? */
>   if (line_count)

Otherwise, I like this patch.

Best Regards,
Petr

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH v12 3/9] x86: kdump: use macro CRASH_ADDR_LOW_MAX in functions reserve_crashkernel[_low]()

2020-09-18 Thread chenzhou
Hi Catalin,


On 2020/9/18 16:59, chenzhou wrote:
> Hi Baoquan,
>
> On 2020/9/18 15:25, Baoquan He wrote:
>> Hi,
>>
>> On 09/07/20 at 09:47pm, Chen Zhou wrote:
>>> To make the functions reserve_crashkernel[_low]() as generic,
>>> replace some hard-coded numbers with macro CRASH_ADDR_LOW_MAX.
>>>
>>> Signed-off-by: Chen Zhou 
>>> ---
>>>  arch/x86/kernel/setup.c | 11 ++-
>>>  1 file changed, 6 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
>>> index d7fd90c52dae..71a6a6e7ca5b 100644
>>> --- a/arch/x86/kernel/setup.c
>>> +++ b/arch/x86/kernel/setup.c
>>> @@ -430,7 +430,7 @@ static int __init reserve_crashkernel_low(void)
>>> unsigned long total_low_mem;
>>> int ret;
>>>  
>>> -   total_low_mem = memblock_mem_size(1UL << (32 - PAGE_SHIFT));
>>> +   total_low_mem = memblock_mem_size(CRASH_ADDR_LOW_MAX >> PAGE_SHIFT);
>> Just note that the replacement has been done in another patch from Mike
>> Rapoport, partially. He seems to have done reserve_crashkernel_low()
>> part, there's one left in reserve_crashkernel(), you might want to check
>> that. 
>>
>> Mike's patch which is from a patchset has been merged into Andrew's next
>> tree.
>>
>> commit 6e50f7672ffa362e9bd4bc0c0d2524ed872828c5
>> Author: Mike Rapoport 
>> Date:   Wed Aug 26 15:22:32 2020 +1000
>>
>> x86/setup: simplify reserve_crashkernel()
As Baoquan said, some functions have been changed in the next tree,
if i need to rebase on top of the next tree.

Thanks,
Chen Zhou
> Yeah, the function reserve_crashkernel() has been changed in the next tree.
> Thanks for your review and reminder.
>
> Thanks,
> Chen Zhou
>>>  
>>> /* crashkernel=Y,low */
>>> ret = parse_crashkernel_low(boot_command_line, total_low_mem, 
>>> &low_size, &base);
>>> @@ -451,7 +451,7 @@ static int __init reserve_crashkernel_low(void)
>>> return 0;
>>> }
>>>  
>>> -   low_base = memblock_find_in_range(CRASH_ALIGN, 1ULL << 32, low_size, 
>>> CRASH_ALIGN);
>>> +   low_base = memblock_find_in_range(CRASH_ALIGN, CRASH_ADDR_LOW_MAX, 
>>> low_size, CRASH_ALIGN);
>>> if (!low_base) {
>>> pr_err("Cannot reserve %ldMB crashkernel low memory, please try 
>>> smaller size.\n",
>>>(unsigned long)(low_size >> 20));
>>> @@ -504,8 +504,9 @@ static void __init reserve_crashkernel(void)
>>> if (!crash_base) {
>>> /*
>>>  * Set CRASH_ADDR_LOW_MAX upper bound for crash memory,
>>> -* crashkernel=x,high reserves memory over 4G, also allocates
>>> -* 256M extra low memory for DMA buffers and swiotlb.
>>> +* crashkernel=x,high reserves memory over CRASH_ADDR_LOW_MAX,
>>> +* also allocates 256M extra low memory for DMA buffers
>>> +* and swiotlb.
>>>  * But the extra memory is not required for all machines.
>>>  * So try low memory first and fall back to high memory
>>>  * unless "crashkernel=size[KMG],high" is specified.
>>> @@ -539,7 +540,7 @@ static void __init reserve_crashkernel(void)
>>> return;
>>> }
>>>  
>>> -   if (crash_base >= (1ULL << 32) && reserve_crashkernel_low()) {
>>> +   if (crash_base >= CRASH_ADDR_LOW_MAX && reserve_crashkernel_low()) {
>>> memblock_free(crash_base, crash_size);
>>> return;
>>> }
>>> -- 
>>> 2.20.1
>>>
>> .
>>


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH v12 3/9] x86: kdump: use macro CRASH_ADDR_LOW_MAX in functions reserve_crashkernel[_low]()

2020-09-18 Thread chenzhou
Hi Baoquan,

On 2020/9/18 15:25, Baoquan He wrote:
> Hi,
>
> On 09/07/20 at 09:47pm, Chen Zhou wrote:
>> To make the functions reserve_crashkernel[_low]() as generic,
>> replace some hard-coded numbers with macro CRASH_ADDR_LOW_MAX.
>>
>> Signed-off-by: Chen Zhou 
>> ---
>>  arch/x86/kernel/setup.c | 11 ++-
>>  1 file changed, 6 insertions(+), 5 deletions(-)
>>
>> diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
>> index d7fd90c52dae..71a6a6e7ca5b 100644
>> --- a/arch/x86/kernel/setup.c
>> +++ b/arch/x86/kernel/setup.c
>> @@ -430,7 +430,7 @@ static int __init reserve_crashkernel_low(void)
>>  unsigned long total_low_mem;
>>  int ret;
>>  
>> -total_low_mem = memblock_mem_size(1UL << (32 - PAGE_SHIFT));
>> +total_low_mem = memblock_mem_size(CRASH_ADDR_LOW_MAX >> PAGE_SHIFT);
> Just note that the replacement has been done in another patch from Mike
> Rapoport, partially. He seems to have done reserve_crashkernel_low()
> part, there's one left in reserve_crashkernel(), you might want to check
> that. 
>
> Mike's patch which is from a patchset has been merged into Andrew's next
> tree.
>
> commit 6e50f7672ffa362e9bd4bc0c0d2524ed872828c5
> Author: Mike Rapoport 
> Date:   Wed Aug 26 15:22:32 2020 +1000
>
> x86/setup: simplify reserve_crashkernel()
Yeah, the function reserve_crashkernel() has been changed in the next tree.
Thanks for your review and reminder.

Thanks,
Chen Zhou
>
>>  
>>  /* crashkernel=Y,low */
>>  ret = parse_crashkernel_low(boot_command_line, total_low_mem, 
>> &low_size, &base);
>> @@ -451,7 +451,7 @@ static int __init reserve_crashkernel_low(void)
>>  return 0;
>>  }
>>  
>> -low_base = memblock_find_in_range(CRASH_ALIGN, 1ULL << 32, low_size, 
>> CRASH_ALIGN);
>> +low_base = memblock_find_in_range(CRASH_ALIGN, CRASH_ADDR_LOW_MAX, 
>> low_size, CRASH_ALIGN);
>>  if (!low_base) {
>>  pr_err("Cannot reserve %ldMB crashkernel low memory, please try 
>> smaller size.\n",
>> (unsigned long)(low_size >> 20));
>> @@ -504,8 +504,9 @@ static void __init reserve_crashkernel(void)
>>  if (!crash_base) {
>>  /*
>>   * Set CRASH_ADDR_LOW_MAX upper bound for crash memory,
>> - * crashkernel=x,high reserves memory over 4G, also allocates
>> - * 256M extra low memory for DMA buffers and swiotlb.
>> + * crashkernel=x,high reserves memory over CRASH_ADDR_LOW_MAX,
>> + * also allocates 256M extra low memory for DMA buffers
>> + * and swiotlb.
>>   * But the extra memory is not required for all machines.
>>   * So try low memory first and fall back to high memory
>>   * unless "crashkernel=size[KMG],high" is specified.
>> @@ -539,7 +540,7 @@ static void __init reserve_crashkernel(void)
>>  return;
>>  }
>>  
>> -if (crash_base >= (1ULL << 32) && reserve_crashkernel_low()) {
>> +if (crash_base >= CRASH_ADDR_LOW_MAX && reserve_crashkernel_low()) {
>>  memblock_free(crash_base, crash_size);
>>  return;
>>  }
>> -- 
>> 2.20.1
>>
> .
>


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH] arm64: Add purgatory printing

2020-09-18 Thread Matthias Brugger




On 18/09/2020 07:16, Bhupesh SHARMA wrote:

Hi Matthias,

Thanks for the patch. Some nitpicks inline:

On Fri, Sep 18, 2020 at 1:09 AM  wrote:


From: Matthias Brugger 

Add option to allow purgatory printing on arm64 hardware
by passing the console name which should be used.
Based on a patch by Geoff Levand.

Cc: Geoff Levand 
Signed-off-by: Matthias Brugger 
---
  kexec/arch/arm64/include/arch/options.h |  6 ++-
  kexec/arch/arm64/kexec-arm64.c  | 61 +
  purgatory/arch/arm64/purgatory-arm64.c  | 17 ++-
  3 files changed, 82 insertions(+), 2 deletions(-)


Probably we also need to update the man page 'kexec/kexec.8' to add
documentation about the newly introduced argument.



I checked the documentation and under "ARCHITECTURE OPTIONS" and it only 
documents a subset of the x86(_64) architecture specific commands. So I think 
there is more work to do to get the documentation right.


But after having a look, I think we might want to use "--serial" as option 
instead of "--console" to be in sync with x86. Although many architectures 
reinvent the options, if we can get it as near to x86 as possible, I think that 
would be a good thing. I'll do that in v2.



diff --git a/kexec/arch/arm64/include/arch/options.h 
b/kexec/arch/arm64/include/arch/options.h
index a17d933..be7d169 100644
--- a/kexec/arch/arm64/include/arch/options.h
+++ b/kexec/arch/arm64/include/arch/options.h
@@ -5,7 +5,8 @@
  #define OPT_DTB((OPT_MAX)+1)
  #define OPT_INITRD ((OPT_MAX)+2)
  #define OPT_REUSE_CMDLINE  ((OPT_MAX)+3)
-#define OPT_ARCH_MAX   ((OPT_MAX)+4)
+#define OPT_CONSOLE((OPT_MAX)+4)
+#define OPT_ARCH_MAX   ((OPT_MAX)+5)

  #define KEXEC_ARCH_OPTIONS \
 KEXEC_OPTIONS \
@@ -13,6 +14,7 @@
 { "command-line",  1, NULL, OPT_APPEND }, \
 { "dtb",   1, NULL, OPT_DTB }, \
 { "initrd",1, NULL, OPT_INITRD }, \
+   { "console",   1, NULL, OPT_CONSOLE }, \
 { "ramdisk",   1, NULL, OPT_INITRD }, \
 { "reuse-cmdline", 0, NULL, OPT_REUSE_CMDLINE }, \

@@ -25,6 +27,7 @@ static const char arm64_opts_usage[] __attribute__ ((unused)) 
=
  " --command-line=STRING Set the kernel command line to STRING.\n"
  " --dtb=FILEUse FILE as the device tree blob.\n"
  " --initrd=FILE Use FILE as the kernel initial ramdisk.\n"
+" --console=STRING  Console used for purgatory printing.\n"
  " --ramdisk=FILEUse FILE as the kernel initial ramdisk.\n"
  " --reuse-cmdline   Use kernel command line from running system.\n";


Just a thought... sometimes the console string is also available as a
part of the '--reuse-cmdline' command line argument passed to the
kdump kernel. Can we also try to extract the --console string from the
cmdline provided to the primary kernel itself?



Well the problem is, that there can be more then one consoles present, so which 
one would be the correct one?
I see this more like a debug feature which you use knowing which console you are 
looking for the debug messages. In the end it only helps you to see if kdump 
failed in the production system kernel or in the crash kernel.



@@ -32,6 +35,7 @@ struct arm64_opts {
 const char *command_line;
 const char *dtb;
 const char *initrd;
+   const char *console;
  };

  extern struct arm64_opts arm64_opts;
diff --git a/kexec/arch/arm64/kexec-arm64.c b/kexec/arch/arm64/kexec-arm64.c
index 45ebc54..44c9e6e 100644
--- a/kexec/arch/arm64/kexec-arm64.c
+++ b/kexec/arch/arm64/kexec-arm64.c
@@ -165,6 +165,8 @@ int arch_process_options(int argc, char **argv)
 break;
 case OPT_KEXEC_FILE_SYSCALL:
 do_kexec_file_syscall = 1;
+   case OPT_CONSOLE:
+   arm64_opts.console = optarg;
 break;
 default:
 break; /* Ignore core and unknown options. */
@@ -180,12 +182,62 @@ int arch_process_options(int argc, char **argv)
 dbgprintf("%s:%d: dtb: %s\n", __func__, __LINE__,
 (do_kexec_file_syscall && arm64_opts.dtb ? "(ignored)" :
 arm64_opts.dtb));
+   dbgprintf("%s:%d: console: %s\n", __func__, __LINE__,
+   arm64_opts.console);
+
 if (do_kexec_file_syscall)
 arm64_opts.dtb = NULL;

 return 0;
  }

+/**
+ * find_purgatory_sink - Find a sink for purgatory output.
+ */
+
+static uint64_t find_purgatory_sink(const char *console)
+{
+   int fd, ret;
+   char folder[255], device[255], mem[255];
+   struct stat sb;
+   char buffer[18];


Just trying to understand the reasoning behind keeping the buffer 18
chars long. Can the bytes read from the console exceed the array size
here (may be a boundary check is required here to avoid overflows)?



The buffer

[PATCH] docs: admin-guide: update kdump documentation due to change of crash URL

2020-09-18 Thread Lianbo Jiang
Since crash utility has moved to github, the original URL is no longer
available. Let's update it accordingly.

Suggested-by: Dave Young 
Signed-off-by: Lianbo Jiang 
---
 Documentation/admin-guide/kdump/kdump.rst | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/Documentation/admin-guide/kdump/kdump.rst 
b/Documentation/admin-guide/kdump/kdump.rst
index 2da65fef2a1c..75a9dd98e76e 100644
--- a/Documentation/admin-guide/kdump/kdump.rst
+++ b/Documentation/admin-guide/kdump/kdump.rst
@@ -509,9 +509,12 @@ ELF32-format headers using the --elf32-core-headers kernel 
option on the
 dump kernel.
 
 You can also use the Crash utility to analyze dump files in Kdump
-format. Crash is available on Dave Anderson's site at the following URL:
+format. Crash is available at the following URL:
 
-   http://people.redhat.com/~anderson/
+   https://github.com/crash-utility/crash
+
+Crash document can be found at:
+   https://crash-utility.github.io/
 
 Trigger Kdump on WARN()
 ===
-- 
2.17.1


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH 1/1] kdump: append uts_namespace.name offset to VMCOREINFO

2020-09-18 Thread Alexander Egorenkov
The offset of the field 'init_uts_ns.name' has changed
since commit 9a56493f6942c0e2df1579986128721da96e00d8.
Make the offset of the field 'uts_namespace.name' available
in VMCOREINFO because tools like 'crash-utility' and
'makedumpfile' must be able to read it from crash dumps.

Signed-off-by: Alexander Egorenkov 
---
 kernel/crash_core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index 106e4500fd53..173fdc261882 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -447,6 +447,7 @@ static int __init crash_save_vmcoreinfo_init(void)
VMCOREINFO_PAGESIZE(PAGE_SIZE);
 
VMCOREINFO_SYMBOL(init_uts_ns);
+   VMCOREINFO_OFFSET(uts_namespace, name);
VMCOREINFO_SYMBOL(node_online_map);
 #ifdef CONFIG_MMU
VMCOREINFO_SYMBOL_ARRAY(swapper_pg_dir);
-- 
2.26.2


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH v12 3/9] x86: kdump: use macro CRASH_ADDR_LOW_MAX in functions reserve_crashkernel[_low]()

2020-09-18 Thread Baoquan He
Hi,

On 09/07/20 at 09:47pm, Chen Zhou wrote:
> To make the functions reserve_crashkernel[_low]() as generic,
> replace some hard-coded numbers with macro CRASH_ADDR_LOW_MAX.
> 
> Signed-off-by: Chen Zhou 
> ---
>  arch/x86/kernel/setup.c | 11 ++-
>  1 file changed, 6 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
> index d7fd90c52dae..71a6a6e7ca5b 100644
> --- a/arch/x86/kernel/setup.c
> +++ b/arch/x86/kernel/setup.c
> @@ -430,7 +430,7 @@ static int __init reserve_crashkernel_low(void)
>   unsigned long total_low_mem;
>   int ret;
>  
> - total_low_mem = memblock_mem_size(1UL << (32 - PAGE_SHIFT));
> + total_low_mem = memblock_mem_size(CRASH_ADDR_LOW_MAX >> PAGE_SHIFT);

Just note that the replacement has been done in another patch from Mike
Rapoport, partially. He seems to have done reserve_crashkernel_low()
part, there's one left in reserve_crashkernel(), you might want to check
that. 

Mike's patch which is from a patchset has been merged into Andrew's next
tree.

commit 6e50f7672ffa362e9bd4bc0c0d2524ed872828c5
Author: Mike Rapoport 
Date:   Wed Aug 26 15:22:32 2020 +1000

x86/setup: simplify reserve_crashkernel()

>  
>   /* crashkernel=Y,low */
>   ret = parse_crashkernel_low(boot_command_line, total_low_mem, 
> &low_size, &base);
> @@ -451,7 +451,7 @@ static int __init reserve_crashkernel_low(void)
>   return 0;
>   }
>  
> - low_base = memblock_find_in_range(CRASH_ALIGN, 1ULL << 32, low_size, 
> CRASH_ALIGN);
> + low_base = memblock_find_in_range(CRASH_ALIGN, CRASH_ADDR_LOW_MAX, 
> low_size, CRASH_ALIGN);
>   if (!low_base) {
>   pr_err("Cannot reserve %ldMB crashkernel low memory, please try 
> smaller size.\n",
>  (unsigned long)(low_size >> 20));
> @@ -504,8 +504,9 @@ static void __init reserve_crashkernel(void)
>   if (!crash_base) {
>   /*
>* Set CRASH_ADDR_LOW_MAX upper bound for crash memory,
> -  * crashkernel=x,high reserves memory over 4G, also allocates
> -  * 256M extra low memory for DMA buffers and swiotlb.
> +  * crashkernel=x,high reserves memory over CRASH_ADDR_LOW_MAX,
> +  * also allocates 256M extra low memory for DMA buffers
> +  * and swiotlb.
>* But the extra memory is not required for all machines.
>* So try low memory first and fall back to high memory
>* unless "crashkernel=size[KMG],high" is specified.
> @@ -539,7 +540,7 @@ static void __init reserve_crashkernel(void)
>   return;
>   }
>  
> - if (crash_base >= (1ULL << 32) && reserve_crashkernel_low()) {
> + if (crash_base >= CRASH_ADDR_LOW_MAX && reserve_crashkernel_low()) {
>   memblock_free(crash_base, crash_size);
>   return;
>   }
> -- 
> 2.20.1
> 


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH] arm64: Add purgatory printing

2020-09-18 Thread Matthias Brugger

Hi Geoff,

On 18/09/2020 04:04, Geoff Levand wrote:

Hi Matthias,

On 9/17/20 12:38 PM, matthias@kernel.org wrote:
Add option to allow purgatory printing on arm64 hardware

by passing the console name which should be used.



+static uint64_t find_purgatory_sink(const char *console)
+{
+   int fd, ret;
+   char folder[255], device[255], mem[255];
+   struct stat sb;
+   char buffer[18];
+   uint64_t iomem = 0x0;
+
+   if (!console)
+   return 0;
+
+   sprintf(device, "/sys/class/tty/%s", console);
+   if (!stat(folder, &sb) == 0 && S_ISDIR(sb.st_mode)) {
+   fprintf(stderr, "kexec: %s: No valid console found for %s\n",
+   __func__, device);
+   return 0;
+   }
+
+   sprintf(mem, "%s%s", device, "/iomem_base");
+   printf("console memory read from %s\n", mem);
+
+   fd = open(mem, O_RDONLY);
+   if (fd < 0) {
+   fprintf(stderr, "kexec: %s: No able to open %s\n",
+   __func__, mem);
+   return 0;
+   }
+
+   memset(buffer, '\0', sizeof(char) * 18);


I think I'd like to just see 'memset(buffer, 0, sizeof(buffer));'.


+   ret = read(fd, buffer, 18);


And 'ret = read(fd, buffer, sizeof(buffer));'.



You are correct, I'll add it to v2.

Thanks for the quick review!

Matthias


+   if (ret < 0) {
+   fprintf(stderr, "kexec: %s: not able to read fd\n", __func__);
+   close(fd);
+   return 0;
+   }
+
+   sscanf(buffer, "%lx", &iomem);
+   printf("console memory is at %#lx\n", iomem);
+
+   close(fd);
+   return iomem;
+}
+


Otherwise, looks OK.

-Geoff



___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec