[PATCH v2 2/2] powerpc:85xx: fix timebase sync issue when CONFIG_HOTPLUG_CPU=n
When CONFIG_SMP=y, timebase synchronization is required when the second kernel is started. arch/powerpc/kernel/smp.c: int __cpu_up(unsigned int cpu, struct task_struct *tidle) { ... if (smp_ops->give_timebase) smp_ops->give_timebase(); ... } void start_secondary(void *unused) { ... if (smp_ops->take_timebase) smp_ops->take_timebase(); ... } When CONFIG_HOTPLUG_CPU=n and CONFIG_KEXEC_CORE=n, smp_85xx_ops.give_timebase is NULL, smp_85xx_ops.take_timebase is NULL, As a result, the timebase is not synchronized. Timebase synchronization does not depend on CONFIG_HOTPLUG_CPU. Fixes: 56f1ba280719 ("powerpc/mpc85xx: refactor the PM operations") Cc: sta...@vger.kernel.org #v4.6 Signed-off-by: Xiaoming Ni --- arch/powerpc/platforms/85xx/Makefile | 4 +++- arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c | 4 arch/powerpc/platforms/85xx/smp.c| 12 ++-- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/platforms/85xx/Makefile b/arch/powerpc/platforms/85xx/Makefile index 60e4e97a929d..260fbad7967b 100644 --- a/arch/powerpc/platforms/85xx/Makefile +++ b/arch/powerpc/platforms/85xx/Makefile @@ -3,7 +3,9 @@ # Makefile for the PowerPC 85xx linux kernel. # obj-$(CONFIG_SMP) += smp.o -obj-$(CONFIG_FSL_PMC)+= mpc85xx_pm_ops.o +ifneq ($(CONFIG_FSL_CORENET_RCPM),y) +obj-$(CONFIG_SMP) += mpc85xx_pm_ops.o +endif obj-y += common.o diff --git a/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c b/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c index ffa8a7a6a2db..4a8af80011a6 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c @@ -17,6 +17,7 @@ static struct ccsr_guts __iomem *guts; +#ifdef CONFIG_FSL_PMC static void mpc85xx_irq_mask(int cpu) { @@ -49,6 +50,7 @@ static void mpc85xx_cpu_up_prepare(int cpu) { } +#endif static void mpc85xx_freeze_time_base(bool freeze) { @@ -76,10 +78,12 @@ static const struct of_device_id mpc85xx_smp_guts_ids[] = { static const struct fsl_pm_ops mpc85xx_pm_ops = { .freeze_time_base = mpc85xx_freeze_time_base, +#ifdef CONFIG_FSL_PMC .irq_mask = mpc85xx_irq_mask, .irq_unmask = mpc85xx_irq_unmask, .cpu_die = mpc85xx_cpu_die, .cpu_up_prepare = mpc85xx_cpu_up_prepare, +#endif }; int __init mpc85xx_setup_pmc(void) diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c index c6df294054fe..83f4a6389a28 100644 --- a/arch/powerpc/platforms/85xx/smp.c +++ b/arch/powerpc/platforms/85xx/smp.c @@ -40,7 +40,6 @@ struct epapr_spin_table { u32 pir; }; -#ifdef CONFIG_HOTPLUG_CPU static u64 timebase; static int tb_req; static int tb_valid; @@ -112,6 +111,7 @@ static void mpc85xx_take_timebase(void) local_irq_restore(flags); } +#ifdef CONFIG_HOTPLUG_CPU static void smp_85xx_cpu_offline_self(void) { unsigned int cpu = smp_processor_id(); @@ -495,21 +495,21 @@ void __init mpc85xx_smp_init(void) smp_85xx_ops.probe = NULL; } -#ifdef CONFIG_HOTPLUG_CPU #ifdef CONFIG_FSL_CORENET_RCPM + /* Assign a value to qoriq_pm_ops on PPC_E500MC */ fsl_rcpm_init(); -#endif - -#ifdef CONFIG_FSL_PMC +#else + /* Assign a value to qoriq_pm_ops on !PPC_E500MC */ mpc85xx_setup_pmc(); #endif if (qoriq_pm_ops) { smp_85xx_ops.give_timebase = mpc85xx_give_timebase; smp_85xx_ops.take_timebase = mpc85xx_take_timebase; +#ifdef CONFIG_HOTPLUG_CPU smp_85xx_ops.cpu_offline_self = smp_85xx_cpu_offline_self; smp_85xx_ops.cpu_die = qoriq_cpu_kill; - } #endif + } smp_ops = _85xx_ops; #ifdef CONFIG_KEXEC_CORE -- 2.27.0
[PATCH v2 1/2] powerpc:85xx:Fix oops when mpc85xx_smp_guts_ids node cannot be found
When the field described in mpc85xx_smp_guts_ids[] is not configured in dtb, the mpc85xx_setup_pmc() does not assign a value to the "guts" variable. As a result, the oops is triggered when mpc85xx_freeze_time_base() is executed. Fixes:56f1ba280719 ("powerpc/mpc85xx: refactor the PM operations") Cc: sta...@vger.kernel.org #v4.6 Signed-off-by: Xiaoming Ni --- arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c b/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c index 7c0133f558d0..ffa8a7a6a2db 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c @@ -94,9 +94,8 @@ int __init mpc85xx_setup_pmc(void) pr_err("Could not map guts node address\n"); return -ENOMEM; } + qoriq_pm_ops = _pm_ops; } - qoriq_pm_ops = _pm_ops; - return 0; } -- 2.27.0
[PATCH v2 0/2] powerpc:85xx: fix timebase sync issue when CONFIG_HOTPLUG_CPU=n
When CONFIG_SMP=y, timebase synchronization is required for mpc8572 when the second kernel is started arch/powerpc/kernel/smp.c: int __cpu_up(unsigned int cpu, struct task_struct *tidle) { ... if (smp_ops->give_timebase) smp_ops->give_timebase(); ... } void start_secondary(void *unused) { ... if (smp_ops->take_timebase) smp_ops->take_timebase(); ... } When CONFIG_HOTPLUG_CPU=n and CONFIG_KEXEC_CORE=n, smp_85xx_ops.give_timebase is NULL, smp_85xx_ops.take_timebase is NULL, As a result, the timebase is not synchronized. test code: for i in $(seq 1 3); do taskset 1 date; taskset 2 date; sleep 1; echo;done log: Sat Sep 25 18:50:00 CST 2021 Sat Sep 25 19:07:47 CST 2021 Sat Sep 25 18:50:01 CST 2021 Sat Sep 25 19:07:48 CST 2021 Sat Sep 25 18:50:02 CST 2021 Sat Sep 25 19:07:49 CST 2021 Code snippet about give_timebase and take_timebase assignments: arch/powerpc/platforms/85xx/smp.c: #ifdef CONFIG_HOTPLUG_CPU #ifdef CONFIG_FSL_CORENET_RCPM fsl_rcpm_init(); #endif #ifdef CONFIG_FSL_PMC mpc85xx_setup_pmc(); #endif if (qoriq_pm_ops) { smp_85xx_ops.give_timebase = mpc85xx_give_timebase; smp_85xx_ops.take_timebase = mpc85xx_take_timebase; config dependency: FSL_CORENET_RCPM depends on the PPC_E500MC. FSL_PMC depends on SUSPEND. SUSPEND depends on ARCH_SUSPEND_POSSIBLE. ARCH_SUSPEND_POSSIBLE depends on !PPC_E500MC. CONFIG_HOTPLUG_CPU and CONFIG_FSL_PMC require the timebase function, but the timebase should not depend on CONFIG_HOTPLUG_CPU and CONFIG_FSL_PMC. Therefore, adjust the macro control range. Ensure that the corresponding timebase hook function is not empty when the dtsi node is configured. - changes in v2: 1. add new patch: "powerpc:85xx:Fix oops when mpc85xx_smp_guts_ids node cannot be found" 2. Using !CONFIG_FSL_CORENET_RCPM to manage the timebase code of !PPC_E500MC v1: https://lore.kernel.org/lkml/20210926025144.55674-1-nixiaom...@huawei.com -- Xiaoming Ni (2): powerpc:85xx:Fix oops when mpc85xx_smp_guts_ids node cannot be found powerpc:85xx: fix timebase sync issue when CONFIG_HOTPLUG_CPU=n arch/powerpc/platforms/85xx/Makefile | 4 +++- arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c | 7 +-- arch/powerpc/platforms/85xx/smp.c| 12 ++-- 3 files changed, 14 insertions(+), 9 deletions(-) -- 2.27.0
Re: [PATCH] powerpc:85xx: fix timebase sync issue when CONFIG_HOTPLUG_CPU=n
On 2021/9/26 10:51, Xiaoming Ni wrote: When CONFIG_SMP=y, timebase synchronization is required when the second kernel is started. arch/powerpc/kernel/smp.c: int __cpu_up(unsigned int cpu, struct task_struct *tidle) { ... if (smp_ops->give_timebase) smp_ops->give_timebase(); ... } void start_secondary(void *unused) { ... if (smp_ops->take_timebase) smp_ops->take_timebase(); ... } When CONFIG_HOTPLUG_CPU=n and CONFIG_KEXEC_CORE=n, smp_85xx_ops.give_timebase is NULL, smp_85xx_ops.take_timebase is NULL, As a result, the timebase is not synchronized. Timebase synchronization does not depend on CONFIG_HOTPLUG_CPU. Fixes: 56f1ba280719 ("powerpc/mpc85xx: refactor the PM operations") Cc: sta...@vger.kernel.org #v4.6 Signed-off-by: Xiaoming Ni --- arch/powerpc/platforms/85xx/Makefile | 2 +- arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c | 4 arch/powerpc/platforms/85xx/smp.c| 9 - 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/platforms/85xx/Makefile b/arch/powerpc/platforms/85xx/Makefile index 60e4e97a929d..71ce1f6b6966 100644 --- a/arch/powerpc/platforms/85xx/Makefile +++ b/arch/powerpc/platforms/85xx/Makefile @@ -3,7 +3,7 @@ # Makefile for the PowerPC 85xx linux kernel. # obj-$(CONFIG_SMP) += smp.o -obj-$(CONFIG_FSL_PMC)+= mpc85xx_pm_ops.o +obj-$(CONFIG_SMP) += mpc85xx_pm_ops.o obj-y += common.o diff --git a/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c b/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c index 7c0133f558d0..a5656b3e9701 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c @@ -17,6 +17,7 @@ static struct ccsr_guts __iomem *guts; +#ifdef CONFIG_FSL_PMC static void mpc85xx_irq_mask(int cpu) { @@ -49,6 +50,7 @@ static void mpc85xx_cpu_up_prepare(int cpu) { } +#endif static void mpc85xx_freeze_time_base(bool freeze) { @@ -76,10 +78,12 @@ static const struct of_device_id mpc85xx_smp_guts_ids[] = { static const struct fsl_pm_ops mpc85xx_pm_ops = { .freeze_time_base = mpc85xx_freeze_time_base, +#ifdef CONFIG_FSL_PMC .irq_mask = mpc85xx_irq_mask, .irq_unmask = mpc85xx_irq_unmask, .cpu_die = mpc85xx_cpu_die, .cpu_up_prepare = mpc85xx_cpu_up_prepare, +#endif }; int __init mpc85xx_setup_pmc(void) diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c index c6df294054fe..349298cd9671 100644 --- a/arch/powerpc/platforms/85xx/smp.c +++ b/arch/powerpc/platforms/85xx/smp.c @@ -40,7 +40,6 @@ struct epapr_spin_table { u32 pir; }; -#ifdef CONFIG_HOTPLUG_CPU static u64 timebase; static int tb_req; static int tb_valid; @@ -112,6 +111,7 @@ static void mpc85xx_take_timebase(void) local_irq_restore(flags); } +#ifdef CONFIG_HOTPLUG_CPU static void smp_85xx_cpu_offline_self(void) { unsigned int cpu = smp_processor_id(); @@ -499,17 +499,16 @@ void __init mpc85xx_smp_init(void) #ifdef CONFIG_FSL_CORENET_RCPM fsl_rcpm_init(); #endif - -#ifdef CONFIG_FSL_PMC - mpc85xx_setup_pmc(); #endif + mpc85xx_setup_pmc(); if (qoriq_pm_ops) { smp_85xx_ops.give_timebase = mpc85xx_give_timebase; smp_85xx_ops.take_timebase = mpc85xx_take_timebase; +#ifdef CONFIG_HOTPLUG_CPU smp_85xx_ops.cpu_offline_self = smp_85xx_cpu_offline_self; smp_85xx_ops.cpu_die = qoriq_cpu_kill; - } #endif + } smp_ops = _85xx_ops; #ifdef CONFIG_KEXEC_CORE I found inconsistent time values on different CPUs on my mpc8572 and used this patch to fix it. But today I found out in ppc64 testing that this patch causes the system to trigger oops in the function mpc85xx_freeze_time_base(): the variable "guts" is a null pointer. I'm sorry to bother you. I'll fix it and resend v2 later, Thanks Xiaoming Ni
[PATCH] powerpc:85xx: fix timebase sync issue when CONFIG_HOTPLUG_CPU=n
When CONFIG_SMP=y, timebase synchronization is required when the second kernel is started. arch/powerpc/kernel/smp.c: int __cpu_up(unsigned int cpu, struct task_struct *tidle) { ... if (smp_ops->give_timebase) smp_ops->give_timebase(); ... } void start_secondary(void *unused) { ... if (smp_ops->take_timebase) smp_ops->take_timebase(); ... } When CONFIG_HOTPLUG_CPU=n and CONFIG_KEXEC_CORE=n, smp_85xx_ops.give_timebase is NULL, smp_85xx_ops.take_timebase is NULL, As a result, the timebase is not synchronized. Timebase synchronization does not depend on CONFIG_HOTPLUG_CPU. Fixes: 56f1ba280719 ("powerpc/mpc85xx: refactor the PM operations") Cc: sta...@vger.kernel.org #v4.6 Signed-off-by: Xiaoming Ni --- arch/powerpc/platforms/85xx/Makefile | 2 +- arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c | 4 arch/powerpc/platforms/85xx/smp.c| 9 - 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/platforms/85xx/Makefile b/arch/powerpc/platforms/85xx/Makefile index 60e4e97a929d..71ce1f6b6966 100644 --- a/arch/powerpc/platforms/85xx/Makefile +++ b/arch/powerpc/platforms/85xx/Makefile @@ -3,7 +3,7 @@ # Makefile for the PowerPC 85xx linux kernel. # obj-$(CONFIG_SMP) += smp.o -obj-$(CONFIG_FSL_PMC)+= mpc85xx_pm_ops.o +obj-$(CONFIG_SMP) += mpc85xx_pm_ops.o obj-y += common.o diff --git a/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c b/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c index 7c0133f558d0..a5656b3e9701 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c @@ -17,6 +17,7 @@ static struct ccsr_guts __iomem *guts; +#ifdef CONFIG_FSL_PMC static void mpc85xx_irq_mask(int cpu) { @@ -49,6 +50,7 @@ static void mpc85xx_cpu_up_prepare(int cpu) { } +#endif static void mpc85xx_freeze_time_base(bool freeze) { @@ -76,10 +78,12 @@ static const struct of_device_id mpc85xx_smp_guts_ids[] = { static const struct fsl_pm_ops mpc85xx_pm_ops = { .freeze_time_base = mpc85xx_freeze_time_base, +#ifdef CONFIG_FSL_PMC .irq_mask = mpc85xx_irq_mask, .irq_unmask = mpc85xx_irq_unmask, .cpu_die = mpc85xx_cpu_die, .cpu_up_prepare = mpc85xx_cpu_up_prepare, +#endif }; int __init mpc85xx_setup_pmc(void) diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c index c6df294054fe..349298cd9671 100644 --- a/arch/powerpc/platforms/85xx/smp.c +++ b/arch/powerpc/platforms/85xx/smp.c @@ -40,7 +40,6 @@ struct epapr_spin_table { u32 pir; }; -#ifdef CONFIG_HOTPLUG_CPU static u64 timebase; static int tb_req; static int tb_valid; @@ -112,6 +111,7 @@ static void mpc85xx_take_timebase(void) local_irq_restore(flags); } +#ifdef CONFIG_HOTPLUG_CPU static void smp_85xx_cpu_offline_self(void) { unsigned int cpu = smp_processor_id(); @@ -499,17 +499,16 @@ void __init mpc85xx_smp_init(void) #ifdef CONFIG_FSL_CORENET_RCPM fsl_rcpm_init(); #endif - -#ifdef CONFIG_FSL_PMC - mpc85xx_setup_pmc(); #endif + mpc85xx_setup_pmc(); if (qoriq_pm_ops) { smp_85xx_ops.give_timebase = mpc85xx_give_timebase; smp_85xx_ops.take_timebase = mpc85xx_take_timebase; +#ifdef CONFIG_HOTPLUG_CPU smp_85xx_ops.cpu_offline_self = smp_85xx_cpu_offline_self; smp_85xx_ops.cpu_die = qoriq_cpu_kill; - } #endif + } smp_ops = _85xx_ops; #ifdef CONFIG_KEXEC_CORE -- 2.27.0
Re: [PATCH] powerpc:Don't print raw EIP/LR hex values in dump_stack() and show_regs()
On 2020/12/22 1:12, Segher Boessenkool wrote: On Mon, Dec 21, 2020 at 04:42:23PM +, David Laight wrote: From: Segher Boessenkool Sent: 21 December 2020 16:32 On Mon, Dec 21, 2020 at 04:17:21PM +0100, Christophe Leroy wrote: Le 21/12/2020 à 04:27, Xiaoming Ni a écrit : Since the commit 2b0e86cc5de6 ("powerpc/fsl_booke/32: implement KASLR infrastructure"), the powerpc system is ready to support KASLR. To reduces the risk of invalidating address randomization, don't print the EIP/LR hex values in dump_stack() and show_regs(). I think your change is not enough to hide EIP address, see below a dump with you patch, you get "Faulting instruction address: 0xc03a0c14" As far as I can see the patch does nothing to the GPR printout. Often GPRs contain code addresses. As one example, the LR is moved via a GPR (often GPR0, but not always) for storing on the stack. So this needs more work. If the dump_stack() is from an oops you need the real EIP value on order to stand any chance of making headway. Or at least the function name + offset, yes. When the system is healthy, only symbols and offsets are printed, Output address and symbol + offset when the system is dying Does this meet both debugging and security requirements? For example: +static void __show_regs_ip_lr(const char *flag, unsigned long addr) +{ + if (system_going_down()) { /* panic oops reboot */ + pr_cont("%s["REG"] %pS", flag, addr, (void *)addr); + } else { + pr_cont("%s%pS", flag, (void *)addr); + } +} + static void __show_regs(struct pt_regs *regs) { int i, trap; - printk("NIP: "REG" LR: "REG" CTR: "REG"\n", - regs->nip, regs->link, regs->ctr); + __show_regs_ip_lr("NIP: ", regs->nip); + __show_regs_ip_lr(" LR: ", regs->link); + pr_cont(" CTR: "REG"\n", regs->ctr); printk("REGS: %px TRAP: %04lx %s (%s)\n", regs, regs->trap, print_tainted(), init_utsname()->release); printk("MSR: "REG" ", regs->msr); Otherwise you might just as well just print 'borked - tough luck'. Yes. ASLR is a house of cards. But that isn't constructive wrt this patch :-) Segher . Thanks Xiaoming Ni
[PATCH] powerpc:Don't print raw EIP/LR hex values in dump_stack() and show_regs()
Since the commit 2b0e86cc5de6 ("powerpc/fsl_booke/32: implement KASLR infrastructure"), the powerpc system is ready to support KASLR. To reduces the risk of invalidating address randomization, don't print the EIP/LR hex values in dump_stack() and show_regs(). This patch follows x86 and arm64's lead: commit a25ffd3a6302a6 ("arm64: traps: Don't print stack or raw PC/LR values in backtraces") commit bb5e5ce545f203 ("x86/dumpstack: Remove kernel text addresses from stack dump") Signed-off-by: Xiaoming Ni --- arch/powerpc/kernel/process.c | 12 ++-- 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index a66f435dabbf..913cf1ea702e 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1455,8 +1455,8 @@ static void __show_regs(struct pt_regs *regs) { int i, trap; - printk("NIP: "REG" LR: "REG" CTR: "REG"\n", - regs->nip, regs->link, regs->ctr); + printk("NIP: %pS LR: %pS CTR: "REG"\n", + (void *)regs->nip, (void *)regs->link, regs->ctr); printk("REGS: %px TRAP: %04lx %s (%s)\n", regs, regs->trap, print_tainted(), init_utsname()->release); printk("MSR: "REG" ", regs->msr); @@ -1493,8 +1493,8 @@ static void __show_regs(struct pt_regs *regs) * above info out without failing */ if (IS_ENABLED(CONFIG_KALLSYMS)) { - printk("NIP ["REG"] %pS\n", regs->nip, (void *)regs->nip); - printk("LR ["REG"] %pS\n", regs->link, (void *)regs->link); + printk("NIP %pS\n", (void *)regs->nip); + printk("LR %pS\n", (void *)regs->link); } } @@ -2160,8 +2160,8 @@ void show_stack(struct task_struct *tsk, unsigned long *stack, newsp = stack[0]; ip = stack[STACK_FRAME_LR_SAVE]; if (!firstframe || ip != lr) { - printk("%s["REG"] ["REG"] %pS", - loglvl, sp, ip, (void *)ip); + printk("%s ["REG"] %pS", + loglvl, sp, (void *)ip); ret_addr = ftrace_graph_ret_addr(current, _idx, ip, stack); if (ret_addr != ip) -- 2.27.0
[PATCH v3] All arch: remove system call sys_sysctl
Since the commit 61a47c1ad3a4dc ("sysctl: Remove the sysctl system call"), sys_sysctl is actually unavailable: any input can only return an error. We have been warning about people using the sysctl system call for years and believe there are no more users. Even if there are users of this interface if they have not complained or fixed their code by now they probably are not going to, so there is no point in warning them any longer. So completely remove sys_sysctl on all architectures. Signed-off-by: Xiaoming Ni Acked-by: Will Deacon Acked-by: "Eric W. Biederman" changes in v3: restore include/uapi/linux/sysctl.h rebase on commit bc7d17d55762 ("Add linux-next specific files for 20200615") Conflicts: arch/sh/include/uapi/asm/unistd_64.h arch/sh/kernel/syscalls_64.S v2: According to Kees Cook's suggestion, completely remove sys_sysctl on all arch According to Eric W. Biederman's suggestion, update the commit log V1: https://lore.kernel.org/lkml/1591683605-8585-1-git-send-email-nixiaom...@huawei.com/ Delete the code of sys_sysctl and return -ENOSYS directly at the function entry --- arch/alpha/kernel/syscalls/syscall.tbl| 2 +- arch/arm/configs/am200epdkit_defconfig| 1 - arch/arm/tools/syscall.tbl| 2 +- arch/arm64/include/asm/unistd32.h | 4 +- arch/ia64/kernel/syscalls/syscall.tbl | 2 +- arch/m68k/kernel/syscalls/syscall.tbl | 2 +- arch/microblaze/kernel/syscalls/syscall.tbl | 2 +- arch/mips/configs/cu1000-neo_defconfig| 1 - arch/mips/kernel/syscalls/syscall_n32.tbl | 2 +- arch/mips/kernel/syscalls/syscall_n64.tbl | 2 +- arch/mips/kernel/syscalls/syscall_o32.tbl | 2 +- arch/parisc/kernel/syscalls/syscall.tbl | 2 +- arch/powerpc/kernel/syscalls/syscall.tbl | 2 +- arch/s390/kernel/syscalls/syscall.tbl | 2 +- arch/sh/configs/dreamcast_defconfig | 1 - arch/sh/configs/espt_defconfig| 1 - arch/sh/configs/hp6xx_defconfig | 1 - arch/sh/configs/landisk_defconfig | 1 - arch/sh/configs/lboxre2_defconfig | 1 - arch/sh/configs/microdev_defconfig| 1 - arch/sh/configs/migor_defconfig | 1 - arch/sh/configs/r7780mp_defconfig | 1 - arch/sh/configs/r7785rp_defconfig | 1 - arch/sh/configs/rts7751r2d1_defconfig | 1 - arch/sh/configs/rts7751r2dplus_defconfig | 1 - arch/sh/configs/se7206_defconfig | 1 - arch/sh/configs/se7343_defconfig | 1 - arch/sh/configs/se7619_defconfig | 1 - arch/sh/configs/se7705_defconfig | 1 - arch/sh/configs/se7750_defconfig | 1 - arch/sh/configs/se7751_defconfig | 1 - arch/sh/configs/secureedge5410_defconfig | 1 - arch/sh/configs/sh03_defconfig| 1 - arch/sh/configs/sh7710voipgw_defconfig| 1 - arch/sh/configs/sh7757lcr_defconfig | 1 - arch/sh/configs/sh7763rdp_defconfig | 1 - arch/sh/configs/shmin_defconfig | 1 - arch/sh/configs/titan_defconfig | 1 - arch/sh/kernel/syscalls/syscall.tbl | 2 +- arch/sparc/kernel/syscalls/syscall.tbl| 2 +- arch/x86/entry/syscalls/syscall_32.tbl| 2 +- arch/x86/entry/syscalls/syscall_64.tbl| 2 +- arch/xtensa/kernel/syscalls/syscall.tbl | 2 +- include/linux/compat.h| 1 - include/linux/syscalls.h | 2 - include/linux/sysctl.h| 6 +- kernel/Makefile | 2 +- kernel/sys_ni.c | 1 - kernel/sysctl_binary.c| 171 -- .../arch/powerpc/entry/syscalls/syscall.tbl | 2 +- .../perf/arch/s390/entry/syscalls/syscall.tbl | 2 +- .../arch/x86/entry/syscalls/syscall_64.tbl| 2 +- 52 files changed, 24 insertions(+), 227 deletions(-) delete mode 100644 kernel/sysctl_binary.c diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl index b249824c8267..0da7f1c61529 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -249,7 +249,7 @@ 316common mlockallsys_mlockall 317common munlockall sys_munlockall 318common sysinfo sys_sysinfo -319common _sysctl sys_sysctl +319common _sysctl sys_ni_syscall # 320 was sys_idle 321common oldumount sys_oldumount 322common swapon sys_swapon diff --git a/arch/arm/configs/am200epdkit_defconfig b/arch/arm/configs/am200epdkit_defconfig index f56ac394caf1..4e49d6cb2f62 100644 --- a/arch/arm
Re: [PATCH v2] All arch: remove system call sys_sysctl
On 2020/6/12 2:23, Eric W. Biederman wrote: Rich Felker writes: On Thu, Jun 11, 2020 at 12:01:11PM -0500, Eric W. Biederman wrote: Rich Felker writes: On Thu, Jun 11, 2020 at 06:43:00AM -0500, Eric W. Biederman wrote: Xiaoming Ni writes: Since the commit 61a47c1ad3a4dc ("sysctl: Remove the sysctl system call"), sys_sysctl is actually unavailable: any input can only return an error. We have been warning about people using the sysctl system call for years and believe there are no more users. Even if there are users of this interface if they have not complained or fixed their code by now they probably are not going to, so there is no point in warning them any longer. So completely remove sys_sysctl on all architectures. Signed-off-by: Xiaoming Ni changes in v2: According to Kees Cook's suggestion, completely remove sys_sysctl on all arch According to Eric W. Biederman's suggestion, update the commit log V1: https://lore.kernel.org/lkml/1591683605-8585-1-git-send-email-nixiaom...@huawei.com/ Delete the code of sys_sysctl and return -ENOSYS directly at the function entry --- include/uapi/linux/sysctl.h| 15 -- [snip] diff --git a/include/uapi/linux/sysctl.h b/include/uapi/linux/sysctl.h index 27c1ed2..84b44c3 100644 --- a/include/uapi/linux/sysctl.h +++ b/include/uapi/linux/sysctl.h @@ -27,21 +27,6 @@ #include #include -#define CTL_MAXNAME 10 /* how many path components do we allow in a - call to sysctl? In other words, what is - the largest acceptable value for the nlen - member of a struct __sysctl_args to have? */ - -struct __sysctl_args { - int __user *name; - int nlen; - void __user *oldval; - size_t __user *oldlenp; - void __user *newval; - size_t newlen; - unsigned long __unused[4]; -}; - /* Define sysctl names first */ /* Top-level names: */ [snip] The uapi header change does not make sense. The entire point of the header is to allow userspace programs to be able to call sys_sysctl. It either needs to all stay or all go. As the concern with the uapi header is about userspace programs being able to compile please leave the header for now. We should leave auditing userspace and seeing if userspace code will still compile if we remove this header for a separate patch. The concerns and justifications for the uapi header are completely different then for the removing the sys_sysctl implementation. Otherwise Acked-by: "Eric W. Biederman" The UAPI header should be kept because it's defining an API not just for the kernel the headers are supplied with, but for all past kernels. In particular programs needing a failsafe CSPRNG source that works on old kernels may (do) use this as a fallback only if modern syscalls are missing. Removing the syscall is no problem since it won't be used, but if you remove the types/macros from the UAPI headers, they'll have to copy that into their own sources. May we assume you know of a least one piece of userspace that will fail to compile if this header file is removed? I know at least one piece of software is using SYS_sysctl for a fallback CSPRNG source. I'm not 100% sure that they're using the kernel headers; they might have copied it already. I'm also not sure how many there are. Regardless, I think the principle stands. There's no need to remove definitions that are essentially maintenance-free now that the interface is no longer available in new kernels, and doing so contributes to the myth that you're supposed to use kernel headers matching runtime kernel rather than it always being safe to use latest headers. If there is no one using the definitions removing them saves people having to remember what they are there for. The big rule is don't break userspace. The goal is to allow people to upgrade their kernel without needing to worry about userspace breaking, and to be able to downgrade to the extent possible to help in tracking bugs. Not being able to compile userspace seems like a pretty clear cut case. Although there are some fuzzy edges given the history of the kernel headers. Things like your libc requiring kernel headers to be processed before they can be used. I think there are still some kernel headers that have that restriction when used with glibc as glibc uses different sizes for types like dev_t. The bottom line is we can't do it casually so that any work in the direction of removing from or deleting uapi headers needs to be it's own separate patch. Given how much effort it can be to show that userspace is not using something I don't expect us to be mucking with the uapi headers any time soon. Eric Thanks everyone for your guidance, I will delete the update of uapi file in v3 version. But here I am still a bit confused: how to modify include/uapi? Before commit 61a47c1ad3a4dc ("
[PATCH v2] All arch: remove system call sys_sysctl
Since the commit 61a47c1ad3a4dc ("sysctl: Remove the sysctl system call"), sys_sysctl is actually unavailable: any input can only return an error. We have been warning about people using the sysctl system call for years and believe there are no more users. Even if there are users of this interface if they have not complained or fixed their code by now they probably are not going to, so there is no point in warning them any longer. So completely remove sys_sysctl on all architectures. Signed-off-by: Xiaoming Ni changes in v2: According to Kees Cook's suggestion, completely remove sys_sysctl on all arch According to Eric W. Biederman's suggestion, update the commit log V1: https://lore.kernel.org/lkml/1591683605-8585-1-git-send-email-nixiaom...@huawei.com/ Delete the code of sys_sysctl and return -ENOSYS directly at the function entry --- arch/alpha/kernel/syscalls/syscall.tbl | 2 +- arch/arm/configs/am200epdkit_defconfig | 1 - arch/arm/tools/syscall.tbl | 2 +- arch/arm64/include/asm/unistd32.h | 4 +- arch/ia64/kernel/syscalls/syscall.tbl | 2 +- arch/m68k/kernel/syscalls/syscall.tbl | 2 +- arch/microblaze/kernel/syscalls/syscall.tbl| 2 +- arch/mips/configs/cu1000-neo_defconfig | 1 - arch/mips/kernel/syscalls/syscall_n32.tbl | 2 +- arch/mips/kernel/syscalls/syscall_n64.tbl | 2 +- arch/mips/kernel/syscalls/syscall_o32.tbl | 2 +- arch/parisc/kernel/syscalls/syscall.tbl| 2 +- arch/powerpc/kernel/syscalls/syscall.tbl | 2 +- arch/s390/kernel/syscalls/syscall.tbl | 2 +- arch/sh/configs/dreamcast_defconfig| 1 - arch/sh/configs/espt_defconfig | 1 - arch/sh/configs/hp6xx_defconfig| 1 - arch/sh/configs/landisk_defconfig | 1 - arch/sh/configs/lboxre2_defconfig | 1 - arch/sh/configs/microdev_defconfig | 1 - arch/sh/configs/migor_defconfig| 1 - arch/sh/configs/r7780mp_defconfig | 1 - arch/sh/configs/r7785rp_defconfig | 1 - arch/sh/configs/rts7751r2d1_defconfig | 1 - arch/sh/configs/rts7751r2dplus_defconfig | 1 - arch/sh/configs/se7206_defconfig | 1 - arch/sh/configs/se7343_defconfig | 1 - arch/sh/configs/se7619_defconfig | 1 - arch/sh/configs/se7705_defconfig | 1 - arch/sh/configs/se7750_defconfig | 1 - arch/sh/configs/se7751_defconfig | 1 - arch/sh/configs/secureedge5410_defconfig | 1 - arch/sh/configs/sh03_defconfig | 1 - arch/sh/configs/sh7710voipgw_defconfig | 1 - arch/sh/configs/sh7757lcr_defconfig| 1 - arch/sh/configs/sh7763rdp_defconfig| 1 - arch/sh/configs/shmin_defconfig| 1 - arch/sh/configs/titan_defconfig| 1 - arch/sh/include/uapi/asm/unistd_64.h | 2 +- arch/sh/kernel/syscalls/syscall.tbl| 2 +- arch/sh/kernel/syscalls_64.S | 2 +- arch/sparc/kernel/syscalls/syscall.tbl | 2 +- arch/x86/entry/syscalls/syscall_32.tbl | 2 +- arch/x86/entry/syscalls/syscall_64.tbl | 2 +- arch/xtensa/kernel/syscalls/syscall.tbl| 2 +- include/linux/compat.h | 1 - include/linux/syscalls.h | 2 - include/linux/sysctl.h | 6 +- include/uapi/linux/sysctl.h| 15 -- kernel/Makefile| 2 +- kernel/sys_ni.c| 1 - kernel/sysctl_binary.c | 171 - tools/perf/arch/powerpc/entry/syscalls/syscall.tbl | 2 +- tools/perf/arch/s390/entry/syscalls/syscall.tbl| 2 +- tools/perf/arch/x86/entry/syscalls/syscall_64.tbl | 2 +- 55 files changed, 26 insertions(+), 244 deletions(-) delete mode 100644 kernel/sysctl_binary.c diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl index b249824..0da7f1c 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -249,7 +249,7 @@ 316common mlockallsys_mlockall 317common munlockall sys_munlockall 318common sysinfo sys_sysinfo -319common _sysctl sys_sysctl +319common _sysctl sys_ni_syscall # 320 was sys_idle 321common oldumount sys_oldumount 322common swapon sys_swapon
Re: [PATCH 13/13] fs: move binfmt_misc sysctl to its own file
On 2020/5/29 15:41, Luis Chamberlain wrote: This moves the binfmt_misc sysctl to its own file to help remove clutter from kernel/sysctl.c. Signed-off-by: Luis Chamberlain --- fs/binfmt_misc.c | 1 + kernel/sysctl.c | 7 --- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index f69a043f562b..656b3f5f3bbf 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -821,6 +821,7 @@ static int __init init_misc_binfmt(void) int err = register_filesystem(_fs_type); if (!err) insert_binfmt(_format); + register_sysctl_empty_subdir("fs", "binfmt_misc"); return err; } build error when CONFIG_BINFMT_MISC=m ERROR: modpost: "register_sysctl_empty_subdir" [fs/binfmt_misc.ko] undefined! diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 27f0c9ea..4129dfb 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -2853,6 +2853,7 @@ void register_sysctl_empty_subdir(const char *base, { register_sysctl_subdir(base, subdir, sysctl_mount_point); } +EXPORT_SYMBOL_GPL(register_sysctl_empty_subdir); #endif /* CONFIG_SYSCTL */ Thanks Xiaoming Ni
Re: [PATCH 11/13] random: simplify sysctl declaration with register_sysctl_subdir()
On 2020/5/29 18:26, Greg KH wrote: On Fri, May 29, 2020 at 07:41:06AM +, Luis Chamberlain wrote: From: Xiaoming Ni Move random_table sysctl from kernel/sysctl.c to drivers/char/random.c and use register_sysctl_subdir() to help remove the clutter out of kernel/sysctl.c. Signed-off-by: Xiaoming Ni Signed-off-by: Luis Chamberlain --- drivers/char/random.c | 14 -- include/linux/sysctl.h | 1 - kernel/sysctl.c| 5 - 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index a7cf6aa65908..73fd4b6e9c18 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -2101,8 +2101,7 @@ static int proc_do_entropy(struct ctl_table *table, int write, } static int sysctl_poolsize = INPUT_POOL_WORDS * 32; -extern struct ctl_table random_table[]; -struct ctl_table random_table[] = { +static struct ctl_table random_table[] = { { .procname = "poolsize", .data = _poolsize, @@ -2164,6 +2163,17 @@ struct ctl_table random_table[] = { #endif { } }; + +/* + * rand_initialize() is called before sysctl_init(), + * so we cannot call register_sysctl_init() in rand_initialize() + */ +static int __init random_sysctls_init(void) +{ + register_sysctl_subdir("kernel", "random", random_table); No error checking? :( It was my mistake, I forgot to add a comment here. Same as the comment of register_sysctl_init(), There is almost no failure here during the system initialization phase, and failure in time does not affect the main function. Thanks Xiaoming Ni
Re: [PATCH 09/13] firmware_loader: simplify sysctl declaration with register_sysctl_subdir()
On 2020/5/29 18:26, Greg KH wrote: On Fri, May 29, 2020 at 07:41:04AM +, Luis Chamberlain wrote: From: Xiaoming Ni Move the firmware config sysctl table to fallback_table.c and use the new register_sysctl_subdir() helper. This removes the clutter from kernel/sysctl.c. Signed-off-by: Xiaoming Ni Signed-off-by: Luis Chamberlain --- drivers/base/firmware_loader/fallback.c | 4 drivers/base/firmware_loader/fallback.h | 11 ++ drivers/base/firmware_loader/fallback_table.c | 22 +-- include/linux/sysctl.h| 1 - kernel/sysctl.c | 7 -- 5 files changed, 35 insertions(+), 10 deletions(-) So it now takes more lines than the old stuff? :( CONFIG_FW_LOADER = m Before cleaning, no matter whether ko is loaded or not, the sysctl interface will be created, but now we need to add register and unregister interfaces, so the number of lines of code has increased diff --git a/drivers/base/firmware_loader/fallback.c b/drivers/base/firmware_loader/fallback.c index d9ac7296205e..8190653ae9a3 100644 --- a/drivers/base/firmware_loader/fallback.c +++ b/drivers/base/firmware_loader/fallback.c @@ -200,12 +200,16 @@ static struct class firmware_class = { int register_sysfs_loader(void) { + int ret = register_firmware_config_sysctl(); + if (ret != 0) + return ret; checkpatch :( This is my fault, thanks for your guidance return class_register(_class); And if that fails? Yes, it is better to call register_firmware_config_sysctl() after class_register(). thanks for your guidance. } void unregister_sysfs_loader(void) { class_unregister(_class); + unregister_firmware_config_sysctl(); } static ssize_t firmware_loading_show(struct device *dev, diff --git a/drivers/base/firmware_loader/fallback.h b/drivers/base/firmware_loader/fallback.h index 06f4577733a8..7d2cb5f6ceb8 100644 --- a/drivers/base/firmware_loader/fallback.h +++ b/drivers/base/firmware_loader/fallback.h @@ -42,6 +42,17 @@ void fw_fallback_set_default_timeout(void); int register_sysfs_loader(void); void unregister_sysfs_loader(void); +#ifdef CONFIG_SYSCTL +extern int register_firmware_config_sysctl(void); +extern void unregister_firmware_config_sysctl(void); +#else +static inline int register_firmware_config_sysctl(void) +{ + return 0; +} +static inline void unregister_firmware_config_sysctl(void) { } +#endif /* CONFIG_SYSCTL */ + #else /* CONFIG_FW_LOADER_USER_HELPER */ static inline int firmware_fallback_sysfs(struct firmware *fw, const char *name, struct device *device, diff --git a/drivers/base/firmware_loader/fallback_table.c b/drivers/base/firmware_loader/fallback_table.c index 46a731dede6f..4234aa5ee5df 100644 --- a/drivers/base/firmware_loader/fallback_table.c +++ b/drivers/base/firmware_loader/fallback_table.c @@ -24,7 +24,7 @@ struct firmware_fallback_config fw_fallback_config = { EXPORT_SYMBOL_NS_GPL(fw_fallback_config, FIRMWARE_LOADER_PRIVATE); #ifdef CONFIG_SYSCTL -struct ctl_table firmware_config_table[] = { +static struct ctl_table firmware_config_table[] = { { .procname = "force_sysfs_fallback", .data = _fallback_config.force_sysfs_fallback, @@ -45,4 +45,22 @@ struct ctl_table firmware_config_table[] = { }, { } }; -#endif + +static struct ctl_table_header *hdr; +int register_firmware_config_sysctl(void) +{ + if (hdr) + return -EEXIST; How can hdr be set? It's my mistake, register_firmware_config_sysctl() is not exported, there will be no repeated calls. thanks for your guidance. + hdr = register_sysctl_subdir("kernel", "firmware_config", +firmware_config_table); + if (!hdr) + return -ENOMEM; + return 0; +} + +void unregister_firmware_config_sysctl(void) +{ + if (hdr) + unregister_sysctl_table(hdr); Why can't unregister_sysctl_table() take a null pointer value? Sorry, I didn't notice that the unregister_sysctl_table() already checks the input parameters. thanks for your guidance. And what sets 'hdr' (worst name for a static variable) to NULL so that it knows not to be unregistered again as it looks like register_firmware_config_sysctl() could be called multiple times. How about renaming hdr to firmware_config_sysct_table_header? + if (hdr) + return -EEXIST; After deleting this code in register_firmware_config_sysctl(), and considering register_firmware_config_sysctl() and unregister_firmware_config_sysctl() are not exported, whether there is no need to add "hdr = NULL;" ? Thanks Xiaoming Ni