Re: [PATCH] kdump: Defer the insertion of crashkernel resources

2024-01-04 Thread Baoquan He
Huacai,

On 12/29/23 at 04:02pm, Huacai Chen wrote:
> In /proc/iomem, sub-regions should be inserted after their parent,
> otherwise the insertion of parent resource fails. But after generic
> crashkernel reservation applied, in both RISC-V and ARM64 (LoongArch
> will also use generic reservation later on), crashkernel resources are
> inserted before their parent, which causes the parent disappear in
> /proc/iomem. So we defer the insertion of crashkernel resources to an
> early_initcall().
> 
> 1, Without 'crashkernel' parameter:
> 
>  100d0100-100d01ff : LOON0001:00
>100d0100-100d01ff : LOON0001:00 LOON0001:00
>  100e-100e0bff : LOON0002:00
>100e-100e0bff : LOON0002:00 LOON0002:00
>  1fe001e0-1fe001e7 : serial
>  9040-fa17 : System RAM
>f622-f622 : Reserved
>f9ee-f9ee3fff : Reserved
>fa12-fa17 : Reserved
>  fa19-fe0b : System RAM
>fa19-fa1b : Reserved
>  fe4e-47fff : System RAM
>43c00-441ff : Reserved
>47ff98000-47ffa3fff : Reserved
>47ffa4000-47ffa7fff : Reserved
>47ffa8000-47ffabfff : Reserved
>47ffac000-47ffa : Reserved
>47ffb-47ffb3fff : Reserved
> 
> 2, With 'crashkernel' parameter, before this patch:
> 
>  100d0100-100d01ff : LOON0001:00
>100d0100-100d01ff : LOON0001:00 LOON0001:00
>  100e-100e0bff : LOON0002:00
>100e-100e0bff : LOON0002:00 LOON0002:00
>  1fe001e0-1fe001e7 : serial
>  e620-f61f : Crash kernel
>  fa19-fe0b : System RAM
>fa19-fa1b : Reserved
>  fe4e-47fff : System RAM
>43c00-441ff : Reserved
>47ff98000-47ffa3fff : Reserved
>47ffa4000-47ffa7fff : Reserved
>47ffa8000-47ffabfff : Reserved
>47ffac000-47ffa : Reserved
>47ffb-47ffb3fff : Reserved
> 
> 3, With 'crashkernel' parameter, after this patch:
> 
>  100d0100-100d01ff : LOON0001:00
>100d0100-100d01ff : LOON0001:00 LOON0001:00
>  100e-100e0bff : LOON0002:00
>100e-100e0bff : LOON0002:00 LOON0002:00
>  1fe001e0-1fe001e7 : serial
>  9040-fa17 : System RAM
>e620-f61f : Crash kernel
>f622-f622 : Reserved
>f9ee-f9ee3fff : Reserved
>fa12-fa17 : Reserved
>  fa19-fe0b : System RAM
>fa19-fa1b : Reserved
>  fe4e-47fff : System RAM
>43c00-441ff : Reserved
>47ff98000-47ffa3fff : Reserved
>47ffa4000-47ffa7fff : Reserved
>47ffa8000-47ffabfff : Reserved
>47ffac000-47ffa : Reserved
>47ffb-47ffb3fff : Reserved

This looks like a great catch. I am curious where arm64 and loongarch
insert the system RAM range into iomem before crashk_res and
crashk_low_res. On x86, it should be done by pci or acpi init which is
earlier than crashkernel parsing and inserting into iomem, just went\
through codes, haven't adding debugging code to print.

> 
> Signed-off-by: Huacai Chen 
> ---
>  kernel/crash_core.c | 14 --
>  1 file changed, 12 insertions(+), 2 deletions(-)
> 
> diff --git a/kernel/crash_core.c b/kernel/crash_core.c
> index d4313b53837e..755d8d4ef5b0 100644
> --- a/kernel/crash_core.c
> +++ b/kernel/crash_core.c
> @@ -377,7 +377,6 @@ static int __init reserve_crashkernel_low(unsigned long 
> long low_size)
>  
>   crashk_low_res.start = low_base;
>   crashk_low_res.end   = low_base + low_size - 1;
> - insert_resource(&iomem_resource, &crashk_low_res);
>  #endif
>   return 0;
>  }
> @@ -459,8 +458,19 @@ void __init reserve_crashkernel_generic(char *cmdline,
>  
>   crashk_res.start = crash_base;
>   crashk_res.end = crash_base + crash_size - 1;
> - insert_resource(&iomem_resource, &crashk_res);
>  }
> +
> +static __init int insert_crashkernel_resources(void)
> +{
> + if (crashk_res.start < crashk_res.end)
> + insert_resource(&iomem_resource, &crashk_res);
> +
> + if (crashk_low_res.start < crashk_low_res.end)
> + insert_resource(&iomem_resource, &crashk_low_res);
> +
> + return 0;
> +}
> +early_initcall(insert_crashkernel_resources);
>  #endif
>  
>  int crash_prepare_elf64_headers(struct crash_mem *mem, int need_kernel_map,
> -- 
> 2.39.3
> 
> 
> ___
> kexec mailing list
> kexec@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/kexec
> 


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH v2 00/10] sysctl: Remove sentinel elements from kernel dir

2024-01-04 Thread Joel Granados
On Thu, Jan 04, 2024 at 03:05:42PM +, Matthew Wilcox wrote:
> On Thu, Jan 04, 2024 at 04:02:21PM +0100, Joel Granados via B4 Relay wrote:
> > From: Joel Granados 
> > 
> > What?
> 
> The reason I wanted you to do the sentinel removal before the split was
> so that there weren't two rounds of patches.  Ironically, because you
> refused to do it that way, not only are there two rounds of patches, but
> I'm being cc'd on all of them, so I get all the $%*^ emails twice.
> 
> Please at least stop cc'ing me.
Will do

-- 

Joel Granados


signature.asc
Description: PGP signature


Re: [PATCH v2 00/10] sysctl: Remove sentinel elements from kernel dir

2024-01-04 Thread Matthew Wilcox
On Thu, Jan 04, 2024 at 04:02:21PM +0100, Joel Granados via B4 Relay wrote:
> From: Joel Granados 
> 
> What?

The reason I wanted you to do the sentinel removal before the split was
so that there weren't two rounds of patches.  Ironically, because you
refused to do it that way, not only are there two rounds of patches, but
I'm being cc'd on all of them, so I get all the $%*^ emails twice.

Please at least stop cc'ing me.



Re: [PATCHv4 05/14] x86/kvm: Do not try to disable kvmclock if it was not enabled

2024-01-04 Thread Kirill A. Shutemov
On Wed, Dec 13, 2023 at 09:22:34AM -0800, Sean Christopherson wrote:
> On Tue, Dec 12, 2023, Kirill A. Shutemov wrote:
> > On Tue, Dec 05, 2023 at 03:45:01AM +0300, Kirill A. Shutemov wrote:
> > > kvm_guest_cpu_offline() tries to disable kvmclock regardless if it is
> > > present in the VM. It leads to write to a MSR that doesn't exist on some
> > > configurations, namely in TDX guest:
> > > 
> > >   unchecked MSR access error: WRMSR to 0x12 (tried to write 
> > > 0x)
> > >   at rIP: 0x8110687c (kvmclock_disable+0x1c/0x30)
> > > 
> > > kvmclock enabling is gated by CLOCKSOURCE and CLOCKSOURCE2 KVM paravirt
> > > features.
> > > 
> > > Do not disable kvmclock if it was not enabled.
> > > 
> > > Signed-off-by: Kirill A. Shutemov 
> > > Fixes: c02027b5742b ("x86/kvm: Disable kvmclock on all CPUs on shutdown")
> > > Reviewed-by: Sean Christopherson 
> > > Reviewed-by: Vitaly Kuznetsov 
> > > Cc: Paolo Bonzini 
> > > Cc: Wanpeng Li 
> > 
> > Paolo, Sean, any chance you can get it in through KVM tree while the rest
> > of kexec patchset is pending? The problem is visible on normal reboot too.
> 
> Paolo is going to grab this (possibly for 6.7-rc?).  I'll keep this tagged on 
> my
> end in case that doesn't happen "soon".

Sean, any update on this?

-- 
  Kiryl Shutsemau / Kirill A. Shutemov

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v2 10/10] bpf: Remove the now superfluous sentinel elements from ctl_table array

2024-01-04 Thread Joel Granados via B4 Relay
From: Joel Granados 

This commit comes at the tail end of a greater effort to remove the
empty elements at the end of the ctl_table arrays (sentinels) which
will reduce the overall build time size of the kernel and run time
memory bloat by ~64 bytes per sentinel (further information Link :
https://lore.kernel.org/all/zo5yx5jfoggi%2f...@bombadil.infradead.org/)

Remove sentinel element from bpf_syscall_table.

Acked-by: Andrii Nakryiko 
Signed-off-by: Joel Granados 
---
 kernel/bpf/syscall.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 0ed286b8a0f0..2790deabf639 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -5727,7 +5727,6 @@ static struct ctl_table bpf_syscall_table[] = {
.mode   = 0644,
.proc_handler   = bpf_stats_handler,
},
-   { }
 };
 
 static int __init bpf_syscall_sysctl_init(void)

-- 
2.30.2




[PATCH v2 09/10] delayacct: Remove the now superfluous sentinel elements from ctl_table array

2024-01-04 Thread Joel Granados via B4 Relay
From: Joel Granados 

This commit comes at the tail end of a greater effort to remove the
empty elements at the end of the ctl_table arrays (sentinels) which
will reduce the overall build time size of the kernel and run time
memory bloat by ~64 bytes per sentinel (further information Link :
https://lore.kernel.org/all/zo5yx5jfoggi%2f...@bombadil.infradead.org/)

Remove sentinel element from kern_delayacct_table

Signed-off-by: Joel Granados 
---
 kernel/delayacct.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index 6f0c358e73d8..e039b0f99a0b 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -74,7 +74,6 @@ static struct ctl_table kern_delayacct_table[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
-   { }
 };
 
 static __init int kernel_delayacct_sysctls_init(void)

-- 
2.30.2




[PATCH v2 07/10] printk: Remove the now superfluous sentinel elements from ctl_table array

2024-01-04 Thread Joel Granados via B4 Relay
From: Joel Granados 

This commit comes at the tail end of a greater effort to remove the
empty elements at the end of the ctl_table arrays (sentinels) which
will reduce the overall build time size of the kernel and run time
memory bloat by ~64 bytes per sentinel (further information Link :
https://lore.kernel.org/all/zo5yx5jfoggi%2f...@bombadil.infradead.org/)

rm sentinel element from printk_sysctls

Reviewed-by: Petr Mladek 
Signed-off-by: Joel Granados 
---
 kernel/printk/sysctl.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/kernel/printk/sysctl.c b/kernel/printk/sysctl.c
index c228343eeb97..3e47dedce9e5 100644
--- a/kernel/printk/sysctl.c
+++ b/kernel/printk/sysctl.c
@@ -76,7 +76,6 @@ static struct ctl_table printk_sysctls[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_TWO,
},
-   {}
 };
 
 void __init printk_sysctl_init(void)

-- 
2.30.2




[PATCH v2 06/10] scheduler: Remove the now superfluous sentinel elements from ctl_table array

2024-01-04 Thread Joel Granados via B4 Relay
From: Joel Granados 

This commit comes at the tail end of a greater effort to remove the
empty elements at the end of the ctl_table arrays (sentinels) which
will reduce the overall build time size of the kernel and run time
memory bloat by ~64 bytes per sentinel (further information Link :
https://lore.kernel.org/all/zo5yx5jfoggi%2f...@bombadil.infradead.org/)

rm sentinel element from ctl_table arrays

Acked-by: "Peter Zijlstra (Intel)" 
Signed-off-by: Joel Granados 
---
 kernel/sched/autogroup.c | 1 -
 kernel/sched/core.c  | 1 -
 kernel/sched/deadline.c  | 1 -
 kernel/sched/fair.c  | 1 -
 kernel/sched/rt.c| 1 -
 kernel/sched/topology.c  | 1 -
 6 files changed, 6 deletions(-)

diff --git a/kernel/sched/autogroup.c b/kernel/sched/autogroup.c
index 991fc9002535..db68a964e34e 100644
--- a/kernel/sched/autogroup.c
+++ b/kernel/sched/autogroup.c
@@ -19,7 +19,6 @@ static struct ctl_table sched_autogroup_sysctls[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
-   {}
 };
 
 static void __init sched_autogroup_sysctl_init(void)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index a708d225c28e..5631d0ec161b 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4708,7 +4708,6 @@ static struct ctl_table sched_core_sysctls[] = {
.extra2 = SYSCTL_FOUR,
},
 #endif /* CONFIG_NUMA_BALANCING */
-   {}
 };
 static int __init sched_core_sysctl_init(void)
 {
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index b28114478b82..58cf9defc3b9 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -43,7 +43,6 @@ static struct ctl_table sched_dl_sysctls[] = {
.proc_handler   = proc_douintvec_minmax,
.extra2 = (void *)&sysctl_sched_dl_period_max,
},
-   {}
 };
 
 static int __init sched_dl_sysctl_init(void)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index d7a3c63a2171..8f5f016ebc46 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -157,7 +157,6 @@ static struct ctl_table sched_fair_sysctls[] = {
.extra1 = SYSCTL_ZERO,
},
 #endif /* CONFIG_NUMA_BALANCING */
-   {}
 };
 
 static int __init sched_fair_sysctl_init(void)
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 6aaf0a3d6081..350f4e8b3b2f 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -56,7 +56,6 @@ static struct ctl_table sched_rt_sysctls[] = {
.mode   = 0644,
.proc_handler   = sched_rr_handler,
},
-   {}
 };
 
 static int __init sched_rt_sysctl_init(void)
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 10d1391e7416..e3a354173005 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -322,7 +322,6 @@ static struct ctl_table sched_energy_aware_sysctls[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
-   {}
 };
 
 static int __init sched_energy_aware_sysctl_init(void)

-- 
2.30.2




[PATCH v2 03/10] ftrace: Remove the now superfluous sentinel elements from ctl_table array

2024-01-04 Thread Joel Granados via B4 Relay
From: Joel Granados 

This commit comes at the tail end of a greater effort to remove the
empty elements at the end of the ctl_table arrays (sentinels) which
will reduce the overall build time size of the kernel and run time
memory bloat by ~64 bytes per sentinel (further information Link :
https://lore.kernel.org/all/zo5yx5jfoggi%2f...@bombadil.infradead.org/)

Remove sentinel elements from ftrace_sysctls and user_event_sysctls

Acked-by: "Masami Hiramatsu (Google)" 
Acked-by: "Steven Rostedt (Google)" 
Signed-off-by: Joel Granados 
---
 kernel/trace/ftrace.c| 1 -
 kernel/trace/trace_events_user.c | 1 -
 2 files changed, 2 deletions(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 8de8bec5f366..fd40d02a23c7 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -8264,7 +8264,6 @@ static struct ctl_table ftrace_sysctls[] = {
.mode   = 0644,
.proc_handler   = ftrace_enable_sysctl,
},
-   {}
 };
 
 static int __init ftrace_sysctl_init(void)
diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c
index 9365ce407426..b15a103bb11d 100644
--- a/kernel/trace/trace_events_user.c
+++ b/kernel/trace/trace_events_user.c
@@ -2746,7 +2746,6 @@ static struct ctl_table user_event_sysctls[] = {
.mode   = 0644,
.proc_handler   = set_max_user_events_sysctl,
},
-   {}
 };
 
 static int __init trace_events_user_init(void)

-- 
2.30.2




[PATCH v2 05/10] seccomp: Remove the now superfluous sentinel elements from ctl_table array

2024-01-04 Thread Joel Granados via B4 Relay
From: Joel Granados 

This commit comes at the tail end of a greater effort to remove the
empty elements at the end of the ctl_table arrays (sentinels) which
will reduce the overall build time size of the kernel and run time
memory bloat by ~64 bytes per sentinel (further information Link :
https://lore.kernel.org/all/zo5yx5jfoggi%2f...@bombadil.infradead.org/)

Remove sentinel element from seccomp_sysctl_table.

Acked-by: Kees Cook 
Signed-off-by: Joel Granados 
---
 kernel/seccomp.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 255999ba9190..b727b4351c1b 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -2445,7 +2445,6 @@ static struct ctl_table seccomp_sysctl_table[] = {
.mode   = 0644,
.proc_handler   = seccomp_actions_logged_handler,
},
-   { }
 };
 
 static int __init seccomp_sysctl_init(void)

-- 
2.30.2




[PATCH v2 08/10] kprobes: Remove the now superfluous sentinel elements from ctl_table array

2024-01-04 Thread Joel Granados via B4 Relay
From: Joel Granados 

This commit comes at the tail end of a greater effort to remove the
empty elements at the end of the ctl_table arrays (sentinels) which
will reduce the overall build time size of the kernel and run time
memory bloat by ~64 bytes per sentinel (further information Link :
https://lore.kernel.org/all/zo5yx5jfoggi%2f...@bombadil.infradead.org/)

Remove sentinel element from kprobe_sysclts

Acked-by: "Masami Hiramatsu (Google)" 
Signed-off-by: Joel Granados 
---
 kernel/kprobes.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index d5a0ee40bf66..1e0ea688cf7f 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -968,7 +968,6 @@ static struct ctl_table kprobe_sysctls[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
-   {}
 };
 
 static void __init kprobe_sysctls_init(void)

-- 
2.30.2




[PATCH v2 02/10] umh: Remove the now superfluous sentinel elements from ctl_table array

2024-01-04 Thread Joel Granados via B4 Relay
From: Joel Granados 

This commit comes at the tail end of a greater effort to remove the
empty elements at the end of the ctl_table arrays (sentinels) which
will reduce the overall build time size of the kernel and run time
memory bloat by ~64 bytes per sentinel (further information Link :
https://lore.kernel.org/all/zo5yx5jfoggi%2f...@bombadil.infradead.org/)

Remove sentinel element from usermodehelper_table

Signed-off-by: Joel Granados 
---
 kernel/umh.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/kernel/umh.c b/kernel/umh.c
index 1b13c5d34624..598b3ffe1522 100644
--- a/kernel/umh.c
+++ b/kernel/umh.c
@@ -560,7 +560,6 @@ static struct ctl_table usermodehelper_table[] = {
.mode   = 0600,
.proc_handler   = proc_cap_handler,
},
-   { }
 };
 
 static int __init init_umh_sysctls(void)

-- 
2.30.2




[PATCH v2 04/10] timekeeping: Remove the now superfluous sentinel elements from ctl_table array

2024-01-04 Thread Joel Granados via B4 Relay
From: Joel Granados 

This commit comes at the tail end of a greater effort to remove the
empty elements at the end of the ctl_table arrays (sentinels) which
will reduce the overall build time size of the kernel and run time
memory bloat by ~64 bytes per sentinel (further information Link :
https://lore.kernel.org/all/zo5yx5jfoggi%2f...@bombadil.infradead.org/)

Remove sentinel element from time_sysctl

Signed-off-by: Joel Granados 
---
 kernel/time/timer.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 63a8ce7177dd..475826ad78df 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -260,7 +260,6 @@ static struct ctl_table timer_sysctl[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
-   {}
 };
 
 static int __init timer_sysctl_init(void)

-- 
2.30.2




[PATCH v2 00/10] sysctl: Remove sentinel elements from kernel dir

2024-01-04 Thread Joel Granados via B4 Relay
From: Joel Granados 

What?
These commits remove the sentinel element (last empty element) from the
sysctl arrays of all the files under the "kernel/" directory that use a
sysctl array for registration. The merging of the preparation patches
(in https://lore.kernel.org/all/zo5yx5jfoggi%2f...@bombadil.infradead.org/)
to mainline allows us to just remove sentinel elements without changing
behavior (more info here [1]).

These commits are part of a bigger set (here
https://github.com/Joelgranados/linux/tree/tag/sysctl_remove_empty_elem_V5)
that remove the ctl_table sentinel. Make the review process easier by
chunking the commits into manageable pieces. Each chunk can be reviewed
separately without noise from parallel sets.

Sending the "kernel/*" chunk now that the "drivers/" has been mostly
reviewed [6]. After this and the "fs/*" are reviewed we only miss 2 more
chunks ("net/*" and miscellaneous) to complete the sentinel removal.
Hurray!!!

Why?
By removing the sysctl sentinel elements we avoid kernel bloat as
ctl_table arrays get moved out of kernel/sysctl.c into their own
respective subsystems. This move was started long ago to avoid merge
conflicts; the sentinel removal bit came after Mathew Wilcox suggested
it to avoid bloating the kernel by one element as arrays moved out. This
patchset will reduce the overall build time size of the kernel and run
time memory bloat by about ~64 bytes per declared ctl_table array. I
have consolidated some links that shed light on the history of this
effort [2].

Testing:
* Ran sysctl selftests (./tools/testing/selftests/sysctl/sysctl.sh)
* Ran this through 0-day with no errors or warnings

Size saving after this patchset:
* bloat-o-meter
- The "yesall" config saves 1984 bytes [4]
- The "tiny" config saves 771 bytes [5]
* If you want to know how many bytes are saved after all the chunks
  are merged see [3]

Comments/feedback greatly appreciated

Changes in v2:
- No functional changes; I resent it as I did not see it in the latest
  sysctl-next. It might be a bit too late to include it in 6.7 version,
  but this v2 can be used for 6.8 when it comes out.
- Rebased on top of v6.7-rc6
- Added trailers to the relevant commits.
- Link to v1: 
https://lore.kernel.org/r/20231107-jag-sysctl_remove_empty_elem_kernel-v1-0-e4ce1388d...@samsung.com
Best

Joel

[1]
We are able to remove a sentinel table without behavioral change by
introducing a table_size argument in the same place where procname is
checked for NULL. The idea is for it to keep stopping when it hits
->procname == NULL, while the sentinel is still present. And when the
sentinel is removed, it will stop on the table_size. You can go to 
(https://lore.kernel.org/all/20230809105006.1198165-1-j.grana...@samsung.com/)
for more information.

[2]
Links Related to the ctl_table sentinel removal:
* E-mail threads that summarize the sentinel effort
  https://lore.kernel.org/all/zo5yx5jfoggi%2f...@bombadil.infradead.org/
  https://lore.kernel.org/all/zmfizkfkvxuft...@bombadil.infradead.org/
* Replacing the register functions:
  https://lore.kernel.org/all/20230302204612.782387-1-mcg...@kernel.org/
  https://lore.kernel.org/all/20230302202826.776286-1-mcg...@kernel.org/
* E-mail threads discussing prposal
  https://lore.kernel.org/all/20230321130908.6972-1-frank...@vivo.com
  https://lore.kernel.org/all/20220220060626.15885-1-tangm...@uniontech.com

[3]
Size saving after removing all sentinels:
  These are the bytes that we save after removing all the sentinels
  (this plus all the other chunks). I included them to get an idea of
  how much memory we are talking about.
* bloat-o-meter:
- The "yesall" configuration results save 9158 bytes
  
https://lore.kernel.org/all/20230621091000.424843-1-j.grana...@samsung.com/
- The "tiny" config + CONFIG_SYSCTL save 1215 bytes
  
https://lore.kernel.org/all/20230809105006.1198165-1-j.grana...@samsung.com/
* memory usage:
In memory savings are measured to be 7296 bytes. (here is how to
measure [7])

[4]
add/remove: 0/0 grow/shrink: 0/31 up/down: 0/-1984 (-1984)
Function old new   delta
watchdog_sysctls 576 512 -64
watchdog_hardlockup_sysctl   128  64 -64
vm_table13441280 -64
uts_kern_table   448 384 -64
usermodehelper_table 192 128 -64
user_table   832 768 -64
user_event_sysctls   128  64 -64
timer_sysctl 128  64 -64
signal_debug_table   128  64 -64
seccomp_sysctl_table 192 128 -64
sched_rt_sysctls 256 192 -64
sched_fair_sysctls   256 192 -64
sched_energy_aware_s

[PATCH v2 01/10] kernel misc: Remove the now superfluous sentinel elements from ctl_table array

2024-01-04 Thread Joel Granados via B4 Relay
From: Joel Granados 

This commit comes at the tail end of a greater effort to remove the
empty elements at the end of the ctl_table arrays (sentinels) which
will reduce the overall build time size of the kernel and run time
memory bloat by ~64 bytes per sentinel (further information Link :
https://lore.kernel.org/all/zo5yx5jfoggi%2f...@bombadil.infradead.org/)

Remove the sentinel from ctl_table arrays. Reduce by one the values used
to compare the size of the adjusted arrays.

Signed-off-by: Joel Granados 
---
 kernel/acct.c   | 1 -
 kernel/exit.c   | 1 -
 kernel/hung_task.c  | 1 -
 kernel/kexec_core.c | 1 -
 kernel/latencytop.c | 1 -
 kernel/panic.c  | 1 -
 kernel/pid_namespace.c  | 1 -
 kernel/pid_sysctl.h | 1 -
 kernel/reboot.c | 1 -
 kernel/signal.c | 1 -
 kernel/stackleak.c  | 1 -
 kernel/sysctl.c | 2 --
 kernel/ucount.c | 3 +--
 kernel/utsname_sysctl.c | 1 -
 kernel/watchdog.c   | 2 --
 15 files changed, 1 insertion(+), 18 deletions(-)

diff --git a/kernel/acct.c b/kernel/acct.c
index 986c8214dabf..179848ad33e9 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -84,7 +84,6 @@ static struct ctl_table kern_acct_table[] = {
.mode   = 0644,
.proc_handler   = proc_dointvec,
},
-   { }
 };
 
 static __init int kernel_acct_sysctls_init(void)
diff --git a/kernel/exit.c b/kernel/exit.c
index aedc0832c9f4..6a6ea468a0c6 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -92,7 +92,6 @@ static struct ctl_table kern_exit_table[] = {
.mode   = 0644,
.proc_handler   = proc_douintvec,
},
-   { }
 };
 
 static __init int kernel_exit_sysctls_init(void)
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index 9a24574988d2..a81cb511d954 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -313,7 +313,6 @@ static struct ctl_table hung_task_sysctls[] = {
.proc_handler   = proc_dointvec_minmax,
.extra1 = SYSCTL_NEG_ONE,
},
-   {}
 };
 
 static void __init hung_task_sysctl_init(void)
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index be5642a4ec49..b862285b97dc 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -988,7 +988,6 @@ static struct ctl_table kexec_core_sysctls[] = {
.mode   = 0644,
.proc_handler   = kexec_limit_handler,
},
-   { }
 };
 
 static int __init kexec_core_sysctl_init(void)
diff --git a/kernel/latencytop.c b/kernel/latencytop.c
index 781249098cb6..84c53285f499 100644
--- a/kernel/latencytop.c
+++ b/kernel/latencytop.c
@@ -85,7 +85,6 @@ static struct ctl_table latencytop_sysctl[] = {
.mode   = 0644,
.proc_handler   = sysctl_latencytop,
},
-   {}
 };
 #endif
 
diff --git a/kernel/panic.c b/kernel/panic.c
index 2807639aab51..01a82cdfd81a 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -99,7 +99,6 @@ static struct ctl_table kern_panic_table[] = {
.mode   = 0644,
.proc_handler   = proc_douintvec,
},
-   { }
 };
 
 static __init int kernel_panic_sysctls_init(void)
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 3028b2218aa4..ca4fbba09d38 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -306,7 +306,6 @@ static struct ctl_table pid_ns_ctl_table[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = &pid_max,
},
-   { }
 };
 #endif /* CONFIG_CHECKPOINT_RESTORE */
 
diff --git a/kernel/pid_sysctl.h b/kernel/pid_sysctl.h
index 2ee41a3a1dfd..fe9fb991dc42 100644
--- a/kernel/pid_sysctl.h
+++ b/kernel/pid_sysctl.h
@@ -41,7 +41,6 @@ static struct ctl_table pid_ns_ctl_table_vm[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_TWO,
},
-   { }
 };
 static inline void register_pid_ns_sysctl_table_vm(void)
 {
diff --git a/kernel/reboot.c b/kernel/reboot.c
index 395a0ea3c7a8..1c3b2dfc42a9 100644
--- a/kernel/reboot.c
+++ b/kernel/reboot.c
@@ -1276,7 +1276,6 @@ static struct ctl_table kern_reboot_table[] = {
.mode   = 0644,
.proc_handler   = proc_dointvec,
},
-   { }
 };
 
 static void __init kernel_reboot_sysctls_init(void)
diff --git a/kernel/signal.c b/kernel/signal.c
index 47a7602dfe8d..737b0e630ed7 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -4812,7 +4812,6 @@ static struct ctl_table signal_debug_table[] = {
.proc_handler   = proc_dointvec
},
 #endif
-   { }
 };
 
 static int __init init_signal_sysctls(void)
diff --git a/kernel/stackleak.c b/kernel/stackleak.c
index 34c9d81eea94..d099f3affcf1 100644
--- a/kernel/stackleak.c
+++ b/kernel/stackleak.c
@@ -54,7 +54,6 @@ static struct ctl_table stackleak_sysctls[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ON

Re: [RFC PATCH 2/7] osdump: reuse some code from crash_core to get vmcoreinfo

2024-01-04 Thread Petr Mladek
Hi Qi,

first, most people, including me, prefer to be in Cc for the entire patchset.
It helps to get the whole picture.

This mail is even worse because the other patches are not in the same
thread. As a result, I can't find the other patches even via lore,
see https://lore.kernel.org/all/20231221132522.547-1-ruipeng...@gmail.com/


On Thu 2023-12-21 21:25:22, Ruipeng Qi wrote:
> From: qiruipeng 
> 
> Osdump is a new crash dumping solution like crash. It is interested in
> vmcoreinfo,too. Reuse some data and function from crash_core, but not full
> of them. So pick some code to get vmcoreinfo as needed.

> diff --git a/kernel/crash_core_mini.c b/kernel/crash_core_mini.c
> new file mode 100644
> index ..a0f8d0c79bba
> --- /dev/null
> +++ b/kernel/crash_core_mini.c
> @@ -0,0 +1,275 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * crash.c - kernel crash support code.
> + * Copyright (C) 2002-2004 Eric Biederman  
> + */
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +#include 
> +#include 
> +
> +#include 
> +
> +#include "kallsyms_internal.h"
> +#include "kexec_internal.h"
> +
> +/* Per cpu memory for storing cpu states in case of system crash. */
> +note_buf_t __percpu *crash_notes;
> +
> +/* vmcoreinfo stuff */
> +unsigned char *vmcoreinfo_data;
> +size_t vmcoreinfo_size;
> +u32 *vmcoreinfo_note;
> +
> +/* trusted vmcoreinfo, e.g. we can make a copy in the crash memory */
> +static unsigned char *vmcoreinfo_data_safecopy;
> +
> +
> +Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type,
> +   void *data, size_t data_len)
> +{
> + struct elf_note *note = (struct elf_note *)buf;
> +
> + note->n_namesz = strlen(name) + 1;
> + note->n_descsz = data_len;
> + note->n_type   = type;
> + buf += DIV_ROUND_UP(sizeof(*note), sizeof(Elf_Word));
> + memcpy(buf, name, note->n_namesz);
> + buf += DIV_ROUND_UP(note->n_namesz, sizeof(Elf_Word));
> + memcpy(buf, data, data_len);
> + buf += DIV_ROUND_UP(data_len, sizeof(Elf_Word));
> +
> + return buf;
> +}
> +
> +void final_note(Elf_Word *buf)
> +{
> + memset(buf, 0, sizeof(struct elf_note));
> +}
> +
> +static void update_vmcoreinfo_note(void)
> +{
> + u32 *buf = vmcoreinfo_note;
> +
> + if (!vmcoreinfo_size)
> + return;
> + buf = append_elf_note(buf, VMCOREINFO_NOTE_NAME, 0, vmcoreinfo_data,
> +   vmcoreinfo_size);
> + final_note(buf);
> +}
> +
> +void crash_update_vmcoreinfo_safecopy(void *ptr)
> +{
> + if (ptr)
> + memcpy(ptr, vmcoreinfo_data, vmcoreinfo_size);
> +
> + vmcoreinfo_data_safecopy = ptr;
> +}
> +
> +void crash_save_vmcoreinfo(void)
> +{
> + if (!vmcoreinfo_note)
> + return;
> +
> + /* Use the safe copy to generate vmcoreinfo note if have */
> + if (vmcoreinfo_data_safecopy)
> + vmcoreinfo_data = vmcoreinfo_data_safecopy;
> +
> + vmcoreinfo_append_str("CRASHTIME=%lld\n", ktime_get_real_seconds());
> + update_vmcoreinfo_note();
> +}
> +
> +void vmcoreinfo_append_str(const char *fmt, ...)
> +{
> + va_list args;
> + char buf[0x50];
> + size_t r;
> +
> + va_start(args, fmt);
> + r = vscnprintf(buf, sizeof(buf), fmt, args);
> + va_end(args);
> +
> + r = min(r, (size_t)VMCOREINFO_BYTES - vmcoreinfo_size);
> +
> + memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r);
> +
> + vmcoreinfo_size += r;
> +
> + WARN_ONCE(vmcoreinfo_size == VMCOREINFO_BYTES,
> +   "vmcoreinfo data exceeds allocated size, truncating");
> +}
> +
> +/*
> + * provide an empty default implementation here -- architecture
> + * code may override this
> + */
> +void __weak arch_crash_save_vmcoreinfo(void)
> +{}
> +
> +phys_addr_t __weak paddr_vmcoreinfo_note(void)
> +{
> + return __pa(vmcoreinfo_note);
> +}
> +EXPORT_SYMBOL(paddr_vmcoreinfo_note);
> +
> +int get_note_size(void)
> +{
> + return VMCOREINFO_NOTE_SIZE;
> +}
> +
> +static int __init crash_save_vmcoreinfo_init(void)
> +{
> + vmcoreinfo_data = (unsigned char *)get_zeroed_page(GFP_KERNEL);
> + if (!vmcoreinfo_data) {
> + pr_warn("Memory allocation for vmcoreinfo_data failed\n");
> + return -ENOMEM;
> + }
> +
> + vmcoreinfo_note = alloc_pages_exact(VMCOREINFO_NOTE_SIZE,
> + GFP_KERNEL | __GFP_ZERO);
> + if (!vmcoreinfo_note) {
> + free_page((unsigned long)vmcoreinfo_data);
> + vmcoreinfo_data = NULL;
> + pr_warn("Memory allocation for vmcoreinfo_note failed\n");
> + return -ENOMEM;
> + }
> +
> + VMCOREINFO_OSRELEASE(init_uts_ns.name.release);
> + VMCOREINFO_BUILD_ID();
> + VMCOREINFO_PAGESIZE(PAGE_SIZE);
> +
> + VMCOREINFO_SYMBOL(init_uts_ns);
> + VMCOREINFO_OFFSET(uts_namespace, name);
> + VMCOREINFO_SY