Re: [PATCH] powerpc: Don't clobber fr0/vs0 during fp|altivec register save

2023-11-18 Thread Gabriel Paubert
On Sat, Nov 18, 2023 at 05:45:03PM -0600, Timothy Pearson wrote:
> During floating point and vector save to thread data fr0/vs0 are clobbered
> by the FPSCR/VSCR store routine.  This leads to userspace register corruption
> and application data corruption / crash under the following rare condition:
> 
>  * A userspace thread is executing with VSX/FP mode enabled
>  * The userspace thread is making active use of fr0 and/or vs0
>  * An IPI is taken in kernel mode, forcing the userspace thread to reschedule
>  * The userspace thread is interrupted by the IPI before accessing data it
>previously stored in fr0/vs0
>  * The thread being switched in by the IPI has a pending signal
> 
> If these exact criteria are met, then the following sequence happens:
> 
>  * The existing thread FP storage is still valid before the IPI, due to a
>prior call to save_fpu() or store_fp_state().  Note that the current
>fr0/vs0 registers have been clobbered, so the FP/VSX state in registers
>is now invalid pending a call to restore_fp()/restore_altivec().
>  * IPI -- FP/VSX register state remains invalid
>  * interrupt_exit_user_prepare_main() calls do_notify_resume(),
>due to the pending signal
>  * do_notify_resume() eventually calls save_fpu() via giveup_fpu(), which
>merrily reads and saves the invalid FP/VSX state to thread local storage.
>  * interrupt_exit_user_prepare_main() calls restore_math(), writing the 
> invalid
>FP/VSX state back to registers.
>  * Execution is released to userspace, and the application crashes or corrupts
>data.
> 
> Without the pending signal, do_notify_resume() is never called, therefore the
> invalid register state does't matter as it is overwritten nearly immeediately
> by interrupt_exit_user_prepare_main() calling restore_math() before return
> to userspace.
> 
> The combination of MariaDB and io_uring is especially good at triggering data
> corruption using the above sequence, see MariaDB bug MDEV-30728.
> 
> Restore fr0/vs0 after FPSCR/VSCR store has completed for both the fp and
> altivec register save paths.
> 
> Tested under QEMU in kvm mode, running on a Talos II workstation with dual
> POWER9 DD2.2 CPUs.
> 
> Tested-by: Timothy Pearson 
> Signed-off-by: Timothy Pearson 
> ---
>  arch/powerpc/kernel/fpu.S| 13 +
>  arch/powerpc/kernel/vector.S |  4 
>  2 files changed, 17 insertions(+)
> 
> diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
> index 6a9acfb690c9..2f8f3f93cbb6 100644
> --- a/arch/powerpc/kernel/fpu.S
> +++ b/arch/powerpc/kernel/fpu.S
> @@ -23,6 +23,15 @@
>  #include 
>  
>  #ifdef CONFIG_VSX
> +#define __REST_1FPVSR(n,c,base)  
> \
> +BEGIN_FTR_SECTION\
> + b   2f; \
> +END_FTR_SECTION_IFSET(CPU_FTR_VSX);  \
> + REST_FPR(n,base);   \
> + b   3f; \
> +2:   REST_VSR(n,c,base); \
> +3:
> +
>  #define __REST_32FPVSRS(n,c,base)\
>  BEGIN_FTR_SECTION\
>   b   2f; \
> @@ -41,9 +50,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX);
> \
>  2:   SAVE_32VSRS(n,c,base);  \
>  3:
>  #else
> +#define __REST_1FPVSR(n,b,base)  REST_FPR(n, base)
>  #define __REST_32FPVSRS(n,b,base)REST_32FPRS(n, base)
>  #define __SAVE_32FPVSRS(n,b,base)SAVE_32FPRS(n, base)
>  #endif
> +#define REST_1FPVSR(n,c,base)   __REST_1FPVSR(n,__REG_##c,__REG_##base)
>  #define REST_32FPVSRS(n,c,base) __REST_32FPVSRS(n,__REG_##c,__REG_##base)
>  #define SAVE_32FPVSRS(n,c,base) __SAVE_32FPVSRS(n,__REG_##c,__REG_##base)
>  
> @@ -67,6 +78,7 @@ _GLOBAL(store_fp_state)
>   SAVE_32FPVSRS(0, R4, R3)
>   mffsfr0
>   stfdfr0,FPSTATE_FPSCR(r3)
> + REST_1FPVSR(0, R4, R3)
>   blr
>  EXPORT_SYMBOL(store_fp_state)
>  
> @@ -138,4 +150,5 @@ _GLOBAL(save_fpu)
>  2:   SAVE_32FPVSRS(0, R4, R6)
>   mffsfr0
>   stfdfr0,FPSTATE_FPSCR(r6)
> + REST_1FPVSR(0, R4, R6)
>   blr
> diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
> index 4094e4c4c77a..8c63b05b421e 100644
> --- a/arch/powerpc/kernel/vector.S
> +++ b/arch/powerpc/kernel/vector.S
> @@ -33,6 +33,8 @@ _GLOBAL(store_vr_state)
>   mfvscr  v0
>   li  r4, VRSTATE_VSCR
>   stvxv0, r4, r3
> + li  r4, 0
> + lvx v0, r4, r3

Just a small nit, no need for clearing r4, "lvx v0,0,r3" will do, as all
Power instructions using indexed addressing mode.

>   blr
>  EXPORT_SYMBOL(store_vr_state)
>  
> @@ -109,6 +111,8 @@ _GLOBAL(save_altivec)
>   

Re: [PATCH] powerpc: Don't clobber fr0/vs0 during fp|altivec register save

2023-11-18 Thread Linux regression tracking (Thorsten Leemhuis)
On 19.11.23 00:45, Timothy Pearson wrote:
> During floating point and vector save to thread data fr0/vs0 are clobbered
> by the FPSCR/VSCR store routine.  This leads to userspace register corruption
> and application data corruption / crash under the following rare condition:
> [...]
> Tested-by: Timothy Pearson 

Many thx for this, good to see you finally found the problem.

FWIW, you might want to add a

 Closes:
https://lore.kernel.org/all/480932026.45576726.1699374859845.javamail.zim...@raptorengineeringinc.com/

here. Yes, I care about those tags because of regression tracking. But
it only relies on Link:/Closes: tags because they were meant to be used
in the first place to link to backstories and details of a change[1].

And you and Jens did such good debugging in that thread, which is why
it's IMHO really worth linking here in case anyone ever needs to look
into the backstory later.

> Signed-off-by: Timothy Pearson 
> [..]

Thx again for all your work you put into this.

Ciao, Thorsten

[1] see Documentation/process/submitting-patches.rst
(http://docs.kernel.org/process/submitting-patches.html) and
Documentation/process/5.Posting.rst
(https://docs.kernel.org/process/5.Posting.html)

See also these mails from Linus:
https://lore.kernel.org/all/CAHk-=wjMmSZzMJ3Xnskdg4+GGz=5p5p+gsyyfbth0f-dgvd...@mail.gmail.com/
https://lore.kernel.org/all/CAHk-=wgs38ZrfPvy=nowvkvzjpm3vfu1zobp37fwd_h9iad...@mail.gmail.com/
https://lore.kernel.org/all/CAHk-=wjxzafG-=j8ot30s7upn4rhbs6tx-uvfz5rme+l5_d...@mail.gmail.com/


Re: [PATCH] asm/io: remove unnecessary xlate_dev_mem_ptr() and unxlate_dev_mem_ptr()

2023-11-18 Thread kernel test robot
Hi Kefeng,

kernel test robot noticed the following build errors:

[auto build test ERROR on soc/for-next]
[also build test ERROR on geert-m68k/for-next geert-m68k/for-linus 
deller-parisc/for-next powerpc/next powerpc/fixes linus/master v6.7-rc1 
next-20231117]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:
https://github.com/intel-lab-lkp/linux/commits/Kefeng-Wang/asm-io-remove-unnecessary-xlate_dev_mem_ptr-and-unxlate_dev_mem_ptr/20231118-183038
base:   https://git.kernel.org/pub/scm/linux/kernel/git/soc/soc.git for-next
patch link:
https://lore.kernel.org/r/20231118100827.1599422-1-wangkefeng.wang%40huawei.com
patch subject: [PATCH] asm/io: remove unnecessary xlate_dev_mem_ptr() and 
unxlate_dev_mem_ptr()
config: mips-mtx1_defconfig 
(https://download.01.org/0day-ci/archive/20231119/202311191145.pppexjs6-...@intel.com/config)
compiler: clang version 16.0.4 (https://github.com/llvm/llvm-project.git 
ae42196bc493ffe877a7e3dff8be32035dea4d07)
reproduce (this is a W=1 build): 
(https://download.01.org/0day-ci/archive/20231119/202311191145.pppexjs6-...@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot 
| Closes: 
https://lore.kernel.org/oe-kbuild-all/202311191145.pppexjs6-...@intel.com/

All errors (new ones prefixed by >>):

>> drivers/char/mem.c:159:10: error: call to undeclared function 
>> 'xlate_dev_mem_ptr'; ISO C99 and later do not support implicit function 
>> declarations [-Wimplicit-function-declaration]
   ptr = xlate_dev_mem_ptr(p);
 ^
>> drivers/char/mem.c:159:8: error: incompatible integer to pointer conversion 
>> assigning to 'void *' from 'int' [-Wint-conversion]
   ptr = xlate_dev_mem_ptr(p);
   ^ 
>> drivers/char/mem.c:164:4: error: call to undeclared function 
>> 'unxlate_dev_mem_ptr'; ISO C99 and later do not support implicit function 
>> declarations [-Wimplicit-function-declaration]
   unxlate_dev_mem_ptr(p, ptr);
   ^
   drivers/char/mem.c:235:10: error: call to undeclared function 
'xlate_dev_mem_ptr'; ISO C99 and later do not support implicit function 
declarations [-Wimplicit-function-declaration]
   ptr = xlate_dev_mem_ptr(p);
 ^
   drivers/char/mem.c:235:8: error: incompatible integer to pointer conversion 
assigning to 'void *' from 'int' [-Wint-conversion]
   ptr = xlate_dev_mem_ptr(p);
   ^ 
   drivers/char/mem.c:243:4: error: call to undeclared function 
'unxlate_dev_mem_ptr'; ISO C99 and later do not support implicit function 
declarations [-Wimplicit-function-declaration]
   unxlate_dev_mem_ptr(p, ptr);
   ^
   6 errors generated.


vim +/xlate_dev_mem_ptr +159 drivers/char/mem.c

^1da177e4c3f41 Linus Torvalds2005-04-16  133  
22ec1a2aea73b9 Kees Cook 2017-12-01  134bounce = 
kmalloc(PAGE_SIZE, GFP_KERNEL);
22ec1a2aea73b9 Kees Cook 2017-12-01  135if (!bounce)
22ec1a2aea73b9 Kees Cook 2017-12-01  136return 
-ENOMEM;
22ec1a2aea73b9 Kees Cook 2017-12-01  137  
^1da177e4c3f41 Linus Torvalds2005-04-16  138while (count > 
0) {
fa29e97bb8c70f Wu Fengguang  2009-12-14  139
unsigned long remaining;
b5b38200ebe548 Kees Cook 2018-03-27  140int 
allowed, probe;
fa29e97bb8c70f Wu Fengguang  2009-12-14  141  
f222318e9c3a31 Wu Fengguang  2009-12-14  142sz = 
size_inside_page(p, count);
^1da177e4c3f41 Linus Torvalds2005-04-16  143  
22ec1a2aea73b9 Kees Cook 2017-12-01  144err = 
-EPERM;
a4866aa812518e Kees Cook 2017-04-05  145allowed 
= page_is_allowed(p >> PAGE_SHIFT);
a4866aa812518e Kees Cook 2017-04-05  146if 
(!allowed)
22ec1a2aea73b9 Kees Cook 2017-12-01  147
goto failed;
22ec1a2aea73b9 Kees Cook 2017-12-01  148  
22ec1a2aea73b9 Kees Cook 2017-12-01  149err = 
-EFAULT;
a4866aa812518e Kees Cook 2017-04-05  150if 
(allowed == 2) {
a4866aa812518e Kees Cook 2017-04-05  151
/* Show zeros for restricted memory. */
a4866aa812518e Kees Cook 201

Re: [PATCH] powerpc: Don't clobber fr0/vs0 during fp|altivec register save

2023-11-18 Thread Timothy Pearson



- Original Message -
> From: "Timothy Pearson" 
> To: "Jens Axboe" , "regressions" 
> , "Michael Ellerman"
> , "npiggin" , "christophe leroy" 
> , "linuxppc-dev"
> 
> Sent: Saturday, November 18, 2023 5:45:03 PM
> Subject: [PATCH] powerpc: Don't clobber fr0/vs0 during fp|altivec register  
> save

> During floating point and vector save to thread data fr0/vs0 are clobbered
> by the FPSCR/VSCR store routine.  This leads to userspace register corruption
> and application data corruption / crash under the following rare condition:
> 
> * A userspace thread is executing with VSX/FP mode enabled
> * The userspace thread is making active use of fr0 and/or vs0
> * An IPI is taken in kernel mode, forcing the userspace thread to reschedule
> * The userspace thread is interrupted by the IPI before accessing data it
>   previously stored in fr0/vs0
> * The thread being switched in by the IPI has a pending signal
> 
> If these exact criteria are met, then the following sequence happens:
> 
> * The existing thread FP storage is still valid before the IPI, due to a
>   prior call to save_fpu() or store_fp_state().  Note that the current
>   fr0/vs0 registers have been clobbered, so the FP/VSX state in registers
>   is now invalid pending a call to restore_fp()/restore_altivec().
> * IPI -- FP/VSX register state remains invalid
> * interrupt_exit_user_prepare_main() calls do_notify_resume(),
>   due to the pending signal
> * do_notify_resume() eventually calls save_fpu() via giveup_fpu(), which
>   merrily reads and saves the invalid FP/VSX state to thread local storage.
> * interrupt_exit_user_prepare_main() calls restore_math(), writing the invalid
>   FP/VSX state back to registers.
> * Execution is released to userspace, and the application crashes or corrupts
>   data.
> 
> Without the pending signal, do_notify_resume() is never called, therefore the
> invalid register state does't matter as it is overwritten nearly immeediately
> by interrupt_exit_user_prepare_main() calling restore_math() before return
> to userspace.
> 
> The combination of MariaDB and io_uring is especially good at triggering data
> corruption using the above sequence, see MariaDB bug MDEV-30728.
> 
> Restore fr0/vs0 after FPSCR/VSCR store has completed for both the fp and
> altivec register save paths.
> 
> Tested under QEMU in kvm mode, running on a Talos II workstation with dual
> POWER9 DD2.2 CPUs.
> 
> Tested-by: Timothy Pearson 
> Signed-off-by: Timothy Pearson 
> ---
> arch/powerpc/kernel/fpu.S| 13 +
> arch/powerpc/kernel/vector.S |  4 
> 2 files changed, 17 insertions(+)
> 
> diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
> index 6a9acfb690c9..2f8f3f93cbb6 100644
> --- a/arch/powerpc/kernel/fpu.S
> +++ b/arch/powerpc/kernel/fpu.S
> @@ -23,6 +23,15 @@
> #include 
> 
> #ifdef CONFIG_VSX
> +#define __REST_1FPVSR(n,c,base)  
> \
> +BEGIN_FTR_SECTION\
> + b   2f; \
> +END_FTR_SECTION_IFSET(CPU_FTR_VSX);  \
> + REST_FPR(n,base);   \
> + b   3f; \
> +2:   REST_VSR(n,c,base); \
> +3:
> +
> #define __REST_32FPVSRS(n,c,base) \
> BEGIN_FTR_SECTION \
>   b   2f; \
> @@ -41,9 +50,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX);
> \
> 2:SAVE_32VSRS(n,c,base);  \
> 3:
> #else
> +#define __REST_1FPVSR(n,b,base)  REST_FPR(n, base)
> #define __REST_32FPVSRS(n,b,base) REST_32FPRS(n, base)
> #define __SAVE_32FPVSRS(n,b,base) SAVE_32FPRS(n, base)
> #endif
> +#define REST_1FPVSR(n,c,base)   __REST_1FPVSR(n,__REG_##c,__REG_##base)
> #define REST_32FPVSRS(n,c,base) __REST_32FPVSRS(n,__REG_##c,__REG_##base)
> #define SAVE_32FPVSRS(n,c,base) __SAVE_32FPVSRS(n,__REG_##c,__REG_##base)
> 
> @@ -67,6 +78,7 @@ _GLOBAL(store_fp_state)
>   SAVE_32FPVSRS(0, R4, R3)
>   mffsfr0
>   stfdfr0,FPSTATE_FPSCR(r3)
> + REST_1FPVSR(0, R4, R3)
>   blr
> EXPORT_SYMBOL(store_fp_state)
> 
> @@ -138,4 +150,5 @@ _GLOBAL(save_fpu)
> 2:SAVE_32FPVSRS(0, R4, R6)
>   mffsfr0
>   stfdfr0,FPSTATE_FPSCR(r6)
> + REST_1FPVSR(0, R4, R6)
>   blr
> diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
> index 4094e4c4c77a..8c63b05b421e 100644
> --- a/arch/powerpc/kernel/vector.S
> +++ b/arch/powerpc/kernel/vector.S
> @@ -33,6 +33,8 @@ _GLOBAL(store_vr_state)
>   mfvscr  v0
>   li  r4, VRSTATE_VSCR
>   stvxv0, r4, r3
> + li  r4, 0
> + lvx v0, r4, r3
>   blr
> 

[PATCH] powerpc: Don't clobber fr0/vs0 during fp|altivec register save

2023-11-18 Thread Timothy Pearson
During floating point and vector save to thread data fr0/vs0 are clobbered
by the FPSCR/VSCR store routine.  This leads to userspace register corruption
and application data corruption / crash under the following rare condition:

 * A userspace thread is executing with VSX/FP mode enabled
 * The userspace thread is making active use of fr0 and/or vs0
 * An IPI is taken in kernel mode, forcing the userspace thread to reschedule
 * The userspace thread is interrupted by the IPI before accessing data it
   previously stored in fr0/vs0
 * The thread being switched in by the IPI has a pending signal

If these exact criteria are met, then the following sequence happens:

 * The existing thread FP storage is still valid before the IPI, due to a
   prior call to save_fpu() or store_fp_state().  Note that the current
   fr0/vs0 registers have been clobbered, so the FP/VSX state in registers
   is now invalid pending a call to restore_fp()/restore_altivec().
 * IPI -- FP/VSX register state remains invalid
 * interrupt_exit_user_prepare_main() calls do_notify_resume(),
   due to the pending signal
 * do_notify_resume() eventually calls save_fpu() via giveup_fpu(), which
   merrily reads and saves the invalid FP/VSX state to thread local storage.
 * interrupt_exit_user_prepare_main() calls restore_math(), writing the invalid
   FP/VSX state back to registers.
 * Execution is released to userspace, and the application crashes or corrupts
   data.

Without the pending signal, do_notify_resume() is never called, therefore the
invalid register state does't matter as it is overwritten nearly immeediately
by interrupt_exit_user_prepare_main() calling restore_math() before return
to userspace.

The combination of MariaDB and io_uring is especially good at triggering data
corruption using the above sequence, see MariaDB bug MDEV-30728.

Restore fr0/vs0 after FPSCR/VSCR store has completed for both the fp and
altivec register save paths.

Tested under QEMU in kvm mode, running on a Talos II workstation with dual
POWER9 DD2.2 CPUs.

Tested-by: Timothy Pearson 
Signed-off-by: Timothy Pearson 
---
 arch/powerpc/kernel/fpu.S| 13 +
 arch/powerpc/kernel/vector.S |  4 
 2 files changed, 17 insertions(+)

diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
index 6a9acfb690c9..2f8f3f93cbb6 100644
--- a/arch/powerpc/kernel/fpu.S
+++ b/arch/powerpc/kernel/fpu.S
@@ -23,6 +23,15 @@
 #include 
 
 #ifdef CONFIG_VSX
+#define __REST_1FPVSR(n,c,base)
\
+BEGIN_FTR_SECTION  \
+   b   2f; \
+END_FTR_SECTION_IFSET(CPU_FTR_VSX);\
+   REST_FPR(n,base);   \
+   b   3f; \
+2: REST_VSR(n,c,base); \
+3:
+
 #define __REST_32FPVSRS(n,c,base)  \
 BEGIN_FTR_SECTION  \
b   2f; \
@@ -41,9 +50,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX);  
\
 2: SAVE_32VSRS(n,c,base);  \
 3:
 #else
+#define __REST_1FPVSR(n,b,base)REST_FPR(n, base)
 #define __REST_32FPVSRS(n,b,base)  REST_32FPRS(n, base)
 #define __SAVE_32FPVSRS(n,b,base)  SAVE_32FPRS(n, base)
 #endif
+#define REST_1FPVSR(n,c,base)   __REST_1FPVSR(n,__REG_##c,__REG_##base)
 #define REST_32FPVSRS(n,c,base) __REST_32FPVSRS(n,__REG_##c,__REG_##base)
 #define SAVE_32FPVSRS(n,c,base) __SAVE_32FPVSRS(n,__REG_##c,__REG_##base)
 
@@ -67,6 +78,7 @@ _GLOBAL(store_fp_state)
SAVE_32FPVSRS(0, R4, R3)
mffsfr0
stfdfr0,FPSTATE_FPSCR(r3)
+   REST_1FPVSR(0, R4, R3)
blr
 EXPORT_SYMBOL(store_fp_state)
 
@@ -138,4 +150,5 @@ _GLOBAL(save_fpu)
 2: SAVE_32FPVSRS(0, R4, R6)
mffsfr0
stfdfr0,FPSTATE_FPSCR(r6)
+   REST_1FPVSR(0, R4, R6)
blr
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index 4094e4c4c77a..8c63b05b421e 100644
--- a/arch/powerpc/kernel/vector.S
+++ b/arch/powerpc/kernel/vector.S
@@ -33,6 +33,8 @@ _GLOBAL(store_vr_state)
mfvscr  v0
li  r4, VRSTATE_VSCR
stvxv0, r4, r3
+   li  r4, 0
+   lvx v0, r4, r3
blr
 EXPORT_SYMBOL(store_vr_state)
 
@@ -109,6 +111,8 @@ _GLOBAL(save_altivec)
mfvscr  v0
li  r4,VRSTATE_VSCR
stvxv0,r4,r7
+   li  r4,0
+   lvx v0,r4,r7
blr
 
 #ifdef CONFIG_VSX
-- 
2.39.2


Re: [PATCH 00/34] biops: add atomig find_bit() operations

2023-11-18 Thread Sergey Shtylyov
On 11/18/23 7:18 PM, Bart Van Assche wrote:
[...]
>> Add helpers around test_and_{set,clear}_bit() that allow to search for
>> clear or set bits and flip them atomically.
> 
> There is a typo in the subject: shouldn't "atomig" be changed
> into "atomic"?

   And "biops" to "bitops"? :-)

> Thanks,
> 
> Bart.

MBR, Sergey


Re: [PATCH 01/34] lib/find: add atomic find_bit() primitives

2023-11-18 Thread Bart Van Assche

On 11/18/23 07:50, Yury Norov wrote:

Add helpers around test_and_{set,clear}_bit() that allow to search for
clear or set bits and flip them atomically.


Has it been considered to add kunit tests for the new functions?

Thanks,

Bart.



Re: [PATCH 00/34] biops: add atomig find_bit() operations

2023-11-18 Thread Bart Van Assche

On 11/18/23 07:50, Yury Norov wrote:

Add helpers around test_and_{set,clear}_bit() that allow to search for
clear or set bits and flip them atomically.


There is a typo in the subject: shouldn't "atomig" be changed
into "atomic"?

Thanks,

Bart.



[PATCH 00/34] biops: add atomig find_bit() operations

2023-11-18 Thread Yury Norov
Add helpers around test_and_{set,clear}_bit() that allow to search for
clear or set bits and flip them atomically.

The target patterns may look like this:

for (idx = 0; idx < nbits; idx++)
if (test_and_clear_bit(idx, bitmap))
do_something(idx);

Or like this:

do {
bit = find_first_bit(bitmap, nbits);
if (bit >= nbits)
return nbits;
} while (!test_and_clear_bit(bit, bitmap));
return bit;

In both cases, the opencoded loop may be converted to a single function
or iterator call. Correspondingly:

for_each_test_and_clear_bit(idx, bitmap, nbits)
do_something(idx);

Or:
return find_and_clear_bit(bitmap, nbits);

Obviously, the less routine code people have write themself, the less
probability to make a mistake. Patch #31 of this series fixes one such
error in perf/m1 codebase.

Those are not only handy helpers but also resolve a non-trivial
issue of using non-atomic find_bit() together with atomic
test_and_{set,clear)_bit().

The trick is that find_bit() implies that the bitmap is a regular
non-volatile piece of memory, and compiler is allowed to use such
optimization techniques like re-fetching memory instead of caching it.

For example, find_first_bit() is implemented like this:

  for (idx = 0; idx * BITS_PER_LONG < sz; idx++) {
  val = addr[idx];
  if (val) {
  sz = min(idx * BITS_PER_LONG + __ffs(val), sz);
  break;
  }
  }

On register-memory architectures, like x86, compiler may decide to
access memory twice - first time to compare against 0, and second time
to fetch its value to pass it to __ffs().

When running find_first_bit() on volatile memory, the memory may get
changed in-between, and for instance, it may lead to passing 0 to
__ffs(), which is undefined. This is a potentially dangerous call.

find_and_clear_bit() as a wrapper around test_and_clear_bit()
naturally treats underlying bitmap as a volatile memory and prevents
compiler from such optimizations.

Now that KCSAN is catching exactly this type of situations and warns on
undercover memory modifications. We can use it to reveal improper usage
of find_bit(), and convert it to atomic find_and_*_bit() as appropriate.

The 1st patch of the series adds the following atomic primitives:

find_and_set_bit(addr, nbits);
find_and_set_next_bit(addr, nbits, start);
...

Here find_and_{set,clear} part refers to the corresponding
test_and_{set,clear}_bit function, and suffixes like _wrap or _lock
derive semantics from corresponding find() or test() functions.

For brevity, the naming omits the fact that we search for zero bit in
find_and_set, and correspondingly, search for set bit in find_and_clear
functions.

The patch also adds iterators with atomic semantics, like
for_each_test_and_set_bit(). Here, the naming rule is to simply prefix
corresponding atomic operation with 'for_each'.

This series is a result of discussion [1]. All find_bit() functions imply
exclusive access to the bitmaps. However, KCSAN reports quite a number
of warnings related to find_bit() API. Some of them are not pointing
to real bugs because in many situations people intentionally allow
concurrent bitmap operations.

If so, find_bit() can be annotated such that KCSAN will ignore it:

bit = data_race(find_first_bit(bitmap, nbits));

This series addresses the other important case where people really need
atomic find ops. As the following patches show, the resulting code
looks safer and more verbose comparing to opencoded loops followed by
atomic bit flips.

In [1] Mirsad reported 2% slowdown in a single-thread search test when
switching find_bit() function to treat bitmaps as volatile arrays. On
the other hand, kernel robot in the same thread reported +3.7% to the
performance of will-it-scale.per_thread_ops test.

Assuming that our compilers are sane and generate better code against
properly annotated data, the above discrepancy doesn't look weird. When
running on non-volatile bitmaps, plain find_bit() outperforms atomic
find_and_bit(), and vice-versa.

So, all users of find_bit() API, where heavy concurrency is expected,
are encouraged to switch to atomic find_and_bit() as appropriate.

1st patch of this series adds atomic find_and_bit() API, and all the
following patches spread it over the kernel. They can be applied
separately from each other on per-subsystems basis, or I can pull them
in bitmap tree, as appropriate.

[1] 
https://lore.kernel.org/lkml/634f5fdf-e236-42cf-be8d-48a581c21...@alu.unizg.hr/T/#m3e7341eb3571753f3acf8fe166f3fb5b2c12e615
 

Yury Norov (34):
  lib/find: add atomic find_bit() primitives
  lib/sbitmap; make __sbitmap_get_word() using find_and_set_bit()
  watch_queue: use atomic find_bit() in post_one_notification()
  sched: add cpumask_find_and_set() and use it in __mm_cid_get()
  mips: 

[PATCH 01/34] lib/find: add atomic find_bit() primitives

2023-11-18 Thread Yury Norov
Add helpers around test_and_{set,clear}_bit() that allow to search for
clear or set bits and flip them atomically.

The target patterns may look like this:

for (idx = 0; idx < nbits; idx++)
if (test_and_clear_bit(idx, bitmap))
do_something(idx);

Or like this:

do {
bit = find_first_bit(bitmap, nbits);
if (bit >= nbits)
return nbits;
} while (!test_and_clear_bit(bit, bitmap));
return bit;

In both cases, the opencoded loop may be converted to a single function
or iterator call. Correspondingly:

for_each_test_and_clear_bit(idx, bitmap, nbits)
do_something(idx);

Or:
return find_and_clear_bit(bitmap, nbits);

Obviously, the less routine code people have write themself, the less
probability to make a mistake.

Those are not only handy helpers but also resolve a non-trivial
issue of using non-atomic find_bit() together with atomic
test_and_{set,clear)_bit().

The trick is that find_bit() implies that the bitmap is a regular
non-volatile piece of memory, and compiler is allowed to use such
optimization techniques like re-fetching memory instead of caching it.

For example, find_first_bit() is implemented like this:

  for (idx = 0; idx * BITS_PER_LONG < sz; idx++) {
  val = addr[idx];
  if (val) {
  sz = min(idx * BITS_PER_LONG + __ffs(val), sz);
  break;
  }
  }

On register-memory architectures, like x86, compiler may decide to
access memory twice - first time to compare against 0, and second time
to fetch its value to pass it to __ffs().

When running find_first_bit() on volatile memory, the memory may get
changed in-between, and for instance, it may lead to passing 0 to
__ffs(), which is undefined. This is a potentially dangerous call.

find_and_clear_bit() as a wrapper around test_and_clear_bit()
naturally treats underlying bitmap as a volatile memory and prevents
compiler from such optimizations.

Now that KCSAN is catching exactly this type of situations and warns on
undercover memory modifications. We can use it to reveal improper usage
of find_bit(), and convert it to atomic find_and_*_bit() as appropriate.

The 1st patch of the series adds the following atomic primitives:

find_and_set_bit(addr, nbits);
find_and_set_next_bit(addr, nbits, start);
...

Here find_and_{set,clear} part refers to the corresponding
test_and_{set,clear}_bit function, and suffixes like _wrap or _lock
derive semantics from corresponding find() or test() functions.

For brevity, the naming omits the fact that we search for zero bit in
find_and_set, and correspondingly, search for set bit in find_and_clear
functions.

The patch also adds iterators with atomic semantics, like
for_each_test_and_set_bit(). Here, the naming rule is to simply prefix
corresponding atomic operation with 'for_each'.

All users of find_bit() API, where heavy concurrency is expected,
are encouraged to switch to atomic find_and_bit() as appropriate.

Signed-off-by: Yury Norov 
---
 include/linux/find.h | 289 +++
 lib/find_bit.c   |  85 +
 2 files changed, 374 insertions(+)

diff --git a/include/linux/find.h b/include/linux/find.h
index 5e4f39ef2e72..e8567f336f42 100644
--- a/include/linux/find.h
+++ b/include/linux/find.h
@@ -32,6 +32,16 @@ extern unsigned long _find_first_and_bit(const unsigned long 
*addr1,
 extern unsigned long _find_first_zero_bit(const unsigned long *addr, unsigned 
long size);
 extern unsigned long _find_last_bit(const unsigned long *addr, unsigned long 
size);
 
+unsigned long _find_and_set_bit(volatile unsigned long *addr, unsigned long 
nbits);
+unsigned long _find_and_set_next_bit(volatile unsigned long *addr, unsigned 
long nbits,
+   unsigned long start);
+unsigned long _find_and_set_bit_lock(volatile unsigned long *addr, unsigned 
long nbits);
+unsigned long _find_and_set_next_bit_lock(volatile unsigned long *addr, 
unsigned long nbits,
+ unsigned long start);
+unsigned long _find_and_clear_bit(volatile unsigned long *addr, unsigned long 
nbits);
+unsigned long _find_and_clear_next_bit(volatile unsigned long *addr, unsigned 
long nbits,
+   unsigned long start);
+
 #ifdef __BIG_ENDIAN
 unsigned long _find_first_zero_bit_le(const unsigned long *addr, unsigned long 
size);
 unsigned long _find_next_zero_bit_le(const  unsigned long *addr, unsigned
@@ -460,6 +470,267 @@ unsigned long __for_each_wrap(const unsigned long 
*bitmap, unsigned long size,
return bit < start ? bit : size;
 }
 
+/**
+ * find_and_set_bit - Find a zero bit and set it atomically
+ * @addr: The address to base the search on
+ * @nbits: The bitmap size in bits
+ *
+ * This function is designed to operate in concurrent access environment.
+ 

Re: [PATCH] asm/io: remove unnecessary xlate_dev_mem_ptr() and unxlate_dev_mem_ptr()

2023-11-18 Thread kernel test robot
Hi Kefeng,

kernel test robot noticed the following build errors:

[auto build test ERROR on soc/for-next]
[also build test ERROR on geert-m68k/for-next geert-m68k/for-linus 
deller-parisc/for-next powerpc/next powerpc/fixes linus/master v6.7-rc1 
next-20231117]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:
https://github.com/intel-lab-lkp/linux/commits/Kefeng-Wang/asm-io-remove-unnecessary-xlate_dev_mem_ptr-and-unxlate_dev_mem_ptr/20231118-183038
base:   https://git.kernel.org/pub/scm/linux/kernel/git/soc/soc.git for-next
patch link:
https://lore.kernel.org/r/20231118100827.1599422-1-wangkefeng.wang%40huawei.com
patch subject: [PATCH] asm/io: remove unnecessary xlate_dev_mem_ptr() and 
unxlate_dev_mem_ptr()
config: mips-db1xxx_defconfig 
(https://download.01.org/0day-ci/archive/20231119/202311190352.yqcpbjin-...@intel.com/config)
compiler: mipsel-linux-gcc (GCC) 13.2.0
reproduce (this is a W=1 build): 
(https://download.01.org/0day-ci/archive/20231119/202311190352.yqcpbjin-...@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot 
| Closes: 
https://lore.kernel.org/oe-kbuild-all/202311190352.yqcpbjin-...@intel.com/

All error/warnings (new ones prefixed by >>):

   drivers/char/mem.c: In function 'read_mem':
>> drivers/char/mem.c:159:31: error: implicit declaration of function 
>> 'xlate_dev_mem_ptr' [-Werror=implicit-function-declaration]
 159 | ptr = xlate_dev_mem_ptr(p);
 |   ^
>> drivers/char/mem.c:159:29: warning: assignment to 'void *' from 'int' makes 
>> pointer from integer without a cast [-Wint-conversion]
 159 | ptr = xlate_dev_mem_ptr(p);
 | ^
>> drivers/char/mem.c:164:25: error: implicit declaration of function 
>> 'unxlate_dev_mem_ptr' [-Werror=implicit-function-declaration]
 164 | unxlate_dev_mem_ptr(p, ptr);
 | ^~~
   drivers/char/mem.c: In function 'write_mem':
   drivers/char/mem.c:235:29: warning: assignment to 'void *' from 'int' makes 
pointer from integer without a cast [-Wint-conversion]
 235 | ptr = xlate_dev_mem_ptr(p);
 | ^
   cc1: some warnings being treated as errors


vim +/xlate_dev_mem_ptr +159 drivers/char/mem.c

^1da177e4c3f41 Linus Torvalds2005-04-16  133  
22ec1a2aea73b9 Kees Cook 2017-12-01  134bounce = 
kmalloc(PAGE_SIZE, GFP_KERNEL);
22ec1a2aea73b9 Kees Cook 2017-12-01  135if (!bounce)
22ec1a2aea73b9 Kees Cook 2017-12-01  136return 
-ENOMEM;
22ec1a2aea73b9 Kees Cook 2017-12-01  137  
^1da177e4c3f41 Linus Torvalds2005-04-16  138while (count > 
0) {
fa29e97bb8c70f Wu Fengguang  2009-12-14  139
unsigned long remaining;
b5b38200ebe548 Kees Cook 2018-03-27  140int 
allowed, probe;
fa29e97bb8c70f Wu Fengguang  2009-12-14  141  
f222318e9c3a31 Wu Fengguang  2009-12-14  142sz = 
size_inside_page(p, count);
^1da177e4c3f41 Linus Torvalds2005-04-16  143  
22ec1a2aea73b9 Kees Cook 2017-12-01  144err = 
-EPERM;
a4866aa812518e Kees Cook 2017-04-05  145allowed 
= page_is_allowed(p >> PAGE_SHIFT);
a4866aa812518e Kees Cook 2017-04-05  146if 
(!allowed)
22ec1a2aea73b9 Kees Cook 2017-12-01  147
goto failed;
22ec1a2aea73b9 Kees Cook 2017-12-01  148  
22ec1a2aea73b9 Kees Cook 2017-12-01  149err = 
-EFAULT;
a4866aa812518e Kees Cook 2017-04-05  150if 
(allowed == 2) {
a4866aa812518e Kees Cook 2017-04-05  151
/* Show zeros for restricted memory. */
a4866aa812518e Kees Cook 2017-04-05  152
remaining = clear_user(buf, sz);
a4866aa812518e Kees Cook 2017-04-05  153} else {
^1da177e4c3f41 Linus Torvalds2005-04-16  154
/*
a4866aa812518e Kees Cook 2017-04-05  155
 * On ia64 if a page has been mapped somewhere as
a4866aa812518e Kees Cook 2017-04-05  156
 * uncached, then it must also be accessed uncached
a4866aa812518e Kees Cook 2017-04-05  157 

[PATCH 00/10] Don't let i2c adapters declare I2C_CLASS_SPD support if they support I2C_CLASS_HWMON

2023-11-18 Thread Heiner Kallweit
After removal of the legacy eeprom driver the only remaining I2C
client device driver supporting I2C_CLASS_SPD is jc42. Because this
driver also supports I2C_CLASS_HWMON, adapters don't have to
declare support for I2C_CLASS_SPD if they support I2C_CLASS_HWMON.
It's one step towards getting rid of I2C_CLASS_SPD mid-term.

Series was created supported by Coccinelle and its splitpatch.

Signed-off-by: Heiner Kallweit 

---

 drivers/i2c/busses/i2c-ali1535.c  |2 +-
 drivers/i2c/busses/i2c-ali1563.c  |2 +-
 drivers/i2c/busses/i2c-ali15x3.c  |2 +-
 drivers/i2c/busses/i2c-amd756.c   |2 +-
 drivers/i2c/busses/i2c-amd8111.c  |2 +-
 drivers/i2c/busses/i2c-elektor.c  |2 +-
 drivers/i2c/busses/i2c-gpio.c |2 +-
 drivers/i2c/busses/i2c-ibm_iic.c  |2 +-
 drivers/i2c/busses/i2c-iop3xx.c   |2 +-
 drivers/i2c/busses/i2c-isch.c |2 +-
 drivers/i2c/busses/i2c-kempld.c   |4 ++--
 drivers/i2c/busses/i2c-mlxcpld.c  |2 +-
 drivers/i2c/busses/i2c-nforce2.c  |2 +-
 drivers/i2c/busses/i2c-pasemi-pci.c   |2 +-
 drivers/i2c/busses/i2c-piix4.c|2 +-
 drivers/i2c/busses/i2c-scmi.c |2 +-
 drivers/i2c/busses/i2c-sh7760.c   |2 +-
 drivers/i2c/busses/i2c-sibyte.c   |4 ++--
 drivers/i2c/busses/i2c-sis5595.c  |2 +-
 drivers/i2c/busses/i2c-sis630.c   |2 +-
 drivers/i2c/busses/i2c-sis96x.c   |2 +-
 drivers/i2c/busses/i2c-via.c  |2 +-
 drivers/i2c/busses/i2c-viapro.c   |2 +-
 drivers/i2c/busses/scx200_acb.c   |2 +-
 drivers/i2c/i2c-stub.c|2 +-
 drivers/media/pci/netup_unidvb/netup_unidvb_i2c.c |2 +-
 drivers/staging/greybus/i2c.c |2 +-
 27 files changed, 29 insertions(+), 29 deletions(-)


[PATCH 03/10] drivers/i2c/busses/i2c-pasemi-pci.c: Don't let i2c adapters declare I2C_CLASS_SPD support if they support I2C_CLASS_HWMON

2023-11-18 Thread Heiner Kallweit
After removal of the legacy eeprom driver the only remaining I2C
client device driver supporting I2C_CLASS_SPD is jc42. Because this
driver also supports I2C_CLASS_HWMON, adapters don't have to
declare support for I2C_CLASS_SPD if they support I2C_CLASS_HWMON.
It's one step towards getting rid of I2C_CLASS_SPD mid-term.

Series was created supported by Coccinelle and its splitpatch.

Signed-off-by: Heiner Kallweit 

---
 drivers/i2c/busses/i2c-pasemi-pci.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/i2c-pasemi-pci.c 
b/drivers/i2c/busses/i2c-pasemi-pci.c
index cfc89e04e..77f90c743 100644
--- a/drivers/i2c/busses/i2c-pasemi-pci.c
+++ b/drivers/i2c/busses/i2c-pasemi-pci.c
@@ -56,7 +56,7 @@ static int pasemi_smb_pci_probe(struct pci_dev *dev,
if (!smbus->ioaddr)
return -EBUSY;
 
-   smbus->adapter.class = I2C_CLASS_HWMON | I2C_CLASS_SPD;
+   smbus->adapter.class = I2C_CLASS_HWMON;
error = pasemi_i2c_common_probe(smbus);
if (error)
return error;



[PATCH 16/34] powerpc: use atomic find_bit() API where appropriate

2023-11-18 Thread Yury Norov
Fix opencoded find_and_{set,clear}_bit() by using dedicated functions.

Signed-off-by: Yury Norov 
---
 arch/powerpc/mm/book3s32/mmu_context.c | 10 ++---
 arch/powerpc/platforms/pasemi/dma_lib.c| 45 +-
 arch/powerpc/platforms/powernv/pci-sriov.c | 12 ++
 3 files changed, 17 insertions(+), 50 deletions(-)

diff --git a/arch/powerpc/mm/book3s32/mmu_context.c 
b/arch/powerpc/mm/book3s32/mmu_context.c
index 1922f9a6b058..7db19f173c2e 100644
--- a/arch/powerpc/mm/book3s32/mmu_context.c
+++ b/arch/powerpc/mm/book3s32/mmu_context.c
@@ -50,13 +50,11 @@ static unsigned long context_map[LAST_CONTEXT / 
BITS_PER_LONG + 1];
 
 unsigned long __init_new_context(void)
 {
-   unsigned long ctx = next_mmu_context;
+   unsigned long ctx;
 
-   while (test_and_set_bit(ctx, context_map)) {
-   ctx = find_next_zero_bit(context_map, LAST_CONTEXT+1, ctx);
-   if (ctx > LAST_CONTEXT)
-   ctx = 0;
-   }
+   ctx = find_and_set_next_bit(context_map, LAST_CONTEXT + 1, 
next_mmu_context);
+   if (ctx > LAST_CONTEXT)
+   ctx = 0;
next_mmu_context = (ctx + 1) & LAST_CONTEXT;
 
return ctx;
diff --git a/arch/powerpc/platforms/pasemi/dma_lib.c 
b/arch/powerpc/platforms/pasemi/dma_lib.c
index 1be1f18f6f09..906dabee0132 100644
--- a/arch/powerpc/platforms/pasemi/dma_lib.c
+++ b/arch/powerpc/platforms/pasemi/dma_lib.c
@@ -118,14 +118,9 @@ static int pasemi_alloc_tx_chan(enum pasemi_dmachan_type 
type)
limit = MAX_TXCH;
break;
}
-retry:
-   bit = find_next_bit(txch_free, MAX_TXCH, start);
-   if (bit >= limit)
-   return -ENOSPC;
-   if (!test_and_clear_bit(bit, txch_free))
-   goto retry;
-
-   return bit;
+
+   bit = find_and_clear_next_bit(txch_free, MAX_TXCH, start);
+   return bit < limit ? bit : -ENOSPC;
 }
 
 static void pasemi_free_tx_chan(int chan)
@@ -136,15 +131,9 @@ static void pasemi_free_tx_chan(int chan)
 
 static int pasemi_alloc_rx_chan(void)
 {
-   int bit;
-retry:
-   bit = find_first_bit(rxch_free, MAX_RXCH);
-   if (bit >= MAX_TXCH)
-   return -ENOSPC;
-   if (!test_and_clear_bit(bit, rxch_free))
-   goto retry;
-
-   return bit;
+   int bit = find_and_clear_bit(rxch_free, MAX_RXCH);
+
+   return bit < MAX_TXCH ? bit : -ENOSPC;
 }
 
 static void pasemi_free_rx_chan(int chan)
@@ -374,16 +363,9 @@ EXPORT_SYMBOL(pasemi_dma_free_buf);
  */
 int pasemi_dma_alloc_flag(void)
 {
-   int bit;
+   int bit = find_and_clear_bit(flags_free, MAX_FLAGS);
 
-retry:
-   bit = find_first_bit(flags_free, MAX_FLAGS);
-   if (bit >= MAX_FLAGS)
-   return -ENOSPC;
-   if (!test_and_clear_bit(bit, flags_free))
-   goto retry;
-
-   return bit;
+   return bit < MAX_FLAGS ? bit : -ENOSPC;
 }
 EXPORT_SYMBOL(pasemi_dma_alloc_flag);
 
@@ -439,16 +421,9 @@ EXPORT_SYMBOL(pasemi_dma_clear_flag);
  */
 int pasemi_dma_alloc_fun(void)
 {
-   int bit;
-
-retry:
-   bit = find_first_bit(fun_free, MAX_FLAGS);
-   if (bit >= MAX_FLAGS)
-   return -ENOSPC;
-   if (!test_and_clear_bit(bit, fun_free))
-   goto retry;
+   int bit = find_and_clear_bit(fun_free, MAX_FLAGS);
 
-   return bit;
+   return bit < MAX_FLAGS ? bit : -ENOSPC;
 }
 EXPORT_SYMBOL(pasemi_dma_alloc_fun);
 
diff --git a/arch/powerpc/platforms/powernv/pci-sriov.c 
b/arch/powerpc/platforms/powernv/pci-sriov.c
index 59882da3e742..640e387e6d83 100644
--- a/arch/powerpc/platforms/powernv/pci-sriov.c
+++ b/arch/powerpc/platforms/powernv/pci-sriov.c
@@ -397,18 +397,12 @@ static int64_t pnv_ioda_map_m64_single(struct pnv_phb 
*phb,
 
 static int pnv_pci_alloc_m64_bar(struct pnv_phb *phb, struct pnv_iov_data *iov)
 {
-   int win;
+   int win = find_and_set_bit(>ioda.m64_bar_alloc, 
phb->ioda.m64_bar_idx + 1);
 
-   do {
-   win = find_next_zero_bit(>ioda.m64_bar_alloc,
-   phb->ioda.m64_bar_idx + 1, 0);
-
-   if (win >= phb->ioda.m64_bar_idx + 1)
-   return -1;
-   } while (test_and_set_bit(win, >ioda.m64_bar_alloc));
+   if (win >= phb->ioda.m64_bar_idx + 1)
+   return -1;
 
set_bit(win, iov->used_m64_bar_mask);
-
return win;
 }
 
-- 
2.39.2



Re: [PATCH v2 3/5] modpost: Extended modversion support

2023-11-18 Thread kernel test robot
Hi Matthew,

kernel test robot noticed the following build errors:

[auto build test ERROR on mcgrof/modules-next]
[also build test ERROR on powerpc/next powerpc/fixes masahiroy-kbuild/for-next 
masahiroy-kbuild/fixes linus/master v6.7-rc1 next-20231117]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:
https://github.com/intel-lab-lkp/linux/commits/Matthew-Maurer/export_report-Rehabilitate-script/20231118-110040
base:   https://git.kernel.org/pub/scm/linux/kernel/git/mcgrof/linux.git 
modules-next
patch link:
https://lore.kernel.org/r/20231118025748.2778044-4-mmaurer%40google.com
patch subject: [PATCH v2 3/5] modpost: Extended modversion support
config: powerpc-allmodconfig 
(https://download.01.org/0day-ci/archive/20231118/202311182118.zjqkg301-...@intel.com/config)
compiler: powerpc64-linux-gcc (GCC) 13.2.0
reproduce (this is a W=1 build): 
(https://download.01.org/0day-ci/archive/20231118/202311182118.zjqkg301-...@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot 
| Closes: 
https://lore.kernel.org/oe-kbuild-all/202311182118.zjqkg301-...@intel.com/

All errors (new ones prefixed by >>):

>> arch/powerpc/kernel/module_64.c:25:10: fatal error: string.h: No such file 
>> or directory
  25 | #include 
 |  ^~
   compilation terminated.


vim +25 arch/powerpc/kernel/module_64.c

 8  
 9  #include 
10  #include 
11  #include 
12  #include 
13  #include 
14  #include 
15  #include 
16  #include 
17  #include 
18  #include 
19  #include 
20  #include 
21  #include 
22  #include 
23  #include 
24  #include 
  > 25  #include 
26  

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki


Re: [PATCH v2 2/5] modules: Refactor + kdoc elf_validity_cached_copy

2023-11-18 Thread Greg KH
On Sat, Nov 18, 2023 at 02:54:43AM +, Matthew Maurer wrote:
> Functionality is almost identical, just structured for better
> documentation and readability. Changes:
> 
> * Section names are checked for *all* non-SHT_NULL sections, not just
>   SHF_ALLOC sections. We have code that accesses section names of
>   non-SHF_ALLOC sections (see find_any_sec for example)
> * The section name check occurs *before* strcmping on section names.
>   Previously, it was possible to use an out-of-bounds offset to strcmp
>   against ".modinfo" or ".gnu.linkonce.this_module"
> * strtab is checked for NUL lead+termination and nonzero size
> * The symbol table is swept to ensure offsets are inbounds of strtab
> 
> While some of these oversights would normally be worrying, all of the
> potentially unverified accesses occur after signature check, and only in
> response to a user who can load a kernel module.
> 
> Signed-off-by: Matthew Maurer 
> ---
>  kernel/module/internal.h |   7 +-
>  kernel/module/main.c | 585 +--
>  2 files changed, 444 insertions(+), 148 deletions(-)

Again, this needs to be broken into much smaller pieces before we can
even review it.  Would you want to review this?

thanks,

greg "think of the reviewers" k-h


Re: [PATCH v2 1/5] export_report: Rehabilitate script

2023-11-18 Thread Greg KH
On Sat, Nov 18, 2023 at 02:54:42AM +, Matthew Maurer wrote:
> * modules.order has .o files when in a build dir, support this
> * .mod.c source layout has changed, update regexes to match
> * Add a stage 3, to be more robust against additional .mod.c content

When you have to list different things you do in a patch, that is a huge
hint that you need to break up your patch into smaller pieces.

Remember, each patch can only do one logical thing.  I know it feels
odd, but it makes it easier to review.

This patch, as-is, is nothing that I would be able to take, please make
it a series.

thanks,

greg k-h


[PATCH] asm/io: remove unnecessary xlate_dev_mem_ptr() and unxlate_dev_mem_ptr()

2023-11-18 Thread Kefeng Wang
The asm-generic/io.h already has default definition, remove unnecessary
arch's defination.

Cc: Richard Henderson 
Cc: Ivan Kokshaysky 
Cc: Russell King 
Cc: Brian Cain 
Cc: "James E.J. Bottomley" 
Cc: Nicholas Piggin 
Cc: Christophe Leroy 
Cc: Yoshinori Sato 
Cc: Rich Felker 
Cc: "David S. Miller" 
Cc: Stanislav Kinsburskii 
Signed-off-by: Kefeng Wang 
---
 arch/alpha/include/asm/io.h| 6 --
 arch/arm/include/asm/io.h  | 6 --
 arch/hexagon/include/asm/io.h  | 6 --
 arch/m68k/include/asm/io_mm.h  | 6 --
 arch/mips/include/asm/io.h | 7 ---
 arch/parisc/include/asm/io.h   | 6 --
 arch/powerpc/include/asm/io.h  | 6 --
 arch/sh/include/asm/io.h   | 7 ---
 arch/sparc/include/asm/io_64.h | 6 --
 9 files changed, 56 deletions(-)

diff --git a/arch/alpha/include/asm/io.h b/arch/alpha/include/asm/io.h
index 7aeaf7c30a6f..5e5d21ebc584 100644
--- a/arch/alpha/include/asm/io.h
+++ b/arch/alpha/include/asm/io.h
@@ -651,12 +651,6 @@ extern void outsl (unsigned long port, const void *src, 
unsigned long count);
 #endif
 #define RTC_ALWAYS_BCD 0
 
-/*
- * Convert a physical pointer to a virtual kernel pointer for /dev/mem
- * access
- */
-#define xlate_dev_mem_ptr(p)   __va(p)
-
 /*
  * These get provided from  since alpha does not
  * select GENERIC_IOMAP.
diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h
index 56b08ed6cc3b..1815748f5d2a 100644
--- a/arch/arm/include/asm/io.h
+++ b/arch/arm/include/asm/io.h
@@ -407,12 +407,6 @@ struct pci_dev;
 #define pci_iounmap pci_iounmap
 extern void pci_iounmap(struct pci_dev *dev, void __iomem *addr);
 
-/*
- * Convert a physical pointer to a virtual kernel pointer for /dev/mem
- * access
- */
-#define xlate_dev_mem_ptr(p)   __va(p)
-
 #include 
 
 #ifdef CONFIG_MMU
diff --git a/arch/hexagon/include/asm/io.h b/arch/hexagon/include/asm/io.h
index e2b308e32a37..97d57751ce3b 100644
--- a/arch/hexagon/include/asm/io.h
+++ b/arch/hexagon/include/asm/io.h
@@ -58,12 +58,6 @@ static inline void *phys_to_virt(unsigned long address)
return __va(address);
 }
 
-/*
- * convert a physical pointer to a virtual kernel pointer for
- * /dev/mem access.
- */
-#define xlate_dev_mem_ptr(p)__va(p)
-
 /*
  * IO port access primitives.  Hexagon doesn't have special IO access
  * instructions; all I/O is memory mapped.
diff --git a/arch/m68k/include/asm/io_mm.h b/arch/m68k/include/asm/io_mm.h
index 47525f2a57e1..090aec54b8fa 100644
--- a/arch/m68k/include/asm/io_mm.h
+++ b/arch/m68k/include/asm/io_mm.h
@@ -389,12 +389,6 @@ static inline void isa_delay(void)
 
 #define __ARCH_HAS_NO_PAGE_ZERO_MAPPED 1
 
-/*
- * Convert a physical pointer to a virtual kernel pointer for /dev/mem
- * access
- */
-#define xlate_dev_mem_ptr(p)   __va(p)
-
 #define readb_relaxed(addr)readb(addr)
 #define readw_relaxed(addr)readw(addr)
 #define readl_relaxed(addr)readl(addr)
diff --git a/arch/mips/include/asm/io.h b/arch/mips/include/asm/io.h
index 062dd4e6b954..2158ff302430 100644
--- a/arch/mips/include/asm/io.h
+++ b/arch/mips/include/asm/io.h
@@ -548,13 +548,6 @@ extern void (*_dma_cache_inv)(unsigned long start, 
unsigned long size);
 #define csr_out32(v, a) (*(volatile u32 *)((unsigned long)(a) + 
__CSR_32_ADJUST) = (v))
 #define csr_in32(a)(*(volatile u32 *)((unsigned long)(a) + 
__CSR_32_ADJUST))
 
-/*
- * Convert a physical pointer to a virtual kernel pointer for /dev/mem
- * access
- */
-#define xlate_dev_mem_ptr(p)   __va(p)
-#define unxlate_dev_mem_ptr(p, v) do { } while (0)
-
 void __ioread64_copy(void *to, const void __iomem *from, size_t count);
 
 #endif /* _ASM_IO_H */
diff --git a/arch/parisc/include/asm/io.h b/arch/parisc/include/asm/io.h
index 366537042465..9c06cafb0e70 100644
--- a/arch/parisc/include/asm/io.h
+++ b/arch/parisc/include/asm/io.h
@@ -267,12 +267,6 @@ extern void iowrite64be(u64 val, void __iomem *addr);
 #define iowrite16_rep iowrite16_rep
 #define iowrite32_rep iowrite32_rep
 
-/*
- * Convert a physical pointer to a virtual kernel pointer for /dev/mem
- * access
- */
-#define xlate_dev_mem_ptr(p)   __va(p)
-
 extern int devmem_is_allowed(unsigned long pfn);
 
 #include 
diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h
index 5220274a6277..79421c285066 100644
--- a/arch/powerpc/include/asm/io.h
+++ b/arch/powerpc/include/asm/io.h
@@ -709,12 +709,6 @@ static inline void name at 
\
 #define memcpy_fromio memcpy_fromio
 #define memcpy_toio memcpy_toio
 
-/*
- * Convert a physical pointer to a virtual kernel pointer for /dev/mem
- * access
- */
-#define xlate_dev_mem_ptr(p)   __va(p)
-
 /*
  * We don't do relaxed operations yet, at least not with this semantic
  */
diff --git a/arch/sh/include/asm/io.h b/arch/sh/include/asm/io.h
index ac521f287fa5..be7ac06423a9 100644
--- a/arch/sh/include/asm/io.h
+++ b/arch/sh/include/asm/io.h
@@ -304,13 +304,6 @@ unsigned long long poke_real_address_q(unsigned long long 
addr,
 
 #define ioremap_uc