Re: [PATCH] ASoC: fsl_sai: Use physical format width

2023-03-30 Thread Shengjiu Wang
On Thu, Mar 30, 2023 at 4:30 PM Emil Abildgaard Svendsen <
e...@bang-olufsen.dk> wrote:

> Slot width should follow the physical width of the format instead of the
> data width.
>
> This is needed for formats like SNDRV_PCM_FMTBIT_S24_LE where physical
> width is 32 and data width is 24. By using the physical width, data
> won't get misaligned.


There are different requirements for this slot width. Some need physical
width,
Some need format width. We need to be careful about change here.

Actually there is .set_tdm_slot API for slot specific setting, please use
this API.

best regards
wang shengjiu

>
> Signed-off-by: Emil Svendsen 
> ---
>  sound/soc/fsl/fsl_sai.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/sound/soc/fsl/fsl_sai.c b/sound/soc/fsl/fsl_sai.c
> index 939c6bdd22c4..213e2d462076 100644
> --- a/sound/soc/fsl/fsl_sai.c
> +++ b/sound/soc/fsl/fsl_sai.c
> @@ -519,13 +519,13 @@ static int fsl_sai_hw_params(struct
> snd_pcm_substream *substream,
> unsigned int channels = params_channels(params);
> struct snd_dmaengine_dai_dma_data *dma_params;
> struct fsl_sai_dl_cfg *dl_cfg = sai->dl_cfg;
> +   u32 slot_width = params_physical_width(params);
> u32 word_width = params_width(params);
> int trce_mask = 0, dl_cfg_idx = 0;
> int dl_cfg_cnt = sai->dl_cfg_cnt;
> u32 dl_type = FSL_SAI_DL_I2S;
> u32 val_cr4 = 0, val_cr5 = 0;
> u32 slots = (channels == 1) ? 2 : channels;
> -   u32 slot_width = word_width;
> int adir = tx ? RX : TX;
> u32 pins, bclk;
> u32 watermark;
> --
> 2.34.1
>


[powerpc:next-test] BUILD SUCCESS 2f23ef840551947878b6f26688be21e7e5a2a4dd

2023-03-30 Thread kernel test robot
tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git 
next-test
branch HEAD: 2f23ef840551947878b6f26688be21e7e5a2a4dd  powerpc/atomics: Remove 
unused function

elapsed time: 734m

configs tested: 257
configs skipped: 22

The following configs have been built successfully.
More configs may be tested in the coming days.

tested configs:
alphaallyesconfig   gcc  
alphabuildonly-randconfig-r002-20230329   gcc  
alphabuildonly-randconfig-r004-20230329   gcc  
alpha   defconfig   gcc  
alpharandconfig-r015-20230329   gcc  
alpharandconfig-r016-20230329   gcc  
alpharandconfig-r024-20230329   gcc  
alpharandconfig-r025-20230329   gcc  
alpharandconfig-r026-20230329   gcc  
alpharandconfig-r034-20230329   gcc  
arc  allyesconfig   gcc  
arc  buildonly-randconfig-r003-20230329   gcc  
arc  buildonly-randconfig-r005-20230329   gcc  
arc defconfig   gcc  
arc  randconfig-r004-20230329   gcc  
arc  randconfig-r013-20230329   gcc  
arc  randconfig-r016-20230329   gcc  
arc  randconfig-r032-20230329   gcc  
arc  randconfig-r033-20230329   gcc  
arc  randconfig-r035-20230329   gcc  
arm  allmodconfig   gcc  
arm  allyesconfig   gcc  
arm  buildonly-randconfig-r006-20230329   gcc  
arm defconfig   gcc  
arm  jornada720_defconfig   gcc  
arm lpc18xx_defconfig   gcc  
arm orion5x_defconfig   clang
arm  randconfig-c002-20230330   gcc  
arm  randconfig-r021-20230329   gcc  
arm  randconfig-r025-20230329   gcc  
arm  randconfig-r026-20230329   gcc  
arm64allyesconfig   gcc  
arm64buildonly-randconfig-r001-20230329   gcc  
arm64buildonly-randconfig-r002-20230329   gcc  
arm64buildonly-randconfig-r005-20230329   gcc  
arm64   defconfig   gcc  
arm64randconfig-r001-20230329   gcc  
arm64randconfig-r003-20230329   gcc  
arm64randconfig-r025-20230329   clang
arm64randconfig-r026-20230329   clang
arm64randconfig-r032-20230329   gcc  
arm64randconfig-r036-20230329   gcc  
csky buildonly-randconfig-r006-20230329   gcc  
cskydefconfig   gcc  
csky randconfig-r003-20230329   gcc  
csky randconfig-r004-20230329   gcc  
csky randconfig-r011-20230329   gcc  
csky randconfig-r022-20230329   gcc  
csky randconfig-r026-20230329   gcc  
csky randconfig-r033-20230329   gcc  
csky randconfig-r034-20230329   gcc  
csky randconfig-r035-20230329   gcc  
hexagon  randconfig-r011-20230330   clang
hexagon  randconfig-r015-20230329   clang
hexagon  randconfig-r032-20230329   clang
hexagon  randconfig-r041-20230329   clang
hexagon  randconfig-r045-20230329   clang
i386 allyesconfig   gcc  
i386 debian-10.3-func   gcc  
i386   debian-10.3-kselftests   gcc  
i386debian-10.3-kunit   gcc  
i386  debian-10.3-kvm   gcc  
i386  debian-10.3   gcc  
i386defconfig   gcc  
i386  randconfig-a002   clang
i386  randconfig-a004   clang
i386  randconfig-a006   clang
i386  randconfig-a011   clang
i386  randconfig-a012   gcc  
i386  randconfig-a013   clang
i386  randconfig-a014   gcc  
i386  randconfig-a015   clang
i386  randconfig-a016   gcc  
i386  randconfig-c001   gcc  
ia64 allmodconfig   gcc  
ia64 buildonly-randconfig-r003-20230329   gcc  
ia64 buildonly-randconfig-r005-20230329   gcc  
ia64 buildonly-randconfig-r006-20230329   gcc  
ia64defconfig   gcc  
ia64  gensparse_defconfig   gcc  
ia64 randconfig-r003-20230329   gcc  
ia64 randconfig-r004-20230329   gcc  
ia64 randconfig-r005-20230329   gcc  
ia64 randconfig-r006-20230329   gcc  
ia64 randconfig-r011-20230329   gcc  
ia64

[powerpc:next] BUILD SUCCESS 87b626a66dd4ab7d5caf5199d98ec0b5953d73f8

2023-03-30 Thread kernel test robot
tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git 
next
branch HEAD: 87b626a66dd4ab7d5caf5199d98ec0b5953d73f8  macintosh: Use 
of_property_present() for testing DT property presence

elapsed time: 734m

configs tested: 261
configs skipped: 22

The following configs have been built successfully.
More configs may be tested in the coming days.

tested configs:
alphaallyesconfig   gcc  
alphabuildonly-randconfig-r002-20230329   gcc  
alphabuildonly-randconfig-r004-20230329   gcc  
alpha   defconfig   gcc  
alpharandconfig-r015-20230329   gcc  
alpharandconfig-r016-20230329   gcc  
alpharandconfig-r024-20230329   gcc  
alpharandconfig-r025-20230329   gcc  
alpharandconfig-r026-20230329   gcc  
alpharandconfig-r034-20230329   gcc  
arc  allyesconfig   gcc  
arc  buildonly-randconfig-r003-20230329   gcc  
arc  buildonly-randconfig-r005-20230329   gcc  
arc defconfig   gcc  
arc  randconfig-r004-20230329   gcc  
arc  randconfig-r013-20230329   gcc  
arc  randconfig-r016-20230329   gcc  
arc  randconfig-r032-20230329   gcc  
arc  randconfig-r033-20230329   gcc  
arc  randconfig-r035-20230329   gcc  
arm  allmodconfig   gcc  
arm  allyesconfig   gcc  
arm  buildonly-randconfig-r006-20230329   gcc  
arm defconfig   gcc  
arm  jornada720_defconfig   gcc  
arm lpc18xx_defconfig   gcc  
arm orion5x_defconfig   clang
arm  randconfig-c002-20230330   gcc  
arm  randconfig-r021-20230329   gcc  
arm  randconfig-r025-20230329   gcc  
arm  randconfig-r026-20230329   gcc  
arm64allyesconfig   gcc  
arm64buildonly-randconfig-r001-20230329   gcc  
arm64buildonly-randconfig-r002-20230329   gcc  
arm64buildonly-randconfig-r005-20230329   gcc  
arm64   defconfig   gcc  
arm64randconfig-r001-20230329   gcc  
arm64randconfig-r003-20230329   gcc  
arm64randconfig-r025-20230329   clang
arm64randconfig-r026-20230329   clang
arm64randconfig-r032-20230329   gcc  
arm64randconfig-r036-20230329   gcc  
csky buildonly-randconfig-r006-20230329   gcc  
cskydefconfig   gcc  
csky randconfig-r003-20230329   gcc  
csky randconfig-r004-20230329   gcc  
csky randconfig-r011-20230329   gcc  
csky randconfig-r022-20230329   gcc  
csky randconfig-r026-20230329   gcc  
csky randconfig-r033-20230329   gcc  
csky randconfig-r034-20230329   gcc  
csky randconfig-r035-20230329   gcc  
hexagon  buildonly-randconfig-r005-20230329   clang
hexagon  randconfig-r011-20230330   clang
hexagon  randconfig-r015-20230329   clang
hexagon  randconfig-r032-20230329   clang
hexagon  randconfig-r041-20230329   clang
hexagon  randconfig-r045-20230329   clang
i386 allyesconfig   gcc  
i386 debian-10.3-func   gcc  
i386   debian-10.3-kselftests   gcc  
i386debian-10.3-kunit   gcc  
i386  debian-10.3-kvm   gcc  
i386  debian-10.3   gcc  
i386defconfig   gcc  
i386  randconfig-a002   clang
i386  randconfig-a004   clang
i386  randconfig-a006   clang
i386  randconfig-a011   clang
i386  randconfig-a012   gcc  
i386  randconfig-a013   clang
i386  randconfig-a014   gcc  
i386  randconfig-a015   clang
i386  randconfig-a016   gcc  
i386  randconfig-c001   gcc  
ia64 allmodconfig   gcc  
ia64 buildonly-randconfig-r003-20230329   gcc  
ia64 buildonly-randconfig-r005-20230329   gcc  
ia64 buildonly-randconfig-r006-20230329   gcc  
ia64defconfig   gcc  
ia64  gensparse_defconfig   gcc  
ia64 randconfig-r003-20230329   gcc  
ia64 randconfig-r004-20230329   gcc  
ia64 randconfig-r005-20230329   gcc  
ia64 randconfig-r006-20230329

Re: [PATCH v3 4/9] scsi: lpfc: Change to use pci_aer_clear_uncorrect_error_status()

2023-03-30 Thread Justin Tee
Hi Bjorn,

> But lpfc_aer_cleanup_state() is visible in the
> "lpfc_aer_state_cleanup" sysfs file, so removing it would break any
> userspace that uses it.
>
> If we can rely on the PCI core to clean up AER errors itself
> (admittedly, that might be a big "if"), maybe lpfc_aer_cleanup_state()
> could just become a no-op?
>
> Any comment from the LPFC folks?

We have notified all users of the lpfc_aer_cleanup_state sysfs entry,
and Broadcom LPFC is okay to no-op.

Regards,
Justin

On Wed, Mar 15, 2023 at 2:39 PM Bjorn Helgaas  wrote:
>
> On Tue, Dec 06, 2022 at 04:13:35PM -0600, Bjorn Helgaas wrote:
> > On Wed, Sep 28, 2022 at 06:59:41PM +0800, Zhuo Chen wrote:
> > > lpfc_aer_cleanup_state() requires clearing both fatal and non-fatal
> > > uncorrectable error status.
> >
> > I don't know what the point of lpfc_aer_cleanup_state() is.  AER
> > errors should be handled and cleared by the PCI core, not by
> > individual drivers.  Only lpfc, liquidio, and sky2 touch
> > PCI_ERR_UNCOR_STATUS.
> >
> > But lpfc_aer_cleanup_state() is visible in the
> > "lpfc_aer_state_cleanup" sysfs file, so removing it would break any
> > userspace that uses it.
> >
> > If we can rely on the PCI core to clean up AER errors itself
> > (admittedly, that might be a big "if"), maybe lpfc_aer_cleanup_state()
> > could just become a no-op?
> >
> > Any comment from the LPFC folks?
> >
> > Ideally, I would rather not export pci_aer_clear_nonfatal_status() or
> > pci_aer_clear_uncorrect_error_status() outside the PCI core at all.
>
> Resurrecting this old thread.  Zhuo, can you figure out where the PCI
> core clears these errors, include that in the commit log, and propose
> a patch that makes lpfc_aer_cleanup_state() a no-op, by removing the
> pci_aer_clear_nonfatal_status() call completely?
>
> Such a patch could be sent to the SCSI maintainers since it doesn't
> involve the PCI core.
>
> If it turns out that the PCI core *doesn't* clear these errors, we
> should figure out *why* it doesn't and try to change the PCI core so
> it does.
>
> > > But using pci_aer_clear_nonfatal_status()
> > > will only clear non-fatal error status. To clear both fatal and
> > > non-fatal error status, use pci_aer_clear_uncorrect_error_status().
> > >
> > > Signed-off-by: Zhuo Chen 
> > > ---
> > >  drivers/scsi/lpfc/lpfc_attr.c | 4 ++--
> > >  1 file changed, 2 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
> > > index 09cf2cd0ae60..d835cc0ba153 100644
> > > --- a/drivers/scsi/lpfc/lpfc_attr.c
> > > +++ b/drivers/scsi/lpfc/lpfc_attr.c
> > > @@ -4689,7 +4689,7 @@ static DEVICE_ATTR_RW(lpfc_aer_support);
> > >   * Description:
> > >   * If the @buf contains 1 and the device currently has the AER support
> > >   * enabled, then invokes the kernel AER helper routine
> > > - * pci_aer_clear_nonfatal_status() to clean up the uncorrectable
> > > + * pci_aer_clear_uncorrect_error_status() to clean up the uncorrectable
> > >   * error status register.
> > >   *
> > >   * Notes:
> > > @@ -4715,7 +4715,7 @@ lpfc_aer_cleanup_state(struct device *dev, struct 
> > > device_attribute *attr,
> > > return -EINVAL;
> > >
> > > if (phba->hba_flag & HBA_AER_ENABLED)
> > > -   rc = pci_aer_clear_nonfatal_status(phba->pcidev);
> > > +   rc = pci_aer_clear_uncorrect_error_status(phba->pcidev);
> > >
> > > if (rc == 0)
> > > return strlen(buf);
> > > --
> > > 2.30.1 (Apple Git-130)
> > >


Re: [PATCH v8 0/7] Support page table check

2023-03-30 Thread Michael Ellerman
Rohan McLure  writes:
> Anyone got time to review this one?

I was planning to pick it up, but it's going to conflict badly with the
set_ptes() series:

  https://lore.kernel.org/all/20230315051444.3229621-1-wi...@infradead.org/

I thought that series was likely to go in soon, but I see it's still not
in linux-next.

Hopefully there'll be a v5 of that series soon and we can try and work
out the conflicts. I might need to create a topic branch, or have this
series go via -mm.

cheers


>> On 16 Feb 2023, at 10:11 am, Rohan McLure  wrote:
>> 
>> Support the page table check sanitiser on all PowerPC platforms. This
>> sanitiser works by serialising assignments, reassignments and clears of
>> page table entries at each level in order to ensure that anonymous
>> mappings have at most one writable consumer, and likewise that
>> file-backed mappings are not simultaneously also anonymous mappings.
>> 
>> In order to support this infrastructure, a number of stubs must be
>> defined for all powerpc platforms. Additionally, seperate set_pte_at
>> and set_pte, to allow for internal, uninstrumented mappings.
>> 
>> v8:
>> * Fix linux/page_table_check.h include in asm/pgtable.h breaking
>>   32-bit.
>> 
>> v7:
>> * Remove use of extern in set_pte prototypes
>> * Clean up pmdp_collapse_flush macro
>> * Replace set_pte_at with static inline function
>> * Fix commit message for patch 7
>> Link: 
>> https://lore.kernel.org/linuxppc-dev/20230215020155.1969194-1-rmcl...@linux.ibm.com/
>> 
>> v6:
>> * Support huge pages and p{m,u}d accounting.
>> * Remove instrumentation from set_pte from kernel internal pages.
>> * 64s: Implement pmdp_collapse_flush in terms of __pmdp_collapse_flush
>>   as access to the mm_struct * is required.
>> Link: 
>> https://lore.kernel.org/linuxppc-dev/20230214015939.1853438-1-rmcl...@linux.ibm.com/
>> 
>> v5:
>> Link: 
>> https://lore.kernel.org/linuxppc-dev/20221118002146.25979-1-rmcl...@linux.ibm.com/
>> 
>> Rohan McLure (7):
>>  powerpc: mm: Separate set_pte, set_pte_at for internal, external use
>>  powerpc/64s: mm: Introduce __pmdp_collapse_flush with mm_struct
>>argument
>>  powerpc: mm: Replace p{u,m,4}d_is_leaf with p{u,m,4}_leaf
>>  powerpc: mm: Implement p{m,u,4}d_leaf on all platforms
>>  powerpc: mm: Add common pud_pfn stub for all platforms
>>  powerpc: mm: Add p{te,md,ud}_user_accessible_page helpers
>>  powerpc: mm: Support page table check
>> 
>> arch/powerpc/Kconfig |  1 +
>> arch/powerpc/include/asm/book3s/32/pgtable.h | 17 +++-
>> arch/powerpc/include/asm/book3s/64/pgtable.h | 85 +---
>> arch/powerpc/include/asm/book3s/pgtable.h|  3 +-
>> arch/powerpc/include/asm/nohash/32/pgtable.h | 12 ++-
>> arch/powerpc/include/asm/nohash/64/pgtable.h | 24 +-
>> arch/powerpc/include/asm/nohash/pgtable.h|  9 ++-
>> arch/powerpc/include/asm/pgtable.h   | 60 +-
>> arch/powerpc/kvm/book3s_64_mmu_radix.c   | 12 +--
>> arch/powerpc/mm/book3s64/hash_pgtable.c  |  2 +-
>> arch/powerpc/mm/book3s64/pgtable.c   | 16 ++--
>> arch/powerpc/mm/book3s64/radix_pgtable.c | 24 +++---
>> arch/powerpc/mm/nohash/book3e_pgtable.c  |  2 +-
>> arch/powerpc/mm/pgtable.c|  9 +--
>> arch/powerpc/mm/pgtable_32.c |  2 +-
>> arch/powerpc/mm/pgtable_64.c |  6 +-
>> arch/powerpc/xmon/xmon.c |  6 +-
>> 17 files changed, 197 insertions(+), 93 deletions(-)
>> 
>> -- 
>> 2.37.2
>> 


Re: [PATCH 18/21] ARM: drop SMP support for ARM11MPCore

2023-03-30 Thread Neil Armstrong

Le 30/03/2023 à 12:03, Arnd Bergmann a écrit :

On Thu, Mar 30, 2023, at 09:48, Neil Armstrong wrote:

On 27/03/2023 14:13, Arnd Bergmann wrote:

From: Arnd Bergmann 

The cache management operations for noncoherent DMA on ARMv6 work
in two different ways:

   * When CONFIG_DMA_CACHE_RWFO is set, speculative prefetches on in-flight
 DMA buffers lead to data corruption when the prefetched data is written
 back on top of data from the device.

   * When CONFIG_DMA_CACHE_RWFO is disabled, a cache flush on one CPU
 is not seen by the other core(s), leading to inconsistent contents
 accross the system.

As a consequence, neither configuration is actually safe to use in a
general-purpose kernel that is used on both MPCore systems and ARM1176
with prefetching enabled.

We could add further workarounds to make the behavior more dynamic based
on the system, but realistically, there are close to zero remaining
users on any ARM11MPCore anyway, and nobody seems too interested in it,
compared to the more popular ARM1176 used in BMC2835 and AST2500.

The Oxnas platform has some minimal support in OpenWRT, but most of the
drivers and dts files never made it into the mainline kernel, while the
Arm Versatile/Realview platform mainly serves as a reference system but
is not necessary to be kept working once all other ARM11MPCore are gone.


Acked-by: Neil Armstrong 

It's sad but it's the reality, there's no chance full OXNAS support will
ever come upstream and no real work has been done for years.

I think OXNAS support can be programmed for removal for next release,
it would need significant work to rework current support to make it acceptable
before trying to upstream missing bits anyway.


Ok, thanks for your reply!

To clarify, do you think we should plan for removal after the next
stable release (6.3, removed in 6.4), or after the next LTS
release (probably 6.6, removed in 6.7)? As far as I understand,
the next OpenWRT release (23.x) will be based on linux-5.15,
and the one after that (24.x) would likely still use 6.1, unless
they skip an LTS kernel.


I think it's ok to remove it ASAP, or at least before the next LTS,
not having SMP makes the platform barely usable so the earliest is the best.

Neil



  Arnd




[PATCH] ASoC: fsl_sai: Use physical format width

2023-03-30 Thread Emil Abildgaard Svendsen
Slot width should follow the physical width of the format instead of the
data width.

This is needed for formats like SNDRV_PCM_FMTBIT_S24_LE where physical
width is 32 and data width is 24. By using the physical width, data
won't get misaligned.

Signed-off-by: Emil Svendsen 
---
 sound/soc/fsl/fsl_sai.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/soc/fsl/fsl_sai.c b/sound/soc/fsl/fsl_sai.c
index 939c6bdd22c4..213e2d462076 100644
--- a/sound/soc/fsl/fsl_sai.c
+++ b/sound/soc/fsl/fsl_sai.c
@@ -519,13 +519,13 @@ static int fsl_sai_hw_params(struct snd_pcm_substream 
*substream,
unsigned int channels = params_channels(params);
struct snd_dmaengine_dai_dma_data *dma_params;
struct fsl_sai_dl_cfg *dl_cfg = sai->dl_cfg;
+   u32 slot_width = params_physical_width(params);
u32 word_width = params_width(params);
int trce_mask = 0, dl_cfg_idx = 0;
int dl_cfg_cnt = sai->dl_cfg_cnt;
u32 dl_type = FSL_SAI_DL_I2S;
u32 val_cr4 = 0, val_cr5 = 0;
u32 slots = (channels == 1) ? 2 : channels;
-   u32 slot_width = word_width;
int adir = tx ? RX : TX;
u32 pins, bclk;
u32 watermark;
-- 
2.34.1


Re: [PATCH 18/21] ARM: drop SMP support for ARM11MPCore

2023-03-30 Thread Neil Armstrong

On 27/03/2023 14:13, Arnd Bergmann wrote:

From: Arnd Bergmann 

The cache management operations for noncoherent DMA on ARMv6 work
in two different ways:

  * When CONFIG_DMA_CACHE_RWFO is set, speculative prefetches on in-flight
DMA buffers lead to data corruption when the prefetched data is written
back on top of data from the device.

  * When CONFIG_DMA_CACHE_RWFO is disabled, a cache flush on one CPU
is not seen by the other core(s), leading to inconsistent contents
accross the system.

As a consequence, neither configuration is actually safe to use in a
general-purpose kernel that is used on both MPCore systems and ARM1176
with prefetching enabled.

We could add further workarounds to make the behavior more dynamic based
on the system, but realistically, there are close to zero remaining
users on any ARM11MPCore anyway, and nobody seems too interested in it,
compared to the more popular ARM1176 used in BMC2835 and AST2500.

The Oxnas platform has some minimal support in OpenWRT, but most of the
drivers and dts files never made it into the mainline kernel, while the
Arm Versatile/Realview platform mainly serves as a reference system but
is not necessary to be kept working once all other ARM11MPCore are gone.


Acked-by: Neil Armstrong 

It's sad but it's the reality, there's no chance full OXNAS support will
ever come upstream and no real work has been done for years.

I think OXNAS support can be programmed for removal for next release,
it would need significant work to rework current support to make it acceptable
before trying to upstream missing bits anyway.

Thanks,
Neil




Take the easy way out here and drop support for multiprocessing on
ARMv6, along with the CONFIG_DMA_CACHE_RWFO option and the cache
management implementation for it. This also helps with other ARMv6
issues, but for the moment leaves the ability to build a kernel that
can run on both ARMv7 SMP and single-processor ARMv6, which we probably
want to stop supporting as well, but not as part of this series.

Cc: Neil Armstrong 
Cc: Daniel Golle 
Cc: Linus Walleij 
Cc: linux-ox...@groups.io
Signed-off-by: Arnd Bergmann 
---
I could use some help clarifying the above changelog text to describe
the exact problem, and how the CONFIG_DMA_CACHE_RWFO actually works on
MPCore. The TRMs for both 1176 and 11MPCore only describe prefetching
into the instruction cache, not the data cache, but this can end up in
the outercache as a result. The 1176 has some extra control bits to
control prefetching, but I found no reference that explains why an
MPCore does not run into the problem.
---
  arch/arm/mach-oxnas/Kconfig|  4 -
  arch/arm/mach-oxnas/Makefile   |  1 -
  arch/arm/mach-oxnas/headsmp.S  | 23 --
  arch/arm/mach-oxnas/platsmp.c  | 96 --
  arch/arm/mach-versatile/platsmp-realview.c |  4 -
  arch/arm/mm/Kconfig| 19 -
  arch/arm/mm/cache-v6.S | 31 ---
  7 files changed, 178 deletions(-)
  delete mode 100644 arch/arm/mach-oxnas/headsmp.S
  delete mode 100644 arch/arm/mach-oxnas/platsmp.c

diff --git a/arch/arm/mach-oxnas/Kconfig b/arch/arm/mach-oxnas/Kconfig
index a9ded7079268..a054235c3d6c 100644
--- a/arch/arm/mach-oxnas/Kconfig
+++ b/arch/arm/mach-oxnas/Kconfig
@@ -28,10 +28,6 @@ config MACH_OX820
bool "Support OX820 Based Products"
depends on ARCH_MULTI_V6
select ARM_GIC
-   select DMA_CACHE_RWFO if SMP
-   select HAVE_SMP
-   select HAVE_ARM_SCU if SMP
-   select HAVE_ARM_TWD if SMP
help
  Include Support for the Oxford Semiconductor OX820 SoC Based Products.
  
diff --git a/arch/arm/mach-oxnas/Makefile b/arch/arm/mach-oxnas/Makefile

index 0e78ecfe6c49..a4e40e534e6a 100644
--- a/arch/arm/mach-oxnas/Makefile
+++ b/arch/arm/mach-oxnas/Makefile
@@ -1,2 +1 @@
  # SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_SMP)  += platsmp.o headsmp.o
diff --git a/arch/arm/mach-oxnas/headsmp.S b/arch/arm/mach-oxnas/headsmp.S
deleted file mode 100644
index 9c0f1479f33a..
--- a/arch/arm/mach-oxnas/headsmp.S
+++ /dev/null
@@ -1,23 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2013 Ma Haijun 
- * Copyright (c) 2003 ARM Limited
- * All Rights Reserved
- */
-#include 
-#include 
-
-   __INIT
-
-/*
- * OX820 specific entry point for secondary CPUs.
- */
-ENTRY(ox820_secondary_startup)
-   mov r4, #0
-   /* invalidate both caches and branch target cache */
-   mcr p15, 0, r4, c7, c7, 0
-   /*
-* we've been released from the holding pen: secondary_stack
-* should now contain the SVC stack for this core
-*/
-   b   secondary_startup
diff --git a/arch/arm/mach-oxnas/platsmp.c b/arch/arm/mach-oxnas/platsmp.c
deleted file mode 100644
index f0a50b9e61df..
--- a/arch/arm/mach-oxnas/platsmp.c
+++ /dev/null
@@ -1,96 +0,0 @@
-// SPDX-License-Identifier: 

[PATCH v8 7/7] pcmcia: Convert to use less arguments in pci_bus_for_each_resource()

2023-03-30 Thread Andy Shevchenko
The pci_bus_for_each_resource() can hide the iterator loop since
it may be not used otherwise. With this, we may drop that iterator
variable definition.

Signed-off-by: Andy Shevchenko 
Reviewed-by: Krzysztof Wilczyński 
Acked-by: Dominik Brodowski 
---
 drivers/pcmcia/rsrc_nonstatic.c | 9 +++--
 drivers/pcmcia/yenta_socket.c   | 3 +--
 2 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/drivers/pcmcia/rsrc_nonstatic.c b/drivers/pcmcia/rsrc_nonstatic.c
index ad1141fddb4c..96264ebee46a 100644
--- a/drivers/pcmcia/rsrc_nonstatic.c
+++ b/drivers/pcmcia/rsrc_nonstatic.c
@@ -934,7 +934,7 @@ static int adjust_io(struct pcmcia_socket *s, unsigned int 
action, unsigned long
 static int nonstatic_autoadd_resources(struct pcmcia_socket *s)
 {
struct resource *res;
-   int i, done = 0;
+   int done = 0;
 
if (!s->cb_dev || !s->cb_dev->bus)
return -ENODEV;
@@ -960,12 +960,9 @@ static int nonstatic_autoadd_resources(struct 
pcmcia_socket *s)
 */
if (s->cb_dev->bus->number == 0)
return -EINVAL;
-
-   for (i = 0; i < PCI_BRIDGE_RESOURCE_NUM; i++) {
-   res = s->cb_dev->bus->resource[i];
-#else
-   pci_bus_for_each_resource(s->cb_dev->bus, res, i) {
 #endif
+
+   pci_bus_for_each_resource(s->cb_dev->bus, res) {
if (!res)
continue;
 
diff --git a/drivers/pcmcia/yenta_socket.c b/drivers/pcmcia/yenta_socket.c
index 1365eaa20ff4..fd18ab571ce8 100644
--- a/drivers/pcmcia/yenta_socket.c
+++ b/drivers/pcmcia/yenta_socket.c
@@ -673,9 +673,8 @@ static int yenta_search_res(struct yenta_socket *socket, 
struct resource *res,
u32 min)
 {
struct resource *root;
-   int i;
 
-   pci_bus_for_each_resource(socket->dev->bus, root, i) {
+   pci_bus_for_each_resource(socket->dev->bus, root) {
if (!root)
continue;
 
-- 
2.40.0.1.gaa8946217a0b



[PATCH v8 5/7] PCI: Allow pci_bus_for_each_resource() to take less arguments

2023-03-30 Thread Andy Shevchenko
Refactor pci_bus_for_each_resource() in the same way as it's done in
pci_dev_for_each_resource() case. This will allow to hide iterator
inside the loop, where it's not used otherwise.

No functional changes intended.

Signed-off-by: Andy Shevchenko 
Reviewed-by: Krzysztof Wilczyński 
Reviewed-by: Philippe Mathieu-Daudé 
---
 drivers/pci/bus.c  |  7 +++
 drivers/pci/hotplug/shpchp_sysfs.c |  8 
 drivers/pci/pci.c  |  3 +--
 drivers/pci/probe.c|  2 +-
 drivers/pci/setup-bus.c| 10 --
 include/linux/pci.h| 24 +++-
 6 files changed, 32 insertions(+), 22 deletions(-)

diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c
index 549c4bd5caec..5bc81cc0a2de 100644
--- a/drivers/pci/bus.c
+++ b/drivers/pci/bus.c
@@ -182,13 +182,13 @@ static int pci_bus_alloc_from_region(struct pci_bus *bus, 
struct resource *res,
void *alignf_data,
struct pci_bus_region *region)
 {
-   int i, ret;
struct resource *r, avail;
resource_size_t max;
+   int ret;
 
type_mask |= IORESOURCE_TYPE_BITS;
 
-   pci_bus_for_each_resource(bus, r, i) {
+   pci_bus_for_each_resource(bus, r) {
resource_size_t min_used = min;
 
if (!r)
@@ -289,9 +289,8 @@ bool pci_bus_clip_resource(struct pci_dev *dev, int idx)
struct resource *res = >resource[idx];
struct resource orig_res = *res;
struct resource *r;
-   int i;
 
-   pci_bus_for_each_resource(bus, r, i) {
+   pci_bus_for_each_resource(bus, r) {
resource_size_t start, end;
 
if (!r)
diff --git a/drivers/pci/hotplug/shpchp_sysfs.c 
b/drivers/pci/hotplug/shpchp_sysfs.c
index 64beed7a26be..01d47a42da04 100644
--- a/drivers/pci/hotplug/shpchp_sysfs.c
+++ b/drivers/pci/hotplug/shpchp_sysfs.c
@@ -24,16 +24,16 @@
 static ssize_t show_ctrl(struct device *dev, struct device_attribute *attr, 
char *buf)
 {
struct pci_dev *pdev;
-   int index, busnr;
struct resource *res;
struct pci_bus *bus;
size_t len = 0;
+   int busnr;
 
pdev = to_pci_dev(dev);
bus = pdev->subordinate;
 
len += sysfs_emit_at(buf, len, "Free resources: memory\n");
-   pci_bus_for_each_resource(bus, res, index) {
+   pci_bus_for_each_resource(bus, res) {
if (res && (res->flags & IORESOURCE_MEM) &&
!(res->flags & IORESOURCE_PREFETCH)) {
len += sysfs_emit_at(buf, len,
@@ -43,7 +43,7 @@ static ssize_t show_ctrl(struct device *dev, struct 
device_attribute *attr, char
}
}
len += sysfs_emit_at(buf, len, "Free resources: prefetchable memory\n");
-   pci_bus_for_each_resource(bus, res, index) {
+   pci_bus_for_each_resource(bus, res) {
if (res && (res->flags & IORESOURCE_MEM) &&
   (res->flags & IORESOURCE_PREFETCH)) {
len += sysfs_emit_at(buf, len,
@@ -53,7 +53,7 @@ static ssize_t show_ctrl(struct device *dev, struct 
device_attribute *attr, char
}
}
len += sysfs_emit_at(buf, len, "Free resources: IO\n");
-   pci_bus_for_each_resource(bus, res, index) {
+   pci_bus_for_each_resource(bus, res) {
if (res && (res->flags & IORESOURCE_IO)) {
len += sysfs_emit_at(buf, len,
 "start = %8.8llx, length = 
%8.8llx\n",
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 45c3bb039f21..585bb3988ddf 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -779,9 +779,8 @@ struct resource *pci_find_parent_resource(const struct 
pci_dev *dev,
 {
const struct pci_bus *bus = dev->bus;
struct resource *r;
-   int i;
 
-   pci_bus_for_each_resource(bus, r, i) {
+   pci_bus_for_each_resource(bus, r) {
if (!r)
continue;
if (resource_contains(r, res)) {
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index a3f68b6ba6ac..f8191750f6b7 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -533,7 +533,7 @@ void pci_read_bridge_bases(struct pci_bus *child)
pci_read_bridge_mmio_pref(child);
 
if (dev->transparent) {
-   pci_bus_for_each_resource(child->parent, res, i) {
+   pci_bus_for_each_resource(child->parent, res) {
if (res && res->flags) {
pci_bus_add_resource(child, res,
 PCI_SUBTRACTIVE_DECODE);
diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index 027b985dd1ee..fdeb121e9175 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -770,9 +770,8 @@ static struct resource *find_bus_resource_of_type(struct 
pci_bus *bus,
 

[PATCH v8 6/7] EISA: Convert to use less arguments in pci_bus_for_each_resource()

2023-03-30 Thread Andy Shevchenko
The pci_bus_for_each_resource() can hide the iterator loop since
it may be not used otherwise. With this, we may drop that iterator
variable definition.

Signed-off-by: Andy Shevchenko 
Reviewed-by: Krzysztof Wilczyński 
Reviewed-by: Philippe Mathieu-Daudé 
---
 drivers/eisa/pci_eisa.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/eisa/pci_eisa.c b/drivers/eisa/pci_eisa.c
index 930c2332c3c4..8173e60bb808 100644
--- a/drivers/eisa/pci_eisa.c
+++ b/drivers/eisa/pci_eisa.c
@@ -20,8 +20,8 @@ static struct eisa_root_device pci_eisa_root;
 
 static int __init pci_eisa_init(struct pci_dev *pdev)
 {
-   int rc, i;
struct resource *res, *bus_res = NULL;
+   int rc;
 
if ((rc = pci_enable_device (pdev))) {
dev_err(>dev, "Could not enable device\n");
@@ -38,7 +38,7 @@ static int __init pci_eisa_init(struct pci_dev *pdev)
 * eisa_root_register() can only deal with a single io port resource,
*  so we use the first valid io port resource.
 */
-   pci_bus_for_each_resource(pdev->bus, res, i)
+   pci_bus_for_each_resource(pdev->bus, res)
if (res && (res->flags & IORESOURCE_IO)) {
bus_res = res;
break;
-- 
2.40.0.1.gaa8946217a0b



[PATCH v1] dt-bindings: move cache controller bindings to a cache directory

2023-03-30 Thread Conor Dooley
From: Conor Dooley 

There's a bunch of bindings for (mostly l2) cache controllers
scattered to the four winds, move them to a common directory.
I renamed the freescale l2cache.txt file, as while that might make sense
when the parent dir is fsl, it's confusing after the move.
The two Marvell bindings have had a "marvell," prefix added to match
their compatibles.

Signed-off-by: Conor Dooley 
---
 .../{memory-controllers => cache}/baikal,bt1-l2-ctl.yaml| 2 +-
 .../{powerpc/fsl/l2cache.txt => cache/freescale-l2cache.txt}| 0
 Documentation/devicetree/bindings/{arm => cache}/l2c2x0.yaml| 2 +-
 .../{arm/mrvl/feroceon.txt => cache/marvell,feroceon-cache.txt} | 0
 .../{arm/mrvl/tauros2.txt => cache/marvell,tauros2-cache.txt}   | 0
 .../devicetree/bindings/{arm/msm => cache}/qcom,llcc.yaml   | 2 +-
 .../devicetree/bindings/{riscv => cache}/sifive,ccache0.yaml| 2 +-
 .../socionext => cache}/socionext,uniphier-system-cache.yaml| 2 +-
 MAINTAINERS | 2 ++
 9 files changed, 7 insertions(+), 5 deletions(-)
 rename Documentation/devicetree/bindings/{memory-controllers => 
cache}/baikal,bt1-l2-ctl.yaml (95%)
 rename Documentation/devicetree/bindings/{powerpc/fsl/l2cache.txt => 
cache/freescale-l2cache.txt} (100%)
 rename Documentation/devicetree/bindings/{arm => cache}/l2c2x0.yaml (99%)
 rename Documentation/devicetree/bindings/{arm/mrvl/feroceon.txt => 
cache/marvell,feroceon-cache.txt} (100%)
 rename Documentation/devicetree/bindings/{arm/mrvl/tauros2.txt => 
cache/marvell,tauros2-cache.txt} (100%)
 rename Documentation/devicetree/bindings/{arm/msm => cache}/qcom,llcc.yaml 
(96%)
 rename Documentation/devicetree/bindings/{riscv => cache}/sifive,ccache0.yaml 
(98%)
 rename Documentation/devicetree/bindings/{arm/socionext => 
cache}/socionext,uniphier-system-cache.yaml (96%)

diff --git 
a/Documentation/devicetree/bindings/memory-controllers/baikal,bt1-l2-ctl.yaml 
b/Documentation/devicetree/bindings/cache/baikal,bt1-l2-ctl.yaml
similarity index 95%
rename from 
Documentation/devicetree/bindings/memory-controllers/baikal,bt1-l2-ctl.yaml
rename to Documentation/devicetree/bindings/cache/baikal,bt1-l2-ctl.yaml
index 1fca282f64a2..ec4f367bc0b4 100644
--- 
a/Documentation/devicetree/bindings/memory-controllers/baikal,bt1-l2-ctl.yaml
+++ b/Documentation/devicetree/bindings/cache/baikal,bt1-l2-ctl.yaml
@@ -2,7 +2,7 @@
 # Copyright (C) 2020 BAIKAL ELECTRONICS, JSC
 %YAML 1.2
 ---
-$id: http://devicetree.org/schemas/memory-controllers/baikal,bt1-l2-ctl.yaml#
+$id: http://devicetree.org/schemas/cache/baikal,bt1-l2-ctl.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
 title: Baikal-T1 L2-cache Control Block
diff --git a/Documentation/devicetree/bindings/powerpc/fsl/l2cache.txt 
b/Documentation/devicetree/bindings/cache/freescale-l2cache.txt
similarity index 100%
rename from Documentation/devicetree/bindings/powerpc/fsl/l2cache.txt
rename to Documentation/devicetree/bindings/cache/freescale-l2cache.txt
diff --git a/Documentation/devicetree/bindings/arm/l2c2x0.yaml 
b/Documentation/devicetree/bindings/cache/l2c2x0.yaml
similarity index 99%
rename from Documentation/devicetree/bindings/arm/l2c2x0.yaml
rename to Documentation/devicetree/bindings/cache/l2c2x0.yaml
index 6b8f4d4fa580..d7840a5c4037 100644
--- a/Documentation/devicetree/bindings/arm/l2c2x0.yaml
+++ b/Documentation/devicetree/bindings/cache/l2c2x0.yaml
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 %YAML 1.2
 ---
-$id: http://devicetree.org/schemas/arm/l2c2x0.yaml#
+$id: http://devicetree.org/schemas/cache/l2c2x0.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
 title: ARM L2 Cache Controller
diff --git a/Documentation/devicetree/bindings/arm/mrvl/feroceon.txt 
b/Documentation/devicetree/bindings/cache/marvell,feroceon-cache.txt
similarity index 100%
rename from Documentation/devicetree/bindings/arm/mrvl/feroceon.txt
rename to Documentation/devicetree/bindings/cache/marvell,feroceon-cache.txt
diff --git a/Documentation/devicetree/bindings/arm/mrvl/tauros2.txt 
b/Documentation/devicetree/bindings/cache/marvell,tauros2-cache.txt
similarity index 100%
rename from Documentation/devicetree/bindings/arm/mrvl/tauros2.txt
rename to Documentation/devicetree/bindings/cache/marvell,tauros2-cache.txt
diff --git a/Documentation/devicetree/bindings/arm/msm/qcom,llcc.yaml 
b/Documentation/devicetree/bindings/cache/qcom,llcc.yaml
similarity index 96%
rename from Documentation/devicetree/bindings/arm/msm/qcom,llcc.yaml
rename to Documentation/devicetree/bindings/cache/qcom,llcc.yaml
index 38efcad56dbd..14eb5175dac4 100644
--- a/Documentation/devicetree/bindings/arm/msm/qcom,llcc.yaml
+++ b/Documentation/devicetree/bindings/cache/qcom,llcc.yaml
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause)
 %YAML 1.2
 ---
-$id: http://devicetree.org/schemas/arm/msm/qcom,llcc.yaml#
+$id: http://devicetree.org/schemas/cache/qcom,llcc.yaml#
 $schema: 

[PATCH v8 4/7] PCI: Document pci_bus_for_each_resource() to avoid confusion

2023-03-30 Thread Andy Shevchenko
There might be a confusion with the implementation of the
pci_bus_for_each_resources() due to side effect of Logical
OR. Document entire macro and explain how it works and why
the conditional needs to be like that.

Signed-off-by: Andy Shevchenko 
---
 include/linux/pci.h | 20 
 1 file changed, 20 insertions(+)

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 5cacd9e4c8cd..e3b3af606280 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1446,6 +1446,26 @@ int devm_request_pci_bus_resources(struct device *dev,
 /* Temporary until new and working PCI SBR API in place */
 int pci_bridge_secondary_bus_reset(struct pci_dev *dev);
 
+/**
+ * pci_bus_for_each_resource - iterate over PCI bus resources
+ * @bus: the PCI bus
+ * @res: a varible to keep a pointer to the current resource
+ * @i: a variable to keep the index of the current resource
+ *
+ * Iterate over PCI bus resources. The first part is to go over PCI bus
+ * resource array, which has at most the %PCI_BRIDGE_RESOURCE_NUM entries.
+ * After that continue with the separate list of the additional resources,
+ * if not empty. That's why the Logical OR is being used.
+ *
+ * Possible usage:
+ *
+ * struct pci_bus *bus = ...;
+ * struct resource *res;
+ * unsigned int i;
+ *
+ * pci_bus_for_each_resource(bus, res, i)
+ * pr_info("PCI bus resource[%u]: %pR\n", i, res);
+ */
 #define pci_bus_for_each_resource(bus, res, i) \
for (i = 0; \
(res = pci_bus_resource_n(bus, i)) || i < PCI_BRIDGE_RESOURCE_NUM; \
-- 
2.40.0.1.gaa8946217a0b



[PATCH v8 3/7] PCI: Introduce pci_dev_for_each_resource()

2023-03-30 Thread Andy Shevchenko
From: Mika Westerberg 

Instead of open-coding it everywhere introduce a tiny helper that can be
used to iterate over each resource of a PCI device, and convert the most
obvious users into it.

While at it drop doubled empty line before pdev_sort_resources().

No functional changes intended.

Suggested-by: Andy Shevchenko 
Signed-off-by: Mika Westerberg 
Signed-off-by: Andy Shevchenko 
Reviewed-by: Krzysztof Wilczyński 
---
 .clang-format |  1 +
 arch/alpha/kernel/pci.c   |  5 ++--
 arch/arm/kernel/bios32.c  | 16 ++---
 arch/arm/mach-dove/pcie.c | 10 
 arch/arm/mach-mv78xx0/pcie.c  | 10 
 arch/arm/mach-orion5x/pci.c   | 10 
 arch/mips/pci/ops-bcm63xx.c   |  8 +++
 arch/mips/pci/pci-legacy.c|  3 +--
 arch/powerpc/kernel/pci-common.c  | 21 
 arch/powerpc/platforms/4xx/pci.c  |  8 +++
 arch/powerpc/platforms/52xx/mpc52xx_pci.c |  5 ++--
 arch/powerpc/platforms/pseries/pci.c  | 16 ++---
 arch/sh/drivers/pci/pcie-sh7786.c | 10 
 arch/sparc/kernel/leon_pci.c  |  5 ++--
 arch/sparc/kernel/pci.c   | 10 
 arch/sparc/kernel/pcic.c  |  5 ++--
 drivers/pci/remove.c  |  5 ++--
 drivers/pci/setup-bus.c   | 27 -
 drivers/pci/setup-res.c   |  4 +---
 drivers/pci/vgaarb.c  | 17 -
 drivers/pci/xen-pcifront.c|  4 +---
 drivers/pnp/quirks.c  | 29 ---
 include/linux/pci.h   | 15 
 23 files changed, 112 insertions(+), 132 deletions(-)

diff --git a/.clang-format b/.clang-format
index d988e9fa9b26..2048b0296d76 100644
--- a/.clang-format
+++ b/.clang-format
@@ -520,6 +520,7 @@ ForEachMacros:
   - 'of_property_for_each_string'
   - 'of_property_for_each_u32'
   - 'pci_bus_for_each_resource'
+  - 'pci_dev_for_each_resource'
   - 'pci_doe_for_each_off'
   - 'pcl_for_each_chunk'
   - 'pcl_for_each_segment'
diff --git a/arch/alpha/kernel/pci.c b/arch/alpha/kernel/pci.c
index 64fbfb0763b2..4458eb7f44f0 100644
--- a/arch/alpha/kernel/pci.c
+++ b/arch/alpha/kernel/pci.c
@@ -288,11 +288,10 @@ pcibios_claim_one_bus(struct pci_bus *b)
struct pci_bus *child_bus;
 
list_for_each_entry(dev, >devices, bus_list) {
+   struct resource *r;
int i;
 
-   for (i = 0; i < PCI_NUM_RESOURCES; i++) {
-   struct resource *r = >resource[i];
-
+   pci_dev_for_each_resource(dev, r, i) {
if (r->parent || !r->start || !r->flags)
continue;
if (pci_has_flag(PCI_PROBE_ONLY) ||
diff --git a/arch/arm/kernel/bios32.c b/arch/arm/kernel/bios32.c
index e7ef2b5bea9c..d334c7fb672b 100644
--- a/arch/arm/kernel/bios32.c
+++ b/arch/arm/kernel/bios32.c
@@ -142,15 +142,15 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_WINBOND2, 
PCI_DEVICE_ID_WINBOND2_89C940F,
  */
 static void pci_fixup_dec21285(struct pci_dev *dev)
 {
-   int i;
-
if (dev->devfn == 0) {
+   struct resource *r;
+
dev->class &= 0xff;
dev->class |= PCI_CLASS_BRIDGE_HOST << 8;
-   for (i = 0; i < PCI_NUM_RESOURCES; i++) {
-   dev->resource[i].start = 0;
-   dev->resource[i].end   = 0;
-   dev->resource[i].flags = 0;
+   pci_dev_for_each_resource(dev, r) {
+   r->start = 0;
+   r->end = 0;
+   r->flags = 0;
}
}
 }
@@ -162,13 +162,11 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_DEC, 
PCI_DEVICE_ID_DEC_21285, pci_fixup_d
 static void pci_fixup_ide_bases(struct pci_dev *dev)
 {
struct resource *r;
-   int i;
 
if ((dev->class >> 8) != PCI_CLASS_STORAGE_IDE)
return;
 
-   for (i = 0; i < PCI_NUM_RESOURCES; i++) {
-   r = dev->resource + i;
+   pci_dev_for_each_resource(dev, r) {
if ((r->start & ~0x80) == 0x374) {
r->start |= 2;
r->end = r->start;
diff --git a/arch/arm/mach-dove/pcie.c b/arch/arm/mach-dove/pcie.c
index 754ca381f600..3044b7e03890 100644
--- a/arch/arm/mach-dove/pcie.c
+++ b/arch/arm/mach-dove/pcie.c
@@ -142,14 +142,14 @@ static struct pci_ops pcie_ops = {
 static void rc_pci_fixup(struct pci_dev *dev)
 {
if (dev->bus->parent == NULL && dev->devfn == 0) {
-   int i;
+   struct resource *r;
 
dev->class &= 0xff;
dev->class |= PCI_CLASS_BRIDGE_HOST << 8;
-   for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
-   dev->resource[i].start = 0;
-

[PATCH v8 1/7] kernel.h: Split out COUNT_ARGS() and CONCATENATE()

2023-03-30 Thread Andy Shevchenko
kernel.h is being used as a dump for all kinds of stuff for a long time.
The COUNT_ARGS() and CONCATENATE() macros may be used in some places
without need of the full kernel.h dependency train with it.

Here is the attempt on cleaning it up by splitting out these macros().

Signed-off-by: Andy Shevchenko 
---
 include/linux/args.h   | 13 +
 include/linux/kernel.h |  8 +---
 2 files changed, 14 insertions(+), 7 deletions(-)
 create mode 100644 include/linux/args.h

diff --git a/include/linux/args.h b/include/linux/args.h
new file mode 100644
index ..16ef6fad8add
--- /dev/null
+++ b/include/linux/args.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _LINUX_ARGS_H
+#define _LINUX_ARGS_H
+
+/* This counts to 12. Any more, it will return 13th argument. */
+#define __COUNT_ARGS(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, 
_n, X...) _n
+#define COUNT_ARGS(X...) __COUNT_ARGS(, ##X, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 
2, 1, 0)
+
+#define __CONCAT(a, b) a ## b
+#define CONCATENATE(a, b) __CONCAT(a, b)
+
+#endif /* _LINUX_ARGS_H */
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 0d91e0af0125..fa675d50d7b7 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -13,6 +13,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -457,13 +458,6 @@ ftrace_vprintk(const char *fmt, va_list ap)
 static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
 #endif /* CONFIG_TRACING */
 
-/* This counts to 12. Any more, it will return 13th argument. */
-#define __COUNT_ARGS(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, 
_n, X...) _n
-#define COUNT_ARGS(X...) __COUNT_ARGS(, ##X, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 
2, 1, 0)
-
-#define __CONCAT(a, b) a ## b
-#define CONCATENATE(a, b) __CONCAT(a, b)
-
 /* Rebuild everything on CONFIG_FTRACE_MCOUNT_RECORD */
 #ifdef CONFIG_FTRACE_MCOUNT_RECORD
 # define REBUILD_DUE_TO_FTRACE_MCOUNT_RECORD
-- 
2.40.0.1.gaa8946217a0b



[PATCH v8 2/7] PCI: Introduce pci_resource_n()

2023-03-30 Thread Andy Shevchenko
Introduce pci_resource_n() and replace open-coded implementations of it
in pci.h.

Signed-off-by: Andy Shevchenko 
Reviewed-by: Philippe Mathieu-Daudé 
---
 include/linux/pci.h | 15 +++
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/include/linux/pci.h b/include/linux/pci.h
index b50e5c79f7e3..aeaa95455d4c 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1995,14 +1995,13 @@ int pci_iobar_pfn(struct pci_dev *pdev, int bar, struct 
vm_area_struct *vma);
  * These helpers provide future and backwards compatibility
  * for accessing popular PCI BAR info
  */
-#define pci_resource_start(dev, bar)   ((dev)->resource[(bar)].start)
-#define pci_resource_end(dev, bar) ((dev)->resource[(bar)].end)
-#define pci_resource_flags(dev, bar)   ((dev)->resource[(bar)].flags)
-#define pci_resource_len(dev,bar) \
-   ((pci_resource_end((dev), (bar)) == 0) ? 0 :\
-   \
-(pci_resource_end((dev), (bar)) -  \
- pci_resource_start((dev), (bar)) + 1))
+#define pci_resource_n(dev, bar)   (&(dev)->resource[(bar)])
+#define pci_resource_start(dev, bar)   (pci_resource_n(dev, bar)->start)
+#define pci_resource_end(dev, bar) (pci_resource_n(dev, bar)->end)
+#define pci_resource_flags(dev, bar)   (pci_resource_n(dev, bar)->flags)
+#define pci_resource_len(dev,bar)  \
+   (pci_resource_end((dev), (bar)) ?   \
+resource_size(pci_resource_n((dev), (bar))) : 0)
 
 /*
  * Similar to the helpers above, these manipulate per-pci_dev
-- 
2.40.0.1.gaa8946217a0b



[PATCH v8 0/7] Add pci_dev_for_each_resource() helper and update users

2023-03-30 Thread Andy Shevchenko
Provide two new helper macros to iterate over PCI device resources and
convert users.

Looking at it, refactor existing pci_bus_for_each_resource() and convert
users accordingly.

Note, the amount of lines grew due to the documentation update.

Changelog v8:
- fixed issue with pci_bus_for_each_resource() macro (LKP)
- due to above added a new patch to document how it works
- moved the last patch to be #2 (Philippe)
- added tags (Philippe)

Changelog v7:
- made both macros to share same name (Bjorn)
- split out the pci_resource_n() conversion (Bjorn)

Changelog v6:
- dropped unused variable in PPC code (LKP)

Changelog v5:
- renamed loop variable to minimize the clash (Keith)
- addressed smatch warning (Dan)
- addressed 0-day bot findings (LKP)

Changelog v4:
- rebased on top of v6.3-rc1
- added tag (Krzysztof)

Changelog v3:
- rebased on top of v2 by Mika, see above
- added tag to pcmcia patch (Dominik)

Changelog v2:
- refactor to have two macros
- refactor existing pci_bus_for_each_resource() in the same way and
  convert users

Andy Shevchenko (6):
  kernel.h: Split out COUNT_ARGS() and CONCATENATE()
  PCI: Introduce pci_resource_n()
  PCI: Document pci_bus_for_each_resource() to avoid confusion
  PCI: Allow pci_bus_for_each_resource() to take less arguments
  EISA: Convert to use less arguments in pci_bus_for_each_resource()
  pcmcia: Convert to use less arguments in pci_bus_for_each_resource()

Mika Westerberg (1):
  PCI: Introduce pci_dev_for_each_resource()

 .clang-format |  1 +
 arch/alpha/kernel/pci.c   |  5 +-
 arch/arm/kernel/bios32.c  | 16 +++--
 arch/arm/mach-dove/pcie.c | 10 ++--
 arch/arm/mach-mv78xx0/pcie.c  | 10 ++--
 arch/arm/mach-orion5x/pci.c   | 10 ++--
 arch/mips/pci/ops-bcm63xx.c   |  8 +--
 arch/mips/pci/pci-legacy.c|  3 +-
 arch/powerpc/kernel/pci-common.c  | 21 +++
 arch/powerpc/platforms/4xx/pci.c  |  8 +--
 arch/powerpc/platforms/52xx/mpc52xx_pci.c |  5 +-
 arch/powerpc/platforms/pseries/pci.c  | 16 ++---
 arch/sh/drivers/pci/pcie-sh7786.c | 10 ++--
 arch/sparc/kernel/leon_pci.c  |  5 +-
 arch/sparc/kernel/pci.c   | 10 ++--
 arch/sparc/kernel/pcic.c  |  5 +-
 drivers/eisa/pci_eisa.c   |  4 +-
 drivers/pci/bus.c |  7 +--
 drivers/pci/hotplug/shpchp_sysfs.c|  8 +--
 drivers/pci/pci.c |  3 +-
 drivers/pci/probe.c   |  2 +-
 drivers/pci/remove.c  |  5 +-
 drivers/pci/setup-bus.c   | 37 +---
 drivers/pci/setup-res.c   |  4 +-
 drivers/pci/vgaarb.c  | 17 ++
 drivers/pci/xen-pcifront.c|  4 +-
 drivers/pcmcia/rsrc_nonstatic.c   |  9 +--
 drivers/pcmcia/yenta_socket.c |  3 +-
 drivers/pnp/quirks.c  | 29 -
 include/linux/args.h  | 13 
 include/linux/kernel.h|  8 +--
 include/linux/pci.h   | 72 +++
 32 files changed, 190 insertions(+), 178 deletions(-)
 create mode 100644 include/linux/args.h

-- 
2.40.0.1.gaa8946217a0b



Re: [PATCH] powerpc/papr_scm: Update the NUMA distance table for the target node

2023-03-30 Thread Aneesh Kumar K.V
"Aneesh Kumar K.V"  writes:

> platform device helper routines won't update the NUMA distance table
> while creating a platform device, even if the device is present on
> a NUMA node that doesn't have memory or CPU. This is especially true
> for pmem devices. If the target node of the pmem device is not online, we
> find the nearest online node to the device and associate the pmem
> device with that online node. To find the nearest online node, we should
> have the numa distance table updated correctly. Update the distance
> information during the device probe.
>
> distance_lookup_table value for distance_ref_points_depth = 2 before and after
> fix is below
> node 3 distance depth 0  - 0
> node 3 distance depth 1  - 0
> node 4 distance depth 0  - 4
> node 4 distance depth 1  - 2
> node 5 distance depth 0  - 5
> node 5 distance depth 1  - 1
>
> after fix
> node 3 distance depth 0  - 3
> node 3 distance depth 1  - 1
> node 4 distance depth 0  - 4
> node 4 distance depth 1  - 2
> node 5 distance depth 0  - 5
> node 5 distance depth 1  - 1
>
> Without the fix, the nearest numa node to the pmem device will be picked as 4.
> After the fix, we get the correct numa node which is 5.
>
> Fixes: da1115fdbd6e ("powerpc/nvdimm: Pick nearby online node if the device 
> node is not online")
> Signed-off-by: Aneesh Kumar K.V 
> ---
>  arch/powerpc/platforms/pseries/papr_scm.c | 4 
>  1 file changed, 4 insertions(+)
>
> diff --git a/arch/powerpc/platforms/pseries/papr_scm.c 
> b/arch/powerpc/platforms/pseries/papr_scm.c
> index 2f8385523a13..5bef75714bd5 100644
> --- a/arch/powerpc/platforms/pseries/papr_scm.c
> +++ b/arch/powerpc/platforms/pseries/papr_scm.c
> @@ -1428,6 +1428,10 @@ static int papr_scm_probe(struct platform_device *pdev)
>   return -ENODEV;
>   }
>  
> + /*
> +  * of platform device create won't update the numa distance table
> +  */
> + update_numa_distance(dn);
>  
>   p = kzalloc(sizeof(*p), GFP_KERNEL);
>   if (!p)
> -- 
> 2.39.2

This also requires export of update_numa_distance()

diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index b44ce71917d7..16cfe56be05b 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -366,6 +366,7 @@ void update_numa_distance(struct device_node *node)
WARN(numa_distance_table[nid][nid] == -1,
 "NUMA distance details for node %d not provided\n", nid);
 }
+EXPORT_SYMBOL_GPL(update_numa_distance);
 
 /*
  * ibm,numa-lookup-index-table= {N, domainid1, domainid2, . domainidN}


Re: [PATCH] powerpc/pseries/cpuhp: respect current SMT when adding new CPU

2023-03-30 Thread Michal Suchánek
On Thu, Mar 30, 2023 at 05:51:57PM +0200, Laurent Dufour wrote:
> On 13/02/2023 16:40:50, Nathan Lynch wrote:
> > Michal Suchánek  writes:
> >> On Mon, Feb 13, 2023 at 08:46:50AM -0600, Nathan Lynch wrote:
> >>> Laurent Dufour  writes:
>  When a new CPU is added, the kernel is activating all its threads. This
>  leads to weird, but functional, result when adding CPU on a SMT 4 system
>  for instance.
> 
>  Here the newly added CPU 1 has 8 threads while the other one has 4 
>  threads
>  active (system has been booted with the 'smt-enabled=4' kernel option):
> 
>  ltcden3-lp12:~ # ppc64_cpu --info
>  Core   0:0*1*2*3*4 5 6 7
>  Core   1:8*9*   10*   11*   12*   13*   14*   15*
> 
>  There is no SMT value in the kernel. It is possible to run unbalanced 
>  LPAR
>  with 2 threads for a CPU, 4 for another one, and 5 on the latest.

> Indeed, that's not so easy. There are multiple ways for the SMT level to be
> impacted:
>  - smt-enabled kernel option
>  - smtstate systemctl service (if activated), saving SMT level at shutdown
> time to restore it a boot time
>  - pseries-energyd daemon (if activated) could turn off threads
>  - ppc64_cpu --smt=x user command
>  - sysfs direct writing to turn off/on specific threads.
> 
> There is no SMT level saved, on "disk" or in the kernel, and any of these
> options can interact in parallel. So from the user space point of view, the
> best we could do is looking for the SMT current values, there could be
> multiple values in the case of a mixed SMT state, peek one value and apply it.
> 
> Extending the drmgr's hook is still valid, and I sent a patch series on the
> powerpc-utils mailing list to achieve that. However, changing the SMT level
> in that hook means that newly added CPU will be first turn on and there is
> a window where this threads could be seen active. Not a big deal but not
> turning on these extra threads looks better to me.

Which means

1) add an option to not onlince hotplugged CPUs by default

2) when a tool that wants to manage CPU onlining is active it can set
the option so that no threads are onlined automatically, and online the
desired threads

3) when no such tool is active the default should be to online all
threeads to preserve compatibility with existing behavior

> That's being said, I can't see any benefit of a user space implementation
> compared to the option I'm proposing in that patch.

The userspace implementation can implement arbitrily complex policy,
that's not something that belongs into the kernel.

Thanks

Michal


Re: [PATCH] powerpc/pseries/cpuhp: respect current SMT when adding new CPU

2023-03-30 Thread Laurent Dufour
On 13/02/2023 16:40:50, Nathan Lynch wrote:
> Michal Suchánek  writes:
>> On Mon, Feb 13, 2023 at 08:46:50AM -0600, Nathan Lynch wrote:
>>> Laurent Dufour  writes:
 When a new CPU is added, the kernel is activating all its threads. This
 leads to weird, but functional, result when adding CPU on a SMT 4 system
 for instance.

 Here the newly added CPU 1 has 8 threads while the other one has 4 threads
 active (system has been booted with the 'smt-enabled=4' kernel option):

 ltcden3-lp12:~ # ppc64_cpu --info
 Core   0:0*1*2*3*4 5 6 7
 Core   1:8*9*   10*   11*   12*   13*   14*   15*

 There is no SMT value in the kernel. It is possible to run unbalanced LPAR
 with 2 threads for a CPU, 4 for another one, and 5 on the latest.

 To work around this possibility, and assuming that the LPAR run with the
 same number of threads for each CPU, which is the common case,
>>>
>>> I am skeptical at best of baking that assumption into this code. Mixed
>>> SMT modes within a partition doesn't strike me as an unreasonable
>>> possibility for some use cases. And if that's wrong, then we should just
>>> add a global smt value instead of using heuristics.
>>>
 the number
 of active threads of the CPU doing the hot-plug operation is computed. Only
 that number of threads will be activated for the newly added CPU.

 This way on a LPAR running in SMT=4, newly added CPU will be running 4
 threads, which is what a end user would expect.
>>>
>>> I could see why most users would prefer this new behavior. But surely
>>> some users have come to expect the existing behavior, which has been in
>>> place for years, and developed workarounds that might be broken by this
>>> change?
>>>
>>> I would suggest that to handle this well, we need to give user space
>>> more ability to tell the kernel what actions to take on added cores, on
>>> an opt-in basis.
>>>
>>> This could take the form of extending the DLPAR sysfs command set:
>>>
>>> Option 1 - Add a flag that tells the kernel not to online any threads at
>>> all; user space will online the desired threads later.
>>>
>>> Option 2 - Add an option that tells the kernel which SMT mode to apply.
>>
>> powerpc-utils grew some drmgr hooks recently so maybe the policy can be
>> moved to userspace?
> 
> I'm not sure whether the hook mechanism would come into play, but yes, I
> am suggesting that user space be given the option of overriding the
> kernel's current behavior.

Indeed, that's not so easy. There are multiple ways for the SMT level to be
impacted:
 - smt-enabled kernel option
 - smtstate systemctl service (if activated), saving SMT level at shutdown
time to restore it a boot time
 - pseries-energyd daemon (if activated) could turn off threads
 - ppc64_cpu --smt=x user command
 - sysfs direct writing to turn off/on specific threads.

There is no SMT level saved, on "disk" or in the kernel, and any of these
options can interact in parallel. So from the user space point of view, the
best we could do is looking for the SMT current values, there could be
multiple values in the case of a mixed SMT state, peek one value and apply it.

Extending the drmgr's hook is still valid, and I sent a patch series on the
powerpc-utils mailing list to achieve that. However, changing the SMT level
in that hook means that newly added CPU will be first turn on and there is
a window where this threads could be seen active. Not a big deal but not
turning on these extra threads looks better to me.

That's being said, I can't see any benefit of a user space implementation
compared to the option I'm proposing in that patch.

Does anyone have a better idea?

Cheers,
Laurent.


Re: [PATCH v7 3/6] PCI: Allow pci_bus_for_each_resource() to take less arguments

2023-03-30 Thread Andy Shevchenko
On Thu, Mar 30, 2023 at 09:24:21PM +0800, kernel test robot wrote:
> 
> Greeting,
> 
> FYI, we noticed various errors such like
> "i40e: probe of :3d:00.0 failed with error -12"
> due to commit (built with gcc-11):
> 
> commit: d23d5938fd7ced817d6aa1ff86cd671ebbaebfc2 ("[PATCH v7 3/6] PCI: Allow 
> pci_bus_for_each_resource() to take less arguments")
> url: 
> https://github.com/intel-lab-lkp/linux/commits/Andy-Shevchenko/kernel-h-Split-out-COUNT_ARGS-and-CONCATENATE/20230324-013857
> base: https://git.kernel.org/cgit/linux/kernel/git/pci/pci.git next
> patch link: 
> https://lore.kernel.org/all/20230323173610.60442-4-andriy.shevche...@linux.intel.com/
> patch subject: [PATCH v7 3/6] PCI: Allow pci_bus_for_each_resource() to take 
> less arguments
> 
> in testcase: boot
> 
> on test machine: 96 threads 2 sockets Intel(R) Xeon(R) Gold 6252 CPU @ 
> 2.10GHz (Cascade Lake) with 512G memory
> 
> caused below changes (please refer to attached dmesg/kmsg for entire 
> log/backtrace):
> 
> 
> If you fix the issue, kindly add following tag
> | Reported-by: kernel test robot 
> | Link: 
> https://lore.kernel.org/oe-lkp/202303302009.55848372-oliver.s...@intel.com

Thanks, that is useful test!

-- 
With Best Regards,
Andy Shevchenko




Re: [PATCH 21/21] dma-mapping: replace custom code with generic implementation

2023-03-30 Thread Lad, Prabhakar
On Mon, Mar 27, 2023 at 1:20 PM Arnd Bergmann  wrote:
>
> From: Arnd Bergmann 
>
> Now that all of these have consistent behavior, replace them with
> a single shared implementation of arch_sync_dma_for_device() and
> arch_sync_dma_for_cpu() and three parameters to pick how they should
> operate:
>
>  - If the CPU has speculative prefetching, then the cache
>has to be invalidated after a transfer from the device.
>On the rarer CPUs without prefetching, this can be skipped,
>with all cache management happening before the transfer.
>This flag can be runtime detected, but is usually fixed
>per architecture.
>
>  - Some architectures currently clean the caches before DMA
>from a device, while others invalidate it. There has not
>been a conclusion regarding whether we should change all
>architectures to use clean instead, so this adds an
>architecture specific flag that we can change later on.
>
>  - On 32-bit Arm, the arch_sync_dma_for_cpu() function keeps
>track pages that are marked clean in the page cache, to
>avoid flushing them again. The implementation for this is
>generic enough to work on all architectures that use the
>PG_dcache_clean page flag, but a Kconfig symbol is used
>to only enable it on Arm to preserve the existing behavior.
>
> For the function naming, I picked 'wback' over 'clean', and 'wback_inv'
> over 'flush', to avoid any ambiguity of what the helper functions are
> supposed to do.
>
> Moving the global functions into a header file is usually a bad idea
> as it prevents the header from being included more than once, but it
> helps keep the behavior as close as possible to the previous state,
> including the possibility of inlining most of it into these functions
> where that was done before. This also helps keep the global namespace
> clean, by hiding the new arch_dma_cache{_wback,_inv,_wback_inv} from
> device drivers that might use them incorrectly.
>
> It would be possible to do this one architecture at a time, but
> as the change is the same everywhere, the combined patch helps
> explain it better once.
>
> Signed-off-by: Arnd Bergmann 
> ---
>  arch/arc/mm/dma.c |  66 +-
>  arch/arm/Kconfig  |   3 +
>  arch/arm/mm/dma-mapping-nommu.c   |  39 ++-
>  arch/arm/mm/dma-mapping.c |  64 +++---
>  arch/arm64/mm/dma-mapping.c   |  28 +---
>  arch/csky/mm/dma-mapping.c|  44 ++--
>  arch/hexagon/kernel/dma.c |  44 ++--
>  arch/m68k/kernel/dma.c|  43 +++-
>  arch/microblaze/kernel/dma.c  |  48 +++---
>  arch/mips/mm/dma-noncoherent.c|  60 +++--
>  arch/nios2/mm/dma-mapping.c   |  57 +++-
>  arch/openrisc/kernel/dma.c|  63 +++---
>  arch/parisc/kernel/pci-dma.c  |  46 ++---
>  arch/powerpc/mm/dma-noncoherent.c |  34 ++
>  arch/riscv/mm/dma-noncoherent.c   |  51 +++---
>  arch/sh/kernel/dma-coherent.c |  43 +++-
>  arch/sparc/kernel/ioport.c|  38 ---
>  arch/xtensa/kernel/pci-dma.c  |  40 ++-
>  include/linux/dma-sync.h  | 107 ++
>  19 files changed, 527 insertions(+), 391 deletions(-)
>  create mode 100644 include/linux/dma-sync.h
>
I tested this on RZ/Five (with my v6 [0] + additional changes) so for RISC-V,

Reviewed-by: Lad Prabhakar 
Tested-by: Lad Prabhakar 

[0] 
https://patchwork.kernel.org/project/linux-renesas-soc/cover/20230106185526.260163-1-prabhakar.mahadev-lad...@bp.renesas.com/

Cheers,
Prabhakar

> diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c
> index ddb96786f765..61cd01646222 100644
> --- a/arch/arc/mm/dma.c
> +++ b/arch/arc/mm/dma.c
> @@ -30,63 +30,33 @@ void arch_dma_prep_coherent(struct page *page, size_t 
> size)
> dma_cache_wback_inv(page_to_phys(page), size);
>  }
>
> -/*
> - * Cache operations depending on function and direction argument, inspired by
> - * https://lore.kernel.org/lkml/20180518175004.gf17...@n2100.armlinux.org.uk
> - * "dma_sync_*_for_cpu and direction=TO_DEVICE (was Re: [PATCH 02/20]
> - * dma-mapping: provide a generic dma-noncoherent implementation)"
> - *
> - *  |   map  ==  for_device |   unmap ==  for_cpu
> - *  |
> - * TO_DEV   |   writebackwriteback  |   none  none
> - * FROM_DEV |   invalidate   invalidate |   invalidate*   invalidate*
> - * BIDIR|   writebackwriteback  |   invalidateinvalidate
> - *
> - * [*] needed for CPU speculative prefetches
> - *
> - * NOTE: we don't check the validity of direction argument as it is done in
> - * upper layer functions (in include/linux/dma-mapping.h)
> - */
> -
> -void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
> -   enum dma_data_direction dir)
> +static inline void 

Re: [PATCH v8 2/2] arm64: support batched/deferred tlb shootdown during page reclamation

2023-03-30 Thread Yicong Yang
Hi Punit,

On 2023/3/30 21:15, Punit Agrawal wrote:
> Hi Yicong,
> 
> Yicong Yang  writes:
> 
>> From: Barry Song 
>>
>> on x86, batched and deferred tlb shootdown has lead to 90%
>> performance increase on tlb shootdown. on arm64, HW can do
>> tlb shootdown without software IPI. But sync tlbi is still
>> quite expensive.
>>
>> Even running a simplest program which requires swapout can
>> prove this is true,
>>  #include 
>>  #include 
>>  #include 
>>  #include 
>>
>>  int main()
>>  {
>>  #define SIZE (1 * 1024 * 1024)
>>  volatile unsigned char *p = mmap(NULL, SIZE, PROT_READ | PROT_WRITE,
>>   MAP_SHARED | MAP_ANONYMOUS, -1, 0);
>>
>>  memset(p, 0x88, SIZE);
>>
>>  for (int k = 0; k < 1; k++) {
>>  /* swap in */
>>  for (int i = 0; i < SIZE; i += 4096) {
>>  (void)p[i];
>>  }
>>
>>  /* swap out */
>>  madvise(p, SIZE, MADV_PAGEOUT);
>>  }
>>  }
>>
>> Perf result on snapdragon 888 with 8 cores by using zRAM
>> as the swap block device.
>>
>>  ~ # perf record taskset -c 4 ./a.out
>>  [ perf record: Woken up 10 times to write data ]
>>  [ perf record: Captured and wrote 2.297 MB perf.data (60084 samples) ]
>>  ~ # perf report
>>  # To display the perf.data header info, please use --header/--header-only 
>> options.
>>  # To display the perf.data header info, please use --header/--header-only 
>> options.
>>  #
>>  #
>>  # Total Lost Samples: 0
>>  #
>>  # Samples: 60K of event 'cycles'
>>  # Event count (approx.): 35706225414
>>  #
>>  # Overhead  Command  Shared Object  Symbol
>>  #   ...  .  
>> .
>>  #
>> 21.07%  a.out[kernel.kallsyms]  [k] _raw_spin_unlock_irq
>>  8.23%  a.out[kernel.kallsyms]  [k] _raw_spin_unlock_irqrestore
>>  6.67%  a.out[kernel.kallsyms]  [k] filemap_map_pages
>>  6.16%  a.out[kernel.kallsyms]  [k] __zram_bvec_write
>>  5.36%  a.out[kernel.kallsyms]  [k] ptep_clear_flush
>>  3.71%  a.out[kernel.kallsyms]  [k] _raw_spin_lock
>>  3.49%  a.out[kernel.kallsyms]  [k] memset64
>>  1.63%  a.out[kernel.kallsyms]  [k] clear_page
>>  1.42%  a.out[kernel.kallsyms]  [k] _raw_spin_unlock
>>  1.26%  a.out[kernel.kallsyms]  [k] 
>> mod_zone_state.llvm.8525150236079521930
>>  1.23%  a.out[kernel.kallsyms]  [k] xas_load
>>  1.15%  a.out[kernel.kallsyms]  [k] zram_slot_lock
>>
>> ptep_clear_flush() takes 5.36% CPU in the micro-benchmark
>> swapping in/out a page mapped by only one process. If the
>> page is mapped by multiple processes, typically, like more
>> than 100 on a phone, the overhead would be much higher as
>> we have to run tlb flush 100 times for one single page.
>> Plus, tlb flush overhead will increase with the number
>> of CPU cores due to the bad scalability of tlb shootdown
>> in HW, so those ARM64 servers should expect much higher
>> overhead.
>>
>> Further perf annonate shows 95% cpu time of ptep_clear_flush
>> is actually used by the final dsb() to wait for the completion
>> of tlb flush. This provides us a very good chance to leverage
>> the existing batched tlb in kernel. The minimum modification
>> is that we only send async tlbi in the first stage and we send
>> dsb while we have to sync in the second stage.
>>
>> With the above simplest micro benchmark, collapsed time to
>> finish the program decreases around 5%.
>>
>> Typical collapsed time w/o patch:
>>  ~ # time taskset -c 4 ./a.out
>>  0.21user 14.34system 0:14.69elapsed
>> w/ patch:
>>  ~ # time taskset -c 4 ./a.out
>>  0.22user 13.45system 0:13.80elapsed
>>
>> Also, Yicong Yang added the following observation.
>>  Tested with benchmark in the commit on Kunpeng920 arm64 server,
>>  observed an improvement around 12.5% with command
>>  `time ./swap_bench`.
>>  w/o w/
>>  real0m13.460s   0m11.771s
>>  user0m0.248s0m0.279s
>>  sys 0m12.039s   0m11.458s
>>
>>  Originally it's noticed a 16.99% overhead of ptep_clear_flush()
>>  which has been eliminated by this patch:
>>
>>  [root@localhost yang]# perf record -- ./swap_bench && perf report
>>  [...]
>>  16.99%  swap_bench  [kernel.kallsyms]  [k] ptep_clear_flush
>>
>> It is tested on 4,8,128 CPU platforms and shows to be beneficial on
>> large systems but may not have improvement on small systems like on
>> a 4 CPU platform. So make ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH depends
>> on CONFIG_EXPERT for this stage and make this disabled on systems
>> with less than 8 CPUs. User can modify this threshold according to
>> their own platforms by CONFIG_NR_CPUS_FOR_BATCHED_TLB.
> 
> The commit log and the patch disagree on the name of the config option
> (CONFIG_NR_CPUS_FOR_BATCHED_TLB vs 

Re: [PATCH 09/21] riscv: dma-mapping: skip invalidation before bidirectional DMA

2023-03-30 Thread Lad, Prabhakar
On Mon, Mar 27, 2023 at 1:16 PM Arnd Bergmann  wrote:
>
> From: Arnd Bergmann 
>
> For a DMA_BIDIRECTIONAL transfer, the caches have to be cleaned
> first to let the device see data written by the CPU, and invalidated
> after the transfer to let the CPU see data written by the device.
>
> riscv also invalidates the caches before the transfer, which does
> not appear to serve any purpose.
>
> Signed-off-by: Arnd Bergmann 
> ---
>  arch/riscv/mm/dma-noncoherent.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
Reviewed-by: Lad Prabhakar 

Cheers,
Prabhakar

> diff --git a/arch/riscv/mm/dma-noncoherent.c b/arch/riscv/mm/dma-noncoherent.c
> index 640f4c496d26..69c80b2155a1 100644
> --- a/arch/riscv/mm/dma-noncoherent.c
> +++ b/arch/riscv/mm/dma-noncoherent.c
> @@ -25,7 +25,7 @@ void arch_sync_dma_for_device(phys_addr_t paddr, size_t 
> size,
> ALT_CMO_OP(clean, vaddr, size, riscv_cbom_block_size);
> break;
> case DMA_BIDIRECTIONAL:
> -   ALT_CMO_OP(flush, vaddr, size, riscv_cbom_block_size);
> +   ALT_CMO_OP(clean, vaddr, size, riscv_cbom_block_size);
> break;
> default:
> break;
> --
> 2.39.2
>
>
> ___
> linux-riscv mailing list
> linux-ri...@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv


Re: [PATCH v8 2/2] arm64: support batched/deferred tlb shootdown during page reclamation

2023-03-30 Thread Punit Agrawal
Hi Yicong,

Yicong Yang  writes:

> From: Barry Song 
>
> on x86, batched and deferred tlb shootdown has lead to 90%
> performance increase on tlb shootdown. on arm64, HW can do
> tlb shootdown without software IPI. But sync tlbi is still
> quite expensive.
>
> Even running a simplest program which requires swapout can
> prove this is true,
>  #include 
>  #include 
>  #include 
>  #include 
>
>  int main()
>  {
>  #define SIZE (1 * 1024 * 1024)
>  volatile unsigned char *p = mmap(NULL, SIZE, PROT_READ | PROT_WRITE,
>   MAP_SHARED | MAP_ANONYMOUS, -1, 0);
>
>  memset(p, 0x88, SIZE);
>
>  for (int k = 0; k < 1; k++) {
>  /* swap in */
>  for (int i = 0; i < SIZE; i += 4096) {
>  (void)p[i];
>  }
>
>  /* swap out */
>  madvise(p, SIZE, MADV_PAGEOUT);
>  }
>  }
>
> Perf result on snapdragon 888 with 8 cores by using zRAM
> as the swap block device.
>
>  ~ # perf record taskset -c 4 ./a.out
>  [ perf record: Woken up 10 times to write data ]
>  [ perf record: Captured and wrote 2.297 MB perf.data (60084 samples) ]
>  ~ # perf report
>  # To display the perf.data header info, please use --header/--header-only 
> options.
>  # To display the perf.data header info, please use --header/--header-only 
> options.
>  #
>  #
>  # Total Lost Samples: 0
>  #
>  # Samples: 60K of event 'cycles'
>  # Event count (approx.): 35706225414
>  #
>  # Overhead  Command  Shared Object  Symbol
>  #   ...  .  
> .
>  #
> 21.07%  a.out[kernel.kallsyms]  [k] _raw_spin_unlock_irq
>  8.23%  a.out[kernel.kallsyms]  [k] _raw_spin_unlock_irqrestore
>  6.67%  a.out[kernel.kallsyms]  [k] filemap_map_pages
>  6.16%  a.out[kernel.kallsyms]  [k] __zram_bvec_write
>  5.36%  a.out[kernel.kallsyms]  [k] ptep_clear_flush
>  3.71%  a.out[kernel.kallsyms]  [k] _raw_spin_lock
>  3.49%  a.out[kernel.kallsyms]  [k] memset64
>  1.63%  a.out[kernel.kallsyms]  [k] clear_page
>  1.42%  a.out[kernel.kallsyms]  [k] _raw_spin_unlock
>  1.26%  a.out[kernel.kallsyms]  [k] 
> mod_zone_state.llvm.8525150236079521930
>  1.23%  a.out[kernel.kallsyms]  [k] xas_load
>  1.15%  a.out[kernel.kallsyms]  [k] zram_slot_lock
>
> ptep_clear_flush() takes 5.36% CPU in the micro-benchmark
> swapping in/out a page mapped by only one process. If the
> page is mapped by multiple processes, typically, like more
> than 100 on a phone, the overhead would be much higher as
> we have to run tlb flush 100 times for one single page.
> Plus, tlb flush overhead will increase with the number
> of CPU cores due to the bad scalability of tlb shootdown
> in HW, so those ARM64 servers should expect much higher
> overhead.
>
> Further perf annonate shows 95% cpu time of ptep_clear_flush
> is actually used by the final dsb() to wait for the completion
> of tlb flush. This provides us a very good chance to leverage
> the existing batched tlb in kernel. The minimum modification
> is that we only send async tlbi in the first stage and we send
> dsb while we have to sync in the second stage.
>
> With the above simplest micro benchmark, collapsed time to
> finish the program decreases around 5%.
>
> Typical collapsed time w/o patch:
>  ~ # time taskset -c 4 ./a.out
>  0.21user 14.34system 0:14.69elapsed
> w/ patch:
>  ~ # time taskset -c 4 ./a.out
>  0.22user 13.45system 0:13.80elapsed
>
> Also, Yicong Yang added the following observation.
>   Tested with benchmark in the commit on Kunpeng920 arm64 server,
>   observed an improvement around 12.5% with command
>   `time ./swap_bench`.
>   w/o w/
>   real0m13.460s   0m11.771s
>   user0m0.248s0m0.279s
>   sys 0m12.039s   0m11.458s
>
>   Originally it's noticed a 16.99% overhead of ptep_clear_flush()
>   which has been eliminated by this patch:
>
>   [root@localhost yang]# perf record -- ./swap_bench && perf report
>   [...]
>   16.99%  swap_bench  [kernel.kallsyms]  [k] ptep_clear_flush
>
> It is tested on 4,8,128 CPU platforms and shows to be beneficial on
> large systems but may not have improvement on small systems like on
> a 4 CPU platform. So make ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH depends
> on CONFIG_EXPERT for this stage and make this disabled on systems
> with less than 8 CPUs. User can modify this threshold according to
> their own platforms by CONFIG_NR_CPUS_FOR_BATCHED_TLB.

The commit log and the patch disagree on the name of the config option
(CONFIG_NR_CPUS_FOR_BATCHED_TLB vs CONFIG_ARM64_NR_CPUS_FOR_BATCHED_TLB).

But more importantly, I was wondering why this posting doesn't address
Catalin's feedback [a] about using a runtime tunable. Maybe I missed the
follow-up 

Re: [PATCH 08/21] riscv: dma-mapping: only invalidate after DMA, not flush

2023-03-30 Thread Lad, Prabhakar
On Mon, Mar 27, 2023 at 1:16 PM Arnd Bergmann  wrote:
>
> From: Arnd Bergmann 
>
> No other architecture intentionally writes back dirty cache lines into
> a buffer that a device has just finished writing into. If the cache is
> clean, this has no effect at all, but if a cacheline in the buffer has
> actually been written by the CPU,  there is a drive bug that is likely
> made worse by overwriting that buffer.
>
> Signed-off-by: Arnd Bergmann 
> ---
>  arch/riscv/mm/dma-noncoherent.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>

Reviewed-by: Lad Prabhakar 

Cheers,
Prabhakar

> diff --git a/arch/riscv/mm/dma-noncoherent.c b/arch/riscv/mm/dma-noncoherent.c
> index d919efab6eba..640f4c496d26 100644
> --- a/arch/riscv/mm/dma-noncoherent.c
> +++ b/arch/riscv/mm/dma-noncoherent.c
> @@ -42,7 +42,7 @@ void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
> break;
> case DMA_FROM_DEVICE:
> case DMA_BIDIRECTIONAL:
> -   ALT_CMO_OP(flush, vaddr, size, riscv_cbom_block_size);
> +   ALT_CMO_OP(inval, vaddr, size, riscv_cbom_block_size);
> break;
> default:
> break;
> --
> 2.39.2
>
>
> ___
> linux-riscv mailing list
> linux-ri...@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv


[PATCH] powerpc/papr_scm: Update the NUMA distance table for the target node

2023-03-30 Thread Aneesh Kumar K.V
platform device helper routines won't update the NUMA distance table
while creating a platform device, even if the device is present on
a NUMA node that doesn't have memory or CPU. This is especially true
for pmem devices. If the target node of the pmem device is not online, we
find the nearest online node to the device and associate the pmem
device with that online node. To find the nearest online node, we should
have the numa distance table updated correctly. Update the distance
information during the device probe.

distance_lookup_table value for distance_ref_points_depth = 2 before and after
fix is below
node 3 distance depth 0  - 0
node 3 distance depth 1  - 0
node 4 distance depth 0  - 4
node 4 distance depth 1  - 2
node 5 distance depth 0  - 5
node 5 distance depth 1  - 1

after fix
node 3 distance depth 0  - 3
node 3 distance depth 1  - 1
node 4 distance depth 0  - 4
node 4 distance depth 1  - 2
node 5 distance depth 0  - 5
node 5 distance depth 1  - 1

Without the fix, the nearest numa node to the pmem device will be picked as 4.
After the fix, we get the correct numa node which is 5.

Fixes: da1115fdbd6e ("powerpc/nvdimm: Pick nearby online node if the device 
node is not online")
Signed-off-by: Aneesh Kumar K.V 
---
 arch/powerpc/platforms/pseries/papr_scm.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/arch/powerpc/platforms/pseries/papr_scm.c 
b/arch/powerpc/platforms/pseries/papr_scm.c
index 2f8385523a13..5bef75714bd5 100644
--- a/arch/powerpc/platforms/pseries/papr_scm.c
+++ b/arch/powerpc/platforms/pseries/papr_scm.c
@@ -1428,6 +1428,10 @@ static int papr_scm_probe(struct platform_device *pdev)
return -ENODEV;
}
 
+   /*
+* of platform device create won't update the numa distance table
+*/
+   update_numa_distance(dn);
 
p = kzalloc(sizeof(*p), GFP_KERNEL);
if (!p)
-- 
2.39.2



Re: [PATCH 18/21] ARM: drop SMP support for ARM11MPCore

2023-03-30 Thread Ard Biesheuvel
On Mon, 27 Mar 2023 at 14:18, Arnd Bergmann  wrote:
>
> From: Arnd Bergmann 
>
> The cache management operations for noncoherent DMA on ARMv6 work
> in two different ways:
>
>  * When CONFIG_DMA_CACHE_RWFO is set, speculative prefetches on in-flight
>DMA buffers lead to data corruption when the prefetched data is written
>back on top of data from the device.
>
>  * When CONFIG_DMA_CACHE_RWFO is disabled, a cache flush on one CPU
>is not seen by the other core(s), leading to inconsistent contents
>accross the system.
>
> As a consequence, neither configuration is actually safe to use in a
> general-purpose kernel that is used on both MPCore systems and ARM1176
> with prefetching enabled.
>
> We could add further workarounds to make the behavior more dynamic based
> on the system, but realistically, there are close to zero remaining
> users on any ARM11MPCore anyway, and nobody seems too interested in it,
> compared to the more popular ARM1176 used in BMC2835 and AST2500.
>
> The Oxnas platform has some minimal support in OpenWRT, but most of the
> drivers and dts files never made it into the mainline kernel, while the
> Arm Versatile/Realview platform mainly serves as a reference system but
> is not necessary to be kept working once all other ARM11MPCore are gone.
>
> Take the easy way out here and drop support for multiprocessing on
> ARMv6, along with the CONFIG_DMA_CACHE_RWFO option and the cache
> management implementation for it. This also helps with other ARMv6
> issues, but for the moment leaves the ability to build a kernel that
> can run on both ARMv7 SMP and single-processor ARMv6, which we probably
> want to stop supporting as well, but not as part of this series.
>
> Cc: Neil Armstrong 
> Cc: Daniel Golle 
> Cc: Linus Walleij 
> Cc: linux-ox...@groups.io
> Signed-off-by: Arnd Bergmann 

Acked-by: Ard Biesheuvel 


Re: [PATCH] perf vendor events power9: Remove UTF-8 characters from json files

2023-03-30 Thread kajoljain



On 3/29/23 18:54, Arnaldo Carvalho de Melo wrote:
> Em Tue, Mar 28, 2023 at 09:21:49AM -0700, Ian Rogers escreveu:
>> On Tue, Mar 28, 2023 at 4:30 AM Kajol Jain  wrote:
>>>
>>> Commit 3c22ba524304 ("perf vendor events powerpc: Update POWER9 events")
>>> added and updated power9 pmu json events. However some of the json
>>> events which are part of other.json and pipeline.json files,
>>> contains UTF-8 characters in their brief description.
>>> Having UTF-8 character could brakes the perf build on some distros.
>>
>> nit: s/bakes/break/
> 
> I'll fix that later, thans.
>  
>>> Fix this issue by removing the UTF-8 characters from other.json and
>>> pipeline.json files.
>>>
>>> Result without the fix patch:
> 
> [perfbuilder@five ~]$ cat dm.log/summary 
>123.25 ubuntu:18.04-x-powerpc: Ok   powerpc-linux-gnu-gcc 
> (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0 
>224.56 ubuntu:18.04-x-powerpc64  : Ok   powerpc64-linux-gnu-gcc 
> (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0 
>325.06 ubuntu:18.04-x-powerpc64el: Ok   powerpc64le-linux-gnu-gcc 
> (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0 
> BUILD_TARBALL_HEAD=9da5ab1d38cd17fb47cbe5a1f95bca63e6ca9796
> 

Thanks Ian and Arnaldo for reviewing it.

Thanks,
Kajol Jain

>>> [command]# file -i pmu-events/arch/powerpc/power9/*
>>> pmu-events/arch/powerpc/power9/cache.json:  application/json; 
>>> charset=us-ascii
>>> pmu-events/arch/powerpc/power9/floating-point.json: application/json; 
>>> charset=us-ascii
>>> pmu-events/arch/powerpc/power9/frontend.json:   application/json; 
>>> charset=us-ascii
>>> pmu-events/arch/powerpc/power9/marked.json: application/json; 
>>> charset=us-ascii
>>> pmu-events/arch/powerpc/power9/memory.json: application/json; 
>>> charset=us-ascii
>>> pmu-events/arch/powerpc/power9/metrics.json:application/json; 
>>> charset=us-ascii
>>> pmu-events/arch/powerpc/power9/nest_metrics.json:   application/json; 
>>> charset=us-ascii
>>> pmu-events/arch/powerpc/power9/other.json:  application/json; 
>>> charset=utf-8
>>> pmu-events/arch/powerpc/power9/pipeline.json:   application/json; 
>>> charset=utf-8
>>> pmu-events/arch/powerpc/power9/pmc.json:application/json; 
>>> charset=us-ascii
>>> pmu-events/arch/powerpc/power9/translation.json:application/json; 
>>> charset=us-ascii
>>>
>>> Result with the fix patch:
>>>
>>> [command]# file -i pmu-events/arch/powerpc/power9/*
>>> pmu-events/arch/powerpc/power9/cache.json:  application/json; 
>>> charset=us-ascii
>>> pmu-events/arch/powerpc/power9/floating-point.json: application/json; 
>>> charset=us-ascii
>>> pmu-events/arch/powerpc/power9/frontend.json:   application/json; 
>>> charset=us-ascii
>>> pmu-events/arch/powerpc/power9/marked.json: application/json; 
>>> charset=us-ascii
>>> pmu-events/arch/powerpc/power9/memory.json: application/json; 
>>> charset=us-ascii
>>> pmu-events/arch/powerpc/power9/metrics.json:application/json; 
>>> charset=us-ascii
>>> pmu-events/arch/powerpc/power9/nest_metrics.json:   application/json; 
>>> charset=us-ascii
>>> pmu-events/arch/powerpc/power9/other.json:  application/json; 
>>> charset=us-ascii
>>> pmu-events/arch/powerpc/power9/pipeline.json:   application/json; 
>>> charset=us-ascii
>>> pmu-events/arch/powerpc/power9/pmc.json:application/json; 
>>> charset=us-ascii
>>> pmu-events/arch/powerpc/power9/translation.json:application/json; 
>>> charset=us-ascii
>>>
>>> Fixes: 3c22ba524304 ("perf vendor events powerpc: Update POWER9 events")
>>> Reported-by: Arnaldo Carvalho de Melo 
>>> Link: https://lore.kernel.org/lkml/zbxp77deq7ikt...@kernel.org/
>>> Signed-off-by: Kajol Jain 
>>
>> Acked-by: Ian Rogers 
>>
>> Thanks,
>> Ian
>>
>>> ---
>>>  tools/perf/pmu-events/arch/powerpc/power9/other.json| 4 ++--
>>>  tools/perf/pmu-events/arch/powerpc/power9/pipeline.json | 2 +-
>>>  2 files changed, 3 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/tools/perf/pmu-events/arch/powerpc/power9/other.json 
>>> b/tools/perf/pmu-events/arch/powerpc/power9/other.json
>>> index 3f69422c21f9..f10bd554521a 100644
>>> --- a/tools/perf/pmu-events/arch/powerpc/power9/other.json
>>> +++ b/tools/perf/pmu-events/arch/powerpc/power9/other.json
>>> @@ -1417,7 +1417,7 @@
>>>{
>>>  "EventCode": "0x45054",
>>>  "EventName": "PM_FMA_CMPL",
>>> -"BriefDescription": "two flops operation completed (fmadd, fnmadd, 
>>> fmsub, fnmsub) Scalar instructions only. "
>>> +"BriefDescription": "two flops operation completed (fmadd, fnmadd, 
>>> fmsub, fnmsub) Scalar instructions only."
>>>},
>>>{
>>>  "EventCode": "0x201E8",
>>> @@ -2017,7 +2017,7 @@
>>>{
>>>  "EventCode": "0xC0BC",
>>>  "EventName": "PM_LSU_FLUSH_OTHER",
>>> -"BriefDescription": "Other LSU flushes including: Sync (sync ack from 
>>> L2 caused search of LRQ for oldest snooped load, This will either signal a 
>>> Precise Flush of the oldest snooped loa 

[PATCH v2 1/2] KVM: PPC: Permit SRR1 flags in more injected interrupt types

2023-03-30 Thread Nicholas Piggin
The prefix architecture in ISA v3.1 introduces a prefixed bit in SRR1
for many types of synchronous interrupts which is set when the interrupt
is caused by a prefixed instruction.

This requires KVM to be able to set this bit when injecting interrupts
into a guest. Plumb through the SRR1 "flags" argument to the core_queue
APIs where it's missing for this. For now they are set to 0, which is
no change.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/include/asm/kvm_ppc.h | 27 ++
 arch/powerpc/kvm/book3s.c  | 32 +-
 arch/powerpc/kvm/book3s_64_mmu_radix.c |  8 +++
 arch/powerpc/kvm/book3s_hv.c   |  4 ++--
 arch/powerpc/kvm/book3s_hv_nested.c|  4 ++--
 arch/powerpc/kvm/book3s_pr.c   |  4 ++--
 arch/powerpc/kvm/booke.c   | 13 +++
 arch/powerpc/kvm/emulate_loadstore.c   |  6 ++---
 arch/powerpc/kvm/powerpc.c |  3 ++-
 9 files changed, 57 insertions(+), 44 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
b/arch/powerpc/include/asm/kvm_ppc.h
index 52e7964e4301..bc57d058ad5b 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -128,25 +128,34 @@ extern void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu);
 
 extern int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu);
 extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu);
-extern void kvmppc_core_queue_machine_check(struct kvm_vcpu *vcpu, ulong 
flags);
+
+extern void kvmppc_core_queue_machine_check(struct kvm_vcpu *vcpu,
+   ulong srr1_flags);
 extern void kvmppc_core_queue_syscall(struct kvm_vcpu *vcpu);
-extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags);
-extern void kvmppc_core_queue_fpunavail(struct kvm_vcpu *vcpu);
-extern void kvmppc_core_queue_vec_unavail(struct kvm_vcpu *vcpu);
-extern void kvmppc_core_queue_vsx_unavail(struct kvm_vcpu *vcpu);
+extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu,
+ ulong srr1_flags);
+extern void kvmppc_core_queue_fpunavail(struct kvm_vcpu *vcpu,
+   ulong srr1_flags);
+extern void kvmppc_core_queue_vec_unavail(struct kvm_vcpu *vcpu,
+ ulong srr1_flags);
+extern void kvmppc_core_queue_vsx_unavail(struct kvm_vcpu *vcpu,
+ ulong srr1_flags);
 extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu);
 extern void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu);
 extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
struct kvm_interrupt *irq);
 extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu);
-extern void kvmppc_core_queue_dtlb_miss(struct kvm_vcpu *vcpu, ulong 
dear_flags,
+extern void kvmppc_core_queue_dtlb_miss(struct kvm_vcpu *vcpu,
+   ulong dear_flags,
ulong esr_flags);
 extern void kvmppc_core_queue_data_storage(struct kvm_vcpu *vcpu,
-  ulong dear_flags,
-  ulong esr_flags);
+  ulong srr1_flags,
+  ulong dar,
+  ulong dsisr);
 extern void kvmppc_core_queue_itlb_miss(struct kvm_vcpu *vcpu);
 extern void kvmppc_core_queue_inst_storage(struct kvm_vcpu *vcpu,
-  ulong esr_flags);
+  ulong srr1_flags);
+
 extern void kvmppc_core_flush_tlb(struct kvm_vcpu *vcpu);
 extern int kvmppc_core_check_requests(struct kvm_vcpu *vcpu);
 
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 85d6a60f4db5..686d8d9eda3e 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -188,10 +188,10 @@ void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, 
unsigned int vec)
 }
 EXPORT_SYMBOL_GPL(kvmppc_book3s_queue_irqprio);
 
-void kvmppc_core_queue_machine_check(struct kvm_vcpu *vcpu, ulong flags)
+void kvmppc_core_queue_machine_check(struct kvm_vcpu *vcpu, ulong srr1_flags)
 {
/* might as well deliver this straight away */
-   kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_MACHINE_CHECK, flags);
+   kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_MACHINE_CHECK, 
srr1_flags);
 }
 EXPORT_SYMBOL_GPL(kvmppc_core_queue_machine_check);
 
@@ -201,29 +201,29 @@ void kvmppc_core_queue_syscall(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL(kvmppc_core_queue_syscall);
 
-void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags)
+void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong srr1_flags)
 {
/* might as well deliver this straight away */
-   kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_PROGRAM, flags);
+   kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_PROGRAM, 

[PATCH v2 2/2] KVM: PPC: Book3S HV: Set SRR1[PREFIX] bit on injected interrupts

2023-03-30 Thread Nicholas Piggin
Pass the hypervisor (H)SRR1[PREFIX] indication through to synchronous
interrupts injected into the guest.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kvm/book3s_64_mmu_radix.c | 13 +
 arch/powerpc/kvm/book3s_hv.c   | 27 +-
 arch/powerpc/kvm/book3s_hv_nested.c|  9 ++---
 arch/powerpc/kvm/emulate_loadstore.c   |  6 +++---
 arch/powerpc/kvm/powerpc.c |  3 ++-
 5 files changed, 38 insertions(+), 20 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c 
b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index 215a6b5ba104..461307b89c3a 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -954,7 +954,9 @@ int kvmppc_book3s_radix_page_fault(struct kvm_vcpu *vcpu,
if (dsisr & DSISR_BADACCESS) {
/* Reflect to the guest as DSI */
pr_err("KVM: Got radix HV page fault with DSISR=%lx\n", dsisr);
-   kvmppc_core_queue_data_storage(vcpu, 0, ea, dsisr);
+   kvmppc_core_queue_data_storage(vcpu,
+   kvmppc_get_msr(vcpu) & SRR1_PREFIXED,
+   ea, dsisr);
return RESUME_GUEST;
}
 
@@ -979,7 +981,9 @@ int kvmppc_book3s_radix_page_fault(struct kvm_vcpu *vcpu,
 * Bad address in guest page table tree, or other
 * unusual error - reflect it to the guest as DSI.
 */
-   kvmppc_core_queue_data_storage(vcpu, 0, ea, dsisr);
+   kvmppc_core_queue_data_storage(vcpu,
+   kvmppc_get_msr(vcpu) & SRR1_PREFIXED,
+   ea, dsisr);
return RESUME_GUEST;
}
return kvmppc_hv_emulate_mmio(vcpu, gpa, ea, writing);
@@ -988,8 +992,9 @@ int kvmppc_book3s_radix_page_fault(struct kvm_vcpu *vcpu,
if (memslot->flags & KVM_MEM_READONLY) {
if (writing) {
/* give the guest a DSI */
-   kvmppc_core_queue_data_storage(vcpu, 0, ea,
-   DSISR_ISSTORE | DSISR_PROTFAULT);
+   kvmppc_core_queue_data_storage(vcpu,
+   kvmppc_get_msr(vcpu) & SRR1_PREFIXED,
+   ea, DSISR_ISSTORE | DSISR_PROTFAULT);
return RESUME_GUEST;
}
kvm_ro = true;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 16ea0ffb7976..c973bf556fb3 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1428,7 +1428,8 @@ static int kvmppc_emulate_debug_inst(struct kvm_vcpu 
*vcpu)
vcpu->run->debug.arch.address = kvmppc_get_pc(vcpu);
return RESUME_HOST;
} else {
-   kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
+   kvmppc_core_queue_program(vcpu, SRR1_PROGILL |
+   (kvmppc_get_msr(vcpu) & SRR1_PREFIXED));
return RESUME_GUEST;
}
 }
@@ -1632,7 +1633,8 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
 * so that it knows that the machine check occurred.
 */
if (!vcpu->kvm->arch.fwnmi_enabled) {
-   ulong flags = vcpu->arch.shregs.msr & 0x083c;
+   ulong flags = (vcpu->arch.shregs.msr & 0x083c) |
+   (kvmppc_get_msr(vcpu) & SRR1_PREFIXED);
kvmppc_core_queue_machine_check(vcpu, flags);
r = RESUME_GUEST;
break;
@@ -1661,7 +1663,8 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
 * as a result of a hypervisor emulation interrupt
 * (e40) getting turned into a 700 by BML RTAS.
 */
-   flags = vcpu->arch.shregs.msr & 0x1full;
+   flags = (vcpu->arch.shregs.msr & 0x1full) |
+   (kvmppc_get_msr(vcpu) & SRR1_PREFIXED);
kvmppc_core_queue_program(vcpu, flags);
r = RESUME_GUEST;
break;
@@ -1741,7 +1744,8 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
}
 
if (!(vcpu->arch.fault_dsisr & (DSISR_NOHPTE | 
DSISR_PROTFAULT))) {
-   kvmppc_core_queue_data_storage(vcpu, 0,
+   kvmppc_core_queue_data_storage(vcpu,
+   kvmppc_get_msr(vcpu) & SRR1_PREFIXED,
vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
r = RESUME_GUEST;
break;
@@ -1759,7 +1763,8 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
} else if (err == -1 || err == -2) {
   

[PATCH v2 0/2] KVM: PPC: Book3S HV: Injected interrupt SRR1

2023-03-30 Thread Nicholas Piggin
I missed this in my earlier review and testing, but I think we need
these in the prefix instruction enablement series before the final patch
that enables HFSCR[PREFIX] for guests.

Thanks,
Nick

Nicholas Piggin (2):
  KVM: PPC: Permit SRR1 flags in more injected interrupt types
  KVM: PPC: Book3S HV: Set SRR1[PREFIX] bit on injected interrupts

 arch/powerpc/include/asm/kvm_ppc.h | 27 ++
 arch/powerpc/kvm/book3s.c  | 32 +-
 arch/powerpc/kvm/book3s_64_mmu_radix.c | 13 +++
 arch/powerpc/kvm/book3s_hv.c   | 23 --
 arch/powerpc/kvm/book3s_hv_nested.c|  9 +---
 arch/powerpc/kvm/book3s_pr.c   |  4 ++--
 arch/powerpc/kvm/booke.c   | 13 +++
 arch/powerpc/kvm/emulate_loadstore.c   |  6 ++---
 arch/powerpc/kvm/powerpc.c |  4 +++-
 9 files changed, 81 insertions(+), 50 deletions(-)

-- 
2.37.2



Re: [PATCH 18/21] ARM: drop SMP support for ARM11MPCore

2023-03-30 Thread Arnd Bergmann
On Thu, Mar 30, 2023, at 09:48, Neil Armstrong wrote:
> On 27/03/2023 14:13, Arnd Bergmann wrote:
>> From: Arnd Bergmann 
>> 
>> The cache management operations for noncoherent DMA on ARMv6 work
>> in two different ways:
>> 
>>   * When CONFIG_DMA_CACHE_RWFO is set, speculative prefetches on in-flight
>> DMA buffers lead to data corruption when the prefetched data is written
>> back on top of data from the device.
>> 
>>   * When CONFIG_DMA_CACHE_RWFO is disabled, a cache flush on one CPU
>> is not seen by the other core(s), leading to inconsistent contents
>> accross the system.
>> 
>> As a consequence, neither configuration is actually safe to use in a
>> general-purpose kernel that is used on both MPCore systems and ARM1176
>> with prefetching enabled.
>> 
>> We could add further workarounds to make the behavior more dynamic based
>> on the system, but realistically, there are close to zero remaining
>> users on any ARM11MPCore anyway, and nobody seems too interested in it,
>> compared to the more popular ARM1176 used in BMC2835 and AST2500.
>> 
>> The Oxnas platform has some minimal support in OpenWRT, but most of the
>> drivers and dts files never made it into the mainline kernel, while the
>> Arm Versatile/Realview platform mainly serves as a reference system but
>> is not necessary to be kept working once all other ARM11MPCore are gone.
>
> Acked-by: Neil Armstrong 
>
> It's sad but it's the reality, there's no chance full OXNAS support will
> ever come upstream and no real work has been done for years.
>
> I think OXNAS support can be programmed for removal for next release,
> it would need significant work to rework current support to make it acceptable
> before trying to upstream missing bits anyway.

Ok, thanks for your reply!

To clarify, do you think we should plan for removal after the next
stable release (6.3, removed in 6.4), or after the next LTS
release (probably 6.6, removed in 6.7)? As far as I understand,
the next OpenWRT release (23.x) will be based on linux-5.15,
and the one after that (24.x) would likely still use 6.1, unless
they skip an LTS kernel.

 Arnd


Re: [PATCH] powerpc: don't try to copy ppc for task with NULL pt_regs

2023-03-30 Thread Christophe Leroy


Le 28/03/2023 à 13:47, Michael Ellerman a écrit :
> "Nicholas Piggin"  writes:
>> On Mon Mar 27, 2023 at 8:26 PM AEST, Christophe Leroy wrote:
> ...
>>>
>>> Now that thread.regs doesn't change anymore at each interrupt, it would
>>> probably be worth dropping it and falling back to task_pt_regs() as
>>> defined on most architecture.
>>> Knowing whether a thread is a kernel or user thread can for instance be
>>> known with PF_KTHREAD flag, so no need of thread.regs for that.
>>
>> That would be nice if we can define regs that way, I agree. We should
>> look into doing that.
> 
> Yeah it's on the long-list of things that need cleaning up.
> 
> I think there's some complication in working out which sites are OK to
> use/give-out the value in pt_regs that's potentially a dummy value, vs
> cases that actually want to check PF_KTHREAD and do something different.
> But that's just my hunch I haven't looked through all the sites.
> 
> The thread.regs = NULL for kernel threads goes back to arch/ppc, about
> 2002 by the looks:
> 
>
> https://github.com/mpe/linux-fullhistory/commit/2a8e186c384c0c911f91cd12367658eabdc820d8#diff-939b705cff722ee75595fad30d56bb1175dfdce49a69adb4d5656f354be076c6
> 
> There's no change log of course :)
> 
> Still maybe it doesn't matter why it was originally done that way, if we
> can do it differently now.
> 

I have the feeling that our logic is broken after commit 5bd2e97c868a 
("fork: Generalize PF_IO_WORKER handling")

Christophe


Re: [PATCH 18/21] ARM: drop SMP support for ARM11MPCore

2023-03-30 Thread Linus Walleij
On Mon, Mar 27, 2023 at 2:16 PM Arnd Bergmann  wrote:

> From: Arnd Bergmann 
>
> The cache management operations for noncoherent DMA on ARMv6 work
> in two different ways:
>
>  * When CONFIG_DMA_CACHE_RWFO is set, speculative prefetches on in-flight
>DMA buffers lead to data corruption when the prefetched data is written
>back on top of data from the device.
>
>  * When CONFIG_DMA_CACHE_RWFO is disabled, a cache flush on one CPU
>is not seen by the other core(s), leading to inconsistent contents
>accross the system.
>
> As a consequence, neither configuration is actually safe to use in a
> general-purpose kernel that is used on both MPCore systems and ARM1176
> with prefetching enabled.
>
> We could add further workarounds to make the behavior more dynamic based
> on the system, but realistically, there are close to zero remaining
> users on any ARM11MPCore anyway, and nobody seems too interested in it,
> compared to the more popular ARM1176 used in BMC2835 and AST2500.
>
> The Oxnas platform has some minimal support in OpenWRT, but most of the
> drivers and dts files never made it into the mainline kernel, while the
> Arm Versatile/Realview platform mainly serves as a reference system but
> is not necessary to be kept working once all other ARM11MPCore are gone.
>
> Take the easy way out here and drop support for multiprocessing on
> ARMv6, along with the CONFIG_DMA_CACHE_RWFO option and the cache
> management implementation for it. This also helps with other ARMv6
> issues, but for the moment leaves the ability to build a kernel that
> can run on both ARMv7 SMP and single-processor ARMv6, which we probably
> want to stop supporting as well, but not as part of this series.
>
> Cc: Neil Armstrong 
> Cc: Daniel Golle 
> Cc: Linus Walleij 
> Cc: linux-ox...@groups.io
> Signed-off-by: Arnd Bergmann 

Yeah, we discussed this earlier, let's just drop it. Not worth the effort.
Acked-by: Linus Walleij 

Yours,
Linus Walleij


Re: [PATCH 08/21] riscv: dma-mapping: only invalidate after DMA, not flush

2023-03-30 Thread Arnd Bergmann
On Wed, Mar 29, 2023, at 22:48, Conor Dooley wrote:
> On Mon, Mar 27, 2023 at 02:13:04PM +0200, Arnd Bergmann wrote:
>> From: Arnd Bergmann 
>> 
>> No other architecture intentionally writes back dirty cache lines into
>> a buffer that a device has just finished writing into. If the cache is
>> clean, this has no effect at all, but
>
>> if a cacheline in the buffer has
>> actually been written by the CPU,  there is a drive bug that is likely
>> made worse by overwriting that buffer.
>
> So does this need a
> Fixes: 1631ba1259d6 ("riscv: Add support for non-coherent devices using 
> zicbom extension")
> then, even if the cacheline really should not have been touched by the
> CPU?
> Also, minor typo, s/drive/driver/.

done

> In the thread we had that sparked this, I went digging for the source of
> the flushes, and it came from a review comment:
> https://lore.kernel.org/linux-riscv/342e3c12-ebb0-badf-7d4c-c444a2b84...@sholland.org/

Ah, so the comment that led to it was 

"For arch_sync_dma_for_cpu(DMA_BIDIRECTIONAL), we expect the CPU to have
written to the buffer, so this should flush, not invalidate."

which sounds like Samuel just misunderstood what "bidirectional"
means: the comment implies that both the cpu and the device access
the buffer before arch_sync_dma_for_cpu(DMA_BIDIRECTIONAL), but
this is not allowed. Instead, the point is that the device may both
read and write the buffer, requiring that we must do a writeback
at arch_sync_dma_for_device(DMA_BIDIRECTIONAL) and an invalidate
at arch_sync_dma_for_cpu(DMA_BIDIRECTIONAL).

The comment about arch_sync_dma_for_device(DMA_FROM_DEVICE) (in the
same email) seems equally confused. It's of course easy to
misunderstand these, and many others have gotten confused in
similar ways before.

> But *surely* if no other arch needs to do that, then we are safe to also
> not do it... Your logic seems right by me at least, especially given the
> lack of flushes elsewhere.

Right, I remove the extra writeback from powerpc, parisc and microblaze
for the same reason. Those appear to only be there because they used the
same function for _for_device() as for _for_cpu(), not because someone
thought they were required.

> Reviewed-by: Conor Dooley 

Thanks!

 Arnd


[PATCH v2 05/12] powerpc/dexcr: Support userspace ROP protection

2023-03-30 Thread Benjamin Gray
The ISA 3.1B hashst and hashchk instructions use a per-cpu SPR HASHKEYR
to hold a key used in the hash calculation. This key should be different
for each process to make it harder for a malicious process to recreate
valid hash values for a victim process.

Add support for storing a per-thread hash key, and setting/clearing
HASHKEYR appropriately.

Signed-off-by: Benjamin Gray 

---

v1: * Guard HASHKEYR update behind change check
* HASHKEYR reset moved earlier to patch 2
---
 arch/powerpc/include/asm/processor.h |  1 +
 arch/powerpc/kernel/process.c| 17 +
 2 files changed, 18 insertions(+)

diff --git a/arch/powerpc/include/asm/processor.h 
b/arch/powerpc/include/asm/processor.h
index bad64d6a5d36..666d4e9804a8 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -264,6 +264,7 @@ struct thread_struct {
unsigned long   mmcr3;
unsigned long   sier2;
unsigned long   sier3;
+   unsigned long   hashkeyr;
 
 #endif
 };
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index a7f9f3f85e20..a966adb5447f 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1182,6 +1182,9 @@ static inline void save_sprs(struct thread_struct *t)
 */
t->tar = mfspr(SPRN_TAR);
}
+
+   if (cpu_has_feature(CPU_FTR_DEXCR_NPHIE))
+   t->hashkeyr = mfspr(SPRN_HASHKEYR);
 #endif
 }
 
@@ -1260,6 +1263,10 @@ static inline void restore_sprs(struct thread_struct 
*old_thread,
if (cpu_has_feature(CPU_FTR_P9_TIDR) &&
old_thread->tidr != new_thread->tidr)
mtspr(SPRN_TIDR, new_thread->tidr);
+
+   if (cpu_has_feature(CPU_FTR_DEXCR_NPHIE) &&
+   old_thread->hashkeyr != new_thread->hashkeyr)
+   mtspr(SPRN_HASHKEYR, new_thread->hashkeyr);
 #endif
 
 }
@@ -1844,6 +1851,10 @@ int copy_thread(struct task_struct *p, const struct 
kernel_clone_args *args)
childregs->ppr = DEFAULT_PPR;
 
p->thread.tidr = 0;
+#endif
+#ifdef CONFIG_PPC_BOOK3S_64
+   if (cpu_has_feature(CPU_FTR_DEXCR_NPHIE))
+   p->thread.hashkeyr = current->thread.hashkeyr;
 #endif
/*
 * Run with the current AMR value of the kernel
@@ -1972,6 +1983,12 @@ void start_thread(struct pt_regs *regs, unsigned long 
start, unsigned long sp)
current->thread.tm_tfiar = 0;
current->thread.load_tm = 0;
 #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
+#ifdef CONFIG_PPC_BOOK3S_64
+   if (cpu_has_feature(CPU_FTR_DEXCR_NPHIE)) {
+   current->thread.hashkeyr = get_random_long();
+   mtspr(SPRN_HASHKEYR, current->thread.hashkeyr);
+   }
+#endif /* CONFIG_PPC_BOOK3S_64 */
 }
 EXPORT_SYMBOL(start_thread);
 
-- 
2.39.2



[PATCH v2 11/12] selftests/powerpc/dexcr: Add hashst/hashchk test

2023-03-30 Thread Benjamin Gray
Test the kernel DEXCR[NPHIE] interface and hashchk exception handling.

Introduces with it a DEXCR utils library for common DEXCR operations.

Volatile is used to prevent the compiler optimising away the signal
tests.

Signed-off-by: Benjamin Gray 

---
v1: * Clean up dexcr makefile
* Include kernel headers in CFLAGS
* Use numeric literals for hashst/hashchk to support older
  toolchains
* A lot of other refactoring
---
 tools/testing/selftests/powerpc/Makefile  |   1 +
 .../selftests/powerpc/dexcr/.gitignore|   1 +
 .../testing/selftests/powerpc/dexcr/Makefile  |   7 +
 tools/testing/selftests/powerpc/dexcr/dexcr.c | 132 ++
 tools/testing/selftests/powerpc/dexcr/dexcr.h |  49 
 .../selftests/powerpc/dexcr/hashchk_test.c| 227 ++
 tools/testing/selftests/powerpc/include/reg.h |   4 +
 .../testing/selftests/powerpc/include/utils.h |   4 +
 tools/testing/selftests/powerpc/utils.c   |  24 ++
 9 files changed, 449 insertions(+)
 create mode 100644 tools/testing/selftests/powerpc/dexcr/.gitignore
 create mode 100644 tools/testing/selftests/powerpc/dexcr/Makefile
 create mode 100644 tools/testing/selftests/powerpc/dexcr/dexcr.c
 create mode 100644 tools/testing/selftests/powerpc/dexcr/dexcr.h
 create mode 100644 tools/testing/selftests/powerpc/dexcr/hashchk_test.c

diff --git a/tools/testing/selftests/powerpc/Makefile 
b/tools/testing/selftests/powerpc/Makefile
index 6ba95cd19e42..00dbd000ee01 100644
--- a/tools/testing/selftests/powerpc/Makefile
+++ b/tools/testing/selftests/powerpc/Makefile
@@ -17,6 +17,7 @@ SUB_DIRS = alignment  \
   benchmarks   \
   cache_shape  \
   copyloops\
+  dexcr\
   dscr \
   mm   \
   nx-gzip  \
diff --git a/tools/testing/selftests/powerpc/dexcr/.gitignore 
b/tools/testing/selftests/powerpc/dexcr/.gitignore
new file mode 100644
index ..d12e4560aca9
--- /dev/null
+++ b/tools/testing/selftests/powerpc/dexcr/.gitignore
@@ -0,0 +1 @@
+hashchk_test
diff --git a/tools/testing/selftests/powerpc/dexcr/Makefile 
b/tools/testing/selftests/powerpc/dexcr/Makefile
new file mode 100644
index ..16c8b489948a
--- /dev/null
+++ b/tools/testing/selftests/powerpc/dexcr/Makefile
@@ -0,0 +1,7 @@
+TEST_GEN_PROGS := hashchk_test
+
+include ../../lib.mk
+
+$(OUTPUT)/hashchk_test: CFLAGS += -fno-pie $(call cc-option,-mno-rop-protect)
+
+$(TEST_GEN_PROGS): ../harness.c ../utils.c ./dexcr.c
diff --git a/tools/testing/selftests/powerpc/dexcr/dexcr.c 
b/tools/testing/selftests/powerpc/dexcr/dexcr.c
new file mode 100644
index ..65ec5347de98
--- /dev/null
+++ b/tools/testing/selftests/powerpc/dexcr/dexcr.c
@@ -0,0 +1,132 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "dexcr.h"
+#include "reg.h"
+#include "utils.h"
+
+static jmp_buf generic_signal_jump_buf;
+
+static void generic_signal_handler(int signum, siginfo_t *info, void *context)
+{
+   longjmp(generic_signal_jump_buf, 0);
+}
+
+bool dexcr_exists(void)
+{
+   struct sigaction old;
+   volatile bool exists;
+
+   old = push_signal_handler(SIGILL, generic_signal_handler);
+   if (setjmp(generic_signal_jump_buf))
+   goto out;
+
+   /*
+* If the SPR is not recognised by the hardware it triggers
+* a hypervisor emulation interrupt. If the kernel does not
+* recognise/try to emulate it, we receive a SIGILL signal.
+*
+* If we do not receive a signal, assume we have the SPR or the
+* kernel is trying to emulate it correctly.
+*/
+   exists = false;
+   mfspr(SPRN_DEXCR_RO);
+   exists = true;
+
+out:
+   pop_signal_handler(SIGILL, old);
+   return exists;
+}
+
+/*
+ * Just test if a bad hashchk triggers a signal, without checking
+ * for support or if the NPHIE aspect is enabled.
+ */
+bool hashchk_triggers(void)
+{
+   struct sigaction old;
+   volatile bool triggers;
+
+   old = push_signal_handler(SIGILL, generic_signal_handler);
+   if (setjmp(generic_signal_jump_buf))
+   goto out;
+
+   triggers = true;
+   do_bad_hashchk();
+   triggers = false;
+
+out:
+   pop_signal_handler(SIGILL, old);
+   return triggers;
+}
+
+unsigned int get_dexcr(enum dexcr_source source)
+{
+   switch (source) {
+   case DEXCR:
+   return mfspr(SPRN_DEXCR_RO);
+   case HDEXCR:
+   return mfspr(SPRN_HDEXCR_RO);
+   case EFFECTIVE:
+   return mfspr(SPRN_DEXCR_RO) | mfspr(SPRN_HDEXCR_RO);
+   default:
+   FAIL_IF_EXIT_MSG(true, "bad enum dexcr_source");
+   }
+}
+
+void await_child_success(pid_t pid)
+{
+   int wstatus;
+
+   FAIL_IF_EXIT_MSG(pid == -1, "fork failed");
+   FAIL_IF_EXIT_MSG(waitpid(pid, , 0) == -1,