[PATCH 2/2] powerpc/64: Fix msr_check_and_set/clear MSR[EE] race

2022-10-03 Thread Nicholas Piggin
irq soft-masking means that when Linux irqs are disabled, the MSR[EE]
value can change from 1 to 0 asynchronously: if a masked interrupt of
the PACA_IRQ_MUST_HARD_MASK variety fires while irqs are disabled,
the masked handler will return with MSR[EE]=0.

This means a sequence like mtmsr(mfmsr() | MSR_FP) is racy if it can
be called with local irqs disabled, unless a hard_irq_disable has been
done.

Reported-by: Sachin Sant 
Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/include/asm/hw_irq.h | 24 
 arch/powerpc/kernel/process.c |  4 ++--
 2 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/hw_irq.h 
b/arch/powerpc/include/asm/hw_irq.h
index 26ede09c521d..db139deb4466 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -459,6 +459,30 @@ static inline void irq_soft_mask_regs_set_state(struct 
pt_regs *regs, unsigned l
 }
 #endif /* CONFIG_PPC64 */
 
+static inline unsigned long mtmsr_isync_irqsafe(unsigned long msr)
+{
+#ifdef CONFIG_PPC64
+   if (arch_irqs_disabled()) {
+   /*
+* With soft-masking, MSR[EE] can change from 1 to 0
+* asynchronously when irqs are disabled, and we don't want to
+* set MSR[EE] back to 1 here if that has happened. A race-free
+* way to do this is ensure EE is already 0. Another way it
+* could be done is with a RESTART_TABLE handler, but that's
+* probably overkill here.
+*/
+   msr &= ~MSR_EE;
+   mtmsr_isync(msr);
+   irq_soft_mask_set(IRQS_ALL_DISABLED);
+   local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+   } else
+#endif
+   mtmsr_isync(msr);
+
+   return msr;
+}
+
+
 #define ARCH_IRQ_INIT_FLAGSIRQ_NOREQUEST
 
 #endif  /* __ASSEMBLY__ */
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 0fbda89cd1bb..37df0428e4fb 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -127,7 +127,7 @@ unsigned long notrace msr_check_and_set(unsigned long bits)
newmsr |= MSR_VSX;
 
if (oldmsr != newmsr)
-   mtmsr_isync(newmsr);
+   newmsr = mtmsr_isync_irqsafe(newmsr);
 
return newmsr;
 }
@@ -145,7 +145,7 @@ void notrace __msr_check_and_clear(unsigned long bits)
newmsr &= ~MSR_VSX;
 
if (oldmsr != newmsr)
-   mtmsr_isync(newmsr);
+   mtmsr_isync_irqsafe(newmsr);
 }
 EXPORT_SYMBOL(__msr_check_and_clear);
 
-- 
2.37.2



[PATCH 1/2] powerpc/64s/interrupt: Change must-hard-mask interrupt check from BUG to WARN

2022-10-03 Thread Nicholas Piggin
This new assertion added is generally harmless and gets fixed up
naturally, but it does indicate a problem with MSR manipulation
somewhere.

Fixes: c39fb71a54f0 ("powerpc/64s/interrupt: masked handler debug check for 
previous hard disable")
Reported-by: Sachin Sant 
Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kernel/exceptions-64s.S | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index dafa275f18bc..d5d35f3a824e 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -2797,12 +2797,16 @@ masked_interrupt:
 #ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
/*
 * Ensure there was no previous MUST_HARD_MASK interrupt or
-* HARD_DIS setting.
+* HARD_DIS setting. If this does fire, the interrupt is still
+* masked and MSR[EE] will be cleared on return, so no need to
+* panic, but somebody probably enabled MSR[EE] under
+* PACA_IRQ_HARD_DIS, mtmsr(mfmsr() | MSR_x) being a common
+* cause.
 */
lbz r9,PACAIRQHAPPENED(r13)
andi.   r9,r9,(PACA_IRQ_MUST_HARD_MASK|PACA_IRQ_HARD_DIS)
 0: tdnei   r9,0
-   EMIT_BUG_ENTRY 0b,__FILE__,__LINE__,0
+   EMIT_WARN_ENTRY 0b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
 #endif
lbz r9,PACAIRQHAPPENED(r13)
or  r9,r9,r10
-- 
2.37.2



Re: [RFC PATCH 2/2] powerpc: nop trap instruction after WARN_ONCE fires

2022-10-03 Thread Nicholas Piggin
On Sat Sep 24, 2022 at 2:47 AM AEST, Christophe Leroy wrote:
>
>
> Le 23/09/2022 à 17:41, Nicholas Piggin a écrit :
> > WARN_ONCE and similar are often used in frequently executed code, and
> > should not crash the system. The program check interrupt caused by
> > WARN_ON_ONCE can be a significant overhead even when nothing is being
> > printed. This can cause performance to become unacceptable, having the
> > same effective impact to the user as a BUG_ON().
> > 
> > Avoid this overhead by patching the trap with a nop instruction after a
> > "once" trap fires. Conditional warnings that return a result must have
> > equivalent compare and branch instructions after the trap, so when it is
> > nopped the statement will behave the same way. It's possible the asm
> > goto should be removed entirely and this comparison just done in C now.
>
> You mean, just like PPC32 ? (Since db87a7199229 ("powerpc/bug: Remove 
> specific powerpc BUG_ON() and WARN_ON() on PPC32"))
>
> But I'm having hard time with your change.
>
> You change only WARN_ON()
> But WARN_ON_ONCE() calls __WARN_FLAGS()
> And WARN_ONCE() calls WARN() via DO_ONCE_LITE_IF()
>
> So I don't see any ..._ONCE something going with WARN_ON().
>
> Am I missing something ?

Hmm, no I must have missed something. I guess it is the EMIT_WARN_ENTRY
in asm which is the main problem I've seen. Although we could remove the
DO_ONCE_LITE_IF code generation from our WARN_ON_ONCE as well if we did
this patching.

Thanks,
Nick



Re: [PATCH v2] i2c-pasemi: PASemi I2C controller IRQ enablement

2022-10-03 Thread Michael Ellerman
"Sven Peter"  writes:
> On Sun, Oct 2, 2022, at 16:07, Arminder Singh wrote:
>> Hi,
>>
>>>  #define REG_MTXFIFO0x00
>>>  #define REG_MRXFIFO0x04
>>>  #define REG_SMSTA  0x14
>>> +#define REG_IMASK   0x18
>>
>>> This doesn't seem to be aligned correctly, this file seems to use a tab
>>> to separate the register name and the offset and you used spaces here.
>>
>>> @@ -15,7 +16,11 @@ struct pasemi_smbus {
>>> struct i2c_adapter   adapter;
>>> void __iomem*ioaddr;
>>> unsigned int clk_div;
>>> -   int  hw_rev;
>>> +   int  hw_rev;
>>> +   int  use_irq;
>>> +   struct completionirq_completion;
>>
>>> This doesn't seem to be aligned correctly and the hw_rev line
>>> doesn't have to be changed.
>>
>> I'm sorry for the alignment issues in the patch, I genuinely didn't notice
>> them as from the perspective of my primary editor (Visual Studio Code)
>> the entries were aligned. I just saw them when opening the files in
>> nano.
>
> No worries, it's just a small nit and quickly fixed after all! :)
>
>>
>> Does fixing the alignment issues and the commit description justify a v3
>> of the patch or should the minor fixes go out as a "resend"? Just not sure
>> in this particular case as the fixes seem to be very minor ones.
>
> I'd send a v3. I've only used resend when e.g. my previous mail provider
> messed up and silently converted all my outgoing mails to HTML.

If you've modified the patches then it's not a "resend":

  
https://www.kernel.org/doc/html/latest/process/submitting-patches.html#don-t-get-discouraged-or-impatient

So yeah send a v3 in this case.

cheers


linux-next: manual merge of the powerpc tree with the kbuild tree

2022-10-03 Thread Stephen Rothwell
Hi all,

Today's linux-next merge of the powerpc tree got a conflict in:

  arch/powerpc/Makefile

between commit:

  ce697ccee1a8 ("kbuild: remove head-y syntax")

from the kbuild tree and commit:

  dfc3095cec27 ("powerpc: Remove CONFIG_FSL_BOOKE")

from the powerpc tree.

I fixed it up (I used the former version of this file and added the
following merge fix patch) and can carry the fix as necessary. This
is now fixed as far as linux-next is concerned, but any non trivial
conflicts should be mentioned to your upstream maintainer when your tree
is submitted for merging.  You may also want to consider cooperating
with the maintainer of the conflicting tree to minimise any particularly
complex conflicts.

From: Stephen Rothwell 
Date: Tue, 4 Oct 2022 09:13:46 +1100
Subject: [PATCH] powerpc: fix up for "kbuild: remove head-y syntax"

Signed-off-by: Stephen Rothwell 
---
 scripts/head-object-list.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/head-object-list.txt b/scripts/head-object-list.txt
index dd2ba2eda636..e8219d65c3b7 100644
--- a/scripts/head-object-list.txt
+++ b/scripts/head-object-list.txt
@@ -34,7 +34,7 @@ arch/powerpc/kernel/head_44x.o
 arch/powerpc/kernel/head_64.o
 arch/powerpc/kernel/head_8xx.o
 arch/powerpc/kernel/head_book3s_32.o
-arch/powerpc/kernel/head_fsl_booke.o
+arch/powerpc/kernel/head_85xx.o
 arch/powerpc/kernel/entry_64.o
 arch/powerpc/kernel/fpu.o
 arch/powerpc/kernel/vector.o
-- 
2.35.1

-- 
Cheers,
Stephen Rothwell


pgpc_loXHWhAe.pgp
Description: OpenPGP digital signature


linux-next: manual merge of the powerpc tree with the kbuild tree

2022-10-03 Thread Stephen Rothwell
Hi all,

Today's linux-next merge of the powerpc tree got a conflict in:

  arch/powerpc/kernel/Makefile

between commit:

  321648455061 ("kbuild: use obj-y instead extra-y for objects placed at the 
head")

from the kbuild tree and commit:

  dfc3095cec27 ("powerpc: Remove CONFIG_FSL_BOOKE")

from the powerpc tree.

I fixed it up (see below) and can carry the fix as necessary. This
is now fixed as far as linux-next is concerned, but any non trivial
conflicts should be mentioned to your upstream maintainer when your tree
is submitted for merging.  You may also want to consider cooperating
with the maintainer of the conflicting tree to minimise any particularly
complex conflicts.

-- 
Cheers,
Stephen Rothwell

diff --cc arch/powerpc/kernel/Makefile
index ad3decb9f20b,1f121c188805..
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@@ -118,12 -116,12 +116,12 @@@ obj-$(CONFIG_PPC_E500)  += cpu_setup_e5
  obj-$(CONFIG_PPC_DOORBELL)+= dbell.o
  obj-$(CONFIG_JUMP_LABEL)  += jump_label.o
  
 -extra-$(CONFIG_PPC64) := head_64.o
 -extra-$(CONFIG_PPC_BOOK3S_32) := head_book3s_32.o
 -extra-$(CONFIG_40x)   := head_40x.o
 -extra-$(CONFIG_44x)   := head_44x.o
 -extra-$(CONFIG_PPC_85xx)  := head_85xx.o
 -extra-$(CONFIG_PPC_8xx)   := head_8xx.o
 +obj-$(CONFIG_PPC64)   += head_64.o
 +obj-$(CONFIG_PPC_BOOK3S_32)   += head_book3s_32.o
 +obj-$(CONFIG_40x) += head_40x.o
 +obj-$(CONFIG_44x) += head_44x.o
- obj-$(CONFIG_FSL_BOOKE)   += head_fsl_booke.o
++obj-$(CONFIG_PPC_85xx):= head_85xx.o
 +obj-$(CONFIG_PPC_8xx) += head_8xx.o
  extra-y   += vmlinux.lds
  
  obj-$(CONFIG_RELOCATABLE) += reloc_$(BITS).o


pgp8sZdcwfUbm.pgp
Description: OpenPGP digital signature


Re: [PATCH] tools/perf: Fix aggr_printout to display cpu field irrespective of core value

2022-10-03 Thread Ian Rogers
On Mon, Oct 3, 2022 at 7:03 AM atrajeev  wrote:
>
> On 2022-10-02 05:17, Ian Rogers wrote:
> > On Thu, Sep 29, 2022 at 5:56 AM James Clark 
> > wrote:
> >>
> >>
> >>
> >> On 29/09/2022 09:49, Athira Rajeev wrote:
> >> >
> >> >
> >> >> On 28-Sep-2022, at 9:05 PM, James Clark  wrote:
> >> >>
> >> >>
> >> >>
> >> >
> >> > Hi James,
> >> >
> >> > Thanks for looking at the patch and sharing review comments.
> >> >
> >> >> On 13/09/2022 12:57, Athira Rajeev wrote:
> >> >>> perf stat includes option to specify aggr_mode to display
> >> >>> per-socket, per-core, per-die, per-node counter details.
> >> >>> Also there is option -A ( AGGR_NONE, -no-aggr ), where the
> >> >>> counter values are displayed for each cpu along with "CPU"
> >> >>> value in one field of the output.
> >> >>>
> >> >>> Each of the aggregate mode uses the information fetched
> >> >>> from "/sys/devices/system/cpu/cpuX/topology" like core_id,
> >> >>
> >> >> I thought that this wouldn't apply to the cpu field because cpu is
> >> >> basically interchangeable as an index in cpumap, rather than anything
> >> >> being read from the topology file.
> >> >
> >> > The cpu value is filled in this function:
> >> >
> >> > Function : aggr_cpu_id__cpu
> >> > Code: util/cpumap.c
> >> >
> >> >>
> >> >>> physical_package_id. Utility functions in "cpumap.c" fetches
> >> >>> this information and populates the socket id, core id, cpu etc.
> >> >>> If the platform does not expose the topology information,
> >> >>> these values will be set to -1. Example, in case of powerpc,
> >> >>> details like physical_package_id is restricted to be exposed
> >> >>> in pSeries platform. So id.socket, id.core, id.cpu all will
> >> >>> be set as -1.
> >> >>>
> >> >>> In case of displaying socket or die value, there is no check
> >> >>> done in the "aggr_printout" function to see if it points to
> >> >>> valid socket id or die. But for displaying "cpu" value, there
> >> >>> is a check for "if (id.core > -1)". In case of powerpc pSeries
> >> >>> where detail like physical_package_id is restricted to be
> >> >>> exposed, id.core will be set to -1. Hence the column or field
> >> >>> itself for CPU won't be displayed in the output.
> >> >>>
> >> >>> Result for per-socket:
> >> >>>
> >> >>> <<>>
> >> >>> perf stat -e branches --per-socket -a true
> >> >>>
> >> >>> Performance counter stats for 'system wide':
> >> >>>
> >> >>> S-1  32416,851  branches
> >> >>> <<>>
> >> >>>
> >> >>> Here S has -1 in above result. But with -A option which also
> >> >>> expects CPU in one column in the result, below is observed.
> >> >>>
> >> >>> <<>>
> >> >>> /bin/perf stat -e instructions -A -a true
> >> >>>
> >> >>> Performance counter stats for 'system wide':
> >> >>>
> >> >>>47,146  instructions
> >> >>>45,226  instructions
> >> >>>43,354  instructions
> >> >>>45,184  instructions
> >> >>> <<>>
> >> >>>
> >> >>> If the cpu id value is pointing to -1 also, it makes sense
> >> >>> to display the column in the output to replicate the behaviour
> >> >>> or to be in precedence with other aggr options(like per-socket,
> >> >>> per-core). Remove the check "id.core" so that CPU field gets
> >> >>> displayed in the output.
> >> >>
> >> >> Why would you want to print -1 out? Seems like the if statement was a
> >> >> good one to me, otherwise the output looks a bit broken to users. Are
> >> >> the other aggregation modes even working if -1 is set for socket and
> >> >> die? Maybe we need to not print -1 in those cases or exit earlier with a
> >> >> failure.
> >> >>
> >> >> The -1 value has a specific internal meaning which is "to not
> >> >> aggregate". It doesn't mean "not set".
> >> >
> >> > Currently, this check is done only for printing cpu value.
> >> > For socket/die/core values, this check is not done. Pasting an
> >> > example snippet from a powerpc system ( specifically from pseries 
> >> > platform where
> >> > the value is set to -1 )
> >> >
> >> > ./perf stat --per-core -a -C 1 true
> >> >
> >> >  Performance counter stats for 'system wide':
> >> >
> >> > S-1-D-1-C-1  1   1.06 msec cpu-clock 
> >> >#1.018 CPUs utilized
> >> > S-1-D-1-C-1  1  2  context-switches  
> >> >#1.879 K/sec
> >> > S-1-D-1-C-1  1  0  cpu-migrations
> >> >#0.000 /sec
> >> >
> >> > Here though the value is -1, we are displaying it. Where as in case of 
> >> > cpu, the first column will be
> >> > empty since we do a check before printing.
> >> >
> >> > Example:
> >> >
> >> > ./perf stat --per-core -A -C 1 true
> >> >
> >> >  Performance counter stats for 'CPU(s) 1':
> >> >
> >> >   0.88 msec cpu-clock#1.022 CPUs 
> >> > utilized
> >> >  2  context-switches
> >> >  0  cpu-migrations
> >> >
> >> >
> >> > No sure, whether 

Re: [PATCH net-next v6 1/9] dt-bindings: net: Expand pcs-handle to an array

2022-10-03 Thread Rob Herring
On Fri, 30 Sep 2022 16:09:25 -0400, Sean Anderson wrote:
> This allows multiple phandles to be specified for pcs-handle, such as
> when multiple PCSs are present for a single MAC. To differentiate
> between them, also add a pcs-handle-names property.
> 
> Signed-off-by: Sean Anderson 
> ---
> This was previously submitted as [1]. I expect to update this series
> more, so I have moved it here. Changes from that version include:
> - Add maxItems to existing bindings
> - Add a dependency from pcs-names to pcs-handle.
> 
> [1] 
> https://lore.kernel.org/netdev/20220711160519.741990-3-sean.ander...@seco.com/
> 
> Changes in v6:
> - Remove unnecessary $ref from renesas,rzn1-a5psw
> - Remove unnecessary type from pcs-handle-names
> - Add maxItems to pcs-handle
> 
> Changes in v4:
> - Use pcs-handle-names instead of pcs-names, as discussed
> 
> Changes in v3:
> - New
> 
>  .../bindings/net/dsa/renesas,rzn1-a5psw.yaml  |  2 +-
>  .../devicetree/bindings/net/ethernet-controller.yaml  | 11 ++-
>  .../devicetree/bindings/net/fsl,qoriq-mc-dpmac.yaml   |  2 +-
>  3 files changed, 12 insertions(+), 3 deletions(-)
> 

Reviewed-by: Rob Herring 


Re: [PATCH v6 4/8] phy: fsl: Add Lynx 10G SerDes driver

2022-10-03 Thread Sean Anderson
Hi Vinod/Ioana,

Ioana, last time you commented [1], you said you were interested in testing
this out on some other SoCs. Did you ever end up doing that? I've you've had
a chance to look at this driver more closely, do you have any suggestions for
dynamic reconfiguration?

[1] 
https://lore.kernel.org/netdev/gv1pr04mb9055f41ad598f85648b54ee2e0...@gv1pr04mb9055.eurprd04.prod.outlook.com/

On 9/26/22 11:35 AM, Sean Anderson wrote:
> 
> 
> On 9/24/22 2:54 AM, Vinod Koul wrote:
>> On 20-09-22, 16:23, Sean Anderson wrote:
>>> This adds support for the Lynx 10G "SerDes" devices found on various NXP
>>> QorIQ SoCs. There may be up to four SerDes devices on each SoC, each
>>> supporting up to eight lanes. Protocol support for each SerDes is highly
>>> heterogeneous, with each SoC typically having a totally different
>>> selection of supported protocols for each lane. Additionally, the SerDes
>>> devices on each SoC also have differing support. One SerDes will
>>> typically support Ethernet on most lanes, while the other will typically
>>> support PCIe on most lanes.
>>> 
>>> There is wide hardware support for this SerDes. It is present on QorIQ
>>> T-Series and Layerscape processors. Because each SoC typically has
>>> specific instructions and exceptions for its SerDes, I have limited the
>>> initial scope of this module to just the LS1046A and LS1088A.
>>> Additionally, I have only added support for Ethernet protocols. There is
>>> not a great need for dynamic reconfiguration for other protocols (except
>>> perhaps for M.2 cards), so support for them may never be added.
>>> 
>>> Nevertheless, I have tried to provide an obvious path for adding support
>>> for other SoCs as well as other protocols. SATA just needs support for
>>> configuring LNmSSCR0. PCIe may need to configure the equalization
>>> registers. It also uses multiple lanes. I have tried to write the driver
>>> with multi-lane support in mind, so there should not need to be any
>>> large changes. Although there are 6 protocols supported, I have only
>>> tested SGMII and XFI. The rest have been implemented as described in
>>> the datasheet. Most of these protocols should work "as-is", but
>>> 10GBASE-KR will need PCS support for link training.
>>> 
>>> The PLLs are modeled as clocks proper. This lets us take advantage of
>>> the existing clock infrastructure. I have not given the same treatment
>>> to the per-lane clocks because they need to be programmed in-concert
>>> with the rest of the lane settings. One tricky thing is that the VCO
>>> (PLL) rate exceeds 2^32 (maxing out at around 5GHz). This will be a
>>> problem on 32-bit platforms, since clock rates are stored as unsigned
>>> longs. To work around this, the pll clock rate is generally treated in
>>> units of kHz.
>>> 
>>> The PLLs are configured rather interestingly. Instead of the usual direct
>>> programming of the appropriate divisors, the input and output clock rates
>>> are selected directly. Generally, the only restriction is that the input
>>> and output must be integer multiples of each other. This suggests some kind
>>> of internal look-up table. The datasheets generally list out the supported
>>> combinations explicitly, and not all input/output combinations are
>>> documented. I'm not sure if this is due to lack of support, or due to an
>>> oversight. If this becomes an issue, then some combinations can be
>>> blacklisted (or whitelisted). This may also be necessary for other SoCs
>>> which have more stringent clock requirements.
>>> 
>>> The general API call list for this PHY is documented under the driver-api
>>> docs. I think this is rather standard, except that most drivers configure
>>> the mode (protocol) at xlate-time. Unlike some other phys where e.g. PCIe
>>> x4 will use 4 separate phys all configured for PCIe, this driver uses one
>>> phy configured to use 4 lanes. This is because while the individual lanes
>>> may be configured individually, the protocol selection acts on all lanes at
>>> once. Additionally, the order which lanes should be configured in is
>>> specified by the datasheet.  To coordinate this, lanes are reserved in
>>> phy_init, and released in phy_exit.
>>> 
>>> This driver was written with reference to the LS1046A reference manual.
>>> However, it was informed by reference manuals for all processors with
>>> mEMACs, especially the T4240 (which appears to have a "maxed-out"
>>> configuration). The earlier P-Series processors appear to be similar, but
>>> have a different overall register layout (using "banks" instead of
>>> separate SerDes). Perhaps this those use a "5G Lynx SerDes."
>>> 
>>> Signed-off-by: Sean Anderson 
>>> ---
>>> 
>>> Changes in v6:
>>> - Update MAINTAINERS to include new files
>>> - Include bitfield.h and slab.h to allow compilation on non-arm64
>>>   arches.
>>> - Depend on COMMON_CLK and either layerscape/ppc
>>> 
>>> Changes in v5:
>>> - Remove references to PHY_INTERFACE_MODE_1000BASEKX to allow this
>>>   series to be applied 

Re: [PATCH v3 1/7] ASoC: dt-bindings: fsl_rpmsg: Add a property to assign the rpmsg channel

2022-10-03 Thread Rob Herring
On Fri, 30 Sep 2022 14:44:35 +0800, Chancel Liu wrote:
> Add a string property to assign the rpmsg channel this sound card sits
> on. This property can be omitted if there is only one sound card and it
> sits on "rpmsg-audio-channel".
> 
> Signed-off-by: Chancel Liu 
> ---
>  .../devicetree/bindings/sound/fsl,rpmsg.yaml  | 36 +--
>  1 file changed, 34 insertions(+), 2 deletions(-)
> 

Acked-by: Rob Herring 


[PATCH] powerpc: Implement slightly better 64-bit LE non-VMX memory copy

2022-10-03 Thread Joel Stanley
From: Paul Mackerras 

At present, on 64-bit little-endian machines, we have the choice of
either a dumb loop that does one byte per iteration, or an optimized
loop using VMX instructions.  On microwatt, we don't have VMX, so
we are stuck with the dumb loop, which is very slow.

This makes the dumb loop a little less dumb.  It now does 16 bytes
per iteration, using 'ld' and 'std' instructions.  If the number of
bytes to copy is not a multiple of 16, the one-byte-per-iteration
loop is used for the last 1--15 bytes.

Signed-off-by: Paul Mackerras 
Signed-off-by: Joel Stanley 
---
 arch/powerpc/lib/memcpy_64.S | 27 +++
 1 file changed, 19 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/lib/memcpy_64.S b/arch/powerpc/lib/memcpy_64.S
index 016c91e958d8..bed7eb327b25 100644
--- a/arch/powerpc/lib/memcpy_64.S
+++ b/arch/powerpc/lib/memcpy_64.S
@@ -18,7 +18,7 @@
 _GLOBAL_TOC_KASAN(memcpy)
 BEGIN_FTR_SECTION
 #ifdef __LITTLE_ENDIAN__
-   cmpdi   cr7,r5,0
+   clrldi  r6,r5,60
 #else
std r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* save destination 
pointer for return value */
 #endif
@@ -29,13 +29,24 @@ FTR_SECTION_ELSE
 ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
 #ifdef __LITTLE_ENDIAN__
/* dumb little-endian memcpy that will get replaced at runtime */
-   addi r9,r3,-1
-   addi r4,r4,-1
-   beqlr cr7
-   mtctr r5
-1: lbzu r10,1(r4)
-   stbu r10,1(r9)
-   bdnz 1b
+   addir9,r3,-8
+   addir4,r4,-8
+   srdi.   r0,r5,4
+   beq 2f
+   mtctr   r0
+3: ld  r10,8(r4)
+   std r10,8(r9)
+   ldu r10,16(r4)
+   stdur10,16(r9)
+   bdnz3b
+2: cmpwi   r6,0
+   beqlr
+   addir9,r9,7
+   addir4,r4,7
+   mtctr   r6
+1: lbzur10,1(r4)
+   stbur10,1(r9)
+   bdnz1b
blr
 #else
PPC_MTOCRF(0x01,r5)
-- 
2.35.1



[PATCH 5.19 002/101] tools/perf: Fix out of bound access to cpu mask array

2022-10-03 Thread Greg Kroah-Hartman
From: Athira Rajeev 

[ Upstream commit cbd7bfc7fd99acdde58ec2b0bce990158fba1654 ]

The cpu mask init code in "record__mmap_cpu_mask_init" function access
"bits" array part of "struct mmap_cpu_mask".  The size of this array is
the value from cpu__max_cpu().cpu.  This array is used to contain the
cpumask value for each cpu. While setting bit for each cpu, it calls
"set_bit" function which access index in "bits" array.

If we provide a command line option to -C which is greater than the
number of CPU's present in the system, the set_bit could access an array
member which is out-of the array size. This is because currently, there
is no boundary check for the CPU. This will result in seg fault:

<<>>
  ./perf record -C 12341234 ls
  Perf can support 2048 CPUs. Consider raising MAX_NR_CPUS
  Segmentation fault (core dumped)
<<>>

Debugging with gdb, points to function flow as below:

<<>>
  set_bit
  record__mmap_cpu_mask_init
  record__init_thread_default_masks
  record__init_thread_masks
  cmd_record
<<>>

Fix this by adding boundary check for the array.

After the patch:

<<>>
./perf record -C 12341234 ls
  Perf can support 2048 CPUs. Consider raising MAX_NR_CPUS
  Failed to initialize parallel data streaming masks
<<>>

With this fix, if -C is given a non-exsiting CPU, perf
record will fail with:

<<>>
  ./perf record -C 50 ls
  Failed to initialize parallel data streaming masks
<<>>

Reported-by: Nageswara R Sastry 
Signed-off-by: Athira Jajeev 
Tested-by: Arnaldo Carvalho de Melo 
Tested-by: Nageswara R Sastry 
Cc: Jiri Olsa 
Cc: Kajol Jain 
Cc: Madhavan Srinivasan 
Cc: Michael Ellerman 
Cc: linuxppc-dev@lists.ozlabs.org
Link: 
https://lore.kernel.org/r/20220905141929.7171-2-atraj...@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo 
Stable-dep-of: ca76d7d2812b ("perf record: Fix cpu mask bit setting for mixed 
mmaps")
Signed-off-by: Sasha Levin 
---
 tools/perf/builtin-record.c | 26 --
 1 file changed, 20 insertions(+), 6 deletions(-)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 68c878b4e5e4..708880a1c83c 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -3335,16 +3335,22 @@ static struct option __record_options[] = {
 
 struct option *record_options = __record_options;
 
-static void record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct 
perf_cpu_map *cpus)
+static int record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct 
perf_cpu_map *cpus)
 {
struct perf_cpu cpu;
int idx;
 
if (cpu_map__is_dummy(cpus))
-   return;
+   return 0;
 
-   perf_cpu_map__for_each_cpu(cpu, idx, cpus)
+   perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
+   /* Return ENODEV is input cpu is greater than max cpu */
+   if ((unsigned long)cpu.cpu > mask->nbits)
+   return -ENODEV;
set_bit(cpu.cpu, mask->bits);
+   }
+
+   return 0;
 }
 
 static int record__mmap_cpu_mask_init_spec(struct mmap_cpu_mask *mask, const 
char *mask_spec)
@@ -3356,7 +3362,9 @@ static int record__mmap_cpu_mask_init_spec(struct 
mmap_cpu_mask *mask, const cha
return -ENOMEM;
 
bitmap_zero(mask->bits, mask->nbits);
-   record__mmap_cpu_mask_init(mask, cpus);
+   if (record__mmap_cpu_mask_init(mask, cpus))
+   return -ENODEV;
+
perf_cpu_map__put(cpus);
 
return 0;
@@ -3438,7 +3446,12 @@ static int record__init_thread_masks_spec(struct record 
*rec, struct perf_cpu_ma
pr_err("Failed to allocate CPUs mask\n");
return ret;
}
-   record__mmap_cpu_mask_init(_mask, cpus);
+
+   ret = record__mmap_cpu_mask_init(_mask, cpus);
+   if (ret) {
+   pr_err("Failed to init cpu mask\n");
+   goto out_free_cpu_mask;
+   }
 
ret = record__thread_mask_alloc(_mask, cpu__max_cpu().cpu);
if (ret) {
@@ -3679,7 +3692,8 @@ static int record__init_thread_default_masks(struct 
record *rec, struct perf_cpu
if (ret)
return ret;
 
-   record__mmap_cpu_mask_init(>thread_masks->maps, cpus);
+   if (record__mmap_cpu_mask_init(>thread_masks->maps, cpus))
+   return -ENODEV;
 
rec->nr_threads = 1;
 
-- 
2.35.1