date:20210302

A live partition migration (LPM) results in a CRQ disconnect similar to
a hard reset. In this LPM case the hypervisor moslty perserves the CRQ
transport such that it simply needs to be reenabled. However, the
capabilities may have changed such as fewer channels, or no channels at
all. Further, its possible that there may be sub-CRQ support, but no
channel support. The CRQ reenable path currently doesn't take any of
this into consideration.

For simpilicty release and reinitialize sub-CRQs during reenable, and
set do_enquiry and using_channels with the appropriate values to trigger
channel renegotiation.

fixes: 3034ebe26389 ("ibmvfc: add alloc/dealloc routines for SCSI Sub-CRQ 
Channels")
Signed-off-by: Tyrel Datwyler 
Reviewed-by: Brian King 
---
 drivers/scsi/ibmvscsi/ibmvfc.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index ef03fa559433..1e2ea21713ad 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -903,6 +903,9 @@ static int ibmvfc_reenable_crq_queue(struct ibmvfc_host 
*vhost)
 {
int rc = 0;
struct vio_dev *vdev = to_vio_dev(vhost->dev);
+   unsigned long flags;
+
+   ibmvfc_release_sub_crqs(vhost);
 
/* Re-enable the CRQ */
do {
@@ -914,6 +917,15 @@ static int ibmvfc_reenable_crq_queue(struct ibmvfc_host 
*vhost)
if (rc)
dev_err(vhost->dev, "Error enabling adapter (rc=%d)\n", rc);
 
+   spin_lock_irqsave(vhost->host->host_lock, flags);
+   spin_lock(vhost->crq.q_lock);
+   vhost->do_enquiry = 1;
+   vhost->using_channels = 0;
+   spin_unlock(vhost->crq.q_lock);
+   spin_unlock_irqrestore(vhost->host->host_lock, flags);
+
+   ibmvfc_init_sub_crqs(vhost);
+
return rc;
 }
 
-- 
2.27.0

[PATCH v5 4/5] ibmvfc: store return code of H_FREE_SUB_CRQ during cleanup

The H_FREE_SUB_CRQ hypercall can return a retry delay return code that
indicates the call needs to be retried after a specific amount of time
delay. The error path to free a sub-CRQ in case of a failure during
channel registration fails to capture the return code of H_FREE_SUB_CRQ
which will result in the delay loop being skipped in the case of a retry
delay return code.

Store the return code result of the H_FREE_SUB_CRQ call such that the
return code check in the delay loop evaluates a meaningful value. Also,
use the rtas_busy_delay() to check the rc value and delay for the
appropriate amount of time.

Fixes: 39e461fddff0 ("ibmvfc: map/request irq and register Sub-CRQ interrupt 
handler")
Signed-off-by: Tyrel Datwyler 
Reviewed-by: Brian King 
---
 drivers/scsi/ibmvscsi/ibmvfc.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index 1d9f961715ca..ef03fa559433 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -21,6 +21,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -5670,8 +5671,8 @@ static int ibmvfc_register_scsi_channel(struct 
ibmvfc_host *vhost,
 
 irq_failed:
do {
-   plpar_hcall_norets(H_FREE_SUB_CRQ, vdev->unit_address, 
scrq->cookie);
-   } while (rc == H_BUSY || H_IS_LONG_BUSY(rc));
+   rc = plpar_hcall_norets(H_FREE_SUB_CRQ, vdev->unit_address, 
scrq->cookie);
+   } while (rtas_busy_delay(rc));
 reg_failed:
ibmvfc_free_queue(vhost, scrq);
LEAVE;
-- 
2.27.0

[PATCH v5 1/5] ibmvfc: simplify handling of sub-CRQ initialization

If ibmvfc_init_sub_crqs() fails ibmvfc_probe() simply parrots
registration failure reported elsewhere, and futher
vhost->scsi_scrq.scrq == NULL is indication enough to the driver that it
has no sub-CRQs available. The mq_enabled check can also be moved into
ibmvfc_init_sub_crqs() such that each caller doesn't have to gate the
call with a mq_enabled check. Finally, in the case of sub-CRQ setup
failure setting do_enquiry can be turned off to putting the driver into
single queue fallback mode.

The aforementioned changes also simplify the next patch in the series
that fixes a hard reset issue, by tying a sub-CRQ setup failure and
do_enquiry logic into ibmvfc_init_sub_crqs().

Signed-off-by: Tyrel Datwyler 
Reviewed-by: Brian King 
---
 drivers/scsi/ibmvscsi/ibmvfc.c | 21 ++---
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index 7097028d4cb6..384960036f8b 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -5705,17 +5705,21 @@ static void ibmvfc_deregister_scsi_channel(struct 
ibmvfc_host *vhost, int index)
LEAVE;
 }
 
-static int ibmvfc_init_sub_crqs(struct ibmvfc_host *vhost)
+static void ibmvfc_init_sub_crqs(struct ibmvfc_host *vhost)
 {
int i, j;
 
ENTER;
+   if (!vhost->mq_enabled)
+   return;
 
vhost->scsi_scrqs.scrqs = kcalloc(nr_scsi_hw_queues,
  sizeof(*vhost->scsi_scrqs.scrqs),
  GFP_KERNEL);
-   if (!vhost->scsi_scrqs.scrqs)
-   return -1;
+   if (!vhost->scsi_scrqs.scrqs) {
+   vhost->do_enquiry = 0;
+   return;
+   }
 
for (i = 0; i < nr_scsi_hw_queues; i++) {
if (ibmvfc_register_scsi_channel(vhost, i)) {
@@ -5724,13 +5728,12 @@ static int ibmvfc_init_sub_crqs(struct ibmvfc_host 
*vhost)
kfree(vhost->scsi_scrqs.scrqs);
vhost->scsi_scrqs.scrqs = NULL;
vhost->scsi_scrqs.active_queues = 0;
-   LEAVE;
-   return -1;
+   vhost->do_enquiry = 0;
+   break;
}
}
 
LEAVE;
-   return 0;
 }
 
 static void ibmvfc_release_sub_crqs(struct ibmvfc_host *vhost)
@@ -5997,11 +6000,7 @@ static int ibmvfc_probe(struct vio_dev *vdev, const 
struct vio_device_id *id)
goto remove_shost;
}
 
-   if (vhost->mq_enabled) {
-   rc = ibmvfc_init_sub_crqs(vhost);
-   if (rc)
-   dev_warn(dev, "Failed to allocate Sub-CRQs. rc=%d\n", 
rc);
-   }
+   ibmvfc_init_sub_crqs(vhost);
 
if (shost_to_fc_host(shost)->rqst_q)
blk_queue_max_segments(shost_to_fc_host(shost)->rqst_q, 1);
-- 
2.27.0

[PATCH v5 0/5] ibmvfc: hard reset fixes

This series contains a minor simplification of ibmvfc_init_sub_crqs() followed
by a couple fixes for sub-CRQ handling which effect hard reset of the
client/host adapter CRQ pair.

changes in v5:
Patches 2-5: Corrected upstream commit ids for Fixes: tags

changes in v4:
Patch 2: dropped Reviewed-by tag and moved sub-crq init to after locked region
Patch 5: moved sub-crq init to after locked region

changes in v3:
* Patch 1 & 5: moved ibmvfc_init_sub_crqs out of locked patch

changes in v2:
* added Reviewed-by tags for patches 1-3
* Patch 4: use rtas_busy_delay to test rc and delay correct amount of time
* Patch 5: (new) similar fix for LPM case where CRQ pair needs re-enablement

Tyrel Datwyler (5):
  powerpc/pseries: extract host bridge from pci_bus prior to bus removal
  ibmvfc: simplify handling of sub-CRQ initialization
  ibmvfc: fix invalid sub-CRQ handles after hard reset
  ibmvfc: treat H_CLOSED as success during sub-CRQ registration
  ibmvfc: store return code of H_FREE_SUB_CRQ during cleanup

 arch/powerpc/platforms/pseries/pci_dlpar.c |  4 +-
 drivers/scsi/ibmvscsi/ibmvfc.c | 49 ++
 2 files changed, 26 insertions(+), 27 deletions(-)

-- 
2.27.0

[PATCH v5 2/5] ibmvfc: fix invalid sub-CRQ handles after hard reset

A hard reset results in a complete transport disconnect such that the
CRQ connection with the partner VIOS is broken. This has the side effect
of also invalidating the associated sub-CRQs. The current code assumes
that the sub-CRQs are perserved resulting in a protocol violation after
trying to reconnect them with the VIOS. This introduces an infinite loop
such that the VIOS forces a disconnect after each subsequent attempt to
re-register with invalid handles.

Avoid the aforementioned issue by releasing the sub-CRQs prior to CRQ
disconnect, and driving a reinitialization of the sub-CRQs once a new
CRQ is registered with the hypervisor.

fixes: 3034ebe26389 ("ibmvfc: add alloc/dealloc routines for SCSI Sub-CRQ 
Channels")
Signed-off-by: Tyrel Datwyler 
Reviewed-by: Brian King 
---
 drivers/scsi/ibmvscsi/ibmvfc.c | 21 +
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index 384960036f8b..d34e1a4f74d9 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -158,6 +158,9 @@ static void ibmvfc_npiv_logout(struct ibmvfc_host *);
 static void ibmvfc_tgt_implicit_logout_and_del(struct ibmvfc_target *);
 static void ibmvfc_tgt_move_login(struct ibmvfc_target *);
 
+static void ibmvfc_release_sub_crqs(struct ibmvfc_host *);
+static void ibmvfc_init_sub_crqs(struct ibmvfc_host *);
+
 static const char *unknown_error = "unknown error";
 
 static long h_reg_sub_crq(unsigned long unit_address, unsigned long ioba,
@@ -926,8 +929,8 @@ static int ibmvfc_reset_crq(struct ibmvfc_host *vhost)
unsigned long flags;
struct vio_dev *vdev = to_vio_dev(vhost->dev);
struct ibmvfc_queue *crq = >crq;
-   struct ibmvfc_queue *scrq;
-   int i;
+
+   ibmvfc_release_sub_crqs(vhost);
 
/* Close the CRQ */
do {
@@ -947,16 +950,6 @@ static int ibmvfc_reset_crq(struct ibmvfc_host *vhost)
memset(crq->msgs.crq, 0, PAGE_SIZE);
crq->cur = 0;
 
-   if (vhost->scsi_scrqs.scrqs) {
-   for (i = 0; i < nr_scsi_hw_queues; i++) {
-   scrq = >scsi_scrqs.scrqs[i];
-   spin_lock(scrq->q_lock);
-   memset(scrq->msgs.scrq, 0, PAGE_SIZE);
-   scrq->cur = 0;
-   spin_unlock(scrq->q_lock);
-   }
-   }
-
/* And re-open it again */
rc = plpar_hcall_norets(H_REG_CRQ, vdev->unit_address,
crq->msg_token, PAGE_SIZE);
@@ -966,9 +959,12 @@ static int ibmvfc_reset_crq(struct ibmvfc_host *vhost)
dev_warn(vhost->dev, "Partner adapter not ready\n");
else if (rc != 0)
dev_warn(vhost->dev, "Couldn't register crq (rc=%d)\n", rc);
+
spin_unlock(vhost->crq.q_lock);
spin_unlock_irqrestore(vhost->host->host_lock, flags);
 
+   ibmvfc_init_sub_crqs(vhost);
+
return rc;
 }
 
@@ -5692,6 +5688,7 @@ static void ibmvfc_deregister_scsi_channel(struct 
ibmvfc_host *vhost, int index)
 
free_irq(scrq->irq, scrq);
irq_dispose_mapping(scrq->irq);
+   scrq->irq = 0;
 
do {
rc = plpar_hcall_norets(H_FREE_SUB_CRQ, vdev->unit_address,
-- 
2.27.0

[PATCH v5 3/5] ibmvfc: treat H_CLOSED as success during sub-CRQ registration

A non-zero return code for H_REG_SUB_CRQ is currently treated as a
failure resulting in failing sub-CRQ setup. The case of H_CLOSED should
not be treated as a failure. This return code translates to a successful
sub-CRQ registration by the hypervisor, and is meant to communicate back
that there is currently no partner VIOS CRQ connection established as of
yet. This is a common occurrence during a disconnect where the client
adapter can possibly come back up prior to the partner adapter.

For non-zero return code from H_REG_SUB_CRQ treat a H_CLOSED as success
so that sub-CRQs are successfully setup.

Fixes: 3034ebe26389 ("ibmvfc: add alloc/dealloc routines for SCSI Sub-CRQ 
Channels")
Signed-off-by: Tyrel Datwyler 
Reviewed-by: Brian King 
---
 drivers/scsi/ibmvscsi/ibmvfc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index d34e1a4f74d9..1d9f961715ca 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -5636,7 +5636,8 @@ static int ibmvfc_register_scsi_channel(struct 
ibmvfc_host *vhost,
rc = h_reg_sub_crq(vdev->unit_address, scrq->msg_token, PAGE_SIZE,
   >cookie, >hw_irq);
 
-   if (rc) {
+   /* H_CLOSED indicates successful register, but no CRQ partner */
+   if (rc && rc != H_CLOSED) {
dev_warn(dev, "Error registering sub-crq: %d\n", rc);
if (rc == H_PARAMETER)
dev_warn_once(dev, "Firmware may not support MQ\n");
-- 
2.27.0

Re: linux-next: build failure after merge of the powerpc-fixes tree

2021-03-02 Thread Michael Ellerman

Uwe Kleine-König  writes:
> Hello,
>
> On 3/2/21 3:09 AM, Michael Ellerman wrote:
>> Stephen Rothwell  writes:
>>> Hi all,
>>>
>>> After merging the powerpc-fixes tree, today's linux-next build (powerpc
>>> allyesconfig) failed like this:
>>>
>>> drivers/net/ethernet/ibm/ibmvnic.c:5399:13: error: conflicting types for 
>>> 'ibmvnic_remove'
>>>   5399 | static void ibmvnic_remove(struct vio_dev *dev)
>>>| ^~
>>> drivers/net/ethernet/ibm/ibmvnic.c:81:12: note: previous declaration of 
>>> 'ibmvnic_remove' was here
>>> 81 | static int ibmvnic_remove(struct vio_dev *);
>>>|^~
>>>
>>> Caused by commit
>>>
>>>1bdd1e6f9320 ("vio: make remove callback return void")
>> 
>> Gah, is IBMVNIC in any of our defconfigs?! ... no it's not.
>
> Would you accept a patch to add the driver to one of the defconfigs as 
> an excuse for the build breakage I created?

Thanks, but I already sent a patch adding it.

We should really have these drivers enabled in our defconfig, so that's
on us.

cheers

Re: [PATCH v2 30/37] KVM: PPC: Book3S HV: Implement radix prefetch workaround by disabling MMU

2021-03-02 Thread Fabiano Rosas

Nicholas Piggin  writes:

> Rather than partition the guest PID space and catch and flush a rogue
> guest, instead work around this issue by ensuring the MMU is always
> disabled in HV mode while the guest MMU context is switched in.
>
> This may be a bit less efficient, but it is a lot less complicated and
> allows the P9 path to trivally implement the workaround too. Newer CPUs
> are not subject to this issue.
>
> Signed-off-by: Nicholas Piggin 
> ---
>  arch/powerpc/include/asm/mmu_context.h   |  6 
>  arch/powerpc/kvm/book3s_hv.c | 10 --
>  arch/powerpc/kvm/book3s_hv_interrupt.c   | 14 ++--
>  arch/powerpc/kvm/book3s_hv_rmhandlers.S  | 34 --
>  arch/powerpc/mm/book3s64/radix_pgtable.c | 27 +-
>  arch/powerpc/mm/book3s64/radix_tlb.c | 46 
>  arch/powerpc/mm/mmu_context.c|  4 +--
>  7 files changed, 28 insertions(+), 113 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/mmu_context.h 
> b/arch/powerpc/include/asm/mmu_context.h
> index 652ce85f9410..bb5c7e5e142e 100644
> --- a/arch/powerpc/include/asm/mmu_context.h
> +++ b/arch/powerpc/include/asm/mmu_context.h
> @@ -122,12 +122,6 @@ static inline bool need_extra_context(struct mm_struct 
> *mm, unsigned long ea)
>  }
>  #endif
>
> -#if defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE) && defined(CONFIG_PPC_RADIX_MMU)
> -extern void radix_kvm_prefetch_workaround(struct mm_struct *mm);
> -#else
> -static inline void radix_kvm_prefetch_workaround(struct mm_struct *mm) { }
> -#endif
> -
>  extern void switch_cop(struct mm_struct *next);
>  extern int use_cop(unsigned long acop, struct mm_struct *mm);
>  extern void drop_cop(unsigned long acop, struct mm_struct *mm);
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index ad16331c3370..c3064075f1d7 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -806,6 +806,10 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, 
> unsigned long mflags,
>   /* KVM does not support mflags=2 (AIL=2) */
>   if (mflags != 0 && mflags != 3)
>   return H_UNSUPPORTED_FLAG_START;
> + /* Prefetch bug */
> + if (cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG) &&
> + kvmhv_vcpu_is_radix(vcpu) && mflags == 3)
> + return H_UNSUPPORTED_FLAG_START;

So does this mean that if the host has the prefetch bug, all of its
guests will run with AIL=0 all the time? And what we're avoiding here is
a guest setting AIL=3 which would (since there's no HAIL) cause
hypervisor interrupts to be taken with MMU on, is that it?

Do we need to add this verification to kvmppc_set_lpcr as well? QEMU
could in theory call the KVM_SET_ONE_REG ioctl and set AIL to any value.

>   return H_TOO_HARD;
>   default:
>   return H_TOO_HARD;
> @@ -4286,8 +4290,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
>* The TLB prefetch bug fixup is only in the kvmppc_run_vcpu
>* path, which also handles hash and dependent threads mode.
>*/
> - if (kvm->arch.threads_indep && kvm_is_radix(kvm) &&
> - !cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
> + if (kvm->arch.threads_indep && kvm_is_radix(kvm))
>   r = kvmhv_run_single_vcpu(vcpu, ~(u64)0,
> vcpu->arch.vcore->lpcr);
>   else
> @@ -4914,6 +4917,9 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
>   if (!indep_threads_mode && !cpu_has_feature(CPU_FTR_HVMODE)) {
>   pr_warn("KVM: Ignoring indep_threads_mode=N in nested 
> hypervisor\n");
>   kvm->arch.threads_indep = true;
> + } else if (!indep_threads_mode && 
> cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) {
> + pr_warn("KVM: Ignoring indep_threads_mode=N on 
> pre-DD2.2 POWER9\n");
> + kvm->arch.threads_indep = true;
>   } else {
>   kvm->arch.threads_indep = indep_threads_mode;
>   }
> diff --git a/arch/powerpc/kvm/book3s_hv_interrupt.c 
> b/arch/powerpc/kvm/book3s_hv_interrupt.c
> index b93d861d8538..9784da3f8565 100644
> --- a/arch/powerpc/kvm/book3s_hv_interrupt.c
> +++ b/arch/powerpc/kvm/book3s_hv_interrupt.c
> @@ -223,6 +223,9 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 
> time_limit, unsigned long lpc
>
>   mtspr(SPRN_AMOR, ~0UL);
>
> + if (cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
> + __mtmsrd(msr & ~(MSR_IR|MSR_DR|MSR_RI), 0);
> +
>   switch_mmu_to_guest_radix(kvm, vcpu, lpcr);
>
>   /*
> @@ -231,7 +234,8 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 
> time_limit, unsigned long lpc
>*/
>   mtspr(SPRN_HDEC, hdec);
>
> - __mtmsrd(0, 1); /* clear RI */
> + if

Re: [PATCH] ibmvnic: Fix possibly uninitialized old_num_tx_queues variable warning.

2021-03-02 Thread Sukadev Bhattiprolu

Michal Suchanek [msucha...@suse.de] wrote:
> GCC 7.5 reports:
> ../drivers/net/ethernet/ibm/ibmvnic.c: In function 'ibmvnic_reset_init':
> ../drivers/net/ethernet/ibm/ibmvnic.c:5373:51: warning: 'old_num_tx_queues' 
> may be used uninitialized in this function [-Wmaybe-uninitialized]
> ../drivers/net/ethernet/ibm/ibmvnic.c:5373:6: warning: 'old_num_rx_queues' 
> may be used uninitialized in this function [-Wmaybe-uninitialized]
> 
> The variable is initialized only if(reset) and used only if(reset &&
> something) so this is a false positive. However, there is no reason to
> not initialize the variables unconditionally avoiding the warning.

Yeah, its a false positive, but initializing doesn't hurt.
> 
> Fixes: 635e442f4a48 ("ibmvnic: merge ibmvnic_reset_init and ibmvnic_init")
> Signed-off-by: Michal Suchanek 

Reviewed-by: Sukadev Bhattiprolu

Re: Build regressions/improvements in v5.12-rc1

2021-03-02 Thread Geert Uytterhoeven

Hi Alex,

On Tue, Mar 2, 2021 at 8:30 PM Alex Deucher  wrote:
> On Mon, Mar 1, 2021 at 9:21 AM Geert Uytterhoeven  
> wrote:
> > On Mon, 1 Mar 2021, Geert Uytterhoeven wrote:
> > > Below is the list of build error/warning regressions/improvements in
> > > v5.12-rc1[1] compared to v5.11[2].
> > >
> > > Summarized:
> > >  - build errors: +2/-0
> >
> > > [1] 
> > > http://kisskb.ellerman.id.au/kisskb/branch/linus/head/fe07bfda2fb9cdef8a4d4008a409bb02f35f1bd8/
> > >  (all 192 configs)
> > > [2] 
> > > http://kisskb.ellerman.id.au/kisskb/branch/linus/head/f40ddce88593482919761f74910f42f4b84c004b/
> > >  (all 192 configs)
> > >
> > >
> > > *** ERRORS ***
> > >
> > > 2 error regressions:
> > >  + 
> > > /kisskb/src/drivers/gpu/drm/amd/amdgpu/../display/dc/calcs/dcn_calcs.c: 
> > > error: implicit declaration of function 'disable_kernel_vsx' 
> > > [-Werror=implicit-function-declaration]:  => 674:2
> > >  + 
> > > /kisskb/src/drivers/gpu/drm/amd/amdgpu/../display/dc/calcs/dcn_calcs.c: 
> > > error: implicit declaration of function 'enable_kernel_vsx' 
> > > [-Werror=implicit-function-declaration]:  => 638:2
> >
> > powerpc-gcc4.9/ppc64_book3e_allmodconfig
> >
> > This was fixed in v5.11-rc1, but reappeared in v5.12-rc1?
>
> Do you know what fixed in for 5.11?  I guess for PPC64 we depend on 
> CONFIG_VSX?

Looking at the kisskb build logs for v5.11*, it seems compilation never
got to drivers/gpu/drm/ due to internal compiler errors that weren't caught
by my scripts.  So the errors listed above were not really fixed.

Gr{oetje,eeting}s,

Geert

-- 
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- ge...@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds

[PATCH] ibmvnic: Fix possibly uninitialized old_num_tx_queues variable warning.

2021-03-02 Thread Michal Suchanek

GCC 7.5 reports:
../drivers/net/ethernet/ibm/ibmvnic.c: In function 'ibmvnic_reset_init':
../drivers/net/ethernet/ibm/ibmvnic.c:5373:51: warning: 'old_num_tx_queues' may 
be used uninitialized in this function [-Wmaybe-uninitialized]
../drivers/net/ethernet/ibm/ibmvnic.c:5373:6: warning: 'old_num_rx_queues' may 
be used uninitialized in this function [-Wmaybe-uninitialized]

The variable is initialized only if(reset) and used only if(reset &&
something) so this is a false positive. However, there is no reason to
not initialize the variables unconditionally avoiding the warning.

Fixes: 635e442f4a48 ("ibmvnic: merge ibmvnic_reset_init and ibmvnic_init")
Signed-off-by: Michal Suchanek 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 8 +++-
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index 118a4bd3f877..3bad762083c5 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -5219,16 +5219,14 @@ static int ibmvnic_reset_init(struct ibmvnic_adapter 
*adapter, bool reset)
 {
struct device *dev = >vdev->dev;
unsigned long timeout = msecs_to_jiffies(2);
-   u64 old_num_rx_queues, old_num_tx_queues;
+   u64 old_num_rx_queues = adapter->req_rx_queues;
+   u64 old_num_tx_queues = adapter->req_tx_queues;
int rc;
 
adapter->from_passive_init = false;
 
-   if (reset) {
-   old_num_rx_queues = adapter->req_rx_queues;
-   old_num_tx_queues = adapter->req_tx_queues;
+   if (reset)
reinit_completion(>init_done);
-   }
 
adapter->init_done_rc = 0;
rc = ibmvnic_send_crq_init(adapter);
-- 
2.26.2

Re: [PATCH v2 6/7] cmdline: Gives architectures opportunity to use generically defined boot cmdline manipulation

2021-03-02 Thread kernel test robot

Hi Christophe,

I love your patch! Yet something to improve:

[auto build test ERROR on powerpc/next]
[also build test ERROR on robh/for-next linus/master v5.12-rc1 next-20210302]
[cannot apply to mpe/next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:
https://github.com/0day-ci/linux/commits/Christophe-Leroy/Improve-boot-command-line-handling/20210303-014039
base:   https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
config: sh-randconfig-s031-20210302 (attached as .config)
compiler: sh4-linux-gcc (GCC) 9.3.0
reproduce:
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# apt-get install sparse
# sparse version: v0.6.3-241-geaceeafa-dirty
# 
https://github.com/0day-ci/linux/commit/edc3f8320d1dcb21a71e4cfdb26a3d2b64215c30
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review 
Christophe-Leroy/Improve-boot-command-line-handling/20210303-014039
git checkout edc3f8320d1dcb21a71e4cfdb26a3d2b64215c30
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross C=1 
CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' ARCH=sh 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 

All errors (new ones prefixed by >>):

   arch/sh/Kconfig:760:warning: choice value used outside its choice group
>> init/Kconfig:142:error: recursive dependency detected!
   init/Kconfig:142: choice  contains symbol CMDLINE
   init/Kconfig:132: symbol CMDLINE depends on CMDLINE_EXTEND
   init/Kconfig:155: symbol CMDLINE_EXTEND is part of choice 
   For a resolution refer to Documentation/kbuild/kconfig-language.rst
   subsection "Kconfig recursive dependency limitations"


vim +142 init/Kconfig

   103  
   104  config BROKEN
   105  bool
   106  
   107  config BROKEN_ON_SMP
   108  bool
   109  depends on BROKEN || !SMP
   110  default y
   111  
   112  config INIT_ENV_ARG_LIMIT
   113  int
   114  default 32 if !UML
   115  default 128 if UML
   116  help
   117Maximum of each of the number of arguments and environment
   118variables passed to init from the kernel command line.
   119  
   120  config HAVE_CMDLINE
   121  bool
   122  
   123  config CMDLINE_BOOL
   124  bool "Default bootloader kernel arguments"
   125  depends on HAVE_CMDLINE
   126  help
   127On some platforms, there is currently no way for the boot 
loader to
   128pass arguments to the kernel. For these platforms, you can 
supply
   129some command-line options at build time by entering them 
here.  In
   130most cases you will need to specify the root device here.
   131  
   132  config CMDLINE
   133  string "Initial kernel command string"
   134  depends on CMDLINE_BOOL
   135  default DEFAULT_CMDLINE
   136  help
   137On some platforms, there is currently no way for the boot 
loader to
   138pass arguments to the kernel. For these platforms, you can 
supply
   139some command-line options at build time by entering them 
here.  In
   140most cases you will need to specify the root device here.
   141  
 > 142  choice
   143  prompt "Kernel command line type" if CMDLINE != ""
   144  default CMDLINE_FROM_BOOTLOADER
   145  help
   146Selects the way you want to use the default kernel arguments.
   147  

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org


.config.gz
Description: application/gzip

Re: Build regressions/improvements in v5.12-rc1

2021-03-02 Thread Alex Deucher

On Mon, Mar 1, 2021 at 9:21 AM Geert Uytterhoeven  wrote:
>
> On Mon, 1 Mar 2021, Geert Uytterhoeven wrote:
> > Below is the list of build error/warning regressions/improvements in
> > v5.12-rc1[1] compared to v5.11[2].
> >
> > Summarized:
> >  - build errors: +2/-0
>
> > [1] 
> > http://kisskb.ellerman.id.au/kisskb/branch/linus/head/fe07bfda2fb9cdef8a4d4008a409bb02f35f1bd8/
> >  (all 192 configs)
> > [2] 
> > http://kisskb.ellerman.id.au/kisskb/branch/linus/head/f40ddce88593482919761f74910f42f4b84c004b/
> >  (all 192 configs)
> >
> >
> > *** ERRORS ***
> >
> > 2 error regressions:
> >  + /kisskb/src/drivers/gpu/drm/amd/amdgpu/../display/dc/calcs/dcn_calcs.c: 
> > error: implicit declaration of function 'disable_kernel_vsx' 
> > [-Werror=implicit-function-declaration]:  => 674:2
> >  + /kisskb/src/drivers/gpu/drm/amd/amdgpu/../display/dc/calcs/dcn_calcs.c: 
> > error: implicit declaration of function 'enable_kernel_vsx' 
> > [-Werror=implicit-function-declaration]:  => 638:2
>
> powerpc-gcc4.9/ppc64_book3e_allmodconfig
>
> This was fixed in v5.11-rc1, but reappeared in v5.12-rc1?

Do you know what fixed in for 5.11?  I guess for PPC64 we depend on CONFIG_VSX?

Alex

>
> Gr{oetje,eeting}s,
>
> Geert
>
> --
> Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- 
> ge...@linux-m68k.org
>
> In personal conversations with technical people, I call myself a hacker. But
> when I'm talking to journalists I just say "programmer" or something like 
> that.
> -- Linus Torvalds
> ___
> amd-gfx mailing list
> amd-...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 40/44] tty: hvc, drop unneeded forward declarations

On 3/1/21 10:22 PM, Jiri Slaby wrote:
> Forward declarations make the code larger and rewrites harder. Harder as
> they are often omitted from global changes. Remove forward declarations
> which are not really needed, i.e. the definition of the function is
> before its first use.
> 
> Signed-off-by: Jiri Slaby 
> Cc: linuxppc-dev@lists.ozlabs.org

Reviewed-by: Tyrel Datwyler 

> ---
>  drivers/tty/hvc/hvcs.c | 25 -
>  1 file changed, 25 deletions(-)
> 
> diff --git a/drivers/tty/hvc/hvcs.c b/drivers/tty/hvc/hvcs.c
> index c90848919644..0b89d878a108 100644
> --- a/drivers/tty/hvc/hvcs.c
> +++ b/drivers/tty/hvc/hvcs.c
> @@ -290,36 +290,11 @@ static LIST_HEAD(hvcs_structs);
>  static DEFINE_SPINLOCK(hvcs_structs_lock);
>  static DEFINE_MUTEX(hvcs_init_mutex);
> 
> -static void hvcs_unthrottle(struct tty_struct *tty);
> -static void hvcs_throttle(struct tty_struct *tty);
> -static irqreturn_t hvcs_handle_interrupt(int irq, void *dev_instance);
> -
> -static int hvcs_write(struct tty_struct *tty,
> - const unsigned char *buf, int count);
> -static int hvcs_write_room(struct tty_struct *tty);
> -static int hvcs_chars_in_buffer(struct tty_struct *tty);
> -
> -static int hvcs_has_pi(struct hvcs_struct *hvcsd);
> -static void hvcs_set_pi(struct hvcs_partner_info *pi,
> - struct hvcs_struct *hvcsd);
>  static int hvcs_get_pi(struct hvcs_struct *hvcsd);
>  static int hvcs_rescan_devices_list(void);
> 
> -static int hvcs_partner_connect(struct hvcs_struct *hvcsd);
>  static void hvcs_partner_free(struct hvcs_struct *hvcsd);
> 
> -static int hvcs_enable_device(struct hvcs_struct *hvcsd,
> - uint32_t unit_address, unsigned int irq, struct vio_dev *dev);
> -
> -static int hvcs_open(struct tty_struct *tty, struct file *filp);
> -static void hvcs_close(struct tty_struct *tty, struct file *filp);
> -static void hvcs_hangup(struct tty_struct * tty);
> -
> -static int hvcs_probe(struct vio_dev *dev,
> - const struct vio_device_id *id);
> -static int hvcs_remove(struct vio_dev *dev);
> -static int __init hvcs_module_init(void);
> -static void __exit hvcs_module_exit(void);
>  static int hvcs_initialize(void);
> 
>  #define HVCS_SCHED_READ  0x0001
>

Re: [RFC PATCH v1] powerpc: Enable KFENCE for PPC32

2021-03-02 Thread Segher Boessenkool

On Tue, Mar 02, 2021 at 10:40:03PM +1100, Michael Ellerman wrote:
> >> -- Change the unwinder, if it's possible for ppc32.
> >
> > I don't think it is possible.
> 
> I think this actually is the solution.
> 
> It seems the good architectures have all added support for
> arch_stack_walk(), and we have not.

I have no idea what arch_stack_walk does, but some background info:

PowerPC functions that do save the LR (== the return address), and/or
that set up a new stack frame, do not do this at the start of the
function necessarily (it is a lot faster to postpone this, even if you
always have to do it).  So, in a leaf function it isn't always known if
this has been done (in all callers further up it is always done, of
course).  If you have DWARF unwind info all is fine of course, but you
do not have that in the kernel.

> So I think it's probably on us to update to that new API. Or at least
> update our save_stack_trace() to fabricate an entry using the NIP, as it
> seems that's what callers expect.

This sounds very expensive?  If it is only a debug feature that won't
be used in production that does not matter, but it worries me.

Segher

Re: [PATCH next v3 12/15] printk: introduce a kmsg_dump iterator

2021-03-02 Thread Petr Mladek

On Tue 2021-03-02 14:20:51, John Ogness wrote:
> On 2021-03-01, Petr Mladek  wrote:
> >> diff --git a/arch/powerpc/kernel/nvram_64.c 
> >> b/arch/powerpc/kernel/nvram_64.c
> >> index 532f22637783..5a64b24a91c2 100644
> >> --- a/arch/powerpc/kernel/nvram_64.c
> >> +++ b/arch/powerpc/kernel/nvram_64.c
> >> @@ -681,13 +680,14 @@ static void oops_to_nvram(struct kmsg_dumper *dumper,
> >>return;
> >>  
> >>if (big_oops_buf) {
> >> -  kmsg_dump_get_buffer(dumper, false,
> >> +  kmsg_dump_rewind();
> >
> > It would be nice to get rid of the kmsg_dump_rewind() calls
> > in all callers.
> >
> > A solution might be to create the following in include/linux/kmsg_dump.h
> >
> > Then we could do the following at the beginning of both
> > kmsg_dump_get_buffer() and kmsg_dump_get_line():
> >
> > u64 clear_seq = latched_seq_read_nolock(_seq);
> >
> > if (iter->cur_seq < clear_seq)
> > cur_seq = clear_seq;
> 
> I suppose we need to add this part anyway, if we want to enforce that
> records before @clear_seq are not to be available for dumpers.

Yup.

> > It might be better to avoid the infinite loop. We could do the following:
> >
> > static void check_and_set_iter(struct kmsg_dump_iter)
> > {
> > if (iter->cur_seq == 0 && iter->next_seq == U64_MAX) {
> > kmsg_dump_rewind(iter);
> > }
> >
> > and call this at the beginning of both kmsg_dump_get_buffer()
> > and kmsg_dump_get_line()
> >
> > What do you think?
> 
> On a technical level, it does not make any difference. It is pure
> cosmetic.

Yup.

> Personally, I prefer the rewind directly before the kmsg_dump_get calls
> because it puts the initializer directly next to the user.
> 
> As an example to illustrate my view, I prefer:
> 
> for (i = 0; i < n; i++)
> ...;
> 
> instead of:
> 
> int i = 0;
> 
> ...
> 
> for (; i < n; i++)
> ...;
> 
> Also, I do not really like the special use of 0/U64_MAX to identify
> special actions of the kmsg_dump_get functions.

Fair enough.

> > Note that I do not resist on it. But it might make the API easier to
> > use from my POV.
> 
> Since you do not resist, I will keep the API the same for v4. But I will
> add the @clear_seq check to the kmsg_dump_get functions.

Go for it.

Best Regards,
Petr

Re: [PATCH v2 0/7] Improve boot command line handling





Le 02/03/2021 à 18:35, Daniel Walker a écrit :

On Tue, Mar 02, 2021 at 05:25:16PM +, Christophe Leroy wrote:

The purpose of this series is to improve and enhance the
handling of kernel boot arguments.

It is first focussed on powerpc but also extends the capability
for other arches.

This is based on suggestion from Daniel Walker 




I don't see a point in your changes at this time. My changes are much more
mature, and you changes don't really make improvements.




Cool, I'm eager to see them.

Christophe

Re: [PATCH v2 0/7] Improve boot command line handling

2021-03-02 Thread Daniel Walker

On Tue, Mar 02, 2021 at 05:25:16PM +, Christophe Leroy wrote:
> The purpose of this series is to improve and enhance the
> handling of kernel boot arguments.
> 
> It is first focussed on powerpc but also extends the capability
> for other arches.
> 
> This is based on suggestion from Daniel Walker 
> 


I don't see a point in your changes at this time. My changes are much more
mature, and you changes don't really make improvements.

Daniel

[PATCH v2 7/7] powerpc: use generic CMDLINE manipulations

This patch moves powerpc to the centraly defined CMDLINE options.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/Kconfig | 43 +++
 1 file changed, 3 insertions(+), 40 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 0ab406f14513..0e1736a2a621 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -195,6 +195,7 @@ config PPC
select HAVE_CBPF_JITif !PPC64
select HAVE_STACKPROTECTOR  if PPC64 && 
$(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r13)
select HAVE_STACKPROTECTOR  if PPC32 && 
$(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r2)
+   select HAVE_CMDLINE
select HAVE_CONTEXT_TRACKINGif PPC64
select HAVE_DEBUG_KMEMLEAK
select HAVE_DEBUG_STACKOVERFLOW
@@ -886,47 +887,9 @@ config PPC_DENORMALISATION
  Add support for handling denormalisation of single precision
  values.  Useful for bare metal only.  If unsure say Y here.
 
-config CMDLINE
-   string "Initial kernel command string"
+config DEFAULT_CMDLINE
+   string
default ""
-   help
- On some platforms, there is currently no way for the boot loader to
- pass arguments to the kernel. For these platforms, you can supply
- some command-line options at build time by entering them here.  In
- most cases you will need to specify the root device here.
-
-choice
-   prompt "Kernel command line type" if CMDLINE != ""
-   default CMDLINE_FROM_BOOTLOADER
-
-config CMDLINE_FROM_BOOTLOADER
-   bool "Use bootloader kernel arguments if available"
-   help
- Uses the command-line options passed by the boot loader. If
- the boot loader doesn't provide any, the default kernel command
- string provided in CMDLINE will be used.
-
-config CMDLINE_EXTEND
-   bool "Extend bootloader kernel arguments"
-   help
- The command-line arguments provided by the boot loader will be
- appended to the default kernel command string.
-
-config CMDLINE_PREPEND
-   bool "Prepend bootloader kernel arguments"
-   help
- The default kernel command string will be prepend to the
- command-line arguments provided during boot.
-
-config CMDLINE_FORCE
-   bool "Always use the default kernel command string"
-   help
- Always use the default kernel command string, even if the boot
- loader passes other arguments to the kernel.
- This is useful if you cannot or don't want to change the
- command-line options your boot loader passes to the kernel.
-
-endchoice
 
 config EXTRA_TARGETS
string "Additional default image types"
-- 
2.25.0

[PATCH v2 6/7] cmdline: Gives architectures opportunity to use generically defined boot cmdline manipulation

Most architectures have similar boot command line manipulation
options. This patchs adds the definition in init/Kconfig, gated by
CONFIG_HAVE_CMDLINE that the architectures can select to use them.

In order to use this, a few architectures will have to change their
CONFIG options:
- riscv has to replace CMDLINE_FALLBACK by CMDLINE_FROM_BOOTLOADER
- architectures using CONFIG_CMDLINE_OVERRIDE or
CONFIG_CMDLINE_OVERWRITE have to replace them by CONFIG_CMDLINE_FORCE.

Architectures also have to define CONFIG_DEFAULT_CMDLINE.

Signed-off-by: Christophe Leroy 
---
 init/Kconfig | 56 
 1 file changed, 56 insertions(+)

diff --git a/init/Kconfig b/init/Kconfig
index 22946fe5ded9..a0f2ad9467df 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -117,6 +117,62 @@ config INIT_ENV_ARG_LIMIT
  Maximum of each of the number of arguments and environment
  variables passed to init from the kernel command line.
 
+config HAVE_CMDLINE
+   bool
+
+config CMDLINE_BOOL
+   bool "Default bootloader kernel arguments"
+   depends on HAVE_CMDLINE
+   help
+ On some platforms, there is currently no way for the boot loader to
+ pass arguments to the kernel. For these platforms, you can supply
+ some command-line options at build time by entering them here.  In
+ most cases you will need to specify the root device here.
+
+config CMDLINE
+   string "Initial kernel command string"
+   depends on CMDLINE_BOOL
+   default DEFAULT_CMDLINE
+   help
+ On some platforms, there is currently no way for the boot loader to
+ pass arguments to the kernel. For these platforms, you can supply
+ some command-line options at build time by entering them here.  In
+ most cases you will need to specify the root device here.
+
+choice
+   prompt "Kernel command line type" if CMDLINE != ""
+   default CMDLINE_FROM_BOOTLOADER
+   help
+ Selects the way you want to use the default kernel arguments.
+
+config CMDLINE_FROM_BOOTLOADER
+   bool "Use bootloader kernel arguments if available"
+   help
+ Uses the command-line options passed by the boot loader. If
+ the boot loader doesn't provide any, the default kernel command
+ string provided in CMDLINE will be used.
+
+config CMDLINE_EXTEND
+   bool "Extend bootloader kernel arguments"
+   help
+ The default kernel command string will be appended to the
+ command-line arguments provided during boot.
+
+config CMDLINE_PREPEND
+   bool "Prepend bootloader kernel arguments"
+   help
+ The default kernel command string will be prepend to the
+ command-line arguments provided during boot.
+
+config CMDLINE_FORCE
+   bool "Always use the default kernel command string"
+   help
+ Always use the default kernel command string, even if the boot
+ loader passes other arguments to the kernel.
+ This is useful if you cannot or don't want to change the
+ command-line options your boot loader passes to the kernel.
+endchoice
+
 config COMPILE_TEST
bool "Compile also drivers which will not load"
depends on !UML && !S390
-- 
2.25.0

[PATCH v2 5/7] powerpc: add capability to prepend default command line

This patch activates the capability to prepend default
arguments to the command line.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/Kconfig | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 386ae12d8523..0ab406f14513 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -912,6 +912,12 @@ config CMDLINE_EXTEND
  The command-line arguments provided by the boot loader will be
  appended to the default kernel command string.
 
+config CMDLINE_PREPEND
+   bool "Prepend bootloader kernel arguments"
+   help
+ The default kernel command string will be prepend to the
+ command-line arguments provided during boot.
+
 config CMDLINE_FORCE
bool "Always use the default kernel command string"
help
-- 
2.25.0

[PATCH v2 3/7] powerpc: convert to generic builtin command line

This updates the powerpc code to use the new cmdline building function.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/prom_init.c | 35 +
 1 file changed, 5 insertions(+), 30 deletions(-)

diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index ccf77b985c8f..24157e526f80 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -27,6 +27,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -152,7 +153,7 @@ static struct prom_t __prombss prom;
 static unsigned long __prombss prom_entry;
 
 static char __prombss of_stdout_device[256];
-static char __prombss prom_scratch[256];
+static char __prombss prom_scratch[COMMAND_LINE_SIZE];
 
 static unsigned long __prombss dt_header_start;
 static unsigned long __prombss dt_struct_start, dt_struct_end;
@@ -304,26 +305,6 @@ static char __init *prom_strstr(const char *s1, const char 
*s2)
return NULL;
 }
 
-static size_t __init prom_strlcat(char *dest, const char *src, size_t count)
-{
-   size_t dsize = prom_strlen(dest);
-   size_t len = prom_strlen(src);
-   size_t res = dsize + len;
-
-   /* This would be a bug */
-   if (dsize >= count)
-   return count;
-
-   dest += dsize;
-   count -= dsize;
-   if (len >= count)
-   len = count-1;
-   memcpy(dest, src, len);
-   dest[len] = 0;
-   return res;
-
-}
-
 #ifdef CONFIG_PPC_PSERIES
 static int __init prom_strtobool(const char *s, bool *res)
 {
@@ -768,19 +749,13 @@ static unsigned long prom_memparse(const char *ptr, const 
char **retptr)
 static void __init early_cmdline_parse(void)
 {
const char *opt;
-
-   char *p;
int l = 0;
 
-   prom_cmd_line[0] = 0;
-   p = prom_cmd_line;
-
if (!IS_ENABLED(CONFIG_CMDLINE_FORCE) && (long)prom.chosen > 0)
-   l = prom_getprop(prom.chosen, "bootargs", p, 
COMMAND_LINE_SIZE-1);
+   l = prom_getprop(prom.chosen, "bootargs", prom_scratch,
+COMMAND_LINE_SIZE - 1);
 
-   if (IS_ENABLED(CONFIG_CMDLINE_EXTEND) || l <= 0 || p[0] == '\0')
-   prom_strlcat(prom_cmd_line, " " CONFIG_CMDLINE,
-sizeof(prom_cmd_line));
+   cmdline_build(prom_cmd_line, l > 0 ? prom_scratch : NULL, 
sizeof(prom_scratch));
 
prom_printf("command line: %s\n", prom_cmd_line);
 
-- 
2.25.0

[PATCH v2 4/7] cmdline: Add capability to prepend the command line

This patchs adds an option of prepend a text to the command
line instead of appending it.

Signed-off-by: Christophe Leroy 
---
 include/linux/cmdline.h | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/include/linux/cmdline.h b/include/linux/cmdline.h
index ae3610bb0ee2..144346051e01 100644
--- a/include/linux/cmdline.h
+++ b/include/linux/cmdline.h
@@ -31,7 +31,7 @@ static __always_inline size_t cmdline_strlcat(char *dest, 
const char *src, size_
 }
 
 /*
- * This function will append a builtin command line to the command
+ * This function will append or prepend a builtin command line to the command
  * line provided by the bootloader. Kconfig options can be used to alter
  * the behavior of this builtin command line.
  * @dest: The destination of the final appended/prepended string.
@@ -50,6 +50,9 @@ static __always_inline void cmdline_build(char *dest, const 
char *src, size_t le
cmdline_strlcat(dest, CONFIG_CMDLINE, length);
return;
}
+
+   if (IS_ENABLED(CONFIG_CMDLINE_PREPEND) && sizeof(CONFIG_CMDLINE) > 1)
+   cmdline_strlcat(dest, CONFIG_CMDLINE " ", length);
 #endif
if (dest != src)
cmdline_strlcat(dest, src, length);
-- 
2.25.0

[PATCH v2 1/7] cmdline: Add generic function to build command line.

This code provides architectures with a way to build command line
based on what is built in the kernel and what is handed over by the
bootloader, based on selected compile-time options.

Signed-off-by: Christophe Leroy 
---
 include/linux/cmdline.h | 62 +
 1 file changed, 62 insertions(+)
 create mode 100644 include/linux/cmdline.h

diff --git a/include/linux/cmdline.h b/include/linux/cmdline.h
new file mode 100644
index ..ae3610bb0ee2
--- /dev/null
+++ b/include/linux/cmdline.h
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_CMDLINE_H
+#define _LINUX_CMDLINE_H
+
+static __always_inline size_t cmdline_strlen(const char *s)
+{
+   const char *sc;
+
+   for (sc = s; *sc != '\0'; ++sc)
+   ; /* nothing */
+   return sc - s;
+}
+
+static __always_inline size_t cmdline_strlcat(char *dest, const char *src, 
size_t count)
+{
+   size_t dsize = cmdline_strlen(dest);
+   size_t len = cmdline_strlen(src);
+   size_t res = dsize + len;
+
+   /* This would be a bug */
+   if (dsize >= count)
+   return count;
+
+   dest += dsize;
+   count -= dsize;
+   if (len >= count)
+   len = count - 1;
+   memcpy(dest, src, len);
+   dest[len] = 0;
+   return res;
+}
+
+/*
+ * This function will append a builtin command line to the command
+ * line provided by the bootloader. Kconfig options can be used to alter
+ * the behavior of this builtin command line.
+ * @dest: The destination of the final appended/prepended string.
+ * @src: The starting string or NULL if there isn't one. Must not equal dest.
+ * @length: the length of dest buffer.
+ */
+static __always_inline void cmdline_build(char *dest, const char *src, size_t 
length)
+{
+   if (length <= 0)
+   return;
+
+   dest[0] = 0;
+
+#ifdef CONFIG_CMDLINE
+   if (IS_ENABLED(CONFIG_CMDLINE_FORCE) || !src || !src[0]) {
+   cmdline_strlcat(dest, CONFIG_CMDLINE, length);
+   return;
+   }
+#endif
+   if (dest != src)
+   cmdline_strlcat(dest, src, length);
+#ifdef CONFIG_CMDLINE
+   if (IS_ENABLED(CONFIG_CMDLINE_EXTEND) && sizeof(CONFIG_CMDLINE) > 1)
+   cmdline_strlcat(dest, " " CONFIG_CMDLINE, length);
+#endif
+}
+
+#endif /* _LINUX_CMDLINE_H */
-- 
2.25.0

[PATCH v2 0/7] Improve boot command line handling

The purpose of this series is to improve and enhance the
handling of kernel boot arguments.

It is first focussed on powerpc but also extends the capability
for other arches.

This is based on suggestion from Daniel Walker 

Christophe Leroy (7):
  cmdline: Add generic function to build command line.
  drivers: of: use cmdline building function
  powerpc: convert to generic builtin command line
  cmdline: Add capability to prepend the command line
  powerpc: add capability to prepend default command line
  cmdline: Gives architectures opportunity to use generically defined
boot cmdline manipulation
  powerpc: use generic CMDLINE manipulations

 arch/powerpc/Kconfig| 37 ++-
 arch/powerpc/kernel/prom_init.c | 35 +++---
 drivers/of/fdt.c| 23 ++--
 include/linux/cmdline.h | 65 +
 init/Kconfig| 56 
 5 files changed, 133 insertions(+), 83 deletions(-)
 create mode 100644 include/linux/cmdline.h

-- 
2.25.0

[PATCH v2 2/7] drivers: of: use cmdline building function

This patch uses the new cmdline building function to
concatenate the of provided cmdline with built-in parts
based on compile-time options.

Signed-off-by: Christophe Leroy 
---
 drivers/of/fdt.c | 23 ---
 1 file changed, 4 insertions(+), 19 deletions(-)

diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index dcc1dd96911a..cf2b95b8f298 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -25,6 +25,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include   /* for COMMAND_LINE_SIZE */
 #include 
@@ -1050,26 +1051,10 @@ int __init early_init_dt_scan_chosen(unsigned long 
node, const char *uname,
 
/* Retrieve command line */
p = of_get_flat_dt_prop(node, "bootargs", );
-   if (p != NULL && l > 0)
-   strlcpy(data, p, min(l, COMMAND_LINE_SIZE));
+   if (l <= 0)
+   p = NULL;
 
-   /*
-* CONFIG_CMDLINE is meant to be a default in case nothing else
-* managed to set the command line, unless CONFIG_CMDLINE_FORCE
-* is set in which case we override whatever was found earlier.
-*/
-#ifdef CONFIG_CMDLINE
-#if defined(CONFIG_CMDLINE_EXTEND)
-   strlcat(data, " ", COMMAND_LINE_SIZE);
-   strlcat(data, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
-#elif defined(CONFIG_CMDLINE_FORCE)
-   strlcpy(data, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
-#else
-   /* No arguments from boot loader, use kernel's  cmdl*/
-   if (!((char *)data)[0])
-   strlcpy(data, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
-#endif
-#endif /* CONFIG_CMDLINE */
+   cmdline_build(data, p, COMMAND_LINE_SIZE);
 
pr_debug("Command line is: %s\n", (char *)data);
 
-- 
2.25.0

Re: [PATCH 0/2] Fix CMDLINE_EXTEND handling for FDT "bootargs"

2021-03-02 Thread Daniel Walker

On Mon, Mar 01, 2021 at 11:26:14AM -0600, Rob Herring wrote:
> +PPC folks and Daniel W
> 
> On Mon, Mar 1, 2021 at 8:42 AM Will Deacon  wrote:
> >
> > On Mon, Mar 01, 2021 at 08:19:32AM -0600, Rob Herring wrote:
> > > On Thu, Feb 25, 2021 at 6:59 AM Will Deacon  wrote:
> > > > We recently [1] enabled support for CMDLINE_EXTEND on arm64, however
> > > > when I started looking at replacing Android's out-of-tree 
> > > > implementation [2]
> > >
> > > Did anyone go read the common, reworked version of all this I
> > > referenced that supports prepend and append. Here it is again[1].
> > > Maybe I should have been more assertive there and said 'extend' is
> > > ambiguous.
> >
> > I tried reading that, but (a) most of the series is not in the mailing list
> > archives and (b) the patch that _is_ doesn't touch CMDLINE_EXTEND at all.
> > Right now the code in mainline does the opposite of what it's documented to
> > do.
> 
> Actually, there is a newer version I found:
> 
> https://lore.kernel.org/linuxppc-dev/1551469472-53043-1-git-send-email-danie...@cisco.com/
> https://lore.kernel.org/linuxppc-dev/1551469472-53043-2-git-send-email-danie...@cisco.com/
> https://lore.kernel.org/linuxppc-dev/1551469472-53043-3-git-send-email-danie...@cisco.com/
> 
> (Once again, there's some weird threading going on)
> 

I'm happy to work with anyone to resubmit the changes. We currently use the
changes in Cisco, and we have used them for many years.

I was planning to update and resubmit since someone has recently inquired about
why it wasn't upstream.

Daniel

Re: [RFC PATCH v1] powerpc: Enable KFENCE for PPC32

2021-03-02 Thread Marco Elver

On Tue, 2 Mar 2021 at 12:21, Christophe Leroy
 wrote:
[...]
> >> Booting with 'no_hash_pointers" I get the following. Does it helps ?
> >>
> >> [   16.837198] 
> >> ==
> >> [   16.848521] BUG: KFENCE: invalid read in 
> >> finish_task_switch.isra.0+0x54/0x23c
> >> [   16.848521]
> >> [   16.857158] Invalid read at 0xdf98800a:
> >> [   16.861004]  finish_task_switch.isra.0+0x54/0x23c
> >> [   16.865731]  kunit_try_run_case+0x5c/0xd0
> >> [   16.869780]  kunit_generic_run_threadfn_adapter+0x24/0x30
> >> [   16.875199]  kthread+0x15c/0x174
> >> [   16.878460]  ret_from_kernel_thread+0x14/0x1c
> >> [   16.882847]
> >> [   16.884351] CPU: 0 PID: 111 Comm: kunit_try_catch Tainted: GB
> >> 5.12.0-rc1-s3k-dev-01534-g4f14ae75edf0-dirty #4674
> >> [   16.895908] NIP:  c016eb8c LR: c02f50dc CTR: c016eb38
> >> [   16.900963] REGS: e2449d90 TRAP: 0301   Tainted: GB
> >> (5.12.0-rc1-s3k-dev-01534-g4f14ae75edf0-dirty)
> >> [   16.911386] MSR:  9032   CR: 2204  XER: 
> >> [   16.918153] DAR: df98800a DSISR: 2000
> >> [   16.918153] GPR00: c02f50dc e2449e50 c1140d00 e100dd24 c084b13c 
> >> 0008 c084b32b c016eb38
> >> [   16.918153] GPR08: c085 df988000 c0d1 e2449eb0 22000288
> >> [   16.936695] NIP [c016eb8c] test_invalid_access+0x54/0x108
> >> [   16.942125] LR [c02f50dc] kunit_try_run_case+0x5c/0xd0
> >> [   16.947292] Call Trace:
> >> [   16.949746] [e2449e50] [c005a5ec] finish_task_switch.isra.0+0x54/0x23c 
> >> (unreliable)
> >
> > The "(unreliable)" might be a clue that it's related to ppc32 stack
> > unwinding. Any ppc expert know what this is about?
> >
> >> [   16.957443] [e2449eb0] [c02f50dc] kunit_try_run_case+0x5c/0xd0
> >> [   16.963319] [e2449ed0] [c02f63ec] 
> >> kunit_generic_run_threadfn_adapter+0x24/0x30
> >> [   16.970574] [e2449ef0] [c004e710] kthread+0x15c/0x174
> >> [   16.975670] [e2449f30] [c001317c] ret_from_kernel_thread+0x14/0x1c
> >> [   16.981896] Instruction dump:
> >> [   16.984879] 8129d608 38e7eb38 81020280 911f004c 3900 995f0024 
> >> 907f0028 90ff001c
> >> [   16.992710] 3949000a 915f0020 3d40c0d1 3d00c085 <8929000a> 3908adb0 
> >> 812a4b98 3d40c02f
> >> [   17.000711] 
> >> ==
> >> [   17.008223] # test_invalid_access: EXPECTATION FAILED at 
> >> mm/kfence/kfence_test.c:636
> >> [   17.008223] Expected report_matches() to be true, but is 
> >> false
> >> [   17.023243] not ok 21 - test_invalid_access
> >
> > On a fault in test_invalid_access, KFENCE prints the stack trace based
> > on the information in pt_regs. So we do not think there's anything we
> > can do to improve stack printing pe-se.
>
> stack printing, probably not. Would be good anyway to mark the last level 
> [unreliable] as the ppc does.

We use stack_trace_save_regs() + stack_trace_print().

> IIUC, on ppc the address in the stack frame of the caller is written by the 
> caller. In most tests,
> there is some function call being done before the fault, for instance
> test_kmalloc_aligned_oob_read() does a call to kunit_do_assertion which 
> populates the address of the
> call in the stack. However this is fragile.

Interesting, this might explain it.

> This works for function calls because in order to call a subfunction, a 
> function has to set up a
> stack frame in order to same the value in the Link Register, which contains 
> the address of the
> function's parent and that will be clobbered by the sub-function call.
>
> However, it cannot be done by exceptions, because exceptions can happen in a 
> function that has no
> stack frame (because that function has no need to call a subfunction and 
> doesn't need to same
> anything on the stack). If the exception handler was writting the caller's 
> address in the stack
> frame, it would in fact write it in the parent's frame, leading to a mess.
>
> But in fact the information is in pt_regs, it is in regs->nip so KFENCE 
> should be able to use that
> instead of the stack.

Perhaps stack_trace_save_regs() needs fixing for ppc32? Although that
seems to use arch_stack_walk().

> > What's confusing is that it's only this test, and none of the others.
> > Given that, it might be code-gen related, which results in some subtle
> > issue with stack unwinding. There are a few things to try, if you feel
> > like it:
> >
> > -- Change the unwinder, if it's possible for ppc32.
>
> I don't think it is possible.
>
> >
> > -- Add code to test_invalid_access(), to get the compiler to emit
> > different code. E.g. add a bunch (unnecessary) function calls, or add
> > barriers, etc.
>
> The following does the trick
>
> diff --git a/mm/kfence/kfence_test.c b/mm/kfence/kfence_test.c
> index 4acf4251ee04..22550676cd1f 100644
> --- a/mm/kfence/kfence_test.c
> +++ b/mm/kfence/kfence_test.c
> @@ -631,8 +631,11 @@ static void test_invalid_access(struct kunit *test)
> .addr = &__kfence_pool[10],
>

Re: [PATCH v19 00/13] Carry forward IMA measurement log on kexec on ARM64

2021-03-02 Thread Lakshmi Ramasubramanian


On 3/2/21 7:06 AM, Rob Herring wrote:

On Sun, Feb 21, 2021 at 11:49 AM Lakshmi Ramasubramanian
 wrote:


On kexec file load Integrity Measurement Architecture (IMA) subsystem
may verify the IMA signature of the kernel and initramfs, and measure
it.  The command line parameters passed to the kernel in the kexec call
may also be measured by IMA.  A remote attestation service can verify
a TPM quote based on the TPM event log, the IMA measurement list, and
the TPM PCR data.  This can be achieved only if the IMA measurement log
is carried over from the current kernel to the next kernel across
the kexec call.

powerpc already supports carrying forward the IMA measurement log on
kexec.  This patch set adds support for carrying forward the IMA
measurement log on kexec on ARM64.

This patch set moves the platform independent code defined for powerpc
such that it can be reused for other platforms as well.  A chosen node
"linux,ima-kexec-buffer" is added to the DTB for ARM64 to hold
the address and the size of the memory reserved to carry
the IMA measurement log.

This patch set has been tested for ARM64 platform using QEMU.
I would like help from the community for testing this change on powerpc.
Thanks.

This patch set is based on
commit f31e3386a4e9 ("ima: Free IMA measurement buffer after kexec syscall")
in https://git.kernel.org/pub/scm/linux/kernel/git/zohar/linux-integrity.git
"ima-kexec-fixes" branch.


[...]



Lakshmi Ramasubramanian (10):
   kexec: Move ELF fields to struct kimage
   arm64: Use ELF fields defined in 'struct kimage'
   powerpc: Use ELF fields defined in 'struct kimage'
   x86: Use ELF fields defined in 'struct kimage'
   powerpc: Move ima buffer fields to struct kimage
   powerpc: Enable passing IMA log to next kernel on kexec
   powerpc: Move arch independent ima kexec functions to
 drivers/of/kexec.c
   kexec: Use fdt_appendprop_addrrange() to add ima buffer to FDT
   powerpc: Delete unused function delete_fdt_mem_rsv()
   arm64: Enable passing IMA log to next kernel on kexec

Rob Herring (3):
   of: Add a common kexec FDT setup function
   arm64: Use common of_kexec_alloc_and_setup_fdt()
   powerpc: Use common of_kexec_alloc_and_setup_fdt()

  arch/arm64/Kconfig |   1 +
  arch/arm64/include/asm/kexec.h |   4 -
  arch/arm64/kernel/machine_kexec_file.c | 194 +--
  arch/powerpc/Kconfig   |   2 +-
  arch/powerpc/include/asm/ima.h |  30 --
  arch/powerpc/include/asm/kexec.h   |  14 +-
  arch/powerpc/kexec/Makefile|   7 -
  arch/powerpc/kexec/elf_64.c|  30 +-
  arch/powerpc/kexec/file_load.c | 183 +-
  arch/powerpc/kexec/file_load_64.c  |  21 +-
  arch/powerpc/kexec/ima.c   | 219 
  arch/x86/include/asm/kexec.h   |   5 -
  arch/x86/kernel/crash.c|  14 +-
  arch/x86/kernel/kexec-bzimage64.c  |   2 +-
  arch/x86/kernel/machine_kexec_64.c |   4 +-
  drivers/of/Makefile|   6 +
  drivers/of/kexec.c | 458 +
  include/linux/kexec.h  |   8 +
  include/linux/of.h |   7 +
  security/integrity/ima/ima.h   |   4 -
  security/integrity/ima/ima_kexec.c |   9 +-
  21 files changed, 539 insertions(+), 683 deletions(-)
  delete mode 100644 arch/powerpc/include/asm/ima.h
  delete mode 100644 arch/powerpc/kexec/ima.c
  create mode 100644 drivers/of/kexec.c


I fixed up the Fixes tags and applied for 5.13.



Thanks a lot Rob.

 -lakshmi

Re: [PATCH 0/2] Fix CMDLINE_EXTEND handling for FDT "bootargs"

Le 02/03/2021 à 15:56, Rob Herring a écrit :

On Mon, Mar 1, 2021 at 11:45 AM Christophe Leroy
wrote:

Le 01/03/2021 à 18:26, Rob Herring a écrit :

+PPC folks and Daniel W

On Mon, Mar 1, 2021 at 8:42 AM Will Deacon wrote:

On Mon, Mar 01, 2021 at 08:19:32AM -0600, Rob Herring wrote:

On Thu, Feb 25, 2021 at 6:59 AM Will Deacon wrote:

We recently [1] enabled support for CMDLINE_EXTEND on arm64, however
when I started looking at replacing Android's out-of-tree implementation [2]

Did anyone go read the common, reworked version of all this I
referenced that supports prepend and append. Here it is again[1].
Maybe I should have been more assertive there and said 'extend' is
ambiguous.

I tried reading that, but (a) most of the series is not in the mailing list
archives and (b) the patch that _is_ doesn't touch CMDLINE_EXTEND at all.
Right now the code in mainline does the opposite of what it's documented to
do.

Actually, there is a newer version I found:

https://lore.kernel.org/linuxppc-dev/1551469472-53043-1-git-send-email-danie...@cisco.com/
https://lore.kernel.org/linuxppc-dev/1551469472-53043-2-git-send-email-danie...@cisco.com/
https://lore.kernel.org/linuxppc-dev/1551469472-53043-3-git-send-email-danie...@cisco.com/

This was seen as too much intrusive into powerpc.

It looked like the main issue was string functions for KASAN?

This is one issue yes,

As far as being too complex, I think that will be needed if you look
at all architectures and non-DT cases.

As far as I remember, I could't understand why we absolutely need to define the command line string
in the common part of the code, leading to being obliged to use macros in order to allow the
architecture to specify in which section it wants the string.

Why not leave the definition of the string to the architecture and just declare it in the common
code, allowing the architecture to put it where it suits it and reducing opacity and allowing use of
standard static inline functions instead of uggly macros.

I proposed an alternative at
https://patchwork.ozlabs.org/project/linuxppc-dev/cover/cover.1554195798.git.christophe.le...@c-s.fr/
but
never got any feedback.

Didn't go to a list I subscribe to. In particular, if it had gone to
DT list and into PW you would have gotten a reply from me.

Sorry for that. Original series from Daniel
(https://patchwork.ozlabs.org/project/linuxppc-dev/patch/20190319232448.45964-2-danie...@cisco.com/)
was sent only to linuxppc-dev list, and Michael suggested to also send it to linux-arch list, and I
also always copy linux-kernel.

If there is new interest for that functionnality, I can try and rebase my
series.

Christophe

Re: [PATCH v19 00/13] Carry forward IMA measurement log on kexec on ARM64

2021-03-02 Thread Rob Herring

On Sun, Feb 21, 2021 at 11:49 AM Lakshmi Ramasubramanian
 wrote:
>
> On kexec file load Integrity Measurement Architecture (IMA) subsystem
> may verify the IMA signature of the kernel and initramfs, and measure
> it.  The command line parameters passed to the kernel in the kexec call
> may also be measured by IMA.  A remote attestation service can verify
> a TPM quote based on the TPM event log, the IMA measurement list, and
> the TPM PCR data.  This can be achieved only if the IMA measurement log
> is carried over from the current kernel to the next kernel across
> the kexec call.
>
> powerpc already supports carrying forward the IMA measurement log on
> kexec.  This patch set adds support for carrying forward the IMA
> measurement log on kexec on ARM64.
>
> This patch set moves the platform independent code defined for powerpc
> such that it can be reused for other platforms as well.  A chosen node
> "linux,ima-kexec-buffer" is added to the DTB for ARM64 to hold
> the address and the size of the memory reserved to carry
> the IMA measurement log.
>
> This patch set has been tested for ARM64 platform using QEMU.
> I would like help from the community for testing this change on powerpc.
> Thanks.
>
> This patch set is based on
> commit f31e3386a4e9 ("ima: Free IMA measurement buffer after kexec syscall")
> in https://git.kernel.org/pub/scm/linux/kernel/git/zohar/linux-integrity.git
> "ima-kexec-fixes" branch.
>
> Changelog:
>
> v19
>   - Moved ELF related fields from "struct kimage_arch" for x86, arm64,
> and powerpc architectures to "struct kimage".
>
> v18
>   - Added a parameter to of_kexec_alloc_and_setup_fdt() for the caller
> to specify additional space needed for the FDT buffer
>   - Renamed arm64 and powerpc ELF buffer address field in
> "struct kimage_arch" to elf_load_addr to match x86_64 name.
>   - Removed of_ima_add_kexec_buffer() and instead directly set
> ima_buffer_addr and ima_buffer_size in ima_add_kexec_buffer()
>   - Moved FDT_EXTRA_SPACE definition from include/linux/of.h to
> drivers/of/kexec.c
>
> v17
>   - Renamed of_kexec_setup_new_fdt() to of_kexec_alloc_and_setup_fdt(),
> and moved memory allocation for the new FDT to this function.
>
> v16
>   - Defined functions to allocate and free buffer for FDT for powerpc
> and arm64.
>   - Moved ima_buffer_addr and ima_buffer_size fields from
> "struct kimage_arch" in powerpc to "struct kimage"
> v15
>   - Included Rob's patches in the patch set, and rebased
> the changes to "next-integrity" branch.
>   - Allocate memory for DTB, on arm64, using kmalloc() instead of
> vmalloc() to keep it consistent with powerpc implementation.
>   - Call of_kexec_setup_new_fdt() from setup_new_fdt_ppc64() and
> remove setup_new_fdt() in the same patch to keep it bisect safe.
>
> v14
>   - Select CONFIG_HAVE_IMA_KEXEC for CONFIG_KEXEC_FILE, for powerpc
> and arm64, if CONFIG_IMA is enabled.
>   - Use IS_ENABLED() macro instead of "#ifdef" in remove_ima_buffer(),
> ima_get_kexec_buffer(), and ima_free_kexec_buffer().
>   - Call of_kexec_setup_new_fdt() from setup_new_fdt_ppc64() and
> remove setup_new_fdt() in "arch/powerpc/kexec/file_load.c".
>
> v13
>   - Moved the arch independent functions to drivers/of/kexec.c
> and then refactored the code.
>   - Moved arch_ima_add_kexec_buffer() to
> security/integrity/ima/ima_kexec.c
>
> v12
>   - Use fdt_appendprop_addrrange() in setup_ima_buffer()
> to setup the IMA measurement list property in
> the device tree.
>   - Moved architecture independent functions from
> "arch/powerpc/kexec/ima.c" to "drivers/of/kexec."
>   - Deleted "arch/powerpc/kexec/ima.c" and
> "arch/powerpc/include/asm/ima.h".
>
> v11
>   - Rebased the changes on the kexec code refactoring done by
> Rob Herring in his "dt/kexec" branch
>   - Removed "extern" keyword in function declarations
>   - Removed unnecessary header files included in C files
>   - Updated patch descriptions per Thiago's comments
>
> v10
>   - Moved delete_fdt_mem_rsv(), remove_ima_buffer(),
> get_ima_kexec_buffer, and get_root_addr_size_cells()
> to drivers/of/kexec.c
>   - Moved arch_ima_add_kexec_buffer() to
> security/integrity/ima/ima_kexec.c
>   - Conditionally define IMA buffer fields in struct kimage_arch
>
> v9
>   - Moved delete_fdt_mem_rsv() to drivers/of/kexec_fdt.c
>   - Defined a new function get_ima_kexec_buffer() in
> drivers/of/ima_kexec.c to replace do_get_kexec_buffer()
>   - Changed remove_ima_kexec_buffer() to the original function name
> remove_ima_buffer()
>   - Moved remove_ima_buffer() to drivers/of/ima_kexec.c
>   - Moved ima_get_kexec_buffer() and ima_free_kexec_buffer()
> to security/integrity/ima/ima_kexec.c
>
> v8:
>   - Moved remove_ima_kexec_buffer(), do_get_kexec_buffer(), and
> delete_fdt_mem_rsv() to drivers/of/fdt.c
>   - Moved ima_dump_measurement_list() and ima_add_kexec_buffer()
> back to

Re: [PATCH v2 28/37] KVM: PPC: Book3S HV P9: Add helpers for OS SPR handling

2021-03-02 Thread Fabiano Rosas

Nicholas Piggin  writes:

> This is a first step to wrapping supervisor and user SPR saving and
> loading up into helpers, which will then be called independently in
> bare metal and nested HV cases in order to optimise SPR access.
>
> Signed-off-by: Nicholas Piggin 
> ---



> +/* vcpu guest regs must already be saved */
> +static void restore_p9_host_os_sprs(struct kvm_vcpu *vcpu,
> + struct p9_host_os_sprs *host_os_sprs)
> +{
> + mtspr(SPRN_PSPB, 0);
> + mtspr(SPRN_WORT, 0);
> + mtspr(SPRN_UAMOR, 0);
> + mtspr(SPRN_PSPB, 0);

Not your fault, but PSPB is set twice here.

> +
> + mtspr(SPRN_DSCR, host_os_sprs->dscr);
> + mtspr(SPRN_TIDR, host_os_sprs->tidr);
> + mtspr(SPRN_IAMR, host_os_sprs->iamr);
> +
> + if (host_os_sprs->amr != vcpu->arch.amr)
> + mtspr(SPRN_AMR, host_os_sprs->amr);
> +
> + if (host_os_sprs->fscr != vcpu->arch.fscr)
> + mtspr(SPRN_FSCR, host_os_sprs->fscr);
> +}
> +



> @@ -3605,34 +3666,10 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu 
> *vcpu, u64 time_limit,
>   vcpu->arch.dec_expires = dec + tb;
>   vcpu->cpu = -1;
>   vcpu->arch.thread_cpu = -1;
> - vcpu->arch.ctrl = mfspr(SPRN_CTRLF);
> -
> - vcpu->arch.iamr = mfspr(SPRN_IAMR);
> - vcpu->arch.pspb = mfspr(SPRN_PSPB);
> - vcpu->arch.fscr = mfspr(SPRN_FSCR);
> - vcpu->arch.tar = mfspr(SPRN_TAR);
> - vcpu->arch.ebbhr = mfspr(SPRN_EBBHR);
> - vcpu->arch.ebbrr = mfspr(SPRN_EBBRR);
> - vcpu->arch.bescr = mfspr(SPRN_BESCR);
> - vcpu->arch.wort = mfspr(SPRN_WORT);
> - vcpu->arch.tid = mfspr(SPRN_TIDR);
> - vcpu->arch.amr = mfspr(SPRN_AMR);
> - vcpu->arch.uamor = mfspr(SPRN_UAMOR);
> - vcpu->arch.dscr = mfspr(SPRN_DSCR);
> -
> - mtspr(SPRN_PSPB, 0);
> - mtspr(SPRN_WORT, 0);
> - mtspr(SPRN_UAMOR, 0);
> - mtspr(SPRN_DSCR, host_dscr);
> - mtspr(SPRN_TIDR, host_tidr);
> - mtspr(SPRN_IAMR, host_iamr);
> - mtspr(SPRN_PSPB, 0);
>
> - if (host_amr != vcpu->arch.amr)
> - mtspr(SPRN_AMR, host_amr);
> + restore_p9_host_os_sprs(vcpu, _os_sprs);
>
> - if (host_fscr != vcpu->arch.fscr)
> - mtspr(SPRN_FSCR, host_fscr);
> + store_spr_state(vcpu);

store_spr_state should come first, right? We want to save the guest
state before restoring the host state.

>
>   msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX);
>   store_fp_state(>arch.fp);

Re: [PATCH 0/2] Fix CMDLINE_EXTEND handling for FDT "bootargs"

2021-03-02 Thread Rob Herring

On Mon, Mar 1, 2021 at 11:45 AM Christophe Leroy
 wrote:
>
>
>
> Le 01/03/2021 à 18:26, Rob Herring a écrit :
> > +PPC folks and Daniel W
> >
> > On Mon, Mar 1, 2021 at 8:42 AM Will Deacon  wrote:
> >>
> >> On Mon, Mar 01, 2021 at 08:19:32AM -0600, Rob Herring wrote:
> >>> On Thu, Feb 25, 2021 at 6:59 AM Will Deacon  wrote:
>  We recently [1] enabled support for CMDLINE_EXTEND on arm64, however
>  when I started looking at replacing Android's out-of-tree implementation 
>  [2]
> >>>
> >>> Did anyone go read the common, reworked version of all this I
> >>> referenced that supports prepend and append. Here it is again[1].
> >>> Maybe I should have been more assertive there and said 'extend' is
> >>> ambiguous.
> >>
> >> I tried reading that, but (a) most of the series is not in the mailing list
> >> archives and (b) the patch that _is_ doesn't touch CMDLINE_EXTEND at all.
> >> Right now the code in mainline does the opposite of what it's documented to
> >> do.
> >
> > Actually, there is a newer version I found:
> >
> > https://lore.kernel.org/linuxppc-dev/1551469472-53043-1-git-send-email-danie...@cisco.com/
> > https://lore.kernel.org/linuxppc-dev/1551469472-53043-2-git-send-email-danie...@cisco.com/
> > https://lore.kernel.org/linuxppc-dev/1551469472-53043-3-git-send-email-danie...@cisco.com/
>
> This was seen as too much intrusive into powerpc.

It looked like the main issue was string functions for KASAN?

As far as being too complex, I think that will be needed if you look
at all architectures and non-DT cases.

> I proposed an alternative at
> https://patchwork.ozlabs.org/project/linuxppc-dev/cover/cover.1554195798.git.christophe.le...@c-s.fr/
>  but
> never got any feedback.

Didn't go to a list I subscribe to. In particular, if it had gone to
DT list and into PW you would have gotten a reply from me.

Rob

Re: [RFC PATCH v1] powerpc: Enable KFENCE for PPC32

2021-03-02 Thread Marco Elver

On Tue, 2 Mar 2021 at 10:27, Christophe Leroy
 wrote:
> Le 02/03/2021 à 10:21, Alexander Potapenko a écrit :
> >> [   14.998426] BUG: KFENCE: invalid read in 
> >> finish_task_switch.isra.0+0x54/0x23c
> >> [   14.998426]
> >> [   15.007061] Invalid read at 0x(ptrval):
> >> [   15.010906]  finish_task_switch.isra.0+0x54/0x23c
> >> [   15.015633]  kunit_try_run_case+0x5c/0xd0
> >> [   15.019682]  kunit_generic_run_threadfn_adapter+0x24/0x30
> >> [   15.025099]  kthread+0x15c/0x174
> >> [   15.028359]  ret_from_kernel_thread+0x14/0x1c
> >> [   15.032747]
> >> [   15.034251] CPU: 0 PID: 111 Comm: kunit_try_catch Tainted: GB
> >> 5.12.0-rc1-s3k-dev-01534-g4f14ae75edf0-dirty #4674
> >> [   15.045811] 
> >> ==
> >> [   15.053324] # test_invalid_access: EXPECTATION FAILED at 
> >> mm/kfence/kfence_test.c:636
> >> [   15.053324] Expected report_matches() to be true, but is 
> >> false
> >> [   15.068359] not ok 21 - test_invalid_access
> >
> > The test expects the function name to be test_invalid_access, i. e.
> > the first line should be "BUG: KFENCE: invalid read in
> > test_invalid_access".
> > The error reporting function unwinds the stack, skips a couple of
> > "uninteresting" frames
> > (https://elixir.bootlin.com/linux/v5.12-rc1/source/mm/kfence/report.c#L43)
> > and uses the first "interesting" one frame to print the report header
> > (https://elixir.bootlin.com/linux/v5.12-rc1/source/mm/kfence/report.c#L226).
> >
> > It's strange that test_invalid_access is missing altogether from the
> > stack trace - is that expected?
> > Can you try printing the whole stacktrace without skipping any frames
> > to see if that function is there?
> >
>
> Booting with 'no_hash_pointers" I get the following. Does it helps ?
>
> [   16.837198] 
> ==
> [   16.848521] BUG: KFENCE: invalid read in 
> finish_task_switch.isra.0+0x54/0x23c
> [   16.848521]
> [   16.857158] Invalid read at 0xdf98800a:
> [   16.861004]  finish_task_switch.isra.0+0x54/0x23c
> [   16.865731]  kunit_try_run_case+0x5c/0xd0
> [   16.869780]  kunit_generic_run_threadfn_adapter+0x24/0x30
> [   16.875199]  kthread+0x15c/0x174
> [   16.878460]  ret_from_kernel_thread+0x14/0x1c
> [   16.882847]
> [   16.884351] CPU: 0 PID: 111 Comm: kunit_try_catch Tainted: GB
> 5.12.0-rc1-s3k-dev-01534-g4f14ae75edf0-dirty #4674
> [   16.895908] NIP:  c016eb8c LR: c02f50dc CTR: c016eb38
> [   16.900963] REGS: e2449d90 TRAP: 0301   Tainted: GB
> (5.12.0-rc1-s3k-dev-01534-g4f14ae75edf0-dirty)
> [   16.911386] MSR:  9032   CR: 2204  XER: 
> [   16.918153] DAR: df98800a DSISR: 2000
> [   16.918153] GPR00: c02f50dc e2449e50 c1140d00 e100dd24 c084b13c 0008 
> c084b32b c016eb38
> [   16.918153] GPR08: c085 df988000 c0d1 e2449eb0 22000288
> [   16.936695] NIP [c016eb8c] test_invalid_access+0x54/0x108
> [   16.942125] LR [c02f50dc] kunit_try_run_case+0x5c/0xd0
> [   16.947292] Call Trace:
> [   16.949746] [e2449e50] [c005a5ec] finish_task_switch.isra.0+0x54/0x23c 
> (unreliable)

The "(unreliable)" might be a clue that it's related to ppc32 stack
unwinding. Any ppc expert know what this is about?

> [   16.957443] [e2449eb0] [c02f50dc] kunit_try_run_case+0x5c/0xd0
> [   16.963319] [e2449ed0] [c02f63ec] 
> kunit_generic_run_threadfn_adapter+0x24/0x30
> [   16.970574] [e2449ef0] [c004e710] kthread+0x15c/0x174
> [   16.975670] [e2449f30] [c001317c] ret_from_kernel_thread+0x14/0x1c
> [   16.981896] Instruction dump:
> [   16.984879] 8129d608 38e7eb38 81020280 911f004c 3900 995f0024 907f0028 
> 90ff001c
> [   16.992710] 3949000a 915f0020 3d40c0d1 3d00c085 <8929000a> 3908adb0 
> 812a4b98 3d40c02f
> [   17.000711] 
> ==
> [   17.008223] # test_invalid_access: EXPECTATION FAILED at 
> mm/kfence/kfence_test.c:636
> [   17.008223] Expected report_matches() to be true, but is false
> [   17.023243] not ok 21 - test_invalid_access

On a fault in test_invalid_access, KFENCE prints the stack trace based
on the information in pt_regs. So we do not think there's anything we
can do to improve stack printing pe-se.

What's confusing is that it's only this test, and none of the others.
Given that, it might be code-gen related, which results in some subtle
issue with stack unwinding. There are a few things to try, if you feel
like it:

-- Change the unwinder, if it's possible for ppc32.

-- Add code to test_invalid_access(), to get the compiler to emit
different code. E.g. add a bunch (unnecessary) function calls, or add
barriers, etc.

-- Play with compiler options. We already pass
-fno-optimize-sibling-calls for kfence_test.o to avoid tail-call
optimizations that'd hide stack trace entries. But perhaps there's
something ppc-specific we missed?

Well, the good thing is that KFENCE detects the bad access just fine.
Since, according to the test,

Re: [PATCH v2 24/37] KVM: PPC: Book3S HV P9: inline kvmhv_load_hv_regs_and_go into __kvmhv_vcpu_entry_p9

2021-03-02 Thread Fabiano Rosas

Nicholas Piggin  writes:

> Now the initial C implementation is done, inline more HV code to make
> rearranging things easier.
>
> And rename __kvmhv_vcpu_entry_p9 to drop the leading underscores as it's
> now C, and is now a more complete vcpu entry.
>
> Signed-off-by: Nicholas Piggin 

Reviewed-by: Fabiano Rosas 

> ---
>  arch/powerpc/include/asm/kvm_book3s_64.h |   2 +-
>  arch/powerpc/kvm/book3s_hv.c | 181 +--
>  arch/powerpc/kvm/book3s_hv_interrupt.c   | 168 -
>  3 files changed, 169 insertions(+), 182 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h 
> b/arch/powerpc/include/asm/kvm_book3s_64.h
> index c214bcffb441..eaf3a562bf1e 100644
> --- a/arch/powerpc/include/asm/kvm_book3s_64.h
> +++ b/arch/powerpc/include/asm/kvm_book3s_64.h
> @@ -153,7 +153,7 @@ static inline bool kvmhv_vcpu_is_radix(struct kvm_vcpu 
> *vcpu)
>   return radix;
>  }
>
> -int __kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu);
> +int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long 
> lpcr);
>
>  #define KVM_DEFAULT_HPT_ORDER24  /* 16MB HPT by default */
>  #endif
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index 28a2761515e3..f99503acdda5 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -3442,183 +3442,6 @@ static noinline void kvmppc_run_core(struct 
> kvmppc_vcore *vc)
>   trace_kvmppc_run_core(vc, 1);
>  }
>
> -static void switch_mmu_to_guest_radix(struct kvm *kvm, struct kvm_vcpu 
> *vcpu, u64 lpcr)
> -{
> - struct kvmppc_vcore *vc = vcpu->arch.vcore;
> - struct kvm_nested_guest *nested = vcpu->arch.nested;
> - u32 lpid;
> -
> - lpid = nested ? nested->shadow_lpid : kvm->arch.lpid;
> -
> - mtspr(SPRN_LPID, lpid);
> - mtspr(SPRN_LPCR, lpcr);
> - mtspr(SPRN_PID, vcpu->arch.pid);
> - isync();
> -
> - /* TLBIEL must have LPIDR set, so set guest LPID before flushing. */
> - kvmppc_check_need_tlb_flush(kvm, vc->pcpu, nested);
> -}
> -
> -static void switch_mmu_to_host_radix(struct kvm *kvm, u32 pid)
> -{
> - mtspr(SPRN_PID, pid);
> - mtspr(SPRN_LPID, kvm->arch.host_lpid);
> - mtspr(SPRN_LPCR, kvm->arch.host_lpcr);
> - isync();
> -}
> -
> -/*
> - * Load up hypervisor-mode registers on P9.
> - */
> -static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
> -  unsigned long lpcr)
> -{
> - struct kvm *kvm = vcpu->kvm;
> - struct kvmppc_vcore *vc = vcpu->arch.vcore;
> - s64 hdec;
> - u64 tb, purr, spurr;
> - int trap;
> - unsigned long host_hfscr = mfspr(SPRN_HFSCR);
> - unsigned long host_ciabr = mfspr(SPRN_CIABR);
> - unsigned long host_dawr0 = mfspr(SPRN_DAWR0);
> - unsigned long host_dawrx0 = mfspr(SPRN_DAWRX0);
> - unsigned long host_psscr = mfspr(SPRN_PSSCR);
> - unsigned long host_pidr = mfspr(SPRN_PID);
> - unsigned long host_dawr1 = 0;
> - unsigned long host_dawrx1 = 0;
> -
> - if (cpu_has_feature(CPU_FTR_DAWR1)) {
> - host_dawr1 = mfspr(SPRN_DAWR1);
> - host_dawrx1 = mfspr(SPRN_DAWRX1);
> - }
> -
> - tb = mftb();
> - hdec = time_limit - tb;
> - if (hdec < 0)
> - return BOOK3S_INTERRUPT_HV_DECREMENTER;
> -
> - if (vc->tb_offset) {
> - u64 new_tb = tb + vc->tb_offset;
> - mtspr(SPRN_TBU40, new_tb);
> - tb = mftb();
> - if ((tb & 0xff) < (new_tb & 0xff))
> - mtspr(SPRN_TBU40, new_tb + 0x100);
> - vc->tb_offset_applied = vc->tb_offset;
> - }
> -
> - if (vc->pcr)
> - mtspr(SPRN_PCR, vc->pcr | PCR_MASK);
> - mtspr(SPRN_DPDES, vc->dpdes);
> - mtspr(SPRN_VTB, vc->vtb);
> -
> - local_paca->kvm_hstate.host_purr = mfspr(SPRN_PURR);
> - local_paca->kvm_hstate.host_spurr = mfspr(SPRN_SPURR);
> - mtspr(SPRN_PURR, vcpu->arch.purr);
> - mtspr(SPRN_SPURR, vcpu->arch.spurr);
> -
> - if (dawr_enabled()) {
> - mtspr(SPRN_DAWR0, vcpu->arch.dawr0);
> - mtspr(SPRN_DAWRX0, vcpu->arch.dawrx0);
> - if (cpu_has_feature(CPU_FTR_DAWR1)) {
> - mtspr(SPRN_DAWR1, vcpu->arch.dawr1);
> - mtspr(SPRN_DAWRX1, vcpu->arch.dawrx1);
> - }
> - }
> - mtspr(SPRN_CIABR, vcpu->arch.ciabr);
> - mtspr(SPRN_IC, vcpu->arch.ic);
> -
> - mtspr(SPRN_PSSCR, vcpu->arch.psscr | PSSCR_EC |
> -   (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
> -
> - mtspr(SPRN_HFSCR, vcpu->arch.hfscr);
> -
> - mtspr(SPRN_SPRG0, vcpu->arch.shregs.sprg0);
> - mtspr(SPRN_SPRG1, vcpu->arch.shregs.sprg1);
> - mtspr(SPRN_SPRG2, vcpu->arch.shregs.sprg2);
> - mtspr(SPRN_SPRG3, vcpu->arch.shregs.sprg3);
> -
> - mtspr(SPRN_AMOR, ~0UL);
> -
> - switch_mmu_to_guest_radix(kvm, vcpu, lpcr);
> -
> - /*
> -  * P9

Re: [RFC PATCH v1] powerpc: Enable KFENCE for PPC32

2021-03-02 Thread Alexander Potapenko

> [   14.998426] BUG: KFENCE: invalid read in 
> finish_task_switch.isra.0+0x54/0x23c
> [   14.998426]
> [   15.007061] Invalid read at 0x(ptrval):
> [   15.010906]  finish_task_switch.isra.0+0x54/0x23c
> [   15.015633]  kunit_try_run_case+0x5c/0xd0
> [   15.019682]  kunit_generic_run_threadfn_adapter+0x24/0x30
> [   15.025099]  kthread+0x15c/0x174
> [   15.028359]  ret_from_kernel_thread+0x14/0x1c
> [   15.032747]
> [   15.034251] CPU: 0 PID: 111 Comm: kunit_try_catch Tainted: GB
> 5.12.0-rc1-s3k-dev-01534-g4f14ae75edf0-dirty #4674
> [   15.045811] 
> ==
> [   15.053324] # test_invalid_access: EXPECTATION FAILED at 
> mm/kfence/kfence_test.c:636
> [   15.053324] Expected report_matches() to be true, but is false
> [   15.068359] not ok 21 - test_invalid_access

The test expects the function name to be test_invalid_access, i. e.
the first line should be "BUG: KFENCE: invalid read in
test_invalid_access".
The error reporting function unwinds the stack, skips a couple of
"uninteresting" frames
(https://elixir.bootlin.com/linux/v5.12-rc1/source/mm/kfence/report.c#L43)
and uses the first "interesting" one frame to print the report header
(https://elixir.bootlin.com/linux/v5.12-rc1/source/mm/kfence/report.c#L226).

It's strange that test_invalid_access is missing altogether from the
stack trace - is that expected?
Can you try printing the whole stacktrace without skipping any frames
to see if that function is there?

Re: [PATCH next v3 12/15] printk: introduce a kmsg_dump iterator

2021-03-02 Thread John Ogness

On 2021-03-01, Petr Mladek  wrote:
>> diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c
>> index 532f22637783..5a64b24a91c2 100644
>> --- a/arch/powerpc/kernel/nvram_64.c
>> +++ b/arch/powerpc/kernel/nvram_64.c
>> @@ -72,8 +72,7 @@ static const char *nvram_os_partitions[] = {
>>  NULL
>>  };
>>  
>> -static void oops_to_nvram(struct kmsg_dumper *dumper,
>> -  enum kmsg_dump_reason reason);
>> +static void oops_to_nvram(enum kmsg_dump_reason reason);
>>  
>>  static struct kmsg_dumper nvram_kmsg_dumper = {
>>  .dump = oops_to_nvram
>> @@ -642,11 +641,11 @@ void __init nvram_init_oops_partition(int 
>> rtas_partition_exists)
>>   * that we think will compress sufficiently to fit in the lnx,oops-log
>>   * partition.  If that's too much, go back and capture uncompressed text.
>>   */
>> -static void oops_to_nvram(struct kmsg_dumper *dumper,
>> -  enum kmsg_dump_reason reason)
>> +static void oops_to_nvram(enum kmsg_dump_reason reason)
>>  {
>>  struct oops_log_info *oops_hdr = (struct oops_log_info *)oops_buf;
>>  static unsigned int oops_count = 0;
>> +static struct kmsg_dump_iter iter;
>>  static bool panicking = false;
>>  static DEFINE_SPINLOCK(lock);
>>  unsigned long flags;
>> @@ -681,13 +680,14 @@ static void oops_to_nvram(struct kmsg_dumper *dumper,
>>  return;
>>  
>>  if (big_oops_buf) {
>> -kmsg_dump_get_buffer(dumper, false,
>> +kmsg_dump_rewind();
>
> It would be nice to get rid of the kmsg_dump_rewind() calls
> in all callers.
>
> A solution might be to create the following in include/linux/kmsg_dump.h
>
> #define KMSG_DUMP_ITER_INIT(iter) {   \
>   .cur_seq = 0,   \
>   .next_seq = U64_MAX,\
>   }
>
> #define DEFINE_KMSG_DUMP_ITER(iter)   \
>   struct kmsg_dump_iter iter = KMSG_DUMP_ITER_INIT(iter)

For this caller (arch/powerpc/kernel/nvram_64.c) and for
(kernel/debug/kdb/kdb_main.c), kmsg_dump_rewind() is called twice within
the dumper. So rewind will still be used there.

> Then we could do the following at the beginning of both
> kmsg_dump_get_buffer() and kmsg_dump_get_line():
>
>   u64 clear_seq = latched_seq_read_nolock(_seq);
>
>   if (iter->cur_seq < clear_seq)
>   cur_seq = clear_seq;

I suppose we need to add this part anyway, if we want to enforce that
records before @clear_seq are not to be available for dumpers.

> I am not completely sure about next_seq:
>
>+ kmsg_dump_get_buffer() will set it for the next call anyway.
>  It reads the blocks of messages from the newest.
>
>+ kmsg_dump_get_line() wants to read the entire buffer anyway.
>  But there is a small risk of an infinite loop when new messages
>  are printed when dumping each line.
>
> It might be better to avoid the infinite loop. We could do the following:
>
> static void check_and_set_iter(struct kmsg_dump_iter)
> {
>   if (iter->cur_seq == 0 && iter->next_seq == U64_MAX) {
>   kmsg_dump_rewind(iter);
> }
>
> and call this at the beginning of both kmsg_dump_get_buffer()
> and kmsg_dump_get_line()
>
> What do you think?

On a technical level, it does not make any difference. It is pure
cosmetic.

Personally, I prefer the rewind directly before the kmsg_dump_get calls
because it puts the initializer directly next to the user.

As an example to illustrate my view, I prefer:

for (i = 0; i < n; i++)
...;

instead of:

int i = 0;

...

for (; i < n; i++)
...;

Also, I do not really like the special use of 0/U64_MAX to identify
special actions of the kmsg_dump_get functions.

> Note that I do not resist on it. But it might make the API easier to
> use from my POV.

Since you do not resist, I will keep the API the same for v4. But I will
add the @clear_seq check to the kmsg_dump_get functions.

John Ogness

Re: [RFC PATCH v1] powerpc: Enable KFENCE for PPC32

2021-03-02 Thread Marco Elver

On Tue, 2 Mar 2021 at 09:37, Christophe Leroy
 wrote:
> Add architecture specific implementation details for KFENCE and enable
> KFENCE for the ppc32 architecture. In particular, this implements the
> required interface in .

Nice!

> KFENCE requires that attributes for pages from its memory pool can
> individually be set. Therefore, force the Read/Write linear map to be
> mapped at page granularity.
>
> Unit tests succeed on all tests but one:
>
> [   15.053324] # test_invalid_access: EXPECTATION FAILED at 
> mm/kfence/kfence_test.c:636
> [   15.053324] Expected report_matches() to be true, but 
> is false
> [   15.068359] not ok 21 - test_invalid_access

This is strange, given all the other tests passed. Do you mind sharing
the full test log?

Thanks,
-- Marco

[PATCH] ASoC: fsl_xcvr: Use devm_platform_ioremap_resource_byname() to simplify code

2021-03-02 Thread Tang Bin

In this function, devm_platform_ioremap_resource_byname() should be 
suitable to simplify code.

Signed-off-by: Tang Bin 
---
 sound/soc/fsl/fsl_xcvr.c | 8 +++-
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/sound/soc/fsl/fsl_xcvr.c b/sound/soc/fsl/fsl_xcvr.c
index 6dd0a5fcd455..5e8284db857b 100644
--- a/sound/soc/fsl/fsl_xcvr.c
+++ b/sound/soc/fsl/fsl_xcvr.c
@@ -1131,7 +1131,7 @@ static int fsl_xcvr_probe(struct platform_device *pdev)
 {
struct device *dev = >dev;
struct fsl_xcvr *xcvr;
-   struct resource *ram_res, *regs_res, *rx_res, *tx_res;
+   struct resource *rx_res, *tx_res;
void __iomem *regs;
int ret, irq;
 
@@ -1166,13 +1166,11 @@ static int fsl_xcvr_probe(struct platform_device *pdev)
return PTR_ERR(xcvr->pll_ipg_clk);
}
 
-   ram_res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "ram");
-   xcvr->ram_addr = devm_ioremap_resource(dev, ram_res);
+   xcvr->ram_addr = devm_platform_ioremap_resource_byname(pdev, "ram");
if (IS_ERR(xcvr->ram_addr))
return PTR_ERR(xcvr->ram_addr);
 
-   regs_res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "regs");
-   regs = devm_ioremap_resource(dev, regs_res);
+   regs = devm_platform_ioremap_resource_byname(pdev, "regs");
if (IS_ERR(regs))
return PTR_ERR(regs);
 
-- 
2.18.2

Re: [PATCH] perf bench numa: Fix the condition checks for max number of numa nodes

2021-03-02 Thread Arnaldo Carvalho de Melo

Em Fri, Feb 26, 2021 at 02:28:27PM +0530, Srikar Dronamraju escreveu:
> * Athira Rajeev  [2021-02-25 11:50:02]:
> 
> > In systems having higher node numbers available like node
> > 255, perf numa bench will fail with SIGABORT.
> > 
> > <<>>
> > perf: bench/numa.c:1416: init: Assertion `!(g->p.nr_nodes > 64 || 
> > g->p.nr_nodes < 0)' failed.
> > Aborted (core dumped)
> > <<>>
> > 
> 
> Looks good to me.
> 
> Reviewed-by: Srikar Dronamraju 

Thanks, applied.

- Arnaldo

[PATCH AUTOSEL 4.4 3/8] powerpc/perf: Record counter overflow always if SAMPLE_IP is unset

From: Athira Rajeev 

[ Upstream commit d137845c973147a22622cc76c7b0bc16f6206323 ]

While sampling for marked events, currently we record the sample only
if the SIAR valid bit of Sampled Instruction Event Register (SIER) is
set. SIAR_VALID bit is used for fetching the instruction address from
Sampled Instruction Address Register(SIAR). But there are some
usecases, where the user is interested only in the PMU stats at each
counter overflow and the exact IP of the overflow event is not
required. Dropping SIAR invalid samples will fail to record some of
the counter overflows in such cases.

Example of such usecase is dumping the PMU stats (event counts) after
some regular amount of instructions/events from the userspace (ex: via
ptrace). Here counter overflow is indicated to userspace via signal
handler, and captured by monitoring and enabling I/O signaling on the
event file descriptor. In these cases, we expect to get
sample/overflow indication after each specified sample_period.

Perf event attribute will not have PERF_SAMPLE_IP set in the
sample_type if exact IP of the overflow event is not requested. So
while profiling if SAMPLE_IP is not set, just record the counter
overflow irrespective of SIAR_VALID check.

Suggested-by: Michael Ellerman 
Signed-off-by: Athira Rajeev 
[mpe: Reflow comment and if formatting]
Signed-off-by: Michael Ellerman 
Link: 
https://lore.kernel.org/r/1612516492-1428-1-git-send-email-atraj...@linux.vnet.ibm.com
Signed-off-by: Sasha Levin 
---
 arch/powerpc/perf/core-book3s.c | 19 +++
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index e593e7f856ed..7a80e1cff6e2 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -2008,7 +2008,17 @@ static void record_and_restart(struct perf_event *event, 
unsigned long val,
left += period;
if (left <= 0)
left = period;
-   record = siar_valid(regs);
+
+   /*
+* If address is not requested in the sample via
+* PERF_SAMPLE_IP, just record that sample irrespective
+* of SIAR valid check.
+*/
+   if (event->attr.sample_type & PERF_SAMPLE_IP)
+   record = siar_valid(regs);
+   else
+   record = 1;
+
event->hw.last_period = event->hw.sample_period;
}
if (left < 0x8000LL)
@@ -2026,9 +2036,10 @@ static void record_and_restart(struct perf_event *event, 
unsigned long val,
 * MMCR2. Check attr.exclude_kernel and address to drop the sample in
 * these cases.
 */
-   if (event->attr.exclude_kernel && record)
-   if (is_kernel_addr(mfspr(SPRN_SIAR)))
-   record = 0;
+   if (event->attr.exclude_kernel &&
+   (event->attr.sample_type & PERF_SAMPLE_IP) &&
+   is_kernel_addr(mfspr(SPRN_SIAR)))
+   record = 0;
 
/*
 * Finally record data if requested.
-- 
2.30.1

[PATCH AUTOSEL 4.9 04/10] powerpc/perf: Record counter overflow always if SAMPLE_IP is unset

From: Athira Rajeev 

[ Upstream commit d137845c973147a22622cc76c7b0bc16f6206323 ]

While sampling for marked events, currently we record the sample only
if the SIAR valid bit of Sampled Instruction Event Register (SIER) is
set. SIAR_VALID bit is used for fetching the instruction address from
Sampled Instruction Address Register(SIAR). But there are some
usecases, where the user is interested only in the PMU stats at each
counter overflow and the exact IP of the overflow event is not
required. Dropping SIAR invalid samples will fail to record some of
the counter overflows in such cases.

Example of such usecase is dumping the PMU stats (event counts) after
some regular amount of instructions/events from the userspace (ex: via
ptrace). Here counter overflow is indicated to userspace via signal
handler, and captured by monitoring and enabling I/O signaling on the
event file descriptor. In these cases, we expect to get
sample/overflow indication after each specified sample_period.

Perf event attribute will not have PERF_SAMPLE_IP set in the
sample_type if exact IP of the overflow event is not requested. So
while profiling if SAMPLE_IP is not set, just record the counter
overflow irrespective of SIAR_VALID check.

Suggested-by: Michael Ellerman 
Signed-off-by: Athira Rajeev 
[mpe: Reflow comment and if formatting]
Signed-off-by: Michael Ellerman 
Link: 
https://lore.kernel.org/r/1612516492-1428-1-git-send-email-atraj...@linux.vnet.ibm.com
Signed-off-by: Sasha Levin 
---
 arch/powerpc/perf/core-book3s.c | 19 +++
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 1f1ac446ace9..f2d8f35c181f 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -2010,7 +2010,17 @@ static void record_and_restart(struct perf_event *event, 
unsigned long val,
left += period;
if (left <= 0)
left = period;
-   record = siar_valid(regs);
+
+   /*
+* If address is not requested in the sample via
+* PERF_SAMPLE_IP, just record that sample irrespective
+* of SIAR valid check.
+*/
+   if (event->attr.sample_type & PERF_SAMPLE_IP)
+   record = siar_valid(regs);
+   else
+   record = 1;
+
event->hw.last_period = event->hw.sample_period;
}
if (left < 0x8000LL)
@@ -2028,9 +2038,10 @@ static void record_and_restart(struct perf_event *event, 
unsigned long val,
 * MMCR2. Check attr.exclude_kernel and address to drop the sample in
 * these cases.
 */
-   if (event->attr.exclude_kernel && record)
-   if (is_kernel_addr(mfspr(SPRN_SIAR)))
-   record = 0;
+   if (event->attr.exclude_kernel &&
+   (event->attr.sample_type & PERF_SAMPLE_IP) &&
+   is_kernel_addr(mfspr(SPRN_SIAR)))
+   record = 0;
 
/*
 * Finally record data if requested.
-- 
2.30.1

[PATCH AUTOSEL 4.14 05/13] powerpc/perf: Record counter overflow always if SAMPLE_IP is unset

From: Athira Rajeev 

[ Upstream commit d137845c973147a22622cc76c7b0bc16f6206323 ]

While sampling for marked events, currently we record the sample only
if the SIAR valid bit of Sampled Instruction Event Register (SIER) is
set. SIAR_VALID bit is used for fetching the instruction address from
Sampled Instruction Address Register(SIAR). But there are some
usecases, where the user is interested only in the PMU stats at each
counter overflow and the exact IP of the overflow event is not
required. Dropping SIAR invalid samples will fail to record some of
the counter overflows in such cases.

Example of such usecase is dumping the PMU stats (event counts) after
some regular amount of instructions/events from the userspace (ex: via
ptrace). Here counter overflow is indicated to userspace via signal
handler, and captured by monitoring and enabling I/O signaling on the
event file descriptor. In these cases, we expect to get
sample/overflow indication after each specified sample_period.

Perf event attribute will not have PERF_SAMPLE_IP set in the
sample_type if exact IP of the overflow event is not requested. So
while profiling if SAMPLE_IP is not set, just record the counter
overflow irrespective of SIAR_VALID check.

Suggested-by: Michael Ellerman 
Signed-off-by: Athira Rajeev 
[mpe: Reflow comment and if formatting]
Signed-off-by: Michael Ellerman 
Link: 
https://lore.kernel.org/r/1612516492-1428-1-git-send-email-atraj...@linux.vnet.ibm.com
Signed-off-by: Sasha Levin 
---
 arch/powerpc/perf/core-book3s.c | 19 +++
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 56f16c803590..2669847434b8 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -2055,7 +2055,17 @@ static void record_and_restart(struct perf_event *event, 
unsigned long val,
left += period;
if (left <= 0)
left = period;
-   record = siar_valid(regs);
+
+   /*
+* If address is not requested in the sample via
+* PERF_SAMPLE_IP, just record that sample irrespective
+* of SIAR valid check.
+*/
+   if (event->attr.sample_type & PERF_SAMPLE_IP)
+   record = siar_valid(regs);
+   else
+   record = 1;
+
event->hw.last_period = event->hw.sample_period;
}
if (left < 0x8000LL)
@@ -2073,9 +2083,10 @@ static void record_and_restart(struct perf_event *event, 
unsigned long val,
 * MMCR2. Check attr.exclude_kernel and address to drop the sample in
 * these cases.
 */
-   if (event->attr.exclude_kernel && record)
-   if (is_kernel_addr(mfspr(SPRN_SIAR)))
-   record = 0;
+   if (event->attr.exclude_kernel &&
+   (event->attr.sample_type & PERF_SAMPLE_IP) &&
+   is_kernel_addr(mfspr(SPRN_SIAR)))
+   record = 0;
 
/*
 * Finally record data if requested.
-- 
2.30.1

[PATCH AUTOSEL 4.14 04/13] powerpc: improve handling of unrecoverable system reset

From: Nicholas Piggin 

[ Upstream commit 11cb0a25f71818ca7ab4856548ecfd83c169aa4d ]

If an unrecoverable system reset hits in process context, the system
does not have to panic. Similar to machine check, call nmi_exit()
before die().

Signed-off-by: Nicholas Piggin 
Signed-off-by: Michael Ellerman 
Link: https://lore.kernel.org/r/20210130130852.2952424-26-npig...@gmail.com
Signed-off-by: Sasha Levin 
---
 arch/powerpc/kernel/traps.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 0f1a888c04a8..05c1aabad01c 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -360,8 +360,11 @@ out:
die("Unrecoverable nested System Reset", regs, SIGABRT);
 #endif
/* Must die if the interrupt is not recoverable */
-   if (!(regs->msr & MSR_RI))
+   if (!(regs->msr & MSR_RI)) {
+   /* For the reason explained in die_mce, nmi_exit before die */
+   nmi_exit();
die("Unrecoverable System Reset", regs, SIGABRT);
+   }
 
if (!nested)
nmi_exit();
-- 
2.30.1

[PATCH AUTOSEL 4.19 08/21] powerpc/perf: Record counter overflow always if SAMPLE_IP is unset

From: Athira Rajeev 

[ Upstream commit d137845c973147a22622cc76c7b0bc16f6206323 ]

While sampling for marked events, currently we record the sample only
if the SIAR valid bit of Sampled Instruction Event Register (SIER) is
set. SIAR_VALID bit is used for fetching the instruction address from
Sampled Instruction Address Register(SIAR). But there are some
usecases, where the user is interested only in the PMU stats at each
counter overflow and the exact IP of the overflow event is not
required. Dropping SIAR invalid samples will fail to record some of
the counter overflows in such cases.

Example of such usecase is dumping the PMU stats (event counts) after
some regular amount of instructions/events from the userspace (ex: via
ptrace). Here counter overflow is indicated to userspace via signal
handler, and captured by monitoring and enabling I/O signaling on the
event file descriptor. In these cases, we expect to get
sample/overflow indication after each specified sample_period.

Perf event attribute will not have PERF_SAMPLE_IP set in the
sample_type if exact IP of the overflow event is not requested. So
while profiling if SAMPLE_IP is not set, just record the counter
overflow irrespective of SIAR_VALID check.

Suggested-by: Michael Ellerman 
Signed-off-by: Athira Rajeev 
[mpe: Reflow comment and if formatting]
Signed-off-by: Michael Ellerman 
Link: 
https://lore.kernel.org/r/1612516492-1428-1-git-send-email-atraj...@linux.vnet.ibm.com
Signed-off-by: Sasha Levin 
---
 arch/powerpc/perf/core-book3s.c | 19 +++
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 70de13822828..091bdeaf02a3 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -2046,7 +2046,17 @@ static void record_and_restart(struct perf_event *event, 
unsigned long val,
left += period;
if (left <= 0)
left = period;
-   record = siar_valid(regs);
+
+   /*
+* If address is not requested in the sample via
+* PERF_SAMPLE_IP, just record that sample irrespective
+* of SIAR valid check.
+*/
+   if (event->attr.sample_type & PERF_SAMPLE_IP)
+   record = siar_valid(regs);
+   else
+   record = 1;
+
event->hw.last_period = event->hw.sample_period;
}
if (left < 0x8000LL)
@@ -2064,9 +2074,10 @@ static void record_and_restart(struct perf_event *event, 
unsigned long val,
 * MMCR2. Check attr.exclude_kernel and address to drop the sample in
 * these cases.
 */
-   if (event->attr.exclude_kernel && record)
-   if (is_kernel_addr(mfspr(SPRN_SIAR)))
-   record = 0;
+   if (event->attr.exclude_kernel &&
+   (event->attr.sample_type & PERF_SAMPLE_IP) &&
+   is_kernel_addr(mfspr(SPRN_SIAR)))
+   record = 0;
 
/*
 * Finally record data if requested.
-- 
2.30.1

[PATCH AUTOSEL 4.19 06/21] powerpc/pci: Add ppc_md.discover_phbs()

From: Oliver O'Halloran 

[ Upstream commit 5537fcb319d016ce387f818dd774179bc03217f5 ]

On many powerpc platforms the discovery and initalisation of
pci_controllers (PHBs) happens inside of setup_arch(). This is very early
in boot (pre-initcalls) and means that we're initialising the PHB long
before many basic kernel services (slab allocator, debugfs, a real ioremap)
are available.

On PowerNV this causes an additional problem since we map the PHB registers
with ioremap(). As of commit d538aadc2718 ("powerpc/ioremap: warn on early
use of ioremap()") a warning is printed because we're using the "incorrect"
API to setup and MMIO mapping in searly boot. The kernel does provide
early_ioremap(), but that is not intended to create long-lived MMIO
mappings and a seperate warning is printed by generic code if
early_ioremap() mappings are "leaked."

This is all fixable with dumb hacks like using early_ioremap() to setup
the initial mapping then replacing it with a real ioremap later on in
boot, but it does raise the question: Why the hell are we setting up the
PHB's this early in boot?

The old and wise claim it's due to "hysterical rasins." Aside from amused
grapes there doesn't appear to be any real reason to maintain the current
behaviour. Already most of the newer embedded platforms perform PHB
discovery in an arch_initcall and between the end of setup_arch() and the
start of initcalls none of the generic kernel code does anything PCI
related. On powerpc scanning PHBs occurs in a subsys_initcall so it should
be possible to move the PHB discovery to a core, postcore or arch initcall.

This patch adds the ppc_md.discover_phbs hook and a core_initcall stub that
calls it. The core_initcalls are the earliest to be called so this will
any possibly issues with dependency between initcalls. This isn't just an
academic issue either since on pseries and PowerNV EEH init occurs in an
arch_initcall and depends on the pci_controllers being available, similarly
the creation of pci_dns occurs at core_initcall_sync (i.e. between core and
postcore initcalls). These problems need to be addressed seperately.

Reported-by: kernel test robot 
Signed-off-by: Oliver O'Halloran 
[mpe: Make discover_phbs() static]
Signed-off-by: Michael Ellerman 
Link: https://lore.kernel.org/r/20201103043523.916109-1-ooh...@gmail.com
Signed-off-by: Sasha Levin 
---
 arch/powerpc/include/asm/machdep.h |  3 +++
 arch/powerpc/kernel/pci-common.c   | 10 ++
 2 files changed, 13 insertions(+)

diff --git a/arch/powerpc/include/asm/machdep.h 
b/arch/powerpc/include/asm/machdep.h
index a47de82fb8e2..bda87cbf106d 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -71,6 +71,9 @@ struct machdep_calls {
int (*pcibios_root_bridge_prepare)(struct pci_host_bridge
*bridge);
 
+   /* finds all the pci_controllers present at boot */
+   void(*discover_phbs)(void);
+
/* To setup PHBs when using automatic OF platform driver for PCI */
int (*pci_setup_phb)(struct pci_controller *host);
 
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 88e4f69a09e5..74628aca2bf1 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -1671,3 +1671,13 @@ static void fixup_hide_host_resource_fsl(struct pci_dev 
*dev)
 }
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MOTOROLA, PCI_ANY_ID, 
fixup_hide_host_resource_fsl);
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_FREESCALE, PCI_ANY_ID, 
fixup_hide_host_resource_fsl);
+
+
+static int __init discover_phbs(void)
+{
+   if (ppc_md.discover_phbs)
+   ppc_md.discover_phbs();
+
+   return 0;
+}
+core_initcall(discover_phbs);
-- 
2.30.1

[PATCH AUTOSEL 4.19 07/21] powerpc: improve handling of unrecoverable system reset

From: Nicholas Piggin 

[ Upstream commit 11cb0a25f71818ca7ab4856548ecfd83c169aa4d ]

If an unrecoverable system reset hits in process context, the system
does not have to panic. Similar to machine check, call nmi_exit()
before die().

Signed-off-by: Nicholas Piggin 
Signed-off-by: Michael Ellerman 
Link: https://lore.kernel.org/r/20210130130852.2952424-26-npig...@gmail.com
Signed-off-by: Sasha Levin 
---
 arch/powerpc/kernel/traps.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 1b2d84cb373b..2379c4bf3979 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -433,8 +433,11 @@ out:
die("Unrecoverable nested System Reset", regs, SIGABRT);
 #endif
/* Must die if the interrupt is not recoverable */
-   if (!(regs->msr & MSR_RI))
+   if (!(regs->msr & MSR_RI)) {
+   /* For the reason explained in die_mce, nmi_exit before die */
+   nmi_exit();
die("Unrecoverable System Reset", regs, SIGABRT);
+   }
 
if (!nested)
nmi_exit();
-- 
2.30.1

[PATCH AUTOSEL 5.4 14/33] powerpc/64: Fix stack trace not displaying final frame

From: Michael Ellerman 

[ Upstream commit e3de1e291fa58a1ab0f471a4b458eff2514e4b5f ]

In commit bf13718bc57a ("powerpc: show registers when unwinding
interrupt frames") we changed our stack dumping logic to show the full
registers whenever we find an interrupt frame on the stack.

However we didn't notice that on 64-bit this doesn't show the final
frame, ie. the interrupt that brought us in from userspace, whereas on
32-bit it does.

That is due to confusion about the size of that last frame. The code
in show_stack() calls validate_sp(), passing it STACK_INT_FRAME_SIZE
to check the sp is at least that far below the top of the stack.

However on 64-bit that size is too large for the final frame, because
it includes the red zone, but we don't allocate a red zone for the
first frame.

So add a new define that encodes the correct size for 32-bit and
64-bit, and use it in show_stack().

This results in the full trace being shown on 64-bit, eg:

  sysrq: Trigger a crash
  Kernel panic - not syncing: sysrq triggered crash
  CPU: 0 PID: 83 Comm: sh Not tainted 
5.11.0-rc2-gcc-8.2.0-00188-g571abcb96b10-dirty #649
  Call Trace:
  [ca1c3ac0] [c0897b70] dump_stack+0xc4/0x114 (unreliable)
  [ca1c3b00] [c014334c] panic+0x178/0x41c
  [ca1c3ba0] [c094e600] sysrq_handle_crash+0x40/0x50
  [ca1c3c00] [c094ef98] __handle_sysrq+0xd8/0x210
  [ca1c3ca0] [c094f820] write_sysrq_trigger+0x100/0x188
  [ca1c3ce0] [c05559dc] proc_reg_write+0x10c/0x1b0
  [ca1c3d10] [c0479950] vfs_write+0xf0/0x360
  [ca1c3d60] [c0479d9c] ksys_write+0x7c/0x140
  [ca1c3db0] [c002bf5c] system_call_exception+0x19c/0x2c0
  [ca1c3e10] [c000d35c] system_call_common+0xec/0x278
  --- interrupt: c00 at 0x7fff9fbab428
  NIP:  7fff9fbab428 LR: 1000b724 CTR: 
  REGS: ca1c3e80 TRAP: 0c00   Not tainted  
(5.11.0-rc2-gcc-8.2.0-00188-g571abcb96b10-dirty)
  MSR:  9280f033   CR: 22002884  
XER: 
  IRQMASK: 0
  GPR00: 0004 7fffc3cb8960 7fff9fc59900 0001
  GPR04: 2a4b32d0 0002 0063 0063
  GPR08: 2a4b32d0   
  GPR12:  7fff9fcca9a0  
  GPR16:    100b8fd0
  GPR20: 2a4b3485 100b8f90  
  GPR24: 2a4b0440 100e77b8 0020 2a4b32d0
  GPR28: 0001 0002 2a4b32d0 0001
  NIP [7fff9fbab428] 0x7fff9fbab428
  LR [1000b724] 0x1000b724
  --- interrupt: c00

Signed-off-by: Michael Ellerman 
Link: https://lore.kernel.org/r/20210209141627.2898485-1-...@ellerman.id.au
Signed-off-by: Sasha Levin 
---
 arch/powerpc/include/asm/ptrace.h | 3 +++
 arch/powerpc/kernel/asm-offsets.c | 2 +-
 arch/powerpc/kernel/process.c | 2 +-
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/ptrace.h 
b/arch/powerpc/include/asm/ptrace.h
index c41220f4aad9..5a424f867c82 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -62,6 +62,9 @@ struct pt_regs
 };
 #endif
 
+
+#define STACK_FRAME_WITH_PT_REGS (STACK_FRAME_OVERHEAD + sizeof(struct 
pt_regs))
+
 #ifdef __powerpc64__
 
 /*
diff --git a/arch/powerpc/kernel/asm-offsets.c 
b/arch/powerpc/kernel/asm-offsets.c
index 5c0a1e17219b..af399675248e 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -285,7 +285,7 @@ int main(void)
 
/* Interrupt register frame */
DEFINE(INT_FRAME_SIZE, STACK_INT_FRAME_SIZE);
-   DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct 
pt_regs));
+   DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_WITH_PT_REGS);
STACK_PT_REGS_OFFSET(GPR0, gpr[0]);
STACK_PT_REGS_OFFSET(GPR1, gpr[1]);
STACK_PT_REGS_OFFSET(GPR2, gpr[2]);
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index bd0c258a1d5d..c94bba9142e7 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -2081,7 +2081,7 @@ void show_stack(struct task_struct *tsk, unsigned long 
*stack)
 * See if this is an exception frame.
 * We look for the "regshere" marker in the current frame.
 */
-   if (validate_sp(sp, tsk, STACK_INT_FRAME_SIZE)
+   if (validate_sp(sp, tsk, STACK_FRAME_WITH_PT_REGS)
&& stack[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) {
struct pt_regs *regs = (struct pt_regs *)
(sp + STACK_FRAME_OVERHEAD);
-- 
2.30.1

[PATCH AUTOSEL 5.4 12/33] powerpc/perf: Record counter overflow always if SAMPLE_IP is unset

From: Athira Rajeev 

[ Upstream commit d137845c973147a22622cc76c7b0bc16f6206323 ]

While sampling for marked events, currently we record the sample only
if the SIAR valid bit of Sampled Instruction Event Register (SIER) is
set. SIAR_VALID bit is used for fetching the instruction address from
Sampled Instruction Address Register(SIAR). But there are some
usecases, where the user is interested only in the PMU stats at each
counter overflow and the exact IP of the overflow event is not
required. Dropping SIAR invalid samples will fail to record some of
the counter overflows in such cases.

Example of such usecase is dumping the PMU stats (event counts) after
some regular amount of instructions/events from the userspace (ex: via
ptrace). Here counter overflow is indicated to userspace via signal
handler, and captured by monitoring and enabling I/O signaling on the
event file descriptor. In these cases, we expect to get
sample/overflow indication after each specified sample_period.

Perf event attribute will not have PERF_SAMPLE_IP set in the
sample_type if exact IP of the overflow event is not requested. So
while profiling if SAMPLE_IP is not set, just record the counter
overflow irrespective of SIAR_VALID check.

Suggested-by: Michael Ellerman 
Signed-off-by: Athira Rajeev 
[mpe: Reflow comment and if formatting]
Signed-off-by: Michael Ellerman 
Link: 
https://lore.kernel.org/r/1612516492-1428-1-git-send-email-atraj...@linux.vnet.ibm.com
Signed-off-by: Sasha Levin 
---
 arch/powerpc/perf/core-book3s.c | 19 +++
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 02fc75ddcbb3..6f013e418834 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -2077,7 +2077,17 @@ static void record_and_restart(struct perf_event *event, 
unsigned long val,
left += period;
if (left <= 0)
left = period;
-   record = siar_valid(regs);
+
+   /*
+* If address is not requested in the sample via
+* PERF_SAMPLE_IP, just record that sample irrespective
+* of SIAR valid check.
+*/
+   if (event->attr.sample_type & PERF_SAMPLE_IP)
+   record = siar_valid(regs);
+   else
+   record = 1;
+
event->hw.last_period = event->hw.sample_period;
}
if (left < 0x8000LL)
@@ -2095,9 +2105,10 @@ static void record_and_restart(struct perf_event *event, 
unsigned long val,
 * MMCR2. Check attr.exclude_kernel and address to drop the sample in
 * these cases.
 */
-   if (event->attr.exclude_kernel && record)
-   if (is_kernel_addr(mfspr(SPRN_SIAR)))
-   record = 0;
+   if (event->attr.exclude_kernel &&
+   (event->attr.sample_type & PERF_SAMPLE_IP) &&
+   is_kernel_addr(mfspr(SPRN_SIAR)))
+   record = 0;
 
/*
 * Finally record data if requested.
-- 
2.30.1

[PATCH AUTOSEL 5.4 11/33] powerpc: improve handling of unrecoverable system reset

From: Nicholas Piggin 

[ Upstream commit 11cb0a25f71818ca7ab4856548ecfd83c169aa4d ]

If an unrecoverable system reset hits in process context, the system
does not have to panic. Similar to machine check, call nmi_exit()
before die().

Signed-off-by: Nicholas Piggin 
Signed-off-by: Michael Ellerman 
Link: https://lore.kernel.org/r/20210130130852.2952424-26-npig...@gmail.com
Signed-off-by: Sasha Levin 
---
 arch/powerpc/kernel/traps.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 206032c9b545..ecfa460f66d1 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -513,8 +513,11 @@ out:
die("Unrecoverable nested System Reset", regs, SIGABRT);
 #endif
/* Must die if the interrupt is not recoverable */
-   if (!(regs->msr & MSR_RI))
+   if (!(regs->msr & MSR_RI)) {
+   /* For the reason explained in die_mce, nmi_exit before die */
+   nmi_exit();
die("Unrecoverable System Reset", regs, SIGABRT);
+   }
 
if (saved_hsrrs) {
mtspr(SPRN_HSRR0, hsrr0);
-- 
2.30.1

[PATCH AUTOSEL 5.4 08/33] powerpc/pci: Add ppc_md.discover_phbs()

From: Oliver O'Halloran 

[ Upstream commit 5537fcb319d016ce387f818dd774179bc03217f5 ]

On many powerpc platforms the discovery and initalisation of
pci_controllers (PHBs) happens inside of setup_arch(). This is very early
in boot (pre-initcalls) and means that we're initialising the PHB long
before many basic kernel services (slab allocator, debugfs, a real ioremap)
are available.

On PowerNV this causes an additional problem since we map the PHB registers
with ioremap(). As of commit d538aadc2718 ("powerpc/ioremap: warn on early
use of ioremap()") a warning is printed because we're using the "incorrect"
API to setup and MMIO mapping in searly boot. The kernel does provide
early_ioremap(), but that is not intended to create long-lived MMIO
mappings and a seperate warning is printed by generic code if
early_ioremap() mappings are "leaked."

This is all fixable with dumb hacks like using early_ioremap() to setup
the initial mapping then replacing it with a real ioremap later on in
boot, but it does raise the question: Why the hell are we setting up the
PHB's this early in boot?

The old and wise claim it's due to "hysterical rasins." Aside from amused
grapes there doesn't appear to be any real reason to maintain the current
behaviour. Already most of the newer embedded platforms perform PHB
discovery in an arch_initcall and between the end of setup_arch() and the
start of initcalls none of the generic kernel code does anything PCI
related. On powerpc scanning PHBs occurs in a subsys_initcall so it should
be possible to move the PHB discovery to a core, postcore or arch initcall.

This patch adds the ppc_md.discover_phbs hook and a core_initcall stub that
calls it. The core_initcalls are the earliest to be called so this will
any possibly issues with dependency between initcalls. This isn't just an
academic issue either since on pseries and PowerNV EEH init occurs in an
arch_initcall and depends on the pci_controllers being available, similarly
the creation of pci_dns occurs at core_initcall_sync (i.e. between core and
postcore initcalls). These problems need to be addressed seperately.

Reported-by: kernel test robot 
Signed-off-by: Oliver O'Halloran 
[mpe: Make discover_phbs() static]
Signed-off-by: Michael Ellerman 
Link: https://lore.kernel.org/r/20201103043523.916109-1-ooh...@gmail.com
Signed-off-by: Sasha Levin 
---
 arch/powerpc/include/asm/machdep.h |  3 +++
 arch/powerpc/kernel/pci-common.c   | 10 ++
 2 files changed, 13 insertions(+)

diff --git a/arch/powerpc/include/asm/machdep.h 
b/arch/powerpc/include/asm/machdep.h
index 7bcb6a39..f71c361dc356 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -59,6 +59,9 @@ struct machdep_calls {
int (*pcibios_root_bridge_prepare)(struct pci_host_bridge
*bridge);
 
+   /* finds all the pci_controllers present at boot */
+   void(*discover_phbs)(void);
+
/* To setup PHBs when using automatic OF platform driver for PCI */
int (*pci_setup_phb)(struct pci_controller *host);
 
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 1c448cf25506..a2c258a8d736 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -1669,3 +1669,13 @@ static void fixup_hide_host_resource_fsl(struct pci_dev 
*dev)
 }
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MOTOROLA, PCI_ANY_ID, 
fixup_hide_host_resource_fsl);
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_FREESCALE, PCI_ANY_ID, 
fixup_hide_host_resource_fsl);
+
+
+static int __init discover_phbs(void)
+{
+   if (ppc_md.discover_phbs)
+   ppc_md.discover_phbs();
+
+   return 0;
+}
+core_initcall(discover_phbs);
-- 
2.30.1

[PATCH AUTOSEL 5.10 20/47] powerpc/64: Fix stack trace not displaying final frame

From: Michael Ellerman 

[ Upstream commit e3de1e291fa58a1ab0f471a4b458eff2514e4b5f ]

In commit bf13718bc57a ("powerpc: show registers when unwinding
interrupt frames") we changed our stack dumping logic to show the full
registers whenever we find an interrupt frame on the stack.

However we didn't notice that on 64-bit this doesn't show the final
frame, ie. the interrupt that brought us in from userspace, whereas on
32-bit it does.

That is due to confusion about the size of that last frame. The code
in show_stack() calls validate_sp(), passing it STACK_INT_FRAME_SIZE
to check the sp is at least that far below the top of the stack.

However on 64-bit that size is too large for the final frame, because
it includes the red zone, but we don't allocate a red zone for the
first frame.

So add a new define that encodes the correct size for 32-bit and
64-bit, and use it in show_stack().

This results in the full trace being shown on 64-bit, eg:

  sysrq: Trigger a crash
  Kernel panic - not syncing: sysrq triggered crash
  CPU: 0 PID: 83 Comm: sh Not tainted 
5.11.0-rc2-gcc-8.2.0-00188-g571abcb96b10-dirty #649
  Call Trace:
  [ca1c3ac0] [c0897b70] dump_stack+0xc4/0x114 (unreliable)
  [ca1c3b00] [c014334c] panic+0x178/0x41c
  [ca1c3ba0] [c094e600] sysrq_handle_crash+0x40/0x50
  [ca1c3c00] [c094ef98] __handle_sysrq+0xd8/0x210
  [ca1c3ca0] [c094f820] write_sysrq_trigger+0x100/0x188
  [ca1c3ce0] [c05559dc] proc_reg_write+0x10c/0x1b0
  [ca1c3d10] [c0479950] vfs_write+0xf0/0x360
  [ca1c3d60] [c0479d9c] ksys_write+0x7c/0x140
  [ca1c3db0] [c002bf5c] system_call_exception+0x19c/0x2c0
  [ca1c3e10] [c000d35c] system_call_common+0xec/0x278
  --- interrupt: c00 at 0x7fff9fbab428
  NIP:  7fff9fbab428 LR: 1000b724 CTR: 
  REGS: ca1c3e80 TRAP: 0c00   Not tainted  
(5.11.0-rc2-gcc-8.2.0-00188-g571abcb96b10-dirty)
  MSR:  9280f033   CR: 22002884  
XER: 
  IRQMASK: 0
  GPR00: 0004 7fffc3cb8960 7fff9fc59900 0001
  GPR04: 2a4b32d0 0002 0063 0063
  GPR08: 2a4b32d0   
  GPR12:  7fff9fcca9a0  
  GPR16:    100b8fd0
  GPR20: 2a4b3485 100b8f90  
  GPR24: 2a4b0440 100e77b8 0020 2a4b32d0
  GPR28: 0001 0002 2a4b32d0 0001
  NIP [7fff9fbab428] 0x7fff9fbab428
  LR [1000b724] 0x1000b724
  --- interrupt: c00

Signed-off-by: Michael Ellerman 
Link: https://lore.kernel.org/r/20210209141627.2898485-1-...@ellerman.id.au
Signed-off-by: Sasha Levin 
---
 arch/powerpc/include/asm/ptrace.h | 3 +++
 arch/powerpc/kernel/asm-offsets.c | 2 +-
 arch/powerpc/kernel/process.c | 2 +-
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/ptrace.h 
b/arch/powerpc/include/asm/ptrace.h
index e2c778c176a3..7bb064ad04d8 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -62,6 +62,9 @@ struct pt_regs
 };
 #endif
 
+
+#define STACK_FRAME_WITH_PT_REGS (STACK_FRAME_OVERHEAD + sizeof(struct 
pt_regs))
+
 #ifdef __powerpc64__
 
 /*
diff --git a/arch/powerpc/kernel/asm-offsets.c 
b/arch/powerpc/kernel/asm-offsets.c
index c2722ff36e98..5c125255571c 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -307,7 +307,7 @@ int main(void)
 
/* Interrupt register frame */
DEFINE(INT_FRAME_SIZE, STACK_INT_FRAME_SIZE);
-   DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct 
pt_regs));
+   DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_WITH_PT_REGS);
STACK_PT_REGS_OFFSET(GPR0, gpr[0]);
STACK_PT_REGS_OFFSET(GPR1, gpr[1]);
STACK_PT_REGS_OFFSET(GPR2, gpr[2]);
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index d421a2c7f822..1a1d2657fe8d 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -2170,7 +2170,7 @@ void show_stack(struct task_struct *tsk, unsigned long 
*stack,
 * See if this is an exception frame.
 * We look for the "regshere" marker in the current frame.
 */
-   if (validate_sp(sp, tsk, STACK_INT_FRAME_SIZE)
+   if (validate_sp(sp, tsk, STACK_FRAME_WITH_PT_REGS)
&& stack[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) {
struct pt_regs *regs = (struct pt_regs *)
(sp + STACK_FRAME_OVERHEAD);
-- 
2.30.1

[PATCH AUTOSEL 5.10 17/47] powerpc/perf: Record counter overflow always if SAMPLE_IP is unset

From: Athira Rajeev 

[ Upstream commit d137845c973147a22622cc76c7b0bc16f6206323 ]

While sampling for marked events, currently we record the sample only
if the SIAR valid bit of Sampled Instruction Event Register (SIER) is
set. SIAR_VALID bit is used for fetching the instruction address from
Sampled Instruction Address Register(SIAR). But there are some
usecases, where the user is interested only in the PMU stats at each
counter overflow and the exact IP of the overflow event is not
required. Dropping SIAR invalid samples will fail to record some of
the counter overflows in such cases.

Example of such usecase is dumping the PMU stats (event counts) after
some regular amount of instructions/events from the userspace (ex: via
ptrace). Here counter overflow is indicated to userspace via signal
handler, and captured by monitoring and enabling I/O signaling on the
event file descriptor. In these cases, we expect to get
sample/overflow indication after each specified sample_period.

Perf event attribute will not have PERF_SAMPLE_IP set in the
sample_type if exact IP of the overflow event is not requested. So
while profiling if SAMPLE_IP is not set, just record the counter
overflow irrespective of SIAR_VALID check.

Suggested-by: Michael Ellerman 
Signed-off-by: Athira Rajeev 
[mpe: Reflow comment and if formatting]
Signed-off-by: Michael Ellerman 
Link: 
https://lore.kernel.org/r/1612516492-1428-1-git-send-email-atraj...@linux.vnet.ibm.com
Signed-off-by: Sasha Levin 
---
 arch/powerpc/perf/core-book3s.c | 19 +++
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 43599e671d38..d84ab867b986 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -2112,7 +2112,17 @@ static void record_and_restart(struct perf_event *event, 
unsigned long val,
left += period;
if (left <= 0)
left = period;
-   record = siar_valid(regs);
+
+   /*
+* If address is not requested in the sample via
+* PERF_SAMPLE_IP, just record that sample irrespective
+* of SIAR valid check.
+*/
+   if (event->attr.sample_type & PERF_SAMPLE_IP)
+   record = siar_valid(regs);
+   else
+   record = 1;
+
event->hw.last_period = event->hw.sample_period;
}
if (left < 0x8000LL)
@@ -2130,9 +2140,10 @@ static void record_and_restart(struct perf_event *event, 
unsigned long val,
 * MMCR2. Check attr.exclude_kernel and address to drop the sample in
 * these cases.
 */
-   if (event->attr.exclude_kernel && record)
-   if (is_kernel_addr(mfspr(SPRN_SIAR)))
-   record = 0;
+   if (event->attr.exclude_kernel &&
+   (event->attr.sample_type & PERF_SAMPLE_IP) &&
+   is_kernel_addr(mfspr(SPRN_SIAR)))
+   record = 0;
 
/*
 * Finally record data if requested.
-- 
2.30.1

[PATCH AUTOSEL 5.10 16/47] powerpc: improve handling of unrecoverable system reset

From: Nicholas Piggin 

[ Upstream commit 11cb0a25f71818ca7ab4856548ecfd83c169aa4d ]

If an unrecoverable system reset hits in process context, the system
does not have to panic. Similar to machine check, call nmi_exit()
before die().

Signed-off-by: Nicholas Piggin 
Signed-off-by: Michael Ellerman 
Link: https://lore.kernel.org/r/20210130130852.2952424-26-npig...@gmail.com
Signed-off-by: Sasha Levin 
---
 arch/powerpc/kernel/traps.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 5006dcbe1d9f..77dffea3d537 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -509,8 +509,11 @@ out:
die("Unrecoverable nested System Reset", regs, SIGABRT);
 #endif
/* Must die if the interrupt is not recoverable */
-   if (!(regs->msr & MSR_RI))
+   if (!(regs->msr & MSR_RI)) {
+   /* For the reason explained in die_mce, nmi_exit before die */
+   nmi_exit();
die("Unrecoverable System Reset", regs, SIGABRT);
+   }
 
if (saved_hsrrs) {
mtspr(SPRN_HSRR0, hsrr0);
-- 
2.30.1

[PATCH AUTOSEL 5.10 13/47] powerpc/pci: Add ppc_md.discover_phbs()

From: Oliver O'Halloran 

[ Upstream commit 5537fcb319d016ce387f818dd774179bc03217f5 ]

On many powerpc platforms the discovery and initalisation of
pci_controllers (PHBs) happens inside of setup_arch(). This is very early
in boot (pre-initcalls) and means that we're initialising the PHB long
before many basic kernel services (slab allocator, debugfs, a real ioremap)
are available.

On PowerNV this causes an additional problem since we map the PHB registers
with ioremap(). As of commit d538aadc2718 ("powerpc/ioremap: warn on early
use of ioremap()") a warning is printed because we're using the "incorrect"
API to setup and MMIO mapping in searly boot. The kernel does provide
early_ioremap(), but that is not intended to create long-lived MMIO
mappings and a seperate warning is printed by generic code if
early_ioremap() mappings are "leaked."

This is all fixable with dumb hacks like using early_ioremap() to setup
the initial mapping then replacing it with a real ioremap later on in
boot, but it does raise the question: Why the hell are we setting up the
PHB's this early in boot?

The old and wise claim it's due to "hysterical rasins." Aside from amused
grapes there doesn't appear to be any real reason to maintain the current
behaviour. Already most of the newer embedded platforms perform PHB
discovery in an arch_initcall and between the end of setup_arch() and the
start of initcalls none of the generic kernel code does anything PCI
related. On powerpc scanning PHBs occurs in a subsys_initcall so it should
be possible to move the PHB discovery to a core, postcore or arch initcall.

This patch adds the ppc_md.discover_phbs hook and a core_initcall stub that
calls it. The core_initcalls are the earliest to be called so this will
any possibly issues with dependency between initcalls. This isn't just an
academic issue either since on pseries and PowerNV EEH init occurs in an
arch_initcall and depends on the pci_controllers being available, similarly
the creation of pci_dns occurs at core_initcall_sync (i.e. between core and
postcore initcalls). These problems need to be addressed seperately.

Reported-by: kernel test robot 
Signed-off-by: Oliver O'Halloran 
[mpe: Make discover_phbs() static]
Signed-off-by: Michael Ellerman 
Link: https://lore.kernel.org/r/20201103043523.916109-1-ooh...@gmail.com
Signed-off-by: Sasha Levin 
---
 arch/powerpc/include/asm/machdep.h |  3 +++
 arch/powerpc/kernel/pci-common.c   | 10 ++
 2 files changed, 13 insertions(+)

diff --git a/arch/powerpc/include/asm/machdep.h 
b/arch/powerpc/include/asm/machdep.h
index 475687f24f4a..d319160d790c 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -59,6 +59,9 @@ struct machdep_calls {
int (*pcibios_root_bridge_prepare)(struct pci_host_bridge
*bridge);
 
+   /* finds all the pci_controllers present at boot */
+   void(*discover_phbs)(void);
+
/* To setup PHBs when using automatic OF platform driver for PCI */
int (*pci_setup_phb)(struct pci_controller *host);
 
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index be108616a721..7920559a1ca8 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -1625,3 +1625,13 @@ static void fixup_hide_host_resource_fsl(struct pci_dev 
*dev)
 }
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MOTOROLA, PCI_ANY_ID, 
fixup_hide_host_resource_fsl);
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_FREESCALE, PCI_ANY_ID, 
fixup_hide_host_resource_fsl);
+
+
+static int __init discover_phbs(void)
+{
+   if (ppc_md.discover_phbs)
+   ppc_md.discover_phbs();
+
+   return 0;
+}
+core_initcall(discover_phbs);
-- 
2.30.1

[PATCH AUTOSEL 5.11 24/52] powerpc/64: Fix stack trace not displaying final frame

From: Michael Ellerman 

[ Upstream commit e3de1e291fa58a1ab0f471a4b458eff2514e4b5f ]

In commit bf13718bc57a ("powerpc: show registers when unwinding
interrupt frames") we changed our stack dumping logic to show the full
registers whenever we find an interrupt frame on the stack.

However we didn't notice that on 64-bit this doesn't show the final
frame, ie. the interrupt that brought us in from userspace, whereas on
32-bit it does.

That is due to confusion about the size of that last frame. The code
in show_stack() calls validate_sp(), passing it STACK_INT_FRAME_SIZE
to check the sp is at least that far below the top of the stack.

However on 64-bit that size is too large for the final frame, because
it includes the red zone, but we don't allocate a red zone for the
first frame.

So add a new define that encodes the correct size for 32-bit and
64-bit, and use it in show_stack().

This results in the full trace being shown on 64-bit, eg:

  sysrq: Trigger a crash
  Kernel panic - not syncing: sysrq triggered crash
  CPU: 0 PID: 83 Comm: sh Not tainted 
5.11.0-rc2-gcc-8.2.0-00188-g571abcb96b10-dirty #649
  Call Trace:
  [ca1c3ac0] [c0897b70] dump_stack+0xc4/0x114 (unreliable)
  [ca1c3b00] [c014334c] panic+0x178/0x41c
  [ca1c3ba0] [c094e600] sysrq_handle_crash+0x40/0x50
  [ca1c3c00] [c094ef98] __handle_sysrq+0xd8/0x210
  [ca1c3ca0] [c094f820] write_sysrq_trigger+0x100/0x188
  [ca1c3ce0] [c05559dc] proc_reg_write+0x10c/0x1b0
  [ca1c3d10] [c0479950] vfs_write+0xf0/0x360
  [ca1c3d60] [c0479d9c] ksys_write+0x7c/0x140
  [ca1c3db0] [c002bf5c] system_call_exception+0x19c/0x2c0
  [ca1c3e10] [c000d35c] system_call_common+0xec/0x278
  --- interrupt: c00 at 0x7fff9fbab428
  NIP:  7fff9fbab428 LR: 1000b724 CTR: 
  REGS: ca1c3e80 TRAP: 0c00   Not tainted  
(5.11.0-rc2-gcc-8.2.0-00188-g571abcb96b10-dirty)
  MSR:  9280f033   CR: 22002884  
XER: 
  IRQMASK: 0
  GPR00: 0004 7fffc3cb8960 7fff9fc59900 0001
  GPR04: 2a4b32d0 0002 0063 0063
  GPR08: 2a4b32d0   
  GPR12:  7fff9fcca9a0  
  GPR16:    100b8fd0
  GPR20: 2a4b3485 100b8f90  
  GPR24: 2a4b0440 100e77b8 0020 2a4b32d0
  GPR28: 0001 0002 2a4b32d0 0001
  NIP [7fff9fbab428] 0x7fff9fbab428
  LR [1000b724] 0x1000b724
  --- interrupt: c00

Signed-off-by: Michael Ellerman 
Link: https://lore.kernel.org/r/20210209141627.2898485-1-...@ellerman.id.au
Signed-off-by: Sasha Levin 
---
 arch/powerpc/include/asm/ptrace.h | 3 +++
 arch/powerpc/kernel/asm-offsets.c | 2 +-
 arch/powerpc/kernel/process.c | 2 +-
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/ptrace.h 
b/arch/powerpc/include/asm/ptrace.h
index 58f9dc060a7b..8236c5e749e4 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -70,6 +70,9 @@ struct pt_regs
 };
 #endif
 
+
+#define STACK_FRAME_WITH_PT_REGS (STACK_FRAME_OVERHEAD + sizeof(struct 
pt_regs))
+
 #ifdef __powerpc64__
 
 /*
diff --git a/arch/powerpc/kernel/asm-offsets.c 
b/arch/powerpc/kernel/asm-offsets.c
index b12d7c049bfe..989006b5ad0f 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -309,7 +309,7 @@ int main(void)
 
/* Interrupt register frame */
DEFINE(INT_FRAME_SIZE, STACK_INT_FRAME_SIZE);
-   DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct 
pt_regs));
+   DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_WITH_PT_REGS);
STACK_PT_REGS_OFFSET(GPR0, gpr[0]);
STACK_PT_REGS_OFFSET(GPR1, gpr[1]);
STACK_PT_REGS_OFFSET(GPR2, gpr[2]);
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index a66f435dabbf..b65a73e4d642 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -2176,7 +2176,7 @@ void show_stack(struct task_struct *tsk, unsigned long 
*stack,
 * See if this is an exception frame.
 * We look for the "regshere" marker in the current frame.
 */
-   if (validate_sp(sp, tsk, STACK_INT_FRAME_SIZE)
+   if (validate_sp(sp, tsk, STACK_FRAME_WITH_PT_REGS)
&& stack[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) {
struct pt_regs *regs = (struct pt_regs *)
(sp + STACK_FRAME_OVERHEAD);
-- 
2.30.1

[PATCH AUTOSEL 5.11 20/52] powerpc/perf: Record counter overflow always if SAMPLE_IP is unset

From: Athira Rajeev 

[ Upstream commit d137845c973147a22622cc76c7b0bc16f6206323 ]

While sampling for marked events, currently we record the sample only
if the SIAR valid bit of Sampled Instruction Event Register (SIER) is
set. SIAR_VALID bit is used for fetching the instruction address from
Sampled Instruction Address Register(SIAR). But there are some
usecases, where the user is interested only in the PMU stats at each
counter overflow and the exact IP of the overflow event is not
required. Dropping SIAR invalid samples will fail to record some of
the counter overflows in such cases.

Example of such usecase is dumping the PMU stats (event counts) after
some regular amount of instructions/events from the userspace (ex: via
ptrace). Here counter overflow is indicated to userspace via signal
handler, and captured by monitoring and enabling I/O signaling on the
event file descriptor. In these cases, we expect to get
sample/overflow indication after each specified sample_period.

Perf event attribute will not have PERF_SAMPLE_IP set in the
sample_type if exact IP of the overflow event is not requested. So
while profiling if SAMPLE_IP is not set, just record the counter
overflow irrespective of SIAR_VALID check.

Suggested-by: Michael Ellerman 
Signed-off-by: Athira Rajeev 
[mpe: Reflow comment and if formatting]
Signed-off-by: Michael Ellerman 
Link: 
https://lore.kernel.org/r/1612516492-1428-1-git-send-email-atraj...@linux.vnet.ibm.com
Signed-off-by: Sasha Levin 
---
 arch/powerpc/perf/core-book3s.c | 19 +++
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 28206b1fe172..8b529daf40ea 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -2149,7 +2149,17 @@ static void record_and_restart(struct perf_event *event, 
unsigned long val,
left += period;
if (left <= 0)
left = period;
-   record = siar_valid(regs);
+
+   /*
+* If address is not requested in the sample via
+* PERF_SAMPLE_IP, just record that sample irrespective
+* of SIAR valid check.
+*/
+   if (event->attr.sample_type & PERF_SAMPLE_IP)
+   record = siar_valid(regs);
+   else
+   record = 1;
+
event->hw.last_period = event->hw.sample_period;
}
if (left < 0x8000LL)
@@ -2167,9 +2177,10 @@ static void record_and_restart(struct perf_event *event, 
unsigned long val,
 * MMCR2. Check attr.exclude_kernel and address to drop the sample in
 * these cases.
 */
-   if (event->attr.exclude_kernel && record)
-   if (is_kernel_addr(mfspr(SPRN_SIAR)))
-   record = 0;
+   if (event->attr.exclude_kernel &&
+   (event->attr.sample_type & PERF_SAMPLE_IP) &&
+   is_kernel_addr(mfspr(SPRN_SIAR)))
+   record = 0;
 
/*
 * Finally record data if requested.
-- 
2.30.1

[PATCH AUTOSEL 5.11 19/52] powerpc: improve handling of unrecoverable system reset

From: Nicholas Piggin 

[ Upstream commit 11cb0a25f71818ca7ab4856548ecfd83c169aa4d ]

If an unrecoverable system reset hits in process context, the system
does not have to panic. Similar to machine check, call nmi_exit()
before die().

Signed-off-by: Nicholas Piggin 
Signed-off-by: Michael Ellerman 
Link: https://lore.kernel.org/r/20210130130852.2952424-26-npig...@gmail.com
Signed-off-by: Sasha Levin 
---
 arch/powerpc/kernel/traps.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 3ec7b443fe6b..4be05517f2db 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -503,8 +503,11 @@ out:
die("Unrecoverable nested System Reset", regs, SIGABRT);
 #endif
/* Must die if the interrupt is not recoverable */
-   if (!(regs->msr & MSR_RI))
+   if (!(regs->msr & MSR_RI)) {
+   /* For the reason explained in die_mce, nmi_exit before die */
+   nmi_exit();
die("Unrecoverable System Reset", regs, SIGABRT);
+   }
 
if (saved_hsrrs) {
mtspr(SPRN_HSRR0, hsrr0);
-- 
2.30.1

[PATCH AUTOSEL 5.11 16/52] powerpc/pci: Add ppc_md.discover_phbs()

From: Oliver O'Halloran 

[ Upstream commit 5537fcb319d016ce387f818dd774179bc03217f5 ]

On many powerpc platforms the discovery and initalisation of
pci_controllers (PHBs) happens inside of setup_arch(). This is very early
in boot (pre-initcalls) and means that we're initialising the PHB long
before many basic kernel services (slab allocator, debugfs, a real ioremap)
are available.

On PowerNV this causes an additional problem since we map the PHB registers
with ioremap(). As of commit d538aadc2718 ("powerpc/ioremap: warn on early
use of ioremap()") a warning is printed because we're using the "incorrect"
API to setup and MMIO mapping in searly boot. The kernel does provide
early_ioremap(), but that is not intended to create long-lived MMIO
mappings and a seperate warning is printed by generic code if
early_ioremap() mappings are "leaked."

This is all fixable with dumb hacks like using early_ioremap() to setup
the initial mapping then replacing it with a real ioremap later on in
boot, but it does raise the question: Why the hell are we setting up the
PHB's this early in boot?

The old and wise claim it's due to "hysterical rasins." Aside from amused
grapes there doesn't appear to be any real reason to maintain the current
behaviour. Already most of the newer embedded platforms perform PHB
discovery in an arch_initcall and between the end of setup_arch() and the
start of initcalls none of the generic kernel code does anything PCI
related. On powerpc scanning PHBs occurs in a subsys_initcall so it should
be possible to move the PHB discovery to a core, postcore or arch initcall.

This patch adds the ppc_md.discover_phbs hook and a core_initcall stub that
calls it. The core_initcalls are the earliest to be called so this will
any possibly issues with dependency between initcalls. This isn't just an
academic issue either since on pseries and PowerNV EEH init occurs in an
arch_initcall and depends on the pci_controllers being available, similarly
the creation of pci_dns occurs at core_initcall_sync (i.e. between core and
postcore initcalls). These problems need to be addressed seperately.

Reported-by: kernel test robot 
Signed-off-by: Oliver O'Halloran 
[mpe: Make discover_phbs() static]
Signed-off-by: Michael Ellerman 
Link: https://lore.kernel.org/r/20201103043523.916109-1-ooh...@gmail.com
Signed-off-by: Sasha Levin 
---
 arch/powerpc/include/asm/machdep.h |  3 +++
 arch/powerpc/kernel/pci-common.c   | 10 ++
 2 files changed, 13 insertions(+)

diff --git a/arch/powerpc/include/asm/machdep.h 
b/arch/powerpc/include/asm/machdep.h
index cf6ebbc16cb4..764f2732a821 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -59,6 +59,9 @@ struct machdep_calls {
int (*pcibios_root_bridge_prepare)(struct pci_host_bridge
*bridge);
 
+   /* finds all the pci_controllers present at boot */
+   void(*discover_phbs)(void);
+
/* To setup PHBs when using automatic OF platform driver for PCI */
int (*pci_setup_phb)(struct pci_controller *host);
 
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 2b555997b295..001e90cd8948 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -1699,3 +1699,13 @@ static void fixup_hide_host_resource_fsl(struct pci_dev 
*dev)
 }
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MOTOROLA, PCI_ANY_ID, 
fixup_hide_host_resource_fsl);
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_FREESCALE, PCI_ANY_ID, 
fixup_hide_host_resource_fsl);
+
+
+static int __init discover_phbs(void)
+{
+   if (ppc_md.discover_phbs)
+   ppc_md.discover_phbs();
+
+   return 0;
+}
+core_initcall(discover_phbs);
-- 
2.30.1

Re: [RFC PATCH v1] powerpc: Enable KFENCE for PPC32

2021-03-02 Thread Michael Ellerman

Christophe Leroy  writes:
> Le 02/03/2021 à 10:53, Marco Elver a écrit :
>> On Tue, 2 Mar 2021 at 10:27, Christophe Leroy
>>  wrote:
>>> Le 02/03/2021 à 10:21, Alexander Potapenko a écrit :
> [   14.998426] BUG: KFENCE: invalid read in 
> finish_task_switch.isra.0+0x54/0x23c
> [   14.998426]
> [   15.007061] Invalid read at 0x(ptrval):
> [   15.010906]  finish_task_switch.isra.0+0x54/0x23c
> [   15.015633]  kunit_try_run_case+0x5c/0xd0
> [   15.019682]  kunit_generic_run_threadfn_adapter+0x24/0x30
> [   15.025099]  kthread+0x15c/0x174
> [   15.028359]  ret_from_kernel_thread+0x14/0x1c
> [   15.032747]
> [   15.034251] CPU: 0 PID: 111 Comm: kunit_try_catch Tainted: GB
> 5.12.0-rc1-s3k-dev-01534-g4f14ae75edf0-dirty #4674
> [   15.045811] 
> ==
> [   15.053324] # test_invalid_access: EXPECTATION FAILED at 
> mm/kfence/kfence_test.c:636
> [   15.053324] Expected report_matches() to be true, but is 
> false
> [   15.068359] not ok 21 - test_invalid_access

 The test expects the function name to be test_invalid_access, i. e.
 the first line should be "BUG: KFENCE: invalid read in
 test_invalid_access".
 The error reporting function unwinds the stack, skips a couple of
 "uninteresting" frames
 (https://elixir.bootlin.com/linux/v5.12-rc1/source/mm/kfence/report.c#L43)
 and uses the first "interesting" one frame to print the report header
 (https://elixir.bootlin.com/linux/v5.12-rc1/source/mm/kfence/report.c#L226).

 It's strange that test_invalid_access is missing altogether from the
 stack trace - is that expected?
 Can you try printing the whole stacktrace without skipping any frames
 to see if that function is there?

>>>
>>> Booting with 'no_hash_pointers" I get the following. Does it helps ?
>>>
>>> [   16.837198] 
>>> ==
>>> [   16.848521] BUG: KFENCE: invalid read in 
>>> finish_task_switch.isra.0+0x54/0x23c
>>> [   16.848521]
>>> [   16.857158] Invalid read at 0xdf98800a:
>>> [   16.861004]  finish_task_switch.isra.0+0x54/0x23c
>>> [   16.865731]  kunit_try_run_case+0x5c/0xd0
>>> [   16.869780]  kunit_generic_run_threadfn_adapter+0x24/0x30
>>> [   16.875199]  kthread+0x15c/0x174
>>> [   16.878460]  ret_from_kernel_thread+0x14/0x1c
>>> [   16.882847]
>>> [   16.884351] CPU: 0 PID: 111 Comm: kunit_try_catch Tainted: GB
>>> 5.12.0-rc1-s3k-dev-01534-g4f14ae75edf0-dirty #4674
>>> [   16.895908] NIP:  c016eb8c LR: c02f50dc CTR: c016eb38
>>> [   16.900963] REGS: e2449d90 TRAP: 0301   Tainted: GB
>>> (5.12.0-rc1-s3k-dev-01534-g4f14ae75edf0-dirty)
>>> [   16.911386] MSR:  9032   CR: 2204  XER: 
>>> [   16.918153] DAR: df98800a DSISR: 2000
>>> [   16.918153] GPR00: c02f50dc e2449e50 c1140d00 e100dd24 c084b13c 0008 
>>> c084b32b c016eb38
>>> [   16.918153] GPR08: c085 df988000 c0d1 e2449eb0 22000288
>>> [   16.936695] NIP [c016eb8c] test_invalid_access+0x54/0x108
>>> [   16.942125] LR [c02f50dc] kunit_try_run_case+0x5c/0xd0
>>> [   16.947292] Call Trace:
>>> [   16.949746] [e2449e50] [c005a5ec] finish_task_switch.isra.0+0x54/0x23c 
>>> (unreliable)
>> 
>> The "(unreliable)" might be a clue that it's related to ppc32 stack
>> unwinding. Any ppc expert know what this is about?
>> 
>>> [   16.957443] [e2449eb0] [c02f50dc] kunit_try_run_case+0x5c/0xd0
>>> [   16.963319] [e2449ed0] [c02f63ec] 
>>> kunit_generic_run_threadfn_adapter+0x24/0x30
>>> [   16.970574] [e2449ef0] [c004e710] kthread+0x15c/0x174
>>> [   16.975670] [e2449f30] [c001317c] ret_from_kernel_thread+0x14/0x1c
>>> [   16.981896] Instruction dump:
>>> [   16.984879] 8129d608 38e7eb38 81020280 911f004c 3900 995f0024 
>>> 907f0028 90ff001c
>>> [   16.992710] 3949000a 915f0020 3d40c0d1 3d00c085 <8929000a> 3908adb0 
>>> 812a4b98 3d40c02f
>>> [   17.000711] 
>>> ==
>>> [   17.008223] # test_invalid_access: EXPECTATION FAILED at 
>>> mm/kfence/kfence_test.c:636
>>> [   17.008223] Expected report_matches() to be true, but is false
>>> [   17.023243] not ok 21 - test_invalid_access
>> 
>> On a fault in test_invalid_access, KFENCE prints the stack trace based
>> on the information in pt_regs. So we do not think there's anything we
>> can do to improve stack printing pe-se.
>
> stack printing, probably not. Would be good anyway to mark the last level 
> [unreliable] as the ppc does.
>
> IIUC, on ppc the address in the stack frame of the caller is written by the 
> caller. In most tests, 
> there is some function call being done before the fault, for instance 
> test_kmalloc_aligned_oob_read() does a call to kunit_do_assertion which 
> populates the address of the 
> call in the stack. However this is fragile.
>
> This works for function calls because in order to call a

Re: [RFC PATCH v1] powerpc: Enable KFENCE for PPC32





Le 02/03/2021 à 10:53, Marco Elver a écrit :

On Tue, 2 Mar 2021 at 10:27, Christophe Leroy
 wrote:

Le 02/03/2021 à 10:21, Alexander Potapenko a écrit :

[   14.998426] BUG: KFENCE: invalid read in finish_task_switch.isra.0+0x54/0x23c
[   14.998426]
[   15.007061] Invalid read at 0x(ptrval):
[   15.010906]  finish_task_switch.isra.0+0x54/0x23c
[   15.015633]  kunit_try_run_case+0x5c/0xd0
[   15.019682]  kunit_generic_run_threadfn_adapter+0x24/0x30
[   15.025099]  kthread+0x15c/0x174
[   15.028359]  ret_from_kernel_thread+0x14/0x1c
[   15.032747]
[   15.034251] CPU: 0 PID: 111 Comm: kunit_try_catch Tainted: GB
5.12.0-rc1-s3k-dev-01534-g4f14ae75edf0-dirty #4674
[   15.045811] 
==
[   15.053324] # test_invalid_access: EXPECTATION FAILED at 
mm/kfence/kfence_test.c:636
[   15.053324] Expected report_matches() to be true, but is false
[   15.068359] not ok 21 - test_invalid_access


The test expects the function name to be test_invalid_access, i. e.
the first line should be "BUG: KFENCE: invalid read in
test_invalid_access".
The error reporting function unwinds the stack, skips a couple of
"uninteresting" frames
(https://elixir.bootlin.com/linux/v5.12-rc1/source/mm/kfence/report.c#L43)
and uses the first "interesting" one frame to print the report header
(https://elixir.bootlin.com/linux/v5.12-rc1/source/mm/kfence/report.c#L226).

It's strange that test_invalid_access is missing altogether from the
stack trace - is that expected?
Can you try printing the whole stacktrace without skipping any frames
to see if that function is there?



Booting with 'no_hash_pointers" I get the following. Does it helps ?

[   16.837198] 
==
[   16.848521] BUG: KFENCE: invalid read in finish_task_switch.isra.0+0x54/0x23c
[   16.848521]
[   16.857158] Invalid read at 0xdf98800a:
[   16.861004]  finish_task_switch.isra.0+0x54/0x23c
[   16.865731]  kunit_try_run_case+0x5c/0xd0
[   16.869780]  kunit_generic_run_threadfn_adapter+0x24/0x30
[   16.875199]  kthread+0x15c/0x174
[   16.878460]  ret_from_kernel_thread+0x14/0x1c
[   16.882847]
[   16.884351] CPU: 0 PID: 111 Comm: kunit_try_catch Tainted: GB
5.12.0-rc1-s3k-dev-01534-g4f14ae75edf0-dirty #4674
[   16.895908] NIP:  c016eb8c LR: c02f50dc CTR: c016eb38
[   16.900963] REGS: e2449d90 TRAP: 0301   Tainted: GB
(5.12.0-rc1-s3k-dev-01534-g4f14ae75edf0-dirty)
[   16.911386] MSR:  9032   CR: 2204  XER: 
[   16.918153] DAR: df98800a DSISR: 2000
[   16.918153] GPR00: c02f50dc e2449e50 c1140d00 e100dd24 c084b13c 0008 
c084b32b c016eb38
[   16.918153] GPR08: c085 df988000 c0d1 e2449eb0 22000288
[   16.936695] NIP [c016eb8c] test_invalid_access+0x54/0x108
[   16.942125] LR [c02f50dc] kunit_try_run_case+0x5c/0xd0
[   16.947292] Call Trace:
[   16.949746] [e2449e50] [c005a5ec] finish_task_switch.isra.0+0x54/0x23c 
(unreliable)


The "(unreliable)" might be a clue that it's related to ppc32 stack
unwinding. Any ppc expert know what this is about?


[   16.957443] [e2449eb0] [c02f50dc] kunit_try_run_case+0x5c/0xd0
[   16.963319] [e2449ed0] [c02f63ec] 
kunit_generic_run_threadfn_adapter+0x24/0x30
[   16.970574] [e2449ef0] [c004e710] kthread+0x15c/0x174
[   16.975670] [e2449f30] [c001317c] ret_from_kernel_thread+0x14/0x1c
[   16.981896] Instruction dump:
[   16.984879] 8129d608 38e7eb38 81020280 911f004c 3900 995f0024 907f0028 
90ff001c
[   16.992710] 3949000a 915f0020 3d40c0d1 3d00c085 <8929000a> 3908adb0 812a4b98 
3d40c02f
[   17.000711] 
==
[   17.008223] # test_invalid_access: EXPECTATION FAILED at 
mm/kfence/kfence_test.c:636
[   17.008223] Expected report_matches() to be true, but is false
[   17.023243] not ok 21 - test_invalid_access


On a fault in test_invalid_access, KFENCE prints the stack trace based
on the information in pt_regs. So we do not think there's anything we
can do to improve stack printing pe-se.


stack printing, probably not. Would be good anyway to mark the last level 
[unreliable] as the ppc does.

IIUC, on ppc the address in the stack frame of the caller is written by the caller. In most tests, 
there is some function call being done before the fault, for instance 
test_kmalloc_aligned_oob_read() does a call to kunit_do_assertion which populates the address of the 
call in the stack. However this is fragile.


This works for function calls because in order to call a subfunction, a function has to set up a 
stack frame in order to same the value in the Link Register, which contains the address of the 
function's parent and that will be clobbered by the sub-function call.


However, it cannot be done by exceptions, because exceptions can happen in a function that has no 
stack frame (because that function has no need to call a subfunction and doesn't need to same 
anything on the stack). If the exception

Re: [PATCH] powerpc: iommu: fix build when neither PCI or IBMVIO is set

2021-03-02 Thread Michael Ellerman

Randy Dunlap  writes:
> When neither CONFIG_PCI nor CONFIG_IBMVIO is enabled:
>
> ../arch/powerpc/kernel/iommu.c:178:30: error: 'fail_iommu_bus_notifier' 
> defined but not used [-Werror=unused-variable]
>   178 | static struct notifier_block fail_iommu_bus_notifier = {
>
> If only that struct is bounded by 2 #if defined() phrases (PCI && IBMVIO):
>
> ../arch/powerpc/kernel/iommu.c:162:12: error: 'fail_iommu_bus_notify' defined 
> but not used [-Werror=unused-function]
>   162 | static int fail_iommu_bus_notify(struct notifier_block *nb,
>
> If that function is also guarded by 2 #if defined() phrases:
>
> In file included from ../include/linux/dma-mapping.h:7,
>  from ../arch/powerpc/kernel/iommu.c:19:
> ../include/linux/device.h:131:26: error: 'dev_attr_fail_iommu' defined but 
> not used [-Werror=unused-variable]
>   131 |  struct device_attribute dev_attr_##_name = __ATTR_RW(_name)
> ../arch/powerpc/kernel/iommu.c:160:8: note: in expansion of macro 
> 'DEVICE_ATTR_RW'
>   160 | static DEVICE_ATTR_RW(fail_iommu);
>
> and the snowball continues to grow.
> Next I got this one:
>
> ../arch/powerpc/kernel/iommu.c: In function 'iommu_range_alloc':
> ../arch/powerpc/kernel/iommu.c:234:6: error: implicit declaration of function 
> 'should_fail_iommu'; did you mean 'should_failslab'? 
> [-Werror=implicit-function-declaration]
>   234 |  if (should_fail_iommu(dev))
>
> and
>
> ../arch/powerpc/kernel/iommu.c: In function 'should_fail_iommu':
> ../arch/powerpc/kernel/iommu.c:122:50: error: 'fail_iommu' undeclared (first 
> use in this function)
>   122 |  return dev->archdata.fail_iommu && should_fail(_iommu, 1);
>
> So combine CONFIG_FAIL_IOMMU && (CONFIG_PCI || CONFIG_IBMVIO)
> to decide on building some of this code/data.

Couldn't we just make FAIL_IOMMU depend on PCI || IBMVIO?

cheers

Re: [RFC PATCH v1] powerpc: Enable KFENCE for PPC32





Le 02/03/2021 à 10:21, Alexander Potapenko a écrit :

[   14.998426] BUG: KFENCE: invalid read in finish_task_switch.isra.0+0x54/0x23c
[   14.998426]
[   15.007061] Invalid read at 0x(ptrval):
[   15.010906]  finish_task_switch.isra.0+0x54/0x23c
[   15.015633]  kunit_try_run_case+0x5c/0xd0
[   15.019682]  kunit_generic_run_threadfn_adapter+0x24/0x30
[   15.025099]  kthread+0x15c/0x174
[   15.028359]  ret_from_kernel_thread+0x14/0x1c
[   15.032747]
[   15.034251] CPU: 0 PID: 111 Comm: kunit_try_catch Tainted: GB
5.12.0-rc1-s3k-dev-01534-g4f14ae75edf0-dirty #4674
[   15.045811] 
==
[   15.053324] # test_invalid_access: EXPECTATION FAILED at 
mm/kfence/kfence_test.c:636
[   15.053324] Expected report_matches() to be true, but is false
[   15.068359] not ok 21 - test_invalid_access


The test expects the function name to be test_invalid_access, i. e.
the first line should be "BUG: KFENCE: invalid read in
test_invalid_access".
The error reporting function unwinds the stack, skips a couple of
"uninteresting" frames
(https://elixir.bootlin.com/linux/v5.12-rc1/source/mm/kfence/report.c#L43)
and uses the first "interesting" one frame to print the report header
(https://elixir.bootlin.com/linux/v5.12-rc1/source/mm/kfence/report.c#L226).

It's strange that test_invalid_access is missing altogether from the
stack trace - is that expected?
Can you try printing the whole stacktrace without skipping any frames
to see if that function is there?



Booting with 'no_hash_pointers" I get the following. Does it helps ?

[   16.837198] 
==
[   16.848521] BUG: KFENCE: invalid read in finish_task_switch.isra.0+0x54/0x23c
[   16.848521]
[   16.857158] Invalid read at 0xdf98800a:
[   16.861004]  finish_task_switch.isra.0+0x54/0x23c
[   16.865731]  kunit_try_run_case+0x5c/0xd0
[   16.869780]  kunit_generic_run_threadfn_adapter+0x24/0x30
[   16.875199]  kthread+0x15c/0x174
[   16.878460]  ret_from_kernel_thread+0x14/0x1c
[   16.882847]
[   16.884351] CPU: 0 PID: 111 Comm: kunit_try_catch Tainted: GB 
5.12.0-rc1-s3k-dev-01534-g4f14ae75edf0-dirty #4674

[   16.895908] NIP:  c016eb8c LR: c02f50dc CTR: c016eb38
[   16.900963] REGS: e2449d90 TRAP: 0301   Tainted: GB 
(5.12.0-rc1-s3k-dev-01534-g4f14ae75edf0-dirty)

[   16.911386] MSR:  9032   CR: 2204  XER: 
[   16.918153] DAR: df98800a DSISR: 2000
[   16.918153] GPR00: c02f50dc e2449e50 c1140d00 e100dd24 c084b13c 0008 
c084b32b c016eb38
[   16.918153] GPR08: c085 df988000 c0d1 e2449eb0 22000288
[   16.936695] NIP [c016eb8c] test_invalid_access+0x54/0x108
[   16.942125] LR [c02f50dc] kunit_try_run_case+0x5c/0xd0
[   16.947292] Call Trace:
[   16.949746] [e2449e50] [c005a5ec] finish_task_switch.isra.0+0x54/0x23c 
(unreliable)
[   16.957443] [e2449eb0] [c02f50dc] kunit_try_run_case+0x5c/0xd0
[   16.963319] [e2449ed0] [c02f63ec] 
kunit_generic_run_threadfn_adapter+0x24/0x30
[   16.970574] [e2449ef0] [c004e710] kthread+0x15c/0x174
[   16.975670] [e2449f30] [c001317c] ret_from_kernel_thread+0x14/0x1c
[   16.981896] Instruction dump:
[   16.984879] 8129d608 38e7eb38 81020280 911f004c 3900 995f0024 907f0028 
90ff001c
[   16.992710] 3949000a 915f0020 3d40c0d1 3d00c085 <8929000a> 3908adb0 812a4b98 
3d40c02f
[   17.000711] 
==
[   17.008223] # test_invalid_access: EXPECTATION FAILED at 
mm/kfence/kfence_test.c:636
[   17.008223] Expected report_matches() to be true, but is false
[   17.023243] not ok 21 - test_invalid_access

Re: [PATCH] sound: pps: fix spelling typo of values

2021-03-02 Thread Takashi Iwai

On Tue, 02 Mar 2021 04:40:53 +0100,
dingsen...@163.com wrote:
> 
> From: dingsenjie 
> 
> vaules -> values
> 
> Signed-off-by: dingsenjie 

Thanks, applied.


Takashi

Re: [RFC PATCH v1] powerpc: Enable KFENCE for PPC32





Le 02/03/2021 à 09:58, Marco Elver a écrit :

On Tue, 2 Mar 2021 at 09:37, Christophe Leroy
 wrote:

Add architecture specific implementation details for KFENCE and enable
KFENCE for the ppc32 architecture. In particular, this implements the
required interface in .


Nice!


KFENCE requires that attributes for pages from its memory pool can
individually be set. Therefore, force the Read/Write linear map to be
mapped at page granularity.

Unit tests succeed on all tests but one:

 [   15.053324] # test_invalid_access: EXPECTATION FAILED at 
mm/kfence/kfence_test.c:636
 [   15.053324] Expected report_matches() to be true, but is 
false
 [   15.068359] not ok 21 - test_invalid_access


This is strange, given all the other tests passed. Do you mind sharing
the full test log?



[0.00] Linux version 5.12.0-rc1-s3k-dev-01534-g4f14ae75edf0-dirty 
(root@localhost.localdomain) (powerpc64-linux-gcc (GCC) 10.1.0, GNU ld (GNU Binutils) 2.34) #4674 
PREEMPT Tue Mar 2 08:18:49 UTC 2021

[0.00] Using CMPCPRO machine description
[0.00] Found legacy serial port 0 for /soc8321@b000/serial@4500
[0.00]   mem=b0004500, taddr=b0004500, irq=0, clk=13334, speed=0
[0.00] Found legacy serial port 1 for /soc8321@b000/serial@4600
[0.00]   mem=b0004600, taddr=b0004600, irq=0, clk=13334, speed=0
[0.00] ioremap() called early from find_legacy_serial_ports+0x3e4/0x4d8. Use early_ioremap() 
instead

[0.00] printk: bootconsole [udbg0] enabled
[0.00] -
[0.00] phys_mem_size = 0x2000
[0.00] dcache_bsize  = 0x20
[0.00] icache_bsize  = 0x20
[0.00] cpu_features  = 0x01000140
[0.00]   possible= 0x277ce140
[0.00]   always  = 0x0100
[0.00] cpu_user_features = 0x8400 0x
[0.00] mmu_features  = 0x0021
[0.00] Hash_size = 0x0
[0.00] -
[0.00] Top of RAM: 0x2000, Total RAM: 0x2000
[0.00] Memory hole size: 0MB
[0.00] Zone ranges:
[0.00]   Normal   [mem 0x-0x1fff]
[0.00] Movable zone start for each node
[0.00] Early memory node ranges
[0.00]   node   0: [mem 0x-0x1fff]
[0.00] Initmem setup node 0 [mem 0x-0x1fff]
[0.00] On node 0 totalpages: 131072
[0.00]   Normal zone: 1024 pages used for memmap
[0.00]   Normal zone: 0 pages reserved
[0.00]   Normal zone: 131072 pages, LIFO batch:31
[0.00] pcpu-alloc: s0 r0 d32768 u32768 alloc=1*32768
[0.00] pcpu-alloc: [0] 0
[0.00] Built 1 zonelists, mobility grouping on.  Total pages: 130048
[0.00] Kernel command line: ip=192.168.0.3:192.168.0.1::255.0.0.0:vgoippro:eth0:off 
console=ttyS0,115200

[0.00] Dentry cache hash table entries: 65536 (order: 6, 262144 bytes, 
linear)
[0.00] Inode-cache hash table entries: 32768 (order: 5, 131072 bytes, 
linear)
[0.00] mem auto-init: stack:off, heap alloc:off, heap free:off
[0.00] Memory: 503516K/524288K available (7532K kernel code, 2236K rwdata, 1328K rodata, 
1500K init, 931K bss, 20772K reserved, 0K cma-reserved)

[0.00] Kernel virtual memory layout:
[0.00]   * 0xff7ff000..0xf000  : fixmap
[0.00]   * 0xff7fd000..0xff7ff000  : early ioremap
[0.00]   * 0xe100..0xff7fd000  : vmalloc & ioremap
[0.00] SLUB: HWalign=32, Order=0-3, MinObjects=0, CPUs=1, Nodes=1
[0.00] rcu: Preemptible hierarchical RCU implementation.
[0.00] rcu: RCU event tracing is enabled.
[0.00]  Trampoline variant of Tasks RCU enabled.
[0.00] rcu: RCU calculated value of scheduler-enlistment delay is 10 
jiffies.
[0.00] NR_IRQS: 512, nr_irqs: 512, preallocated irqs: 16
[0.00] IPIC (128 IRQ sources) at (ptrval)
[0.00] kfence: initialized - using 2097152 bytes for 255 objects at 
0x(ptrval)-0x(ptrval)
...
[4.472455] # Subtest: kfence
[4.472490] 1..25
[4.476069] # test_out_of_bounds_read: test_alloc: size=32, gfp=cc0, 
policy=left, cache=0
[4.946420] 
==
[4.953667] BUG: KFENCE: out-of-bounds read in 
test_out_of_bounds_read+0x90/0x228
[4.953667]
[4.962657] Out-of-bounds read at 0x(ptrval) (1B left of kfence-#23):
[4.969109]  test_out_of_bounds_read+0x90/0x228
[4.973663]  kunit_try_run_case+0x5c/0xd0
[4.977712]  kunit_generic_run_threadfn_adapter+0x24/0x30
[4.983128]  kthread+0x15c/0x174
[4.986387]  ret_from_kernel_thread+0x14/0x1c
[4.990774]
[4.992274] kfence-#23 [0x(ptrval)-0x(ptrval), size=32, cache=kmalloc-32] 
allocated by task 91:
[

[PATCH v1 1/2] powerpc: Use lwarx/ldarx directly instead of PPC_LWARX/LDARX macros

Force the eh flag at 0 on PPC32.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/include/asm/asm-compat.h  | 4 ++--
 arch/powerpc/include/asm/atomic.h  | 4 ++--
 arch/powerpc/include/asm/bitops.h  | 8 
 arch/powerpc/include/asm/ppc-opcode.h  | 2 --
 arch/powerpc/include/asm/simple_spinlock.h | 6 +++---
 5 files changed, 11 insertions(+), 13 deletions(-)

diff --git a/arch/powerpc/include/asm/asm-compat.h 
b/arch/powerpc/include/asm/asm-compat.h
index 19b70c5b5f18..2b736d9fbb1b 100644
--- a/arch/powerpc/include/asm/asm-compat.h
+++ b/arch/powerpc/include/asm/asm-compat.h
@@ -17,7 +17,7 @@
 #define PPC_LONG   stringify_in_c(.8byte)
 #define PPC_LONG_ALIGN stringify_in_c(.balign 8)
 #define PPC_TLNEI  stringify_in_c(tdnei)
-#define PPC_LLARX(t, a, b, eh) PPC_LDARX(t, a, b, eh)
+#define PPC_LLARX  stringify_in_c(ldarx)
 #define PPC_STLCX  stringify_in_c(stdcx.)
 #define PPC_CNTLZL stringify_in_c(cntlzd)
 #define PPC_MTOCRF(FXM, RS) MTOCRF((FXM), RS)
@@ -50,7 +50,7 @@
 #define PPC_LONG   stringify_in_c(.long)
 #define PPC_LONG_ALIGN stringify_in_c(.balign 4)
 #define PPC_TLNEI  stringify_in_c(twnei)
-#define PPC_LLARX(t, a, b, eh) PPC_LWARX(t, a, b, eh)
+#define PPC_LLARX  stringify_in_c(lwarx)
 #define PPC_STLCX  stringify_in_c(stwcx.)
 #define PPC_CNTLZL stringify_in_c(cntlzw)
 #define PPC_MTOCRF stringify_in_c(mtcrf)
diff --git a/arch/powerpc/include/asm/atomic.h 
b/arch/powerpc/include/asm/atomic.h
index 61c6e8b200e8..ba177d0be278 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -204,7 +204,7 @@ atomic_try_cmpxchg_lock(atomic_t *v, int *old, int new)
int r, o = *old;
 
__asm__ __volatile__ (
-"1:\t" PPC_LWARX(%0,0,%2,1) "  # atomic_try_cmpxchg_acquire\n"
+"1:lwarx   %0,0,%2,%5  # atomic_try_cmpxchg_acquire\n"
 "  cmpw0,%0,%3 \n"
 "  bne-2f  \n"
 "  stwcx.  %4,0,%2 \n"
@@ -212,7 +212,7 @@ atomic_try_cmpxchg_lock(atomic_t *v, int *old, int new)
 "\t"   PPC_ACQUIRE_BARRIER "   \n"
 "2:\n"
: "=" (r), "+m" (v->counter)
-   : "r" (>counter), "r" (o), "r" (new)
+   : "r" (>counter), "r" (o), "r" (new), "i" (IS_ENABLED(CONFIG_PPC64) 
? 1 : 0)
: "cr0", "memory");
 
if (unlikely(r != o))
diff --git a/arch/powerpc/include/asm/bitops.h 
b/arch/powerpc/include/asm/bitops.h
index 299ab33505a6..11847b6a244e 100644
--- a/arch/powerpc/include/asm/bitops.h
+++ b/arch/powerpc/include/asm/bitops.h
@@ -70,7 +70,7 @@ static inline void fn(unsigned long mask, \
unsigned long *p = (unsigned long *)_p; \
__asm__ __volatile__ (  \
prefix  \
-"1:"   PPC_LLARX(%0,0,%3,0) "\n"   \
+"1:"   PPC_LLARX "%0,0,%3,0\n" \
stringify_in_c(op) "%0,%0,%2\n" \
PPC_STLCX "%0,0,%3\n"   \
"bne- 1b\n" \
@@ -115,13 +115,13 @@ static inline unsigned long fn(   \
unsigned long *p = (unsigned long *)_p; \
__asm__ __volatile__ (  \
prefix  \
-"1:"   PPC_LLARX(%0,0,%3,eh) "\n"  \
+"1:"   PPC_LLARX "%0,0,%3,%4\n"\
stringify_in_c(op) "%1,%0,%2\n" \
PPC_STLCX "%1,0,%3\n"   \
"bne- 1b\n" \
postfix \
: "=" (old), "=" (t)\
-   : "r" (mask), "r" (p)   \
+   : "r" (mask), "r" (p), "i" (IS_ENABLED(CONFIG_PPC64) ? eh : 0)  \
: "cc", "memory");  \
return (old & mask);\
 }
@@ -170,7 +170,7 @@ clear_bit_unlock_return_word(int nr, volatile unsigned long 
*addr)
 
__asm__ __volatile__ (
PPC_RELEASE_BARRIER
-"1:"   PPC_LLARX(%0,0,%3,0) "\n"
+"1:"   PPC_LLARX "%0,0,%3,0\n"
"andc %1,%0,%2\n"
PPC_STLCX "%1,0,%3\n"
"bne- 1b\n"
diff --git a/arch/powerpc/include/asm/ppc-opcode.h 
b/arch/powerpc/include/asm/ppc-opcode.h
index ed161ef2b3ca..9550af2301b1 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -531,8 +531,6 @@
 #definePPC_DIVDE(t, a, b)  stringify_in_c(.long PPC_RAW_DIVDE(t, 
a, b))
 #definePPC_DIVDEU(t, a, b) stringify_in_c(.long PPC_RAW_DIVDEU(t, 
a, b))
 #define PPC_LQARX(t, a, b, eh) stringify_in_c(.long PPC_RAW_LQARX(t, a, b, eh))
-#define PPC_LDARX(t, a, b, eh) stringify_in_c(.long

[PATCH v1 2/2] powerpc: Use %y addressing on bitops

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/include/asm/bitops.h | 18 +-
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/include/asm/bitops.h 
b/arch/powerpc/include/asm/bitops.h
index 11847b6a244e..264626b13ea8 100644
--- a/arch/powerpc/include/asm/bitops.h
+++ b/arch/powerpc/include/asm/bitops.h
@@ -70,12 +70,12 @@ static inline void fn(unsigned long mask,   \
unsigned long *p = (unsigned long *)_p; \
__asm__ __volatile__ (  \
prefix  \
-"1:"   PPC_LLARX "%0,0,%3,0\n" \
+"1:"   PPC_LLARX "%0,%y3,0\n"  \
stringify_in_c(op) "%0,%0,%2\n" \
-   PPC_STLCX "%0,0,%3\n"   \
+   PPC_STLCX "%0,%y3\n"\
"bne- 1b\n" \
: "=" (old), "+m" (*p)\
-   : "r" (mask), "r" (p)   \
+   : "r" (mask), "Z" (*p)  \
: "cc", "memory");  \
 }
 
@@ -115,13 +115,13 @@ static inline unsigned long fn(   \
unsigned long *p = (unsigned long *)_p; \
__asm__ __volatile__ (  \
prefix  \
-"1:"   PPC_LLARX "%0,0,%3,%4\n"\
+"1:"   PPC_LLARX "%0,%y3,%4\n" \
stringify_in_c(op) "%1,%0,%2\n" \
-   PPC_STLCX "%1,0,%3\n"   \
+   PPC_STLCX "%1,%y3\n"\
"bne- 1b\n" \
postfix \
: "=" (old), "=" (t)\
-   : "r" (mask), "r" (p), "i" (IS_ENABLED(CONFIG_PPC64) ? eh : 0)  \
+   : "r" (mask), "Z" (*p), "i" (IS_ENABLED(CONFIG_PPC64) ? eh : 0) \
: "cc", "memory");  \
return (old & mask);\
 }
@@ -170,12 +170,12 @@ clear_bit_unlock_return_word(int nr, volatile unsigned 
long *addr)
 
__asm__ __volatile__ (
PPC_RELEASE_BARRIER
-"1:"   PPC_LLARX "%0,0,%3,0\n"
+"1:"   PPC_LLARX "%0,%y3,0\n"
"andc %1,%0,%2\n"
-   PPC_STLCX "%1,0,%3\n"
+   PPC_STLCX "%1,%y3\n"
"bne- 1b\n"
: "=" (old), "=" (t)
-   : "r" (mask), "r" (p)
+   : "r" (mask), "Z" (*p)
: "cc", "memory");
 
return old;
-- 
2.25.0

[RFC PATCH v1] powerpc: Enable KFENCE for PPC32