Re: [PATCH] powerpc: don't select KFENCE on platform PPC_FSL_BOOK3E

2021-10-11 Thread LEROY Christophe


Le 12/10/2021 à 03:43, Liu Shixin a écrit :
> kindly ping.

Hi

Based on the discussion we had, this patch is not enough. It should at 
least also de-activate DEBUG_PAGEALLOC,

However I'm looking at fixing it the other way round. Give me one week 
or two.

Christophe

> 
> 
> On 2021/9/24 14:39, Liu Shixin wrote:
>> On platform PPC_FSL_BOOK3E, all lowmem is managed by tlbcam. That means
>> we didn't really map the kfence pool with page granularity. Therefore,
>> if KFENCE is enabled, the system will hit the following panic:
>>
>>  BUG: Kernel NULL pointer dereference on read at 0x
>>  Faulting instruction address: 0xc01de598
>>  Oops: Kernel access of bad area, sig: 11 [#1]
>>  BE PAGE_SIZE=4K SMP NR_CPUS=4 MPC8544 DS
>>  Dumping ftrace buffer:
>> (ftrace buffer empty)
>>  Modules linked in:
>>  CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.12.0-rc3+ #298
>>  NIP:  c01de598 LR: c08ae9c4 CTR: 
>>  REGS: c0b4bea0 TRAP: 0300   Not tainted  (5.12.0-rc3+)
>>  MSR:  00021000   CR: 24000228  XER: 2000
>>  DEAR:  ESR: 
>>  GPR00: c08ae9c4 c0b4bf60 c0ad64e0 ef72 00021000   
>> 0200
>>  GPR08: c0ad5000   0004  008fbb30  
>> 
>>  GPR16:     c000   
>> 
>>  GPR24: c08ca004 c08ca004 c0b6a0e0 c0b6 c0b58f00 c085 c08ca000 
>> ef72
>>  NIP [c01de598] kfence_protect+0x44/0x6c
>>  LR [c08ae9c4] kfence_init+0xfc/0x2a4
>>  Call Trace:
>>  [c0b4bf60] [efffe160] 0xefffe160 (unreliable)
>>  [c0b4bf70] [c08ae9c4] kfence_init+0xfc/0x2a4
>>  [c0b4bfb0] [c0894d3c] start_kernel+0x3bc/0x574
>>  [c0b4bff0] [c470] set_ivor+0x14c/0x188
>>  Instruction dump:
>>  7c0802a6 8109d594 546a653a 90010014 54630026 3920 7d48502e 2c0a
>>  41820010 554a0026 5469b53a 7d295214 <8149> 38831000 554a003c 
>> 9149
>>  random: get_random_bytes called from print_oops_end_marker+0x40/0x78 
>> with crng_init=0
>>  ---[ end trace  ]---
>>
>> Signed-off-by: Liu Shixin 
>> ---
>>   arch/powerpc/Kconfig | 2 +-
>>   1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
>> index d46db0bfb998..cffd57bcb5e4 100644
>> --- a/arch/powerpc/Kconfig
>> +++ b/arch/powerpc/Kconfig
>> @@ -185,7 +185,7 @@ config PPC
>>  select HAVE_ARCH_KASAN  if PPC32 && PPC_PAGE_SHIFT <= 14
>>  select HAVE_ARCH_KASAN_VMALLOC  if PPC32 && PPC_PAGE_SHIFT <= 14
>>  select HAVE_ARCH_KGDB
>> -select HAVE_ARCH_KFENCE if PPC32
>> +select HAVE_ARCH_KFENCE if PPC32 && !PPC_FSL_BOOK3E
>>  select HAVE_ARCH_MMAP_RND_BITS
>>  select HAVE_ARCH_MMAP_RND_COMPAT_BITS   if COMPAT
>>  select HAVE_ARCH_NVRAM_OPS
> 

[PATCH v2] tpm: ibmvtpm: Make use of dma_alloc_noncoherent()

2021-10-11 Thread Cai Huoqing
Replacing kmalloc/kfree/get_zeroed_page/free_page/dma_map_single/
dma_unmap_single() with dma_alloc_noncoherent/dma_free_noncoherent()
helps to reduce code size, and simplify the code, and the hardware
can keep DMA coherent itself.

Signed-off-by: Cai Huoqing 
---
v1->v2:
*Change to dma_alloc/free_noncoherent from dma_alloc/free_coherent.
*Update changelog.

 drivers/char/tpm/tpm_ibmvtpm.c | 63 +++---
 1 file changed, 20 insertions(+), 43 deletions(-)

diff --git a/drivers/char/tpm/tpm_ibmvtpm.c b/drivers/char/tpm/tpm_ibmvtpm.c
index 3af4c07a9342..b4552f8400b8 100644
--- a/drivers/char/tpm/tpm_ibmvtpm.c
+++ b/drivers/char/tpm/tpm_ibmvtpm.c
@@ -356,15 +356,13 @@ static void tpm_ibmvtpm_remove(struct vio_dev *vdev)
rc = plpar_hcall_norets(H_FREE_CRQ, vdev->unit_address);
} while (rc == H_BUSY || H_IS_LONG_BUSY(rc));
 
-   dma_unmap_single(ibmvtpm->dev, ibmvtpm->crq_dma_handle,
-CRQ_RES_BUF_SIZE, DMA_BIDIRECTIONAL);
-   free_page((unsigned long)ibmvtpm->crq_queue.crq_addr);
-
-   if (ibmvtpm->rtce_buf) {
-   dma_unmap_single(ibmvtpm->dev, ibmvtpm->rtce_dma_handle,
-ibmvtpm->rtce_size, DMA_BIDIRECTIONAL);
-   kfree(ibmvtpm->rtce_buf);
-   }
+   dma_free_noncoherent(ibmvtpm->dev, CRQ_RES_BUF_SIZE, crq_q->crq_addr,
+crq_q->crq_dma_handle, DMA_BIDIRECTIONAL);
+
+   if (ibmvtpm->rtce_buf)
+   dma_free_noncoherent(ibmvtpm->dev,
+ibmvtpm->rtce_size, ibmvtpm->rtce_buf,
+ibmvtpm->rtce_dma_handle, 
DMA_BIDIRECTIONAL);
 
kfree(ibmvtpm);
/* For tpm_ibmvtpm_get_desired_dma */
@@ -522,23 +520,12 @@ static void ibmvtpm_crq_process(struct ibmvtpm_crq *crq,
return;
}
ibmvtpm->rtce_size = be16_to_cpu(crq->len);
-   ibmvtpm->rtce_buf = kmalloc(ibmvtpm->rtce_size,
-   GFP_ATOMIC);
-   if (!ibmvtpm->rtce_buf) {
-   dev_err(ibmvtpm->dev, "Failed to allocate 
memory for rtce buffer\n");
-   return;
-   }
-
-   ibmvtpm->rtce_dma_handle = dma_map_single(ibmvtpm->dev,
-   ibmvtpm->rtce_buf, ibmvtpm->rtce_size,
-   DMA_BIDIRECTIONAL);
-
-   if (dma_mapping_error(ibmvtpm->dev,
- ibmvtpm->rtce_dma_handle)) {
-   kfree(ibmvtpm->rtce_buf);
-   ibmvtpm->rtce_buf = NULL;
-   dev_err(ibmvtpm->dev, "Failed to dma map rtce 
buffer\n");
-   }
+   ibmvtpm->rtce_buf = dma_alloc_noncoherent(ibmvtpm->dev,
+ 
ibmvtpm->rtce_size,
+ 
>rtce_dma_handle,
+ 
DMA_BIDIRECTIONAL, GFP_ATOMIC);
+   if (!ibmvtpm->rtce_buf)
+   dev_err(ibmvtpm->dev, "Failed to dma allocate 
rtce buffer\n");
 
return;
case VTPM_GET_VERSION_RES:
@@ -618,22 +605,14 @@ static int tpm_ibmvtpm_probe(struct vio_dev *vio_dev,
ibmvtpm->vdev = vio_dev;
 
crq_q = >crq_queue;
-   crq_q->crq_addr = (struct ibmvtpm_crq *)get_zeroed_page(GFP_KERNEL);
-   if (!crq_q->crq_addr) {
-   dev_err(dev, "Unable to allocate memory for crq_addr\n");
-   goto cleanup;
-   }
 
crq_q->num_entry = CRQ_RES_BUF_SIZE / sizeof(*crq_q->crq_addr);
init_waitqueue_head(_q->wq);
-   ibmvtpm->crq_dma_handle = dma_map_single(dev, crq_q->crq_addr,
-CRQ_RES_BUF_SIZE,
-DMA_BIDIRECTIONAL);
-
-   if (dma_mapping_error(dev, ibmvtpm->crq_dma_handle)) {
-   dev_err(dev, "dma mapping failed\n");
+   crq_q->crq_addr = dma_alloc_noncoherent(dev, CRQ_RES_BUF_SIZE,
+   >crq_dma_handle,
+   DMA_BIDIRECTIONAL, GFP_KERNEL);
+   if (!crq_q->crq_addr)
goto cleanup;
-   }
 
rc = plpar_hcall_norets(H_REG_CRQ, vio_dev->unit_address,
ibmvtpm->crq_dma_handle, CRQ_RES_BUF_SIZE);
@@ -642,7 +621,7 @@ static int tpm_ibmvtpm_probe(struct vio_dev *vio_dev,
 
if (rc) {
dev_err(dev, "Unable to register CRQ rc=%d\n", rc);
-   goto reg_crq_cleanup;
+   goto cleanup;
}
 
rc = 

[PATCH v2] scsi: ibmvscsi: Use dma_alloc_noncoherent() instead of get_zeroed_page/dma_map_single()

2021-10-11 Thread Cai Huoqing
Replacing get_zeroed_page/free_page/dma_map_single/dma_unmap_single()
with dma_alloc_noncoherent/dma_free_noncoherent() helps to reduce
code size, and simplify the code, and the hardware can keeep DMA
coherent itself.

Signed-off-by: Cai Huoqing 
---
v1->v2:
*Change to dma_alloc/free_noncoherent from dma_alloc/free_coherent.
*Update changelog.

 drivers/scsi/ibmvscsi/ibmvfc.c   | 16 
 drivers/scsi/ibmvscsi/ibmvscsi.c | 29 +
 2 files changed, 13 insertions(+), 32 deletions(-)

diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index 1f1586ad48fe..6e95fd02fd25 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -869,8 +869,8 @@ static void ibmvfc_free_queue(struct ibmvfc_host *vhost,
 {
struct device *dev = vhost->dev;
 
-   dma_unmap_single(dev, queue->msg_token, PAGE_SIZE, DMA_BIDIRECTIONAL);
-   free_page((unsigned long)queue->msgs.handle);
+   dma_free_noncoherent(dev, PAGE_SIZE, queue->msgs.handle,
+queue->msg_token, DMA_BIDIRECTIONAL);
queue->msgs.handle = NULL;
 
ibmvfc_free_event_pool(vhost, queue);
@@ -5663,19 +5663,11 @@ static int ibmvfc_alloc_queue(struct ibmvfc_host *vhost,
return -ENOMEM;
}
 
-   queue->msgs.handle = (void *)get_zeroed_page(GFP_KERNEL);
+   queue->msgs.handle = dma_alloc_noncoherent(dev, PAGE_SIZE, 
>msg_token,
+  DMA_BIDIRECTIONAL, 
GFP_KERNEL);
if (!queue->msgs.handle)
return -ENOMEM;
 
-   queue->msg_token = dma_map_single(dev, queue->msgs.handle, PAGE_SIZE,
- DMA_BIDIRECTIONAL);
-
-   if (dma_mapping_error(dev, queue->msg_token)) {
-   free_page((unsigned long)queue->msgs.handle);
-   queue->msgs.handle = NULL;
-   return -ENOMEM;
-   }
-
queue->cur = 0;
queue->fmt = fmt;
queue->size = PAGE_SIZE / fmt_size;
diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c
index ea8e01f49cba..68409c298c74 100644
--- a/drivers/scsi/ibmvscsi/ibmvscsi.c
+++ b/drivers/scsi/ibmvscsi/ibmvscsi.c
@@ -151,10 +151,8 @@ static void ibmvscsi_release_crq_queue(struct crq_queue 
*queue,
msleep(100);
rc = plpar_hcall_norets(H_FREE_CRQ, vdev->unit_address);
} while ((rc == H_BUSY) || (H_IS_LONG_BUSY(rc)));
-   dma_unmap_single(hostdata->dev,
-queue->msg_token,
-queue->size * sizeof(*queue->msgs), DMA_BIDIRECTIONAL);
-   free_page((unsigned long)queue->msgs);
+   dma_free_noncoherent(hostdata->dev, PAGE_SIZE,
+queue->msgs, queue->msg_token, DMA_BIDIRECTIONAL);
 }
 
 /**
@@ -331,18 +329,12 @@ static int ibmvscsi_init_crq_queue(struct crq_queue 
*queue,
int retrc;
struct vio_dev *vdev = to_vio_dev(hostdata->dev);
 
-   queue->msgs = (struct viosrp_crq *)get_zeroed_page(GFP_KERNEL);
-
-   if (!queue->msgs)
-   goto malloc_failed;
queue->size = PAGE_SIZE / sizeof(*queue->msgs);
-
-   queue->msg_token = dma_map_single(hostdata->dev, queue->msgs,
- queue->size * sizeof(*queue->msgs),
- DMA_BIDIRECTIONAL);
-
-   if (dma_mapping_error(hostdata->dev, queue->msg_token))
-   goto map_failed;
+   queue->msgs = dma_alloc_noncoherent(hostdata->dev,
+   PAGE_SIZE, >msg_token,
+   DMA_BIDIRECTIONAL, GFP_KERNEL);
+   if (!queue->msg)
+   goto malloc_failed;
 
gather_partition_info();
set_adapter_info(hostdata);
@@ -395,11 +387,8 @@ static int ibmvscsi_init_crq_queue(struct crq_queue *queue,
rc = plpar_hcall_norets(H_FREE_CRQ, vdev->unit_address);
} while ((rc == H_BUSY) || (H_IS_LONG_BUSY(rc)));
   reg_crq_failed:
-   dma_unmap_single(hostdata->dev,
-queue->msg_token,
-queue->size * sizeof(*queue->msgs), DMA_BIDIRECTIONAL);
-  map_failed:
-   free_page((unsigned long)queue->msgs);
+   dma_free_noncoherent(hostdata->dev, PAGE_SIZE, queue->msg,
+queue->msg_token, DMA_BIDIRECTIONAL);
   malloc_failed:
return -1;
 }
-- 
2.25.1



Re: [PATCH v3 02/52] powerpc/64s: guard optional TIDR SPR with CPU ftr test

2021-10-11 Thread Michael Ellerman
Fabiano Rosas  writes:
> Nicholas Piggin  writes:
>
>> The TIDR SPR only exists on POWER9. Avoid accessing it when the
>> feature bit for it is not set.
>
> Not related to this patch, but how does this work with compat mode? A P9
> compat mode guest would get an invalid instruction when trying to access
> this SPR?

Good question.

I assume you're talking about P9 compat mode on P10.

In general compat mode only applies to userspace, because it's
implemented by setting the PCR which only (mostly?) applies to PR=1.

I don't think there's any special casing in the ISA for the TIDR, so I
think it just falls into the unimplemented SPR case for mt/fspr.

That's documented in Book III section 5.4.4, in particular on page 1171
it says:

  Execution of this instruction specifying an SPR number
  that is undefined for the implementation causes one of
  the following.
  • if spr[0]=0:
- if MSR[PR]=1: Hypervisor Emulation Assistance interrupt
- if MSR[PR]=0: Hypervisor Emulation Assistance interrupt for SPR
  0,4,5, and 6, and no operation (i.e., the instruction is treated
  as a no-op) when LPCR[EVIRT]=0 and Hypervisor Emulation Assistance
  interrupt when LPCR[EVIRT]=1 for all other SPRs

Linux doesn't set EVIRT, and I assume neither does phyp, so it behaves
like a nop.

We actually use that behaviour in xmon to detect that an SPR is not
implemented, by noticing that the mfspr has no effect on the target
register, see dump_one_spr().

We should really write some docs on compat mode in the linuxppc wiki
and/or Documentation ;)

cheers


Re: [PATCH] powerpc: don't select KFENCE on platform PPC_FSL_BOOK3E

2021-10-11 Thread Liu Shixin
kindly ping.


On 2021/9/24 14:39, Liu Shixin wrote:
> On platform PPC_FSL_BOOK3E, all lowmem is managed by tlbcam. That means
> we didn't really map the kfence pool with page granularity. Therefore,
> if KFENCE is enabled, the system will hit the following panic:
>
> BUG: Kernel NULL pointer dereference on read at 0x
> Faulting instruction address: 0xc01de598
> Oops: Kernel access of bad area, sig: 11 [#1]
> BE PAGE_SIZE=4K SMP NR_CPUS=4 MPC8544 DS
> Dumping ftrace buffer:
>(ftrace buffer empty)
> Modules linked in:
> CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.12.0-rc3+ #298
> NIP:  c01de598 LR: c08ae9c4 CTR: 
> REGS: c0b4bea0 TRAP: 0300   Not tainted  (5.12.0-rc3+)
> MSR:  00021000   CR: 24000228  XER: 2000
> DEAR:  ESR: 
> GPR00: c08ae9c4 c0b4bf60 c0ad64e0 ef72 00021000   
> 0200
> GPR08: c0ad5000   0004  008fbb30  
> 
> GPR16:     c000   
> 
> GPR24: c08ca004 c08ca004 c0b6a0e0 c0b6 c0b58f00 c085 c08ca000 
> ef72
> NIP [c01de598] kfence_protect+0x44/0x6c
> LR [c08ae9c4] kfence_init+0xfc/0x2a4
> Call Trace:
> [c0b4bf60] [efffe160] 0xefffe160 (unreliable)
> [c0b4bf70] [c08ae9c4] kfence_init+0xfc/0x2a4
> [c0b4bfb0] [c0894d3c] start_kernel+0x3bc/0x574
> [c0b4bff0] [c470] set_ivor+0x14c/0x188
> Instruction dump:
> 7c0802a6 8109d594 546a653a 90010014 54630026 3920 7d48502e 2c0a
> 41820010 554a0026 5469b53a 7d295214 <8149> 38831000 554a003c 9149
> random: get_random_bytes called from print_oops_end_marker+0x40/0x78 with 
> crng_init=0
> ---[ end trace  ]---
>
> Signed-off-by: Liu Shixin 
> ---
>  arch/powerpc/Kconfig | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
> index d46db0bfb998..cffd57bcb5e4 100644
> --- a/arch/powerpc/Kconfig
> +++ b/arch/powerpc/Kconfig
> @@ -185,7 +185,7 @@ config PPC
>   select HAVE_ARCH_KASAN  if PPC32 && PPC_PAGE_SHIFT <= 14
>   select HAVE_ARCH_KASAN_VMALLOC  if PPC32 && PPC_PAGE_SHIFT <= 14
>   select HAVE_ARCH_KGDB
> - select HAVE_ARCH_KFENCE if PPC32
> + select HAVE_ARCH_KFENCE if PPC32 && !PPC_FSL_BOOK3E
>   select HAVE_ARCH_MMAP_RND_BITS
>   select HAVE_ARCH_MMAP_RND_COMPAT_BITS   if COMPAT
>   select HAVE_ARCH_NVRAM_OPS



[PATCH] powerpc/s64: Clarify that radix lacks DEBUG_PAGEALLOC

2021-10-11 Thread Joel Stanley
The page_alloc.c code will call into __kernel_map_pages when
DEBUG_PAGEALLOC is configured and enabled.

As the implementation assumes hash, this should crash spectacularly if
not for a bit of luck in __kernel_map_pages. In this function
linear_map_hash_count is always zero, the for loop exits without doing
any damage.

There are no other platforms that determine if they support
debug_pagealloc at runtime. Instead of adding code to mm/page_alloc.c to
do that, this change turns the map/unmap into a noop when in radix
mode and prints a warning once.

Signed-off-by: Joel Stanley 
---
I noticed this when I was looking at adding kfence support a while back.
I've put that work aside and jpn has since gotten further than me, but I
think this is a fix worth considering.

 arch/powerpc/include/asm/book3s/64/hash.h |  2 ++
 arch/powerpc/mm/book3s64/hash_utils.c |  2 +-
 arch/powerpc/mm/book3s64/pgtable.c| 12 
 3 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/book3s/64/hash.h 
b/arch/powerpc/include/asm/book3s/64/hash.h
index d959b0195ad9..674fe0e890dc 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -255,6 +255,8 @@ int hash__create_section_mapping(unsigned long start, 
unsigned long end,
 int nid, pgprot_t prot);
 int hash__remove_section_mapping(unsigned long start, unsigned long end);
 
+void hash__kernel_map_pages(struct page *page, int numpages, int enable);
+
 #endif /* !__ASSEMBLY__ */
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_BOOK3S_64_HASH_H */
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c 
b/arch/powerpc/mm/book3s64/hash_utils.c
index c145776d3ae5..cfd45245d009 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -1988,7 +1988,7 @@ static void kernel_unmap_linear_page(unsigned long vaddr, 
unsigned long lmi)
 mmu_kernel_ssize, 0);
 }
 
-void __kernel_map_pages(struct page *page, int numpages, int enable)
+void hash__kernel_map_pages(struct page *page, int numpages, int enable)
 {
unsigned long flags, vaddr, lmi;
int i;
diff --git a/arch/powerpc/mm/book3s64/pgtable.c 
b/arch/powerpc/mm/book3s64/pgtable.c
index 9e16c7b1a6c5..0aefc272cd03 100644
--- a/arch/powerpc/mm/book3s64/pgtable.c
+++ b/arch/powerpc/mm/book3s64/pgtable.c
@@ -526,3 +526,15 @@ static int __init pgtable_debugfs_setup(void)
return 0;
 }
 arch_initcall(pgtable_debugfs_setup);
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+void __kernel_map_pages(struct page *page, int numpages, int enable)
+{
+   if (radix_enabled()) {
+   pr_warn_once("DEBUG_PAGEALLOC not supported in radix mode\n");
+   return;
+   }
+
+   hash__kernel_map_pages(page, numpages, enable);
+}
+#endif
-- 
2.33.0



Re: [PATCH] powerpc/boot: Use CONFIG_PPC_POWERNV to compile OPAL support

2021-10-11 Thread Joel Stanley
On Mon, 11 Oct 2021 at 07:42, Cédric Le Goater  wrote:
>
> CONFIG_PPC64_BOOT_WRAPPER is selected by CPU_LITTLE_ENDIAN which is
> used to compile support for other platforms such as Microwatt. There
> is no need for OPAL calls on these.
>
> Signed-off-by: Cédric Le Goater 

Reviewed-by: Joel Stanley 

> ---
>  arch/powerpc/boot/serial.c | 2 +-
>  arch/powerpc/boot/Makefile | 2 +-
>  2 files changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/arch/powerpc/boot/serial.c b/arch/powerpc/boot/serial.c
> index 9a19e5905485..54d2522be485 100644
> --- a/arch/powerpc/boot/serial.c
> +++ b/arch/powerpc/boot/serial.c
> @@ -132,7 +132,7 @@ int serial_console_init(void)
> else if (dt_is_compatible(devp, "fsl,mpc5200-psc-uart"))
> rc = mpc5200_psc_console_init(devp, _cd);
>  #endif
> -#ifdef CONFIG_PPC64_BOOT_WRAPPER
> +#ifdef CONFIG_PPC_POWERNV
> else if (dt_is_compatible(devp, "ibm,opal-console-raw"))
> rc = opal_console_init(devp, _cd);
>  #endif
> diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
> index 089ee3ea55c8..9993c6256ad2 100644
> --- a/arch/powerpc/boot/Makefile
> +++ b/arch/powerpc/boot/Makefile
> @@ -123,7 +123,7 @@ src-wlib-y := string.S crt0.S stdio.c decompress.c main.c 
> \
> oflib.c ofconsole.c cuboot.c
>
>  src-wlib-$(CONFIG_PPC_MPC52xx) += mpc52xx-psc.c
> -src-wlib-$(CONFIG_PPC64_BOOT_WRAPPER) += opal-calls.S opal.c
> +src-wlib-$(CONFIG_PPC_POWERNV) += opal-calls.S opal.c
>  ifndef CONFIG_PPC64_BOOT_WRAPPER
>  src-wlib-y += crtsavres.S
>  endif
> --
> 2.31.1
>


Re: linux-next: build warnings in Linus' tree

2021-10-11 Thread Rob Herring
+Arnd in regards to removing platforms.

On Sun, Oct 10, 2021 at 4:27 PM Stephen Rothwell  wrote:
>
> Hi all,
>
> [Cc'ing Rob]
>
> Rob: these warnings have been there for a long time ...

If anyone cares about these platforms, then the warnings should be
fixed by folks that care. If not, then perhaps the DT files should
just get removed.

FYI, u-boot removed mpc5xxx support in 2017, so maybe there's
similarly not a need to keep them in the kernel? It does appear NXP
will still sell you the parts though the last BSP was 2009.

Rob


Re: [PATCH v3 02/52] powerpc/64s: guard optional TIDR SPR with CPU ftr test

2021-10-11 Thread Fabiano Rosas
Nicholas Piggin  writes:

> The TIDR SPR only exists on POWER9. Avoid accessing it when the
> feature bit for it is not set.

Not related to this patch, but how does this work with compat mode? A P9
compat mode guest would get an invalid instruction when trying to access
this SPR?

> Signed-off-by: Nicholas Piggin 

Reviewed-by: Fabiano Rosas 

> ---
>  arch/powerpc/kvm/book3s_hv.c | 12 
>  arch/powerpc/xmon/xmon.c | 10 --
>  2 files changed, 16 insertions(+), 6 deletions(-)
>
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index 2acb1c96cfaf..f4a779fffd18 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -3767,7 +3767,8 @@ static void load_spr_state(struct kvm_vcpu *vcpu)
>   mtspr(SPRN_EBBHR, vcpu->arch.ebbhr);
>   mtspr(SPRN_EBBRR, vcpu->arch.ebbrr);
>   mtspr(SPRN_BESCR, vcpu->arch.bescr);
> - mtspr(SPRN_TIDR, vcpu->arch.tid);
> + if (cpu_has_feature(CPU_FTR_P9_TIDR))
> + mtspr(SPRN_TIDR, vcpu->arch.tid);
>   mtspr(SPRN_AMR, vcpu->arch.amr);
>   mtspr(SPRN_UAMOR, vcpu->arch.uamor);
>
> @@ -3793,7 +3794,8 @@ static void store_spr_state(struct kvm_vcpu *vcpu)
>   vcpu->arch.ebbhr = mfspr(SPRN_EBBHR);
>   vcpu->arch.ebbrr = mfspr(SPRN_EBBRR);
>   vcpu->arch.bescr = mfspr(SPRN_BESCR);
> - vcpu->arch.tid = mfspr(SPRN_TIDR);
> + if (cpu_has_feature(CPU_FTR_P9_TIDR))
> + vcpu->arch.tid = mfspr(SPRN_TIDR);
>   vcpu->arch.amr = mfspr(SPRN_AMR);
>   vcpu->arch.uamor = mfspr(SPRN_UAMOR);
>   vcpu->arch.dscr = mfspr(SPRN_DSCR);
> @@ -3813,7 +3815,8 @@ struct p9_host_os_sprs {
>  static void save_p9_host_os_sprs(struct p9_host_os_sprs *host_os_sprs)
>  {
>   host_os_sprs->dscr = mfspr(SPRN_DSCR);
> - host_os_sprs->tidr = mfspr(SPRN_TIDR);
> + if (cpu_has_feature(CPU_FTR_P9_TIDR))
> + host_os_sprs->tidr = mfspr(SPRN_TIDR);
>   host_os_sprs->iamr = mfspr(SPRN_IAMR);
>   host_os_sprs->amr = mfspr(SPRN_AMR);
>   host_os_sprs->fscr = mfspr(SPRN_FSCR);
> @@ -3827,7 +3830,8 @@ static void restore_p9_host_os_sprs(struct kvm_vcpu 
> *vcpu,
>   mtspr(SPRN_UAMOR, 0);
>
>   mtspr(SPRN_DSCR, host_os_sprs->dscr);
> - mtspr(SPRN_TIDR, host_os_sprs->tidr);
> + if (cpu_has_feature(CPU_FTR_P9_TIDR))
> + mtspr(SPRN_TIDR, host_os_sprs->tidr);
>   mtspr(SPRN_IAMR, host_os_sprs->iamr);
>
>   if (host_os_sprs->amr != vcpu->arch.amr)
> diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
> index dd8241c009e5..7958e5aae844 100644
> --- a/arch/powerpc/xmon/xmon.c
> +++ b/arch/powerpc/xmon/xmon.c
> @@ -2107,8 +2107,14 @@ static void dump_300_sprs(void)
>   if (!cpu_has_feature(CPU_FTR_ARCH_300))
>   return;
>
> - printf("pidr   = %.16lx  tidr  = %.16lx\n",
> - mfspr(SPRN_PID), mfspr(SPRN_TIDR));
> + if (cpu_has_feature(CPU_FTR_P9_TIDR)) {
> + printf("pidr   = %.16lx  tidr  = %.16lx\n",
> + mfspr(SPRN_PID), mfspr(SPRN_TIDR));
> + } else {
> + printf("pidr   = %.16lx\n",
> + mfspr(SPRN_PID));
> + }
> +
>   printf("psscr  = %.16lx\n",
>   hv ? mfspr(SPRN_PSSCR) : mfspr(SPRN_PSSCR_PR));


[PATCH 17/22] PCI/DPC: Use RESPONSE_IS_PCI_ERROR() to check read from hardware

2021-10-11 Thread Naveen Naidu
An MMIO read from a PCI device that doesn't exist or doesn't respond
causes a PCI error.  There's no real data to return to satisfy the
CPU read, so most hardware fabricates ~0 data.

Use RESPONSE_IS_PCI_ERROR() to check the response we get when we read
data from hardware.

This helps unify PCI error response checking and make error checks
consistent and easier to find.

Compile tested only.

Signed-off-by: Naveen Naidu 
---
 drivers/pci/pcie/dpc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c
index c556e7beafe3..561c44d9429c 100644
--- a/drivers/pci/pcie/dpc.c
+++ b/drivers/pci/pcie/dpc.c
@@ -79,7 +79,7 @@ static bool dpc_completed(struct pci_dev *pdev)
u16 status;
 
pci_read_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_STATUS, );
-   if ((status != 0x) && (status & PCI_EXP_DPC_STATUS_TRIGGER))
+   if ((!RESPONSE_IS_PCI_ERROR()) && (status & 
PCI_EXP_DPC_STATUS_TRIGGER))
return false;
 
if (test_bit(PCI_DPC_RECOVERING, >priv_flags))
@@ -312,7 +312,7 @@ static irqreturn_t dpc_irq(int irq, void *context)
 
pci_read_config_word(pdev, cap + PCI_EXP_DPC_STATUS, );
 
-   if (!(status & PCI_EXP_DPC_STATUS_INTERRUPT) || status == (u16)(~0))
+   if (!(status & PCI_EXP_DPC_STATUS_INTERRUPT) || 
RESPONSE_IS_PCI_ERROR())
return IRQ_NONE;
 
pci_write_config_word(pdev, cap + PCI_EXP_DPC_STATUS,
-- 
2.25.1



[PATCH 01/22] PCI: Add PCI_ERROR_RESPONSE and it's related defintions

2021-10-11 Thread Naveen Naidu
An MMIO read from a PCI device that doesn't exist or doesn't respond
causes a PCI error.  There's no real data to return to satisfy the
CPU read, so most hardware fabricates ~0 data.

Add a PCI_ERROR_RESPONSE definition for that and use it where
appropriate to make these checks consistent and easier to find.

Also add helper definitions SET_PCI_ERROR_RESPONSE and
RESPONSE_IS_PCI_ERROR to make the code more readable.

Signed-off-by: Naveen Naidu 
---
 include/linux/pci.h | 9 +
 1 file changed, 9 insertions(+)

diff --git a/include/linux/pci.h b/include/linux/pci.h
index cd8aa6fce204..928c589bb5c4 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -154,6 +154,15 @@ enum pci_interrupt_pin {
 /* The number of legacy PCI INTx interrupts */
 #define PCI_NUM_INTX   4
 
+/*
+ * Reading from a device that doesn't respond typically returns ~0.  A
+ * successful read from a device may also return ~0, so you need additional
+ * information to reliably identify errors.
+ */
+#define PCI_ERROR_RESPONSE (~0ULL)
+#define SET_PCI_ERROR_RESPONSE(val)(*val = ((typeof(*val)) 
PCI_ERROR_RESPONSE))
+#define RESPONSE_IS_PCI_ERROR(val) (*val == ((typeof(*val)) 
PCI_ERROR_RESPONSE))
+
 /*
  * pci_power_t values must match the bits in the Capabilities PME_Support
  * and Control/Status PowerState fields in the Power Management capability.
-- 
2.25.1



[PATCH 00/22] PCI: Unify PCI error response checking

2021-10-11 Thread Naveen Naidu
An MMIO read from a PCI device that doesn't exist or doesn't respond
causes a PCI error.  There's no real data to return to satisfy the 
CPU read, so most hardware fabricates ~0 data.

This patch series adds PCI_ERROR_RESPONSE definition and other helper
defintion SET_PCI_ERROR_RESPONSE and RESPONSE_IS_PCI_ERROR and uses it
where appropriate to make these checks consistent and easier to find.

This helps unify PCI error response checking and make error check
consistent and easier to find.

Patch 1:
  - Adds the PCI_ERROR_RESPONSE and other related defintions
  - All other patches are dependent on this patch. This patch needs to
be applied first, before the others

Patch 2 - 13
  - Uses SET_PCI_ERROR_RESPONSE() when device is not found

Patch 14 - 19
  - Uses RESPONSE_IS_PCI_ERROR() to check the reads from hardware

Patch 20 - 22
  - Edits the comments to include PCI_ERROR_RESPONSE alsong with
0x, so that it becomes easier to grep for faulty hardware
reads.

Thanks,
Naveen

Naveen Naidu (22):
  [PATCH 1/22] PCI: Add PCI_ERROR_RESPONSE and it's related defintions
  [PATCH 2/22] PCI: Unify PCI error response checking
  [PATCH 3/22] PCI: thunder: Use SET_PCI_ERROR_RESPONSE() when device not found
  [PATCH 4/22] PCI: iproc: Use SET_PCI_ERROR_RESPONSE() when device not found
  [PATCH 5/22] PCI: mediatek: Use SET_PCI_ERROR_RESPONSE() when device not found
  [PATCH 6/22] PCI: exynos: Use SET_PCI_ERROR_RESPONSE() when device not found
  [PATCH 7/22] PCI: histb: Use SET_PCI_ERROR_RESPONSE() when device not found
  [PATCH 8/22] PCI: kirin: Use SET_PCI_ERROR_RESPONSE() when device not found
  [PATCH 9/22] PCI: aardvark: Use SET_PCI_ERROR_RESPONSE() when device not found
  [PATCH 10/22] PCI: mvebu: Use SET_PCI_ERROR_RESPONSE() when device not found
  [PATCH 11/22] PCI: altera: Use SET_PCI_ERROR_RESPONSE() when device not found
  [PATCH 12/22] PCI: rcar: Use SET_PCI_ERROR_RESPONSE() when device not found
  [PATCH 13/22] PCI: rockchip: Use SET_PCI_ERROR_RESPONSE() when device not 
found
  [PATCH 14/22] PCI/ERR: Use RESPONSE_IS_PCI_ERROR() to check read from hardware
  [PATCH 15/22] PCI: vmd: Use RESPONSE_IS_PCI_ERROR() to check read from 
hardware
  [PATCH 16/22] PCI: pciehp: Use RESPONSE_IS_PCI_ERROR() to check read from 
hardware
  [PATCH 17/22] PCI/DPC: Use RESPONSE_IS_PCI_ERROR() to check read from hardware
  [PATCH 18/22] PCI/PME: Use RESPONSE_IS_PCI_ERROR() to check read from hardware
  [PATCH 19/22] PCI: cpqphp: Use RESPONSE_IS_PCI_ERROR() to check read from 
hardware
  [PATCH 20/22] PCI: keystone: Use PCI_ERROR_RESPONSE to specify hardware error
  [PATCH 21/22] PCI: hv: Use PCI_ERROR_RESPONSE to specify hardware read error
  [PATCH 22/22] PCI: xgene: Use PCI_ERROR_RESPONSE to specify hardware error

 drivers/pci/access.c| 22 ++---
 drivers/pci/controller/dwc/pci-exynos.c |  2 +-
 drivers/pci/controller/dwc/pci-keystone.c   |  4 ++--
 drivers/pci/controller/dwc/pcie-histb.c |  2 +-
 drivers/pci/controller/dwc/pcie-kirin.c |  2 +-
 drivers/pci/controller/pci-aardvark.c   |  8 
 drivers/pci/controller/pci-hyperv.c |  2 +-
 drivers/pci/controller/pci-mvebu.c  |  4 ++--
 drivers/pci/controller/pci-thunder-ecam.c   | 20 +--
 drivers/pci/controller/pci-thunder-pem.c|  2 +-
 drivers/pci/controller/pci-xgene.c  |  8 
 drivers/pci/controller/pcie-altera.c|  2 +-
 drivers/pci/controller/pcie-iproc.c |  2 +-
 drivers/pci/controller/pcie-mediatek.c  |  4 ++--
 drivers/pci/controller/pcie-rcar-host.c |  2 +-
 drivers/pci/controller/pcie-rockchip-host.c |  2 +-
 drivers/pci/controller/vmd.c|  2 +-
 drivers/pci/hotplug/cpqphp_ctrl.c   |  4 ++--
 drivers/pci/hotplug/pciehp_hpc.c| 10 +-
 drivers/pci/pci.c   | 10 +-
 drivers/pci/pcie/dpc.c  |  4 ++--
 drivers/pci/pcie/pme.c  |  4 ++--
 drivers/pci/probe.c | 10 +-
 include/linux/pci.h |  9 +
 24 files changed, 75 insertions(+), 66 deletions(-)

-- 
2.25.1



[PATCH v1 04/10] asm-generic: Use HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR to define associated stubs

2021-10-11 Thread Christophe Leroy
Use HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR instead of 
'dereference_function_descriptor'
to know whether arch has function descriptors.

Signed-off-by: Christophe Leroy 
---
 arch/ia64/include/asm/sections.h| 4 ++--
 arch/parisc/include/asm/sections.h  | 6 --
 arch/powerpc/include/asm/sections.h | 6 --
 include/asm-generic/sections.h  | 3 ++-
 4 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/arch/ia64/include/asm/sections.h b/arch/ia64/include/asm/sections.h
index 35f24e52149a..80f5868afb06 100644
--- a/arch/ia64/include/asm/sections.h
+++ b/arch/ia64/include/asm/sections.h
@@ -7,6 +7,8 @@
  * David Mosberger-Tang 
  */
 
+#define HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR 1
+
 #include 
 #include 
 #include 
@@ -27,8 +29,6 @@ extern char __start_gate_brl_fsys_bubble_down_patchlist[], 
__end_gate_brl_fsys_b
 extern char __start_unwind[], __end_unwind[];
 extern char __start_ivt_text[], __end_ivt_text[];
 
-#define HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR 1
-
 #undef dereference_function_descriptor
 static inline void *dereference_function_descriptor(void *ptr)
 {
diff --git a/arch/parisc/include/asm/sections.h 
b/arch/parisc/include/asm/sections.h
index bb52aea0cb21..2e781ee19b66 100644
--- a/arch/parisc/include/asm/sections.h
+++ b/arch/parisc/include/asm/sections.h
@@ -2,6 +2,10 @@
 #ifndef _PARISC_SECTIONS_H
 #define _PARISC_SECTIONS_H
 
+#ifdef CONFIG_64BIT
+#define HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR 1
+#endif
+
 /* nothing to see, move along */
 #include 
 
@@ -9,8 +13,6 @@ extern char __alt_instructions[], __alt_instructions_end[];
 
 #ifdef CONFIG_64BIT
 
-#define HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR 1
-
 #undef dereference_function_descriptor
 void *dereference_function_descriptor(void *);
 
diff --git a/arch/powerpc/include/asm/sections.h 
b/arch/powerpc/include/asm/sections.h
index 32e7035863ac..b7f1ba04e756 100644
--- a/arch/powerpc/include/asm/sections.h
+++ b/arch/powerpc/include/asm/sections.h
@@ -8,6 +8,10 @@
 
 #define arch_is_kernel_initmem_freed arch_is_kernel_initmem_freed
 
+#ifdef PPC64_ELF_ABI_v1
+#define HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR 1
+#endif
+
 #include 
 
 extern bool init_mem_is_free;
@@ -69,8 +73,6 @@ static inline int overlaps_kernel_text(unsigned long start, 
unsigned long end)
 
 #ifdef PPC64_ELF_ABI_v1
 
-#define HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR 1
-
 #undef dereference_function_descriptor
 static inline void *dereference_function_descriptor(void *ptr)
 {
diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h
index d16302d3eb59..1db5cfd69817 100644
--- a/include/asm-generic/sections.h
+++ b/include/asm-generic/sections.h
@@ -59,7 +59,8 @@ extern char __noinstr_text_start[], __noinstr_text_end[];
 extern __visible const void __nosave_begin, __nosave_end;
 
 /* Function descriptor handling (if any).  Override in asm/sections.h */
-#ifndef dereference_function_descriptor
+#ifdef HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR
+#else
 #define dereference_function_descriptor(p) ((void *)(p))
 #define dereference_kernel_function_descriptor(p) ((void *)(p))
 #endif
-- 
2.31.1



[PATCH v1 01/10] powerpc: Move 'struct ppc64_opd_entry' back into asm/elf.h

2021-10-11 Thread Christophe Leroy
'struct ppc64_opd_entry' doesn't belong to uapi/asm/elf.h

It was initially in module_64.c and commit 2d291e902791 ("Fix compile
failure with non modular builds") moved it into asm/elf.h

But it was by mistake added outside of __KERNEL__ section,
therefore commit c3617f72036c ("UAPI: (Scripted) Disintegrate
arch/powerpc/include/asm") moved it to uapi/asm/elf.h

Move it back into asm/elf.h, this brings it back in line with
IA64 and PARISC architectures.

Fixes: 2d291e902791 ("Fix compile failure with non modular builds")
Signed-off-by: Christophe Leroy 
---
 arch/powerpc/include/asm/elf.h  | 7 +++
 arch/powerpc/include/uapi/asm/elf.h | 8 
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h
index b8425e3cfd81..64b523848cd7 100644
--- a/arch/powerpc/include/asm/elf.h
+++ b/arch/powerpc/include/asm/elf.h
@@ -176,4 +176,11 @@ do {   
\
 /* Relocate the kernel image to @final_address */
 void relocate(unsigned long final_address);
 
+/* There's actually a third entry here, but it's unused */
+struct ppc64_opd_entry
+{
+   unsigned long funcaddr;
+   unsigned long r2;
+};
+
 #endif /* _ASM_POWERPC_ELF_H */
diff --git a/arch/powerpc/include/uapi/asm/elf.h 
b/arch/powerpc/include/uapi/asm/elf.h
index 860c59291bfc..308857123a08 100644
--- a/arch/powerpc/include/uapi/asm/elf.h
+++ b/arch/powerpc/include/uapi/asm/elf.h
@@ -289,12 +289,4 @@ typedef elf_fpreg_t elf_vsrreghalf_t32[ELF_NVSRHALFREG];
 /* Keep this the last entry.  */
 #define R_PPC64_NUM253
 
-/* There's actually a third entry here, but it's unused */
-struct ppc64_opd_entry
-{
-   unsigned long funcaddr;
-   unsigned long r2;
-};
-
-
 #endif /* _UAPI_ASM_POWERPC_ELF_H */
-- 
2.31.1



[PATCH v1 05/10] asm-generic: Define 'funct_descr_t' to commonly describe function descriptors

2021-10-11 Thread Christophe Leroy
We have three architectures using function descriptors, each with its
own name.

Add a common typedef that can be used in generic code.

Also add a stub typedef for architecture without function descriptors,
to avoid a forest of #ifdefs.

Signed-off-by: Christophe Leroy 
---
 arch/ia64/include/asm/sections.h| 1 +
 arch/parisc/include/asm/sections.h  | 1 +
 arch/powerpc/include/asm/sections.h | 1 +
 include/asm-generic/sections.h  | 3 +++
 4 files changed, 6 insertions(+)

diff --git a/arch/ia64/include/asm/sections.h b/arch/ia64/include/asm/sections.h
index 80f5868afb06..929b5c535620 100644
--- a/arch/ia64/include/asm/sections.h
+++ b/arch/ia64/include/asm/sections.h
@@ -8,6 +8,7 @@
  */
 
 #define HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR 1
+typedef struct fdesc funct_descr_t;
 
 #include 
 #include 
diff --git a/arch/parisc/include/asm/sections.h 
b/arch/parisc/include/asm/sections.h
index 2e781ee19b66..329e80f7af0a 100644
--- a/arch/parisc/include/asm/sections.h
+++ b/arch/parisc/include/asm/sections.h
@@ -4,6 +4,7 @@
 
 #ifdef CONFIG_64BIT
 #define HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR 1
+typedef Elf64_Fdesc funct_descr_t;
 #endif
 
 /* nothing to see, move along */
diff --git a/arch/powerpc/include/asm/sections.h 
b/arch/powerpc/include/asm/sections.h
index b7f1ba04e756..d0d5287fa568 100644
--- a/arch/powerpc/include/asm/sections.h
+++ b/arch/powerpc/include/asm/sections.h
@@ -10,6 +10,7 @@
 
 #ifdef PPC64_ELF_ABI_v1
 #define HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR 1
+typedef struct ppc64_opd_entry funct_descr_t;
 #endif
 
 #include 
diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h
index 1db5cfd69817..436412d94054 100644
--- a/include/asm-generic/sections.h
+++ b/include/asm-generic/sections.h
@@ -63,6 +63,9 @@ extern __visible const void __nosave_begin, __nosave_end;
 #else
 #define dereference_function_descriptor(p) ((void *)(p))
 #define dereference_kernel_function_descriptor(p) ((void *)(p))
+typedef struct {
+   unsigned long addr;
+} funct_descr_t;
 #endif
 
 /* random extra sections (if any).  Override
-- 
2.31.1



[PATCH v1 08/10] lkdtm: Really write into kernel text in WRITE_KERN

2021-10-11 Thread Christophe Leroy
WRITE_KERN is supposed to overwrite some kernel text, namely
do_overwritten() function.

But at the time being it overwrites do_overwritten() function
descriptor, not function text.

Fix it by dereferencing the function descriptor to obtain
function text pointer.

And make do_overwritten() noinline so that it is really
do_overwritten() which is called by lkdtm_WRITE_KERN().

Signed-off-by: Christophe Leroy 
---
 drivers/misc/lkdtm/perms.c | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/misc/lkdtm/perms.c b/drivers/misc/lkdtm/perms.c
index 60b3b2fe929d..442d60ed25ef 100644
--- a/drivers/misc/lkdtm/perms.c
+++ b/drivers/misc/lkdtm/perms.c
@@ -5,6 +5,7 @@
  * even non-readable regions.
  */
 #include "lkdtm.h"
+#include 
 #include 
 #include 
 #include 
@@ -37,7 +38,7 @@ static noinline void do_nothing(void)
 }
 
 /* Must immediately follow do_nothing for size calculuations to work out. */
-static void do_overwritten(void)
+static noinline void do_overwritten(void)
 {
pr_info("do_overwritten wasn't overwritten!\n");
return;
@@ -113,8 +114,9 @@ void lkdtm_WRITE_KERN(void)
size_t size;
volatile unsigned char *ptr;
 
-   size = (unsigned long)do_overwritten - (unsigned long)do_nothing;
-   ptr = (unsigned char *)do_overwritten;
+   size = (unsigned long)dereference_symbol_descriptor(do_overwritten) -
+  (unsigned long)dereference_symbol_descriptor(do_nothing);
+   ptr = dereference_symbol_descriptor(do_overwritten);
 
pr_info("attempting bad %zu byte write at %px\n", size, ptr);
memcpy((void *)ptr, (unsigned char *)do_nothing, size);
-- 
2.31.1



[PATCH v1 00/10] Fix LKDTM for PPC64/IA64/PARISC

2021-10-11 Thread Christophe Leroy
PPC64/IA64/PARISC have function descriptors. LKDTM doesn't work
on those three architectures because LKDTM messes up function
descriptors with functions.

This series does some cleanup in the three architectures and
refactors function descriptors so that it can then easily use it
in a generic way in LKDTM.

Patch 6 is not absolutely necessary but it is a good trivial cleanup.

Christophe Leroy (10):
  powerpc: Move 'struct ppc64_opd_entry' back into asm/elf.h
  powerpc: Rename 'funcaddr' to 'addr' in 'struct ppc64_opd_entry'
  ia64: Rename 'ip' to 'addr' in 'struct fdesc'
  asm-generic: Use HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR to define
associated stubs
  asm-generic: Define 'funct_descr_t' to commonly describe function
descriptors
  asm-generic: Refactor dereference_[kernel]_function_descriptor()
  lkdtm: Force do_nothing() out of line
  lkdtm: Really write into kernel text in WRITE_KERN
  lkdtm: Fix lkdtm_EXEC_RODATA()
  lkdtm: Fix execute_[user]_location()

 arch/ia64/include/asm/elf.h |  2 +-
 arch/ia64/include/asm/sections.h| 24 ++-
 arch/ia64/kernel/module.c   |  6 +--
 arch/parisc/include/asm/sections.h  | 16 +++
 arch/parisc/kernel/process.c| 21 -
 arch/powerpc/include/asm/elf.h  |  7 +++
 arch/powerpc/include/asm/sections.h | 30 +++--
 arch/powerpc/include/uapi/asm/elf.h |  8 
 arch/powerpc/kernel/module_64.c |  6 +--
 drivers/misc/lkdtm/perms.c  | 66 +++--
 include/asm-generic/sections.h  | 24 ++-
 11 files changed, 102 insertions(+), 108 deletions(-)

-- 
2.31.1



[PATCH v1 07/10] lkdtm: Force do_nothing() out of line

2021-10-11 Thread Christophe Leroy
LKDTM tests display that the run do_nothing() at a given
address, but in reality do_nothing() is inlined into the
caller.

Force it out of line so that it really runs text at the
displayed address.

Signed-off-by: Christophe Leroy 
---
 drivers/misc/lkdtm/perms.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/misc/lkdtm/perms.c b/drivers/misc/lkdtm/perms.c
index 2dede2ef658f..60b3b2fe929d 100644
--- a/drivers/misc/lkdtm/perms.c
+++ b/drivers/misc/lkdtm/perms.c
@@ -21,7 +21,7 @@
 /* This is non-const, so it will end up in the .data section. */
 static u8 data_area[EXEC_SIZE];
 
-/* This is cost, so it will end up in the .rodata section. */
+/* This is const, so it will end up in the .rodata section. */
 static const unsigned long rodata = 0xAA55AA55;
 
 /* This is marked __ro_after_init, so it should ultimately be .rodata. */
@@ -31,7 +31,7 @@ static unsigned long ro_after_init __ro_after_init = 
0x55AA5500;
  * This just returns to the caller. It is designed to be copied into
  * non-executable memory regions.
  */
-static void do_nothing(void)
+static noinline void do_nothing(void)
 {
return;
 }
-- 
2.31.1



[PATCH v1 06/10] asm-generic: Refactor dereference_[kernel]_function_descriptor()

2021-10-11 Thread Christophe Leroy
dereference_function_descriptor() and
dereference_kernel_function_descriptor() are identical on the
three architectures implementing them.

Make it common.

Signed-off-by: Christophe Leroy 
---
 arch/ia64/include/asm/sections.h| 19 ---
 arch/parisc/include/asm/sections.h  |  9 -
 arch/parisc/kernel/process.c| 21 -
 arch/powerpc/include/asm/sections.h | 23 ---
 include/asm-generic/sections.h  | 18 ++
 5 files changed, 18 insertions(+), 72 deletions(-)

diff --git a/arch/ia64/include/asm/sections.h b/arch/ia64/include/asm/sections.h
index 929b5c535620..d9addaea8339 100644
--- a/arch/ia64/include/asm/sections.h
+++ b/arch/ia64/include/asm/sections.h
@@ -30,23 +30,4 @@ extern char __start_gate_brl_fsys_bubble_down_patchlist[], 
__end_gate_brl_fsys_b
 extern char __start_unwind[], __end_unwind[];
 extern char __start_ivt_text[], __end_ivt_text[];
 
-#undef dereference_function_descriptor
-static inline void *dereference_function_descriptor(void *ptr)
-{
-   struct fdesc *desc = ptr;
-   void *p;
-
-   if (!get_kernel_nofault(p, (void *)>addr))
-   ptr = p;
-   return ptr;
-}
-
-#undef dereference_kernel_function_descriptor
-static inline void *dereference_kernel_function_descriptor(void *ptr)
-{
-   if (ptr < (void *)__start_opd || ptr >= (void *)__end_opd)
-   return ptr;
-   return dereference_function_descriptor(ptr);
-}
-
 #endif /* _ASM_IA64_SECTIONS_H */
diff --git a/arch/parisc/include/asm/sections.h 
b/arch/parisc/include/asm/sections.h
index 329e80f7af0a..b041fb32a300 100644
--- a/arch/parisc/include/asm/sections.h
+++ b/arch/parisc/include/asm/sections.h
@@ -12,13 +12,4 @@ typedef Elf64_Fdesc funct_descr_t;
 
 extern char __alt_instructions[], __alt_instructions_end[];
 
-#ifdef CONFIG_64BIT
-
-#undef dereference_function_descriptor
-void *dereference_function_descriptor(void *);
-
-#undef dereference_kernel_function_descriptor
-void *dereference_kernel_function_descriptor(void *);
-#endif
-
 #endif
diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
index 38ec4ae81239..7382576b52a8 100644
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -266,27 +266,6 @@ get_wchan(struct task_struct *p)
return 0;
 }
 
-#ifdef CONFIG_64BIT
-void *dereference_function_descriptor(void *ptr)
-{
-   Elf64_Fdesc *desc = ptr;
-   void *p;
-
-   if (!get_kernel_nofault(p, (void *)>addr))
-   ptr = p;
-   return ptr;
-}
-
-void *dereference_kernel_function_descriptor(void *ptr)
-{
-   if (ptr < (void *)__start_opd ||
-   ptr >= (void *)__end_opd)
-   return ptr;
-
-   return dereference_function_descriptor(ptr);
-}
-#endif
-
 static inline unsigned long brk_rnd(void)
 {
return (get_random_int() & BRK_RND_MASK) << PAGE_SHIFT;
diff --git a/arch/powerpc/include/asm/sections.h 
b/arch/powerpc/include/asm/sections.h
index d0d5287fa568..8f8e95f234e2 100644
--- a/arch/powerpc/include/asm/sections.h
+++ b/arch/powerpc/include/asm/sections.h
@@ -72,29 +72,6 @@ static inline int overlaps_kernel_text(unsigned long start, 
unsigned long end)
(unsigned long)_stext < end;
 }
 
-#ifdef PPC64_ELF_ABI_v1
-
-#undef dereference_function_descriptor
-static inline void *dereference_function_descriptor(void *ptr)
-{
-   struct ppc64_opd_entry *desc = ptr;
-   void *p;
-
-   if (!get_kernel_nofault(p, (void *)>addr))
-   ptr = p;
-   return ptr;
-}
-
-#undef dereference_kernel_function_descriptor
-static inline void *dereference_kernel_function_descriptor(void *ptr)
-{
-   if (ptr < (void *)__start_opd || ptr >= (void *)__end_opd)
-   return ptr;
-
-   return dereference_function_descriptor(ptr);
-}
-#endif /* PPC64_ELF_ABI_v1 */
-
 #endif
 
 #endif /* __KERNEL__ */
diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h
index 436412d94054..5baaf9d7c671 100644
--- a/include/asm-generic/sections.h
+++ b/include/asm-generic/sections.h
@@ -60,6 +60,24 @@ extern __visible const void __nosave_begin, __nosave_end;
 
 /* Function descriptor handling (if any).  Override in asm/sections.h */
 #ifdef HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR
+static inline void *dereference_function_descriptor(void *ptr)
+{
+   funct_descr_t *desc = ptr;
+   void *p;
+
+   if (!get_kernel_nofault(p, (void *)>addr))
+   ptr = p;
+   return ptr;
+}
+
+static inline void *dereference_kernel_function_descriptor(void *ptr)
+{
+   if (ptr < (void *)__start_opd || ptr >= (void *)__end_opd)
+   return ptr;
+
+   return dereference_function_descriptor(ptr);
+}
+
 #else
 #define dereference_function_descriptor(p) ((void *)(p))
 #define dereference_kernel_function_descriptor(p) ((void *)(p))
-- 
2.31.1



[PATCH v1 03/10] ia64: Rename 'ip' to 'addr' in 'struct fdesc'

2021-10-11 Thread Christophe Leroy
There are three architectures with function descriptors, try to
have common names for the address they contain in order to
refactor some functions into generic functions later.

powerpc has 'funcaddr'
ia64 has 'ip'
parisc has 'addr'

Vote for 'addr' and update 'struct fdesc' accordingly.

Signed-off-by: Christophe Leroy 
---
 arch/ia64/include/asm/elf.h  | 2 +-
 arch/ia64/include/asm/sections.h | 2 +-
 arch/ia64/kernel/module.c| 6 +++---
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/ia64/include/asm/elf.h b/arch/ia64/include/asm/elf.h
index 6629301a2620..2ef5f9966ad1 100644
--- a/arch/ia64/include/asm/elf.h
+++ b/arch/ia64/include/asm/elf.h
@@ -226,7 +226,7 @@ struct got_entry {
  * Layout of the Function Descriptor
  */
 struct fdesc {
-   uint64_t ip;
+   uint64_t addr;
uint64_t gp;
 };
 
diff --git a/arch/ia64/include/asm/sections.h b/arch/ia64/include/asm/sections.h
index 3a033d2008b3..35f24e52149a 100644
--- a/arch/ia64/include/asm/sections.h
+++ b/arch/ia64/include/asm/sections.h
@@ -35,7 +35,7 @@ static inline void *dereference_function_descriptor(void *ptr)
struct fdesc *desc = ptr;
void *p;
 
-   if (!get_kernel_nofault(p, (void *)>ip))
+   if (!get_kernel_nofault(p, (void *)>addr))
ptr = p;
return ptr;
 }
diff --git a/arch/ia64/kernel/module.c b/arch/ia64/kernel/module.c
index 2cba53c1da82..4f6400cbf79e 100644
--- a/arch/ia64/kernel/module.c
+++ b/arch/ia64/kernel/module.c
@@ -602,15 +602,15 @@ get_fdesc (struct module *mod, uint64_t value, int *okp)
return value;
 
/* Look for existing function descriptor. */
-   while (fdesc->ip) {
-   if (fdesc->ip == value)
+   while (fdesc->addr) {
+   if (fdesc->addr == value)
return (uint64_t)fdesc;
if ((uint64_t) ++fdesc >= mod->arch.opd->sh_addr + 
mod->arch.opd->sh_size)
BUG();
}
 
/* Create new one */
-   fdesc->ip = value;
+   fdesc->addr = value;
fdesc->gp = mod->arch.gp;
return (uint64_t) fdesc;
 }
-- 
2.31.1



[PATCH v1 02/10] powerpc: Rename 'funcaddr' to 'addr' in 'struct ppc64_opd_entry'

2021-10-11 Thread Christophe Leroy
There are three architectures with function descriptors, try to
have common names for the address they contain in order to
refactor some functions into generic functions later.

powerpc has 'funcaddr'
ia64 has 'ip'
parisc has 'addr'

Vote for 'addr' and update 'struct ppc64_opd_entry' accordingly.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/include/asm/elf.h  | 2 +-
 arch/powerpc/include/asm/sections.h | 2 +-
 arch/powerpc/kernel/module_64.c | 6 +++---
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h
index 64b523848cd7..90c3259a81ab 100644
--- a/arch/powerpc/include/asm/elf.h
+++ b/arch/powerpc/include/asm/elf.h
@@ -179,7 +179,7 @@ void relocate(unsigned long final_address);
 /* There's actually a third entry here, but it's unused */
 struct ppc64_opd_entry
 {
-   unsigned long funcaddr;
+   unsigned long addr;
unsigned long r2;
 };
 
diff --git a/arch/powerpc/include/asm/sections.h 
b/arch/powerpc/include/asm/sections.h
index 6e4af4492a14..32e7035863ac 100644
--- a/arch/powerpc/include/asm/sections.h
+++ b/arch/powerpc/include/asm/sections.h
@@ -77,7 +77,7 @@ static inline void *dereference_function_descriptor(void *ptr)
struct ppc64_opd_entry *desc = ptr;
void *p;
 
-   if (!get_kernel_nofault(p, (void *)>funcaddr))
+   if (!get_kernel_nofault(p, (void *)>addr))
ptr = p;
return ptr;
 }
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index 6baa676e7cb6..82908c9be627 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -72,11 +72,11 @@ static func_desc_t func_desc(unsigned long addr)
 }
 static unsigned long func_addr(unsigned long addr)
 {
-   return func_desc(addr).funcaddr;
+   return func_desc(addr).addr;
 }
 static unsigned long stub_func_addr(func_desc_t func)
 {
-   return func.funcaddr;
+   return func.addr;
 }
 static unsigned int local_entry_offset(const Elf64_Sym *sym)
 {
@@ -187,7 +187,7 @@ static int relacmp(const void *_x, const void *_y)
 static unsigned long get_stubs_size(const Elf64_Ehdr *hdr,
const Elf64_Shdr *sechdrs)
 {
-   /* One extra reloc so it's always 0-funcaddr terminated */
+   /* One extra reloc so it's always 0-addr terminated */
unsigned long relocs = 1;
unsigned i;
 
-- 
2.31.1



[PATCH v1 10/10] lkdtm: Fix execute_[user]_location()

2021-10-11 Thread Christophe Leroy
execute_location() and execute_user_location() intent
to copy do_nothing() text and execute it at a new location.
However, at the time being it doesn't copy do_nothing() function
but do_nothing() function descriptor which still points to the
original text. So at the end it still executes do_nothing() at
its original location allthough using a copied function descriptor.

So, fix that by really copying do_nothing() text and build a new
function descriptor by copying do_nothing() function descriptor and
updating the target address with the new location.

Also fix the displayed addresses by dereferencing do_nothing()
function descriptor.

Signed-off-by: Christophe Leroy 
---
 drivers/misc/lkdtm/perms.c | 45 +++---
 1 file changed, 37 insertions(+), 8 deletions(-)

diff --git a/drivers/misc/lkdtm/perms.c b/drivers/misc/lkdtm/perms.c
index da16564e1ecd..1d03cd44c21d 100644
--- a/drivers/misc/lkdtm/perms.c
+++ b/drivers/misc/lkdtm/perms.c
@@ -44,19 +44,42 @@ static noinline void do_overwritten(void)
return;
 }
 
+static void *setup_function_descriptor(funct_descr_t *fdesc, void *dst)
+{
+   int err;
+
+   if (!__is_defined(HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR))
+   return dst;
+
+   err = copy_from_kernel_nofault(fdesc, do_nothing, sizeof(*fdesc));
+   if (err < 0)
+   return ERR_PTR(err);
+
+   fdesc->addr = (unsigned long)dst;
+   barrier();
+
+   return fdesc;
+}
+
 static noinline void execute_location(void *dst, bool write)
 {
-   void (*func)(void) = dst;
+   void (*func)(void);
+   funct_descr_t fdesc;
+   void *do_nothing_text = dereference_symbol_descriptor(do_nothing);
 
-   pr_info("attempting ok execution at %px\n", do_nothing);
+   pr_info("attempting ok execution at %px\n", do_nothing_text);
do_nothing();
 
if (write == CODE_WRITE) {
-   memcpy(dst, do_nothing, EXEC_SIZE);
+   memcpy(dst, do_nothing_text, EXEC_SIZE);
flush_icache_range((unsigned long)dst,
   (unsigned long)dst + EXEC_SIZE);
}
-   pr_info("attempting bad execution at %px\n", func);
+   func = setup_function_descriptor(, dst);
+   if (IS_ERR(func))
+   return;
+
+   pr_info("attempting bad execution at %px\n", dst);
func();
pr_err("FAIL: func returned\n");
 }
@@ -66,16 +89,22 @@ static void execute_user_location(void *dst)
int copied;
 
/* Intentionally crossing kernel/user memory boundary. */
-   void (*func)(void) = dst;
+   void (*func)(void);
+   funct_descr_t fdesc;
+   void *do_nothing_text = dereference_symbol_descriptor(do_nothing);
 
-   pr_info("attempting ok execution at %px\n", do_nothing);
+   pr_info("attempting ok execution at %px\n", do_nothing_text);
do_nothing();
 
-   copied = access_process_vm(current, (unsigned long)dst, do_nothing,
+   copied = access_process_vm(current, (unsigned long)dst, do_nothing_text,
   EXEC_SIZE, FOLL_WRITE);
if (copied < EXEC_SIZE)
return;
-   pr_info("attempting bad execution at %px\n", func);
+   func = setup_function_descriptor(, dst);
+   if (IS_ERR(func))
+   return;
+
+   pr_info("attempting bad execution at %px\n", dst);
func();
pr_err("FAIL: func returned\n");
 }
-- 
2.31.1



[PATCH v1 09/10] lkdtm: Fix lkdtm_EXEC_RODATA()

2021-10-11 Thread Christophe Leroy
Behind a location, lkdtm_EXEC_RODATA() executes a real function,
not a copy of do_nothing().

So do it directly instead of using execute_location().

And fix displayed addresses by dereferencing the function descriptors.

Signed-off-by: Christophe Leroy 
---
 drivers/misc/lkdtm/perms.c | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/misc/lkdtm/perms.c b/drivers/misc/lkdtm/perms.c
index 442d60ed25ef..da16564e1ecd 100644
--- a/drivers/misc/lkdtm/perms.c
+++ b/drivers/misc/lkdtm/perms.c
@@ -153,7 +153,14 @@ void lkdtm_EXEC_VMALLOC(void)
 
 void lkdtm_EXEC_RODATA(void)
 {
-   execute_location(lkdtm_rodata_do_nothing, CODE_AS_IS);
+   pr_info("attempting ok execution at %px\n",
+   dereference_symbol_descriptor(do_nothing));
+   do_nothing();
+
+   pr_info("attempting bad execution at %px\n",
+   dereference_symbol_descriptor(lkdtm_rodata_do_nothing));
+   lkdtm_rodata_do_nothing();
+   pr_err("FAIL: func returned\n");
 }
 
 void lkdtm_EXEC_USERSPACE(void)
-- 
2.31.1



Re: [PATCH] KVM: PPC: Book3S HV: H_ENTER filter out reserved HPTE[B] value

2021-10-11 Thread Fabiano Rosas
Nicholas Piggin  writes:

> The HPTE B field is a 2-bit field with values 0b10 and 0b11 reserved.
> This field is also taken from the HPTE and used when KVM executes
> TLBIEs to set the B field of those instructions.
>
> Disallow the guest setting B to a reserved value with H_ENTER by
> rejecting it. This is the same approach already taken for rejecting
> reserved (unsupported) LLP values. This prevents the guest from being
> able to induce the host to execute TLBIE with reserved values, which
> is not known to be a problem with current processors but in theory it
> could prevent the TLBIE from working correctly in a future processor.
>
> Signed-off-by: Nicholas Piggin 

The ISA says:

B Segment Size Selector
0b00 - 256 MB (s=28)
0b01 - 1 TB (s=40)
0b10 - reserved
0b11 - reserved

So that looks good. I couldn't find any other guest initiated PTE
modifications, so I think we're covered.

Reviewed-by: Fabiano Rosas 

> ---
>  arch/powerpc/include/asm/kvm_book3s_64.h | 4 
>  arch/powerpc/kvm/book3s_hv_rm_mmu.c  | 9 +
>  2 files changed, 13 insertions(+)
>
> diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h 
> b/arch/powerpc/include/asm/kvm_book3s_64.h
> index 19b6942c6969..fff391b9b97b 100644
> --- a/arch/powerpc/include/asm/kvm_book3s_64.h
> +++ b/arch/powerpc/include/asm/kvm_book3s_64.h
> @@ -378,6 +378,10 @@ static inline unsigned long compute_tlbie_rb(unsigned 
> long v, unsigned long r,
>   rb |= 1;/* L field */
>   rb |= r & 0xff000 & ((1ul << a_pgshift) - 1); /* LP field */
>   }
> + /*
> +  * This sets both bits of the B field in the PTE. 0b1x values are
> +  * reserved, but those will have been filtered by kvmppc_do_h_enter.
> +  */
>   rb |= (v >> HPTE_V_SSIZE_SHIFT) << 8;   /* B field */
>   return rb;
>  }
> diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c 
> b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> index 632b2545072b..2c1f3c6e72d1 100644
> --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> @@ -207,6 +207,15 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long 
> flags,
>
>   if (kvm_is_radix(kvm))
>   return H_FUNCTION;
> + /*
> +  * The HPTE gets used by compute_tlbie_rb() to set TLBIE bits, so
> +  * these functions should work together -- must ensure a guest can not
> +  * cause problems with the TLBIE that KVM executes.
> +  */
> + if ((pteh >> HPTE_V_SSIZE_SHIFT) & 0x2) {
> + /* B=0b1x is a reserved value, disallow it. */
> + return H_PARAMETER;
> + }
>   psize = kvmppc_actual_pgsz(pteh, ptel);
>   if (!psize)
>   return H_PARAMETER;


Re: [V3 0/4] powerpc/perf: Add instruction and data address registers to extended regs

2021-10-11 Thread kajoljain



On 10/7/21 12:25 PM, Athira Rajeev wrote:
> Patch set adds PMU registers namely Sampled Instruction Address Register
> (SIAR) and Sampled Data Address Register (SDAR) as part of extended regs
> in PowerPC. These registers provides the instruction/data address and
> adding these to extended regs helps in debug purposes.
> 
> Patch 1/4 and 2/4 refactors the existing macro definition of
> PERF_REG_PMU_MASK_300 and PERF_REG_PMU_MASK_31 to make it more
> readable.
> Patch 3/4 adds SIAR and SDAR as part of the extended regs mask.
> Patch 4/4 includes perf tools side changes to add the SPRs to
> sample_reg_mask to use with -I? option.
> 
> Changelog:
> Change from v2 -> v3:
> Addressed review comments from Michael Ellerman
> - Fixed the macro definition to use "unsigned long long"
>   which otherwise will cause build error with perf on
>   32-bit.
> - Added Reviewed-by from Daniel Axtens for patch3.
> 
> Change from v1 -> v2:
> Addressed review comments from Michael Ellerman
> - Refactored the perf reg extended mask value macros for
>   PERF_REG_PMU_MASK_300 and PERF_REG_PMU_MASK_31 to
>   make it more readable. Also moved PERF_REG_EXTENDED_MAX
>   along with enum definition similar to PERF_REG_POWERPC_MAX.
> 
> Athira Rajeev (4):
>   powerpc/perf: Refactor the code definition of perf reg extended mask
>   tools/perf: Refactor the code definition of perf reg extended mask in
> tools side header file
>   powerpc/perf: Expose instruction and data address registers as part of
> extended regs
>   tools/perf: Add perf tools support to expose instruction and data
> address registers as part of extended regs

Patch-set looks good to me.

Reviewed-by: Kajol Jain

Thanks,
Kajol Jain
> 
>  arch/powerpc/include/uapi/asm/perf_regs.h | 28 ---
>  arch/powerpc/perf/perf_regs.c |  4 +++
>  .../arch/powerpc/include/uapi/asm/perf_regs.h | 28 ---
>  tools/perf/arch/powerpc/include/perf_regs.h   |  2 ++
>  tools/perf/arch/powerpc/util/perf_regs.c  |  2 ++
>  5 files changed, 44 insertions(+), 20 deletions(-)
> 


Re: [PATCH v2 4/4] powerpc/pseries/cpuhp: remove obsolete comment from pseries_cpu_die

2021-10-11 Thread Michael Ellerman
Nathan Lynch  writes:
> Michael Ellerman  writes:
>> Daniel Henrique Barboza  writes:
>>> This is enough to say that we can't easily see the history behind this 
>>> comment.
>>> I also believe that we're better of without it since it doesn't make sense
>>> with the current codebase.
>>
>> It was added by the original CPU hotplug commit for ppc64::
>>
>> https://github.com/mpe/linux-fullhistory/commit/0e9fd9441cd2113b67b14e739267c9e69761489b
>>
>>
>> The code was fairly similar:
>>
>> void __cpu_die(unsigned int cpu)
>> {
>>  int tries;
>>  int cpu_status;
>>  unsigned int pcpu = get_hard_smp_processor_id(cpu);
>>
>>  for (tries = 0; tries < 5; tries++) {
>>  cpu_status = query_cpu_stopped(pcpu);
>>
>>  if (cpu_status == 0)
>>  break;
>>  set_current_state(TASK_UNINTERRUPTIBLE);
>>  schedule_timeout(HZ);
>>  }
>>  if (cpu_status != 0) {
>>  printk("Querying DEAD? cpu %i (%i) shows %i\n",
>> cpu, pcpu, cpu_status);
>>  }
>>
>>  /* Isolation and deallocation are definatly done by
>>   * drslot_chrp_cpu.  If they were not they would be
>>   * done here.  Change isolate state to Isolate and
>>   * change allocation-state to Unusable.
>>   */
>>  paca[cpu].xProcStart = 0;
>>
>>  /* So we can recognize if it fails to come up next time. */
>>  cpu_callin_map[cpu] = 0;
>> }
>>
>>
>> drslot_chrp_cpu() still exists in drmgr:
>>
>>   
>> https://github.com/ibm-power-utilities/powerpc-utils/blob/e798c4a09fbf0fa0f421e624cfa366a6c405c9fe/src/drmgr/drslot_chrp_cpu.c#L406
>>
>>
>> I agree the comment is no longer meaningful and can be removed.
>
> Thanks for providing this background.
>
>> It might be good to then add a comment explaining why we need to set
>> cpu_start = 0.
>
> Sure, I can take that as a follow-up. Or perhaps it should be moved to
> the online path.

Yeah possibly.

>> It's not immediately clear why we need to. When we bring a CPU back
>> online in smp_pSeries_kick_cpu() we ask RTAS to start it and then
>> immediately set cpu_start = 1, ie. there isn't a separate step that sets
>> cpu_start = 1 for hotplugged CPUs.
>
> Hmm I'm not following the distinction you seem to be drawing between
> bringing a CPU back online and a hotplugged CPU. kick_cpu is used in all
> cases AFAIK.

Yeah that wasn't very clear, reading it back I have half confused myself.

At boot we need the paca->cpu_start flag because some CPUs can be
spinning in generic_secondary_common_init, in head_64.S.

ie. they're not in RTAS, they're spinning in kernel code, and the only
thing that stops them coming "online" in the Linux sense is
paca->cpu_start.

You can see that in pseries/smp.c:

static inline int smp_startup_cpu(unsigned int lcpu)
{
...
if (cpumask_test_cpu(lcpu, of_spin_mask))
/* Already started by OF and sitting in spin loop */
return 1;


We also hit that case when kexec'ing, where all the secondaries come in
that way.


On the other hand when we offline a CPU, we set paca->cpu_start = 0, in
pseries_cpu_die(), and then we return the CPU to RTAS.

The only way it *should* come back online is via smp_pSeries_kick_cpu(),
which calls smp_startup_cpu() to bring the CPU out of RTAS, and then
smp_pSeries_kick_cpu() immediately sets cpu_start = 1.

So the sequence is:

CPU goes offline from Linux POV
paca->cpu_start = 0;
CPU offline in RTAS
...
CPU brought out of RTAS
paca->cpu_start = 1;
CPU comes back online from Linux POV


But I guess I kind of answered my own question above, where I said
*should*. Clearing paca->cpu_start when we offline the CPU gives us a
little bit of backup if the CPU comes out of RTAS unexpectedly. ie. it
would then spin on paca->cpu_start, rather than spontaneously coming
back into Linux when we weren't expecting it.

cheers


Re: [PATCH 0/1] Arch use of pci_dev_is_added()

2021-10-11 Thread Michael Ellerman
On Fri, 10 Sep 2021 16:19:39 +0200, Niklas Schnelle wrote:
> In my proposal to make pci_dev_is_added() more regularly usable by arch code
> you mentioned[0] that you believe the uses in arch/powerpc are not necessary
> anymore. From code reading I agree and so does Oliver O'Halloran[1].
> 
> So as promised here is a patch removing them. I only compile tested this as
> I don't have access to a powerpc system.
> 
> [...]

Applied to powerpc/next.

[1/1] powerpc: Drop superfluous pci_dev_is_added() calls
  https://git.kernel.org/powerpc/c/452f145eca73945222923525a7eba59cf37909cc

cheers


Re: [PATCH v2 0/2] powerpc:85xx: fix timebase sync issue when CONFIG_HOTPLUG_CPU=n

2021-10-11 Thread Michael Ellerman
On Wed, 29 Sep 2021 11:36:44 +0800, Xiaoming Ni wrote:
> When CONFIG_SMP=y, timebase synchronization is required for mpc8572 when
>  the second kernel is started
>   arch/powerpc/kernel/smp.c:
>   int __cpu_up(unsigned int cpu, struct task_struct *tidle)
>   {
>   ...
>   if (smp_ops->give_timebase)
>   smp_ops->give_timebase();
>   ...
>   }
> 
> [...]

Applied to powerpc/next.

[1/2] powerpc:85xx:Fix oops when mpc85xx_smp_guts_ids node cannot be found
  https://git.kernel.org/powerpc/c/3c2172c1c47b4079c29f0e6637d764a99355ebcd
[2/2] powerpc:85xx: fix timebase sync issue when CONFIG_HOTPLUG_CPU=n
  https://git.kernel.org/powerpc/c/c45361abb9185b1e172bd75eff51ad5f601ccae4

cheers


Re: [PATCH] powerpc: clean up UPD_CONSTR

2021-10-11 Thread Michael Ellerman
On Tue, 14 Sep 2021 09:17:04 -0700, Nick Desaulniers wrote:
> UPD_CONSTR was previously a preprocessor define for an old GCC 4.9 inline
> asm bug with m<> constraints.
> 
> 
> 
> 

Applied to powerpc/next.

[1/1] powerpc: clean up UPD_CONSTR
  https://git.kernel.org/powerpc/c/2a24d80fc86bcd70c8e780078254e873ea217379

cheers


Re: [PATCH v2 0/4] CPU DLPAR/hotplug for v5.16

2021-10-11 Thread Michael Ellerman
On Mon, 27 Sep 2021 15:19:29 -0500, Nathan Lynch wrote:
> Fixes for some vintage bugs in handling cache node addition and removal, a
> miscellaneous BUG->WARN conversion, and removal of the fragile "by count"
> CPU DLPAR code that probably has no users.
> 
> Changes since v1:
> * Remove set but unused local variable (0day)
> * Additional comment cleanup patch
> 
> [...]

Applied to powerpc/next.

[1/4] powerpc/pseries/cpuhp: cache node corrections
  https://git.kernel.org/powerpc/c/7edd5c9a8820bedb22870b34a809d45f2a86a35a
[2/4] powerpc/cpuhp: BUG -> WARN conversion in offline path
  https://git.kernel.org/powerpc/c/983f9101740641434cea4f2e172175ff4b0276ad
[3/4] powerpc/pseries/cpuhp: delete add/remove_by_count code
  https://git.kernel.org/powerpc/c/fa2a5dfe2ddd0e7c77e5f608e1fa374192e5be97
[4/4] powerpc/pseries/cpuhp: remove obsolete comment from pseries_cpu_die
  https://git.kernel.org/powerpc/c/f9473a65719e59c45f1638cc04db7c80de8fcc1a

cheers


Re: [PATCH v2 0/2] powerpc/paravirt: vcpu_is_preempted() tweaks

2021-10-11 Thread Michael Ellerman
On Tue, 28 Sep 2021 16:41:45 -0500, Nathan Lynch wrote:
> Minor changes arising from discovering that this code throws warnings with
> DEBUG_PREEMPT kernels.
> 
> Changes since v1:
> * Additional commentary to (1) distinguish hypervisor dispatch and preempt
>   behavior from kernel scheduler preemption; and (2) more clearly justify
>   the use of raw_smp_processor_id().
> * Additional patch to update existing comments before making the functional
>   change.
> 
> [...]

Applied to powerpc/next.

[1/2] powerpc/paravirt: vcpu_is_preempted() commentary
  https://git.kernel.org/powerpc/c/799f9b51db688608b50e630a57bee5f699b268ca
[2/2] powerpc/paravirt: correct preempt debug splat in vcpu_is_preempted()
  https://git.kernel.org/powerpc/c/fda0eb220021a97c1d656434b9340ebf3fc4704a

cheers


Re: [PATCH] powerpc: fix unbalanced node refcount in check_kvm_guest()

2021-10-11 Thread Michael Ellerman
On Tue, 28 Sep 2021 07:45:50 -0500, Nathan Lynch wrote:
> When check_kvm_guest() succeeds in looking up a /hypervisor OF node, it
> returns without performing a matching put for the lookup, leaving the
> node's reference count elevated.
> 
> Add the necessary call to of_node_put(), rearranging the code slightly to
> avoid repetition or goto.
> 
> [...]

Applied to powerpc/next.

[1/1] powerpc: fix unbalanced node refcount in check_kvm_guest()
  https://git.kernel.org/powerpc/c/56537faf8821e361d739fc5ff58c9c40f54a1d4c

cheers


Re: [PATCH trivial v2] powerpc/powernv/dump: Fix typo in comment

2021-10-11 Thread Michael Ellerman
On Tue, 14 Sep 2021 20:08:02 +0530, Vasant Hegde wrote:
> 


Applied to powerpc/next.

[1/1] powerpc/powernv/dump: Fix typo in comment
  https://git.kernel.org/powerpc/c/ee87843795ec5dc2f3bb315fade3ec098c88f639

cheers


Re: [PATCH] powerpc: Remove unused prototype for of_show_percpuinfo

2021-10-11 Thread Michael Ellerman
On Fri, 3 Sep 2021 16:32:46 +1000, Daniel Axtens wrote:
> commit 6d7f58b04d82 ("[PATCH] powerpc: Some minor cleanups to setup_32.c")
> removed of_show_percpuinfo but didn't remove the prototype.
> 
> Remove it.
> 
> 
> 
> [...]

Applied to powerpc/next.

[1/1] powerpc: Remove unused prototype for of_show_percpuinfo
  https://git.kernel.org/powerpc/c/93fa8e9d88118bd2bdf2c61f9b63e7d4d9b648fe

cheers


Re: [PATCH] video: fbdev: use memset_io() instead of memset()

2021-10-11 Thread Michael Ellerman
On Wed, 15 Sep 2021 15:34:35 +0200, Christophe Leroy wrote:
> While investigating a lockup at startup on Powerbook 3400C, it was
> identified that the fbdev driver generates alignment exception at
> startup:
> 
>   --- interrupt: 600 at memset+0x60/0xc0
>   NIP:  c0021414 LR: c03fc49c CTR: 7fff
>   REGS: ca021c10 TRAP: 0600   Tainted: GW  
> (5.14.2-pmac-00727-g12a41fa69492)
>   MSR:  9032   CR: 44008442  XER: 2100
>   DAR: cab80020 DSISR: 00017c07
>   GPR00: 0007 ca021cd0 c14412e0 cab8  0010 cab8001c 
> 0004
>   GPR08: 0010 7fff   84008442  c0006fb4 
> 
>   GPR16:        
> 0010
>   GPR24:  8180 0320 c15fa400 c14d1878  c14d1800 
> c094e19c
>   NIP [c0021414] memset+0x60/0xc0
>   LR [c03fc49c] chipsfb_pci_init+0x160/0x580
>   --- interrupt: 600
>   [ca021cd0] [c03fc46c] chipsfb_pci_init+0x130/0x580 (unreliable)
>   [ca021d20] [c03a3a70] pci_device_probe+0xf8/0x1b8
>   [ca021d50] [c043d584] really_probe.part.0+0xac/0x388
>   [ca021d70] [c043d914] __driver_probe_device+0xb4/0x170
>   [ca021d90] [c043da18] driver_probe_device+0x48/0x144
>   [ca021dc0] [c043e318] __driver_attach+0x11c/0x1c4
>   [ca021de0] [c043ad30] bus_for_each_dev+0x88/0xf0
>   [ca021e10] [c043c724] bus_add_driver+0x190/0x22c
>   [ca021e40] [c043ee94] driver_register+0x9c/0x170
>   [ca021e60] [c0006c28] do_one_initcall+0x54/0x1ec
>   [ca021ed0] [c08246e4] kernel_init_freeable+0x1c0/0x270
>   [ca021f10] [c0006fdc] kernel_init+0x28/0x11c
>   [ca021f30] [c0017148] ret_from_kernel_thread+0x14/0x1c
>   Instruction dump:
>   7d4601a4 39490777 7d4701a4 39490888 7d4801a4 39490999 7d4901a4 39290aaa
>   7d2a01a4 4c00012c 4bfffe88 0fe0 <4bfffe80> 9421fff0 38210010 
> 48001970
> 
> [...]

Applied to powerpc/next.

[1/1] video: fbdev: use memset_io() instead of memset()
  https://git.kernel.org/powerpc/c/f2719b26ae27282c145202ffd656d5ff1fe737cc

cheers


Re: [PATCH] powerpc/powermac: Remove stale declaration of pmac_md

2021-10-11 Thread Michael Ellerman
On Fri, 3 Sep 2021 08:23:52 + (UTC), Christophe Leroy wrote:
> pmac_md doesn't exist anymore, remove stall declaration.
> 

Applied to powerpc/next.

[1/1] powerpc/powermac: Remove stale declaration of pmac_md
  https://git.kernel.org/powerpc/c/9d7fb0643a156a5887814e1263b648501cb0

cheers


Re: [PATCH] powerpc/mem: Fix arch/powerpc/mm/mem.c:53:12: error: no previous prototype for 'create_section_mapping'

2021-10-11 Thread Michael Ellerman
On Mon, 13 Sep 2021 17:17:26 +0200, Christophe Leroy wrote:
> Commit 8e11d62e2e87 ("powerpc/mem: Add back missing header to fix 'no
> previous prototype' error") was supposed to fix the problem, but in
> the meantime commit a927bd6ba952 ("mm: fix phys_to_target_node() and*
> memory_add_physaddr_to_nid() exports") moved create_section_mapping()
> prototype from asm/sparsemem.h to asm/mmzone.h
> 
> 
> [...]

Applied to powerpc/next.

[1/1] powerpc/mem: Fix arch/powerpc/mm/mem.c:53:12: error: no previous 
prototype for 'create_section_mapping'
  https://git.kernel.org/powerpc/c/7eff9bc00ddf1e2281dff575884b7f676c85b006

cheers


Re: [PATCH] powerpc/476: Fix sparse report

2021-10-11 Thread Michael Ellerman
On Sat, 18 Sep 2021 11:22:32 +0200, Christophe Leroy wrote:
>   arch/powerpc/platforms/44x/ppc476.c:236:17: warning: cast removes 
> address space '__iomem' of expression
>   arch/powerpc/platforms/44x/ppc476.c:241:34: warning: incorrect type in 
> argument 1 (different address spaces)
>   arch/powerpc/platforms/44x/ppc476.c:241:34:expected void const 
> volatile [noderef] __iomem *addr
>   arch/powerpc/platforms/44x/ppc476.c:241:34:got unsigned char 
> [usertype] *
>   arch/powerpc/platforms/44x/ppc476.c:243:17: warning: incorrect type in 
> argument 1 (different address spaces)
>   arch/powerpc/platforms/44x/ppc476.c:243:17:expected void volatile 
> [noderef] __iomem *addr
>   arch/powerpc/platforms/44x/ppc476.c:243:17:got unsigned char 
> [usertype] *[assigned] fpga
> 
> [...]

Applied to powerpc/next.

[1/1] powerpc/476: Fix sparse report
  https://git.kernel.org/powerpc/c/494f238a3861863d908af7b98a369f6d8a986c85

cheers


Re: [PATCH kernel v2] powerps/pseries/dma: Add support for 2M IOMMU page size

2021-10-11 Thread Michael Ellerman
On Wed, 6 Oct 2021 15:47:35 +1100, Alexey Kardashevskiy wrote:
> The upcoming PAPR spec adds a 2M page size, bit 23 right after 16G page
> size in the "ibm,query-pe-dma-window" call.
> 
> This adds support for the new page size. Since the new page size is out
> of sorted order, this changes the loop to not assume that shift[] is
> sorted.
> 
> [...]

Applied to powerpc/next.

[1/1] powerps/pseries/dma: Add support for 2M IOMMU page size
  https://git.kernel.org/powerpc/c/3872731187141d5d0a5c4fb30007b8b9ec36a44d

cheers


Re: [PATCH v2 3/3] powerpc/numa: Fill distance_lookup_table for offline nodes

2021-10-11 Thread Michael Ellerman
Srikar Dronamraju  writes:
> * Michael Ellerman  [2021-09-23 21:17:25]:
>
>> Srikar Dronamraju  writes:
>> > * Michael Ellerman  [2021-08-26 23:36:53]:
>> >
>> >> Srikar Dronamraju  writes:
>> >> > Scheduler expects unique number of node distances to be available at
>> >> > boot.
>> ...
>> >
>> >> > Fake the offline node's distance_lookup_table entries so that all
>> >> > possible node distances are updated.
>> >>
>> >> Does this work if we have a single node offline at boot?
>> >>
>> >
>> > It should.
>> >
>> >> Say we start with:
>> >>
>> >> node distances:
>> >> node   0   1
>> >>   0:  10  20
>> >>   1:  20  10
>> >>
>> >> And node 2 is offline at boot. We can only initialise that nodes entries
>> >> in the distance_lookup_table:
>> >>
>> >>   while (i--)
>> >>   distance_lookup_table[node][i] = node;
>> >>
>> >> By filling them all with 2 that causes node_distance(2, X) to return the
>> >> maximum distance for all other nodes X, because we won't break out of
>> >> the loop in __node_distance():
>> >>
>> >>   for (i = 0; i < distance_ref_points_depth; i++) {
>> >>   if (distance_lookup_table[a][i] == distance_lookup_table[b][i])
>> >>   break;
>> >>
>> >>   /* Double the distance for each NUMA level */
>> >>   distance *= 2;
>> >>   }
>> >>
>> >> If distance_ref_points_depth was 4 we'd return 160.
>> >
>> > As you already know, distance 10, 20, .. are defined by Powerpc, form1
>> > affinity. PAPR doesn't define actual distances, it only provides us the
>> > associativity. If there are distance_ref_points_depth is 4,
>> > (distance_ref_points_depth doesn't take local distance into consideration)
>> > 10, 20, 40, 80, 160.
>> >
>> >>
>> >> That'd leave us with 3 unique distances at boot, 10, 20, 160.
>> >>
>> >
>> > So if there are unique distances, then the distances as per the current
>> > code has to be 10, 20, 40, 80.. I dont see a way in which we have a break 
>> > in
>> > the series. like having 160 without 80.
>>
>> I'm confused what you mean there.
>
> At the outset, if we have a better probable solution, do let me know, I am
> willing to try that too.

I don't have one in mind no, I'm just trying to satisfy myself that this
solution will work in all cases we're likely to encounter.

>> If we have a node that's offline at boot then we get 160 for that node,
>> that's just the result of having no info for it, so we never break out
>> of the for loop.
>>
>> So if we have two nodes, one hop apart, and then an offline node we get
>> 10, 20, 160.
>>
>> Or if you're using depth = 3 then it's 10, 20, 80.
>
> My understanding is as below:
>
> device-tree provides the max hops by way of
> ibm,associativity-reference-points. This is mapped to
> distance_ref_points_depth in Linux-powerpc.
>
> Now Linux-powerpc encodes hops as (dis-regarding local distance) 20, 40, 80,
> 160, 320 ...
> So if the distance_ref_points_depth is 3, then the hops are 20, 40, 80.
>
> Do you disagree?

I'm not sure. You didn't really address my point.

You said that we can't have 160 without 80 (for depth = 4).

I gave an example where we could see a gap in the used distance values,
ie. 10, 20, 80 for a depth of 3.

Which is not to say that distance 40 doesn't exist in that scenario,
rather that it's not used by any node.


>> >> But when node 2 comes online it might introduce more than 1 new distance
>> >> value, eg. it could be that the actual distances are:
>> >>
>> >> node distances:
>> >> node   0   1   2
>> >>   0:  10  20  40
>> >>   1:  20  10  80
>> >>   2:  40  80  10
>> >>
>> >> ie. we now have 4 distances, 10, 20, 40, 80.
>> >>
>> >> What am I missing?
>> >
>> > As I said above, I am not sure how we can have a break in the series.
>> > If distance_ref_points_depth is 3, the distances has to be 10,20,40,80 as
>> > atleast for form1 affinity.
>>
>> I agree for depth 3 we have to see 10, 20, 40, 80. But nothing
>> guarantees we see each value (other than 10).
>
> The hop distances are not from the device-tree, the device-tree only gives
> us the max hops possible. Linux-powerpc is actually hard-coding the
> distances which each hop distance being 2x the previous.

Yes. I guess I was sloppy to say "see each value", I didn't mean we see
those values directly in the device-tree.

> So we may not see any nodes at a particular hop, but we know maximum hops.
> And if distance_ref_points_depth is 3, then hops are 20, 40, 80 only.

OK, so we agree that "we may not see any nodes at a particular hop".

Which is what I was trying to say above.

>> We can have two nodes one hop apart, so we have 10 and 20, then a third
>> node is added 3 hops away, so we get 10, 20, 80.
>>
>
>> The real problem is that the third node could be 3 hops from node 0
>> and 2 hops from node 1, and so the addition of the third node causes
>> two new distance values (40 & 80) to be required.
>
> So here the max hops as given by device-tree is 3. So we know that we are
> looking for max-distance of 

Re: [PATCH] powerpc/xive: Discard disabled interrupts in get_irqchip_state()

2021-10-11 Thread seeteena

Tested-by: seeteena

I have used a KVM guest with a passthrough ethernet adapter and the 
lspci output to identify the adapter.




Re: [PATCH] powerpc/xive: Discard disabled interrupts in get_irqchip_state()

2021-10-11 Thread seeteena

Tested-by: seeteena

I have used a KVM guest with a passthrough ethernet adapter and the 
lspci output to identify the adapter.


From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: 

X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
aws-us-west-2-korg-lkml-1.web.codeaurora.org
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
by smtp.lore.kernel.org (Postfix) with ESMTP id 96058C433EF
for ; Mon, 11 Oct 2021 07:11:10 + 
(UTC)
Received: from lists.ozlabs.org (lists.ozlabs.org [112.213.38.117])
(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
(No client certificate requested)
by mail.kernel.org (Postfix) with ESMTPS id B21F760F24
for ; Mon, 11 Oct 2021 07:11:09 + 
(UTC)
DMARC-Filter: OpenDMARC Filter v1.4.1 mail.kernel.org B21F760F24
Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) 
header.from=kaod.org
Authentication-Results: mail.kernel.org; spf=pass smtp.mailfrom=lists.ozlabs.org
Received: from boromir.ozlabs.org (localhost [IPv6:::1])
by lists.ozlabs.org (Postfix) with ESMTP id 4HSVMH6wGdz3bjR
for ; Mon, 11 Oct 2021 18:11:07 +1100 
(AEDT)
Authentication-Results: lists.ozlabs.org; spf=pass (sender SPF authorized)
 smtp.mailfrom=kaod.org (client-ip=79.137.123.220;
 helo=smtpout2.mo529.mail-out.ovh.net; envelope-from=c...@kaod.org;
 receiver=)
X-Greylist: delayed 504 seconds by postgrey-1.36 at boromir;
 Mon, 11 Oct 2021 18:10:42 AEDT
Received: from smtpout2.mo529.mail-out.ovh.net
 (smtpout2.mo529.mail-out.ovh.net [79.137.123.220])
 (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
 (No client certificate requested)
 by lists.ozlabs.org (Postfix) with ESMTPS id 4HSVLp4Pwcz2yPv
 for ; Mon, 11 Oct 2021 18:10:42 +1100 (AEDT)
Received: from mxplan5.mail.ovh.net (unknown [10.109.156.216])
 by mo529.mail-out.ovh.net (Postfix) with ESMTPS id 0EC30C3BBB9B;
 Mon, 11 Oct 2021 09:02:09 +0200 (CEST)
Received: from kaod.org (37.59.142.95) by DAG4EX1.mxp5.local (172.16.2.31)
 with Microsoft SMTP Server (version=TLS1_2,
 cipher=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256) id 15.1.2308.14; Mon, 11 Oct
 2021 09:02:09 +0200
Authentication-Results: garm.ovh; auth=pass
 (GARM-95G0010762b5aa-8db3-4685-afb6-69febc946e19,
 044DEDDE8B0E05FD49EE52B84AFD98BA54CEE260) smtp.auth=c...@kaod.org
X-OVh-ClientIp: 82.64.250.170
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= 
To: 
Subject: [PATCH] powerpc/xive: Discard disabled interrupts in
 get_irqchip_state()
Date: Mon, 11 Oct 2021 09:02:03 +0200
Message-ID: <20211011070203.99726-1-...@kaod.org>
X-Mailer: git-send-email 2.31.1
MIME-Version: 1.0
Content-Type: text/plain; charset="UTF-8"
Content-Transfer-Encoding: 8bit
X-Originating-IP: [37.59.142.95]
X-ClientProxiedBy: DAG3EX1.mxp5.local (172.16.2.21) To DAG4EX1.mxp5.local
 (172.16.2.31)
X-Ovh-Tracer-GUID: 1fcd0286-05cf-4e09-8d7f-6cb5e00e2edd
X-Ovh-Tracer-Id: 16244765333835647968
X-VR-SPAMSTATE: OK
X-VR-SPAMSCORE: 0
X-VR-SPAMCAUSE: 
gggruggvucftvghtrhhoucdtuddrgedvtddrvddthedgudduvdcutefuodetggdotefrodftvfcurfhrohhfihhlvgemucfqggfjpdevjffgvefmvefgnecuuegrihhlohhuthemucehtddtnecunecujfgurhephffvufffkffogggtgfhisehtkeertdertdejnecuhfhrohhmpeevrogurhhitgcunfgvucfiohgrthgvrhcuoegtlhhgsehkrghougdrohhrgheqnecuggftrfgrthhtvghrnhepfedvuedtvdeikeekuefhkedujeejgffggffhtefglefgveevfeeghfdvgedtleevnecukfhppedtrddtrddtrddtpdefjedrheelrddugedvrdelheenucevlhhushhtvghrufhiiigvpedtnecurfgrrhgrmhepmhhouggvpehsmhhtphdqohhuthdphhgvlhhopehmgihplhgrnhehrdhmrghilhdrohhvhhdrnhgvthdpihhnvghtpedtrddtrddtrddtpdhmrghilhhfrhhomheptghlgheskhgrohgurdhorhhgpdhrtghpthhtoheptghlgheskhgrohgurdhorhhg
X-BeenThere: linuxppc-dev@lists.ozlabs.org
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: Linux on PowerPC Developers Mail List 
List-Unsubscribe: ,
 
List-Archive: 
List-Post: 
List-Help: 
List-Subscribe: ,
 
Cc: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= ,
 sta...@vger.kernel.org
Errors-To: 
linuxppc-dev-bounces+linuxppc-dev=archiver.kernel@lists.ozlabs.org
Sender: "Linuxppc-dev"
 

When an interrupt is passed through, the KVM XIVE device calls the
set_vcpu_affinity() handler which raises the P bit to mask the
interrupt and to catch any in-flight interrupts while routing the
interrupt to the guest.

On the guest side, drivers (like some Intels) can request at probe
time some MSIs and call synchronize_irq() to check that there are no
in flight interrupts. This will call the XIVE get_irqchip_state()
handler which will always return true as the interrupt P bit has been
set on the host side and lock the CPU in an infinite loop.

Fix that by discarding 

Re: [PATCH v2 00/11] Add Apple M1 support to PASemi i2c driver

2021-10-11 Thread Hector Martin

On 11/10/2021 17.54, Wolfram Sang wrote:

MAINTAINERS. It'll probably apply cleanly to 5.15-rc5 but if that happens again


It doesn't because Linus' git doesn't have:

Documentation/devicetree/bindings/pci/apple,pcie.yaml

Because MAINTAINER dependencies can be a bit nasty, I suggest I drop the
MAINTAINER additions for now and we add them later. Then, you can add
the pasemi-core as well. D'accord?



We can just split the MAINTAINERS changes into a separate patch and I 
can push that one through the SoC tree, along with other MAINTAINERS 
updates. Does that work for everyone?


--
Hector Martin (mar...@marcan.st)
Public Key: https://mrcn.st/pub


Re: [PATCH v2 00/11] Add Apple M1 support to PASemi i2c driver

2021-10-11 Thread Wolfram Sang
On Fri, Oct 08, 2021 at 06:35:21PM +0200, Sven Peter wrote:
> Hi,
> 
> v1: 
> https://lore.kernel.org/linux-i2c/20210926095847.38261-1-s...@svenpeter.dev/
> 
> Changes for v2:
>  - Added reviewed-by/acks
>  - Switched from ioport_map to pci_iomap as suggested by Arnd Bergmann
>  - Renamed i2c-pasemi-apple.c to i2c-pasemi-platform.c as suggested by
>Wolfram Sang
>  - Replaced the ioport number in the adapter name with dev_name to be
>able to identify separate busses in e.g. i2cdetect.
> 
> I still don't have access to any old PASemi hardware but the changes from
> v1 are pretty small and I expect them to still work. Would still be nice
> if someone with access to such hardware could give this a quick test.
> 
> 
> And for those who didn't see v1 the (almost) unchanged original cover letter:
> 
> This series adds support for the I2C controller found on Apple Silicon Macs
> which has quite a bit of history:
> 
> Apple bought P.A. Semi in 2008 and it looks like a part of its legacy 
> continues
> to live on in the M1. This controller has actually been used since at least 
> the
> iPhone 4S and hasn't changed much since then.
> Essentially, there are only a few differences that matter:
> 
>   - The controller no longer is a PCI device
>   - Starting at some iPhone an additional bit in one register
>   must be set in order to start transmissions.
>   - The reference clock and hence the clock dividers are different
> 
> In order to add support for a platform device I first replaced PCI-specific
> bits and split out the PCI driver to its own file. Then I added support
> to make the clock divider configurable and converted the driver to use
> managed device resources to make it a bit simpler.
> 
> The Apple and PASemi driver will never be compiled in the same kernel
> since the Apple one will run on arm64 while the original PASemi driver
> will only be useful on powerpc.
> I've thus followed the octeon (mips)/thunderx(arm64) approach to do the
> split: I created a -core.c file which contains the shared logic and just
> compile that one for both the PASemi and the new Apple driver.
> 
> 
> Best,
> 
> Sven
> 
> Sven Peter (11):
>   dt-bindings: i2c: Add Apple I2C controller bindings
>   i2c: pasemi: Use io{read,write}32
>   i2c: pasemi: Use dev_name instead of port number
>   i2c: pasemi: Remove usage of pci_dev
>   i2c: pasemi: Split off common probing code
>   i2c: pasemi: Split pci driver to its own file
>   i2c: pasemi: Move common reset code to own function
>   i2c: pasemi: Allow to configure bus frequency
>   i2c: pasemi: Refactor _probe to use devm_*
>   i2c: pasemi: Add Apple platform driver
>   i2c: pasemi: Set enable bit for Apple variant
> 
>  .../devicetree/bindings/i2c/apple,i2c.yaml|  61 +
>  MAINTAINERS   |   2 +
>  drivers/i2c/busses/Kconfig|  11 ++
>  drivers/i2c/busses/Makefile   |   3 +

Applied to for-next with MAINTAINER bits dropped and added tags from
Olof and Christian, thanks!



signature.asc
Description: PGP signature


Re: [PATCH 1/2] firmware: include drivers/firmware/Kconfig unconditionally

2021-10-11 Thread Arnd Bergmann
On Mon, Oct 11, 2021 at 10:42 AM Geert Uytterhoeven
 wrote:
> On Sat, Oct 9, 2021 at 11:24 AM Paul Menzel  wrote:
> > Am 28.09.21 um 09:50 schrieb Arnd Bergmann:
> > > From: Arnd Bergmann 
> > +#
> > +# ARM System Control and Management Interface Protocol
> > +#
> > +# end of ARM System Control and Management Interface Protocol
> > +
> > +# CONFIG_FIRMWARE_MEMMAP is not set
> > +# CONFIG_GOOGLE_FIRMWARE is not set
> > +
> > +#
> > +# Tegra firmware driver
> > +#
> > +# end of Tegra firmware driver
> > +# end of Firmware Drivers
> > +
> >   # CONFIG_GNSS is not set
> >   CONFIG_MTD=m
> >   # CONFIG_MTD_TESTS is not set
> > ```
> >
> > No idea if the entries could be hidden for platforms not supporting them.
> >
> >  ARM System Control and Management Interface Protocol  
> >  [ ] Add firmware-provided memory map to sysfs
> >  [ ] Google Firmware Drivers  
> >  Tegra firmware driver  
>
> GOOGLE_FIRMWARE should probably depend on something.
> I highly doubt Google is running servers on e.g. h8300 and nds32.

GOOGLE_FIRMWARE is only the 'menuconfig' option that contains
the other options, but on architectures that have neither CONFIG_OF
nor CONFIG_ACPI, this is empty.  Most architectures of course
do support or require CONFIG_OF, so it's unclear whether we should
show the options for coreboot. Since it's a software-only driver, I
would tend to keep showing it, given that coreboot can be ported
to every architecture. The DT binding [1] seems to be neither
Google nor Arm specific.

CONFIG_FIRMWARE_MEMMAP in turn can be used for
anything that has memory hotplug, and in theory additional
drivers that register with this interface.

I'd lean towards "leave as is" for both, to avoid having to
change the dependencies again whenever something else
can use these.

Arnd

[1] Documentation/devicetree/bindings/firmware/coreboot.txt


Re: [PATCH v2 00/11] Add Apple M1 support to PASemi i2c driver

2021-10-11 Thread Wolfram Sang

> > Because MAINTAINER dependencies can be a bit nasty, I suggest I drop the
> > MAINTAINER additions for now and we add them later. Then, you can add
> > the pasemi-core as well. D'accord?
> > 
> 
> We can just split the MAINTAINERS changes into a separate patch and I can
> push that one through the SoC tree, along with other MAINTAINERS updates.
> Does that work for everyone?

That would also work for me. Thank you!



signature.asc
Description: PGP signature


Re: [PATCH v2 00/11] Add Apple M1 support to PASemi i2c driver

2021-10-11 Thread Wolfram Sang
> MAINTAINERS. It'll probably apply cleanly to 5.15-rc5 but if that happens 
> again

It doesn't because Linus' git doesn't have:

Documentation/devicetree/bindings/pci/apple,pcie.yaml

Because MAINTAINER dependencies can be a bit nasty, I suggest I drop the
MAINTAINER additions for now and we add them later. Then, you can add
the pasemi-core as well. D'accord?



signature.asc
Description: PGP signature


Re: [PATCH 1/2] firmware: include drivers/firmware/Kconfig unconditionally

2021-10-11 Thread Geert Uytterhoeven
On Sat, Oct 9, 2021 at 11:24 AM Paul Menzel  wrote:
> [Cc: +linuxppc-dev@lists.ozlabs.org]
>
> Am 28.09.21 um 09:50 schrieb Arnd Bergmann:
> > From: Arnd Bergmann 
> >
> > Compile-testing drivers that require access to a firmware layer
> > fails when that firmware symbol is unavailable. This happened
> > twice this week:
> >
> >   - My proposed to change to rework the QCOM_SCM firmware symbol
> > broke on ppc64 and others.
> >
> >   - The cs_dsp firmware patch added device specific firmware loader
> > into drivers/firmware, which broke on the same set of
> > architectures.
> >
> > We should probably do the same thing for other subsystems as well,
> > but fix this one first as this is a dependency for other patches
> > getting merged.
> >

> With this change, I have the new entries below in my .config:
>
> ```
> $ diff -u .config.old .config
> --- .config.old 2021-10-07 11:38:39.54400 +0200
> +++ .config 2021-10-09 10:02:03.15600 +0200
> @@ -1992,6 +1992,25 @@
>
>   CONFIG_CONNECTOR=y
>   CONFIG_PROC_EVENTS=y
> +
> +#
> +# Firmware Drivers
> +#
> +
> +#
> +# ARM System Control and Management Interface Protocol
> +#
> +# end of ARM System Control and Management Interface Protocol
> +
> +# CONFIG_FIRMWARE_MEMMAP is not set
> +# CONFIG_GOOGLE_FIRMWARE is not set
> +
> +#
> +# Tegra firmware driver
> +#
> +# end of Tegra firmware driver
> +# end of Firmware Drivers
> +
>   # CONFIG_GNSS is not set
>   CONFIG_MTD=m
>   # CONFIG_MTD_TESTS is not set
> ```
>
> No idea if the entries could be hidden for platforms not supporting them.
>
>  ARM System Control and Management Interface Protocol  
>  [ ] Add firmware-provided memory map to sysfs
>  [ ] Google Firmware Drivers  
>  Tegra firmware driver  

GOOGLE_FIRMWARE should probably depend on something.
I highly doubt Google is running servers on e.g. h8300 and nds32.

Gr{oetje,eeting}s,

Geert

-- 
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- ge...@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds


[PATCH] powerpc/boot: Use CONFIG_PPC_POWERNV to compile OPAL support

2021-10-11 Thread Cédric Le Goater
CONFIG_PPC64_BOOT_WRAPPER is selected by CPU_LITTLE_ENDIAN which is
used to compile support for other platforms such as Microwatt. There
is no need for OPAL calls on these.

Signed-off-by: Cédric Le Goater 
---
 arch/powerpc/boot/serial.c | 2 +-
 arch/powerpc/boot/Makefile | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/boot/serial.c b/arch/powerpc/boot/serial.c
index 9a19e5905485..54d2522be485 100644
--- a/arch/powerpc/boot/serial.c
+++ b/arch/powerpc/boot/serial.c
@@ -132,7 +132,7 @@ int serial_console_init(void)
else if (dt_is_compatible(devp, "fsl,mpc5200-psc-uart"))
rc = mpc5200_psc_console_init(devp, _cd);
 #endif
-#ifdef CONFIG_PPC64_BOOT_WRAPPER
+#ifdef CONFIG_PPC_POWERNV
else if (dt_is_compatible(devp, "ibm,opal-console-raw"))
rc = opal_console_init(devp, _cd);
 #endif
diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index 089ee3ea55c8..9993c6256ad2 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -123,7 +123,7 @@ src-wlib-y := string.S crt0.S stdio.c decompress.c main.c \
oflib.c ofconsole.c cuboot.c
 
 src-wlib-$(CONFIG_PPC_MPC52xx) += mpc52xx-psc.c
-src-wlib-$(CONFIG_PPC64_BOOT_WRAPPER) += opal-calls.S opal.c
+src-wlib-$(CONFIG_PPC_POWERNV) += opal-calls.S opal.c
 ifndef CONFIG_PPC64_BOOT_WRAPPER
 src-wlib-y += crtsavres.S
 endif
-- 
2.31.1



[PATCH] powerpc/xive: Discard disabled interrupts in get_irqchip_state()

2021-10-11 Thread Cédric Le Goater
When an interrupt is passed through, the KVM XIVE device calls the
set_vcpu_affinity() handler which raises the P bit to mask the
interrupt and to catch any in-flight interrupts while routing the
interrupt to the guest.

On the guest side, drivers (like some Intels) can request at probe
time some MSIs and call synchronize_irq() to check that there are no
in flight interrupts. This will call the XIVE get_irqchip_state()
handler which will always return true as the interrupt P bit has been
set on the host side and lock the CPU in an infinite loop.

Fix that by discarding disabled interrupts in get_irqchip_state().

Fixes: da15c03b047d ("powerpc/xive: Implement get_irqchip_state method for XIVE 
to fix shutdown race")
Cc: sta...@vger.kernel.org#v5.4+
Signed-off-by: Cédric Le Goater 
---
 arch/powerpc/sysdev/xive/common.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/sysdev/xive/common.c 
b/arch/powerpc/sysdev/xive/common.c
index c732ce5a3e1a..c5d75c02ad8b 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -945,7 +945,8 @@ static int xive_get_irqchip_state(struct irq_data *data,
 * interrupt to be inactive in that case.
 */
*state = (pq != XIVE_ESB_INVALID) && !xd->stale_p &&
-   (xd->saved_p || !!(pq & XIVE_ESB_VAL_P));
+   (xd->saved_p || (!!(pq & XIVE_ESB_VAL_P) &&
+!irqd_irq_disabled(data)));
return 0;
default:
return -EINVAL;
-- 
2.31.1