Re: [PATCH v2 26/35] powerpc/64: system call: Fix sparse warning about missing declaration

2019-11-26 Thread Christophe Leroy




Le 26/11/2019 à 22:44, Luc Van Oostenryck a écrit :

On Tue, Nov 26, 2019 at 09:13:40PM +0100, Michal Suchanek wrote:

Sparse warns about missing declarations for these functions:

+arch/powerpc/kernel/syscall_64.c:108:23: warning: symbol 
'syscall_exit_prepare' was not declared. Should it be static?
+arch/powerpc/kernel/syscall_64.c:18:6: warning: symbol 'system_call_exception' 
was not declared. Should it be static?
+arch/powerpc/kernel/syscall_64.c:200:23: warning: symbol 
'interrupt_exit_user_prepare' was not declared. Should it be static?
+arch/powerpc/kernel/syscall_64.c:288:23: warning: symbol 
'interrupt_exit_kernel_prepare' was not declared. Should it be static?

Add declaration for them.


I'm fine with this patch but, just FYI, lately people seems to
prefer to add '__visible' to the function definition instead
of creating such header files.


AFAIU, that's not exactly the purpose of '__visible', see 
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=9add850c2


Christophe


Re: [Very RFC 42/46] powernv/pci: Don't clear pdn->pe_number in pnv_pci_release_device

2019-11-26 Thread Alexey Kardashevskiy



On 20/11/2019 12:28, Oliver O'Halloran wrote:
> Nothing looks at it anymore.

With a small extra step we can ditch it (compile tested):

https://github.com/aik/linux/commit/14db7061d48220354e83f8e100ab0cc1b7181da4



> 
> Signed-off-by: Oliver O'Halloran 
> ---
>  arch/powerpc/platforms/powernv/pci-ioda.c | 12 
>  1 file changed, 12 deletions(-)
> 
> diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
> b/arch/powerpc/platforms/powernv/pci-ioda.c
> index d3e375d71cdc..45d940730c30 100644
> --- a/arch/powerpc/platforms/powernv/pci-ioda.c
> +++ b/arch/powerpc/platforms/powernv/pci-ioda.c
> @@ -3541,9 +3541,7 @@ static void pnv_ioda_release_pe(struct pnv_ioda_pe *pe)
>  
>  static void pnv_pci_release_device(struct pci_dev *pdev)
>  {
> - struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus);
>   struct pnv_ioda_pe *pe = pnv_ioda_get_pe(pdev);
> - struct pci_dn *pdn = pci_get_pdn(pdev);
>  
>   /* The VF PE state is torn down when sriov_disable() is called */
>   if (pdev->is_virtfn)
> @@ -3560,16 +3558,6 @@ static void pnv_pci_release_device(struct pci_dev 
> *pdev)
>   if (pdev->is_physfn)
>   kfree(pdev->dev.archdata.iov_data);
>  
> - /*
> -  * PCI hotplug can happen as part of EEH error recovery. The @pdn
> -  * isn't removed and added afterwards in this scenario. We should
> -  * set the PE number in @pdn to an invalid one. Otherwise, the PE's
> -  * device count is decreased on removing devices while failing to
> -  * be increased on adding devices. It leads to unbalanced PE's device
> -  * count and eventually make normal PCI hotplug path broken.
> -  */
> - pdn->pe_number = IODA_INVALID_PE;
> -
>   WARN_ON(--pe->device_count < 0);
>   if (pe->device_count == 0)
>   pnv_ioda_release_pe(pe);
> 






-- 
Alexey


[PATCH v3] platforms/powernv: Avoid re-registration of imc debugfs directory

2019-11-26 Thread Anju T Sudhakar
export_imc_mode_and_cmd() function which creates the debugfs interface for
imc-mode and imc-command, is invoked when each nest pmu units is
registered.
When the first nest pmu unit is registered, export_imc_mode_and_cmd()
creates 'imc' directory under `/debug/powerpc/`. In the subsequent
invocations debugfs_create_dir() function returns, since the directory
already exists.

The recent commit  (debugfs: make error message a bit more
verbose), throws a warning if we try to invoke `debugfs_create_dir()`
with an already existing directory name.

Address this warning by making the debugfs directory registration
in the opal_imc_counters_probe() function, i.e invoke
export_imc_mode_and_cmd() function from the probe function.

Signed-off-by: Anju T Sudhakar 
---
Changes from v2 -> v3:

* Invoke export_imc_mode_and_cmd(), which does the imc debugfs
  directory registration and deletion, from the probe fucntion.
* Change the return type of imc_pmu_create() to get the
  control block address for nest units in the probe
  function
* Remove unnecessary comments

---
 arch/powerpc/platforms/powernv/opal-imc.c | 39 +--
 1 file changed, 16 insertions(+), 23 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/opal-imc.c 
b/arch/powerpc/platforms/powernv/opal-imc.c
index e04b206..3b4518f 100644
--- a/arch/powerpc/platforms/powernv/opal-imc.c
+++ b/arch/powerpc/platforms/powernv/opal-imc.c
@@ -59,10 +59,6 @@ static void export_imc_mode_and_cmd(struct device_node *node,
 
imc_debugfs_parent = debugfs_create_dir("imc", powerpc_debugfs_root);
 
-   /*
-* Return here, either because 'imc' directory already exists,
-* Or failed to create a new one.
-*/
if (!imc_debugfs_parent)
return;
 
@@ -135,7 +131,6 @@ static int imc_get_mem_addr_nest(struct device_node *node,
}
 
pmu_ptr->imc_counter_mmaped = true;
-   export_imc_mode_and_cmd(node, pmu_ptr);
kfree(base_addr_arr);
kfree(chipid_arr);
return 0;
@@ -151,7 +146,7 @@ static int imc_get_mem_addr_nest(struct device_node *node,
  * and domain as the inputs.
  * Allocates memory for the struct imc_pmu, sets up its domain, size and 
offsets
  */
-static int imc_pmu_create(struct device_node *parent, int pmu_index, int 
domain)
+static struct imc_pmu *imc_pmu_create(struct device_node *parent, int 
pmu_index, int domain)
 {
int ret = 0;
struct imc_pmu *pmu_ptr;
@@ -159,27 +154,23 @@ static int imc_pmu_create(struct device_node *parent, int 
pmu_index, int domain)
 
/* Return for unknown domain */
if (domain < 0)
-   return -EINVAL;
+   return NULL;
 
/* memory for pmu */
pmu_ptr = kzalloc(sizeof(*pmu_ptr), GFP_KERNEL);
if (!pmu_ptr)
-   return -ENOMEM;
+   return NULL;
 
/* Set the domain */
pmu_ptr->domain = domain;
 
ret = of_property_read_u32(parent, "size", &pmu_ptr->counter_mem_size);
-   if (ret) {
-   ret = -EINVAL;
+   if (ret)
goto free_pmu;
-   }
 
if (!of_property_read_u32(parent, "offset", &offset)) {
-   if (imc_get_mem_addr_nest(parent, pmu_ptr, offset)) {
-   ret = -EINVAL;
+   if (imc_get_mem_addr_nest(parent, pmu_ptr, offset))
goto free_pmu;
-   }
}
 
/* Function to register IMC pmu */
@@ -190,14 +181,14 @@ static int imc_pmu_create(struct device_node *parent, int 
pmu_index, int domain)
if (pmu_ptr->domain == IMC_DOMAIN_NEST)
kfree(pmu_ptr->mem_info);
kfree(pmu_ptr);
-   return ret;
+   return NULL;
}
 
-   return 0;
+   return pmu_ptr;
 
 free_pmu:
kfree(pmu_ptr);
-   return ret;
+   return NULL;
 }
 
 static void disable_nest_pmu_counters(void)
@@ -254,6 +245,7 @@ int get_max_nest_dev(void)
 static int opal_imc_counters_probe(struct platform_device *pdev)
 {
struct device_node *imc_dev = pdev->dev.of_node;
+   struct imc_pmu *pmu;
int pmu_count = 0, domain;
bool core_imc_reg = false, thread_imc_reg = false;
u32 type;
@@ -269,6 +261,7 @@ static int opal_imc_counters_probe(struct platform_device 
*pdev)
}
 
for_each_compatible_node(imc_dev, NULL, IMC_DTB_UNIT_COMPAT) {
+   pmu = NULL;
if (of_property_read_u32(imc_dev, "type", &type)) {
pr_warn("IMC Device without type property\n");
continue;
@@ -293,9 +286,13 @@ static int opal_imc_counters_probe(struct platform_device 
*pdev)
break;
}
 
-   if (!imc_pmu_create(imc_dev, pmu_count, domain)) {
-   if (domain == IMC_DOMAIN_NEST)
+   pmu = imc_pmu_create(imc_dev, pmu_count, domain);
+   

Re: [Very RFC 41/46] powernv/eeh: Remove pdn setup for SR-IOV VFs

2019-11-26 Thread Alexey Kardashevskiy



On 20/11/2019 12:28, Oliver O'Halloran wrote:
> We don't need a pci_dn for the VF any more, so we can skip adding them.

Excellent!

Reviewed-by: Alexey Kardashevskiy 




> 
> Signed-off-by: Oliver O'Halloran 
> ---
>  arch/powerpc/platforms/powernv/pci-ioda.c | 16 
>  1 file changed, 16 deletions(-)
> 
> diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
> b/arch/powerpc/platforms/powernv/pci-ioda.c
> index d111a50fbe68..d3e375d71cdc 100644
> --- a/arch/powerpc/platforms/powernv/pci-ioda.c
> +++ b/arch/powerpc/platforms/powernv/pci-ioda.c
> @@ -1526,7 +1526,6 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, 
> u16 num_vfs)
>   for (vf_index = 0; vf_index < num_vfs; vf_index++) {
>   int vf_devfn = pci_iov_virtfn_devfn(pdev, vf_index);
>   int vf_bus = pci_iov_virtfn_bus(pdev, vf_index);
> - struct pci_dn *vf_pdn;
>  
>   if (iov->m64_single_mode)
>   pe_num = iov->pe_num_map[vf_index];
> @@ -1558,15 +1557,6 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, 
> u16 num_vfs)
>   list_add_tail(&pe->list, &phb->ioda.pe_list);
>   mutex_unlock(&phb->ioda.pe_list_mutex);
>  
> - /* associate this pe to it's pdn */
> - list_for_each_entry(vf_pdn, &pdn->parent->child_list, list) {
> - if (vf_pdn->busno == vf_bus &&
> - vf_pdn->devfn == vf_devfn) {
> - vf_pdn->pe_number = pe_num;
> - break;
> - }
> - }
> -
>   pnv_pci_ioda2_setup_dma_pe(phb, pe);
>  #ifdef CONFIG_IOMMU_API
>   iommu_register_group(&pe->table_group,
> @@ -1688,17 +1678,11 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 
> num_vfs)
>  int pnv_pcibios_sriov_disable(struct pci_dev *pdev)
>  {
>   pnv_pci_sriov_disable(pdev);
> -
> - /* Release PCI data */
> - remove_sriov_vf_pdns(pdev);
>   return 0;
>  }
>  
>  int pnv_pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
>  {
> - /* Allocate PCI data */
> - add_sriov_vf_pdns(pdev);
> -
>   return pnv_pci_sriov_enable(pdev, num_vfs);
>  }
>  #endif /* CONFIG_PCI_IOV */
> 

-- 
Alexey


Re: [Very RFC 40/46] powernv/npu: Don't drop refcount when looking up GPU pci_devs

2019-11-26 Thread Alexey Kardashevskiy



On 20/11/2019 12:28, Oliver O'Halloran wrote:
> The comment here implies that we don't need to take a ref to the pci_dev
> because the ioda_pe will always have one. This implies that the current
> expection is that the pci_dev for an NPU device will *never* be torn
> down since the ioda_pe having a ref to the device will prevent the
> release function from being called.
> 
> In other words, the desired behaviour here appears to be leaking a ref.
> 
> Nice!


There is a history: https://patchwork.ozlabs.org/patch/1088078/

We did not fix anything in particular then, we do not seem to be fixing
anything now (in other words - we cannot test it in a normal natural
way). I'd drop this one.



> 
> Signed-off-by: Oliver O'Halloran 
> ---
>  arch/powerpc/platforms/powernv/npu-dma.c | 11 +++
>  1 file changed, 3 insertions(+), 8 deletions(-)
> 
> diff --git a/arch/powerpc/platforms/powernv/npu-dma.c 
> b/arch/powerpc/platforms/powernv/npu-dma.c
> index 72d3749da02c..2eb6e6d45a98 100644
> --- a/arch/powerpc/platforms/powernv/npu-dma.c
> +++ b/arch/powerpc/platforms/powernv/npu-dma.c
> @@ -28,15 +28,10 @@ static struct pci_dev *get_pci_dev(struct device_node *dn)
>   break;
>  
>   /*
> -  * pci_get_domain_bus_and_slot() increased the reference count of
> -  * the PCI device, but callers don't need that actually as the PE
> -  * already holds a reference to the device. Since callers aren't
> -  * aware of the reference count change, call pci_dev_put() now to
> -  * avoid leaks.
> +  * NB: for_each_pci_dev() elevates the pci_dev refcount.
> +  * Caller is responsible for dropping the ref when it's
> +  * finished with it.
>*/
> - if (pdev)
> - pci_dev_put(pdev);
> -
>   return pdev;
>  }
>  
> 

-- 
Alexey


Re: [Very RFC 39/46] powernv/npu: Avoid pci_dn when mapping device_node to a pci_dev

2019-11-26 Thread Alexey Kardashevskiy



On 20/11/2019 12:28, Oliver O'Halloran wrote:
> There's no need to use the pci_dn to find a device_node from a pci_dev.
> Just search for the node pointed to by the pci_dev's of_node pointer.



Reviewed-by: Alexey Kardashevskiy 



> 
> Signed-off-by: Oliver O'Halloran 
> ---
>  arch/powerpc/platforms/powernv/npu-dma.c | 8 
>  1 file changed, 4 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/powerpc/platforms/powernv/npu-dma.c 
> b/arch/powerpc/platforms/powernv/npu-dma.c
> index 68bfaef44862..72d3749da02c 100644
> --- a/arch/powerpc/platforms/powernv/npu-dma.c
> +++ b/arch/powerpc/platforms/powernv/npu-dma.c
> @@ -21,11 +21,11 @@
>  
>  static struct pci_dev *get_pci_dev(struct device_node *dn)
>  {
> - struct pci_dn *pdn = PCI_DN(dn);
> - struct pci_dev *pdev;
> + struct pci_dev *pdev = NULL;
>  
> - pdev = pci_get_domain_bus_and_slot(pci_domain_nr(pdn->phb->bus),
> -pdn->busno, pdn->devfn);
> + for_each_pci_dev(pdev)
> + if (pdev->dev.of_node == dn)
> + break;
>  
>   /*
>* pci_get_domain_bus_and_slot() increased the reference count of
> 

-- 
Alexey


Re: Bug 205201 - Booting halts if Dawicontrol DC-2976 UW SCSI board installed, unless RAM size limited to 3500M

2019-11-26 Thread Mike Rapoport
On Tue, Nov 26, 2019 at 05:40:26PM +0100, Christoph Hellwig wrote:
> On Tue, Nov 26, 2019 at 12:26:38PM +0100, Christian Zigotzky wrote:
> > Hello Christoph,
> >
> > The PCI TV card works with your patch! I was able to patch your Git kernel 
> > with the patch above.
> >
> > I haven't found any error messages in the dmesg yet.
> 
> Thanks.  Unfortunately this is a bit of a hack as we need to set
> the mask based on runtime information like the magic FSL PCIe window.
> Let me try to draft something better up, and thanks already for testing
> this one!

Maybe we'll simply force bottom up allocation before calling
swiotlb_init()? Anyway, it's the last memblock allocation.


diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 62f74b1b33bd..771e6cf7e2b9 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -286,14 +286,15 @@ void __init mem_init(void)
/*
 * book3s is limited to 16 page sizes due to encoding this in
 * a 4-bit field for slices.
 */
BUILD_BUG_ON(MMU_PAGE_COUNT > 16);
 
 #ifdef CONFIG_SWIOTLB
+   memblock_set_bottom_up(true);
swiotlb_init(0);
 #endif
 
high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
set_max_mapnr(max_pfn);
memblock_free_all();
 
 


-- 
Sincerely yours,
Mike.



Re: [PATCH v11 1/7] mm: ksm: Export ksm_madvise()

2019-11-26 Thread Bharata B Rao
On Tue, Nov 26, 2019 at 07:59:49PM -0800, Hugh Dickins wrote:
> On Mon, 25 Nov 2019, Bharata B Rao wrote:
> 
> > On PEF-enabled POWER platforms that support running of secure guests,
> > secure pages of the guest are represented by device private pages
> > in the host. Such pages needn't participate in KSM merging. This is
> > achieved by using ksm_madvise() call which need to be exported
> > since KVM PPC can be a kernel module.
> > 
> > Signed-off-by: Bharata B Rao 
> > Acked-by: Paul Mackerras 
> > Cc: Andrea Arcangeli 
> > Cc: Hugh Dickins 
> 
> I can say
> Acked-by: Hugh Dickins 
> to this one.
> 
> But not to your 2/7 which actually makes use of it: because sadly it
> needs down_write(&kvm->mm->mmap_sem) for the case when it switches off
> VM_MERGEABLE in vma->vm_flags.  That's frustrating, since I think it's
> the only operation for which down_read() is not good enough.

Oh ok! Thanks for pointing this out.

> 
> I have no idea how contended that mmap_sem is likely to be, nor how
> many to-be-secured pages that vma is likely to contain: you might find
> it okay simply to go with it down_write throughout, or you might want
> to start out with it down_read, and only restart with down_write (then
> perhaps downgrade_write later) when you see VM_MERGEABLE is set.

Using down_write throughtout is not easy as we do migrate_vma_pages()
from fault path (->migrate_to_ram()) too. Here we come with down_read
already held.

Starting with down_read and restarting with down_write if VM_MERGEABLE
is set -- this also looks a bit difficult as we will have challenges
with locking order if we release mmap_sem in between and re-acquire.

So I think I will start with down_write in this particular case
and will downgrade_write as soon as ksm_madvise() is complete.

> 
> The crash you got (thanks for the link): that will be because your
> migrate_vma_pages() had already been applied to a page that was
> already being shared via KSM.
> 
> But if these secure pages are expected to be few and far between,
> maybe you'd prefer to keep VM_MERGEABLE, and add per-page checks
> of some kind into mm/ksm.c, to skip over these surprising hybrids.

I did bail out from a few routines in mm/ksm.c with
is_device_private_page(page) check, but that wasn't good enough and
I encountered crashes in different code paths. Guess a bit more
understanding of KSM internals would be required before retrying that.

However since all the pages of the guest except for a few will be turned
into secure pages early during boot, it appears better if secure guests
don't participate in in KSM merging at all.

Regards,
Bharata.



Re: [Very RFC 38/46] powerpc/pci-hotplug: Scan the whole bus when using PCI_PROBE_NORMAL

2019-11-26 Thread Alexey Kardashevskiy



On 20/11/2019 12:28, Oliver O'Halloran wrote:
> Currently when using the normal (i.e not building pci_dev's from the DT
> node) probe method we only scan the devfn corresponding to the first child
> of the bridge's DT node. This doesn't make much sense to me, but it seems
> to have worked so far. At a guess it seems to work because in a PCIe
> environment the first downstream child will be at devfn 00.0.
> 
> In any case it's completely broken when no pci_dn is available. Remove
> the PCI_DN checking and scan each of the device number that might be on
> the downstream bus.


Then why not just use pci_scan_child_bus()? Thanks,


> Cc: Benjamin Herrenschmidt 
> Signed-off-by: Oliver O'Halloran 
> ---
> I'm not sure we should be using pci_scan_slot() directly here. Maybe
> there's some insane legacy reason for it.
> ---
>  arch/powerpc/kernel/pci-hotplug.c | 15 ---
>  1 file changed, 4 insertions(+), 11 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/pci-hotplug.c 
> b/arch/powerpc/kernel/pci-hotplug.c
> index d6a67f814983..85299c769768 100644
> --- a/arch/powerpc/kernel/pci-hotplug.c
> +++ b/arch/powerpc/kernel/pci-hotplug.c
> @@ -123,17 +123,10 @@ void pci_hp_add_devices(struct pci_bus *bus)
>   if (mode == PCI_PROBE_DEVTREE) {
>   /* use ofdt-based probe */
>   of_rescan_bus(dn, bus);
> - } else if (mode == PCI_PROBE_NORMAL &&
> -dn->child && PCI_DN(dn->child)) {
> - /*
> -  * Use legacy probe. In the partial hotplug case, we
> -  * probably have grandchildren devices unplugged. So
> -  * we don't check the return value from pci_scan_slot() in
> -  * order for fully rescan all the way down to pick them up.
> -  * They can have been removed during partial hotplug.
> -  */
> - slotno = PCI_SLOT(PCI_DN(dn->child)->devfn);
> - pci_scan_slot(bus, PCI_DEVFN(slotno, 0));
> + } else if (mode == PCI_PROBE_NORMAL) {
> + for (slotno = 0; slotno < 255; slotno += 8)
> + pci_scan_slot(bus, slotno);
> +
>   max = bus->busn_res.start;
>   /*
>* Scan bridges that are already configured. We don't touch
> 

-- 
Alexey


Re: [Very RFC 36/46] powernv/npu: Remove open-coded PE lookup for GPU device

2019-11-26 Thread Alexey Kardashevskiy



On 20/11/2019 12:28, Oliver O'Halloran wrote:
> Signed-off-by: Oliver O'Halloran 
> ---
>  arch/powerpc/platforms/powernv/npu-dma.c | 13 ++---
>  1 file changed, 2 insertions(+), 11 deletions(-)
> 
> diff --git a/arch/powerpc/platforms/powernv/npu-dma.c 
> b/arch/powerpc/platforms/powernv/npu-dma.c
> index b95b9e3c4c98..68bfaef44862 100644
> --- a/arch/powerpc/platforms/powernv/npu-dma.c
> +++ b/arch/powerpc/platforms/powernv/npu-dma.c
> @@ -97,25 +97,16 @@ EXPORT_SYMBOL(pnv_pci_get_npu_dev);
>  static struct pnv_ioda_pe *get_gpu_pci_dev_and_pe(struct pnv_ioda_pe *npe,
> struct pci_dev **gpdev)
>  {
> - struct pnv_phb *phb;
> - struct pci_controller *hose;
>   struct pci_dev *pdev;
>   struct pnv_ioda_pe *pe;
> - struct pci_dn *pdn;
>  
>   pdev = pnv_pci_get_gpu_dev(npe->pdev);
>   if (!pdev)
>   return NULL;
>  
> - pdn = pci_get_pdn(pdev);
> - if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
> - return NULL;
> -
> - hose = pci_bus_to_host(pdev->bus);
> - phb = hose->private_data;
> - pe = &phb->ioda.pe_array[pdn->pe_number];
> + pe = pnv_ioda_get_pe(pdev);
>  
> - if (gpdev)
> + if (pe && pdev)


s/pdev/gpdev/



>   *gpdev = pdev;
>  
>   return pe;
> 

-- 
Alexey


Re: [PATCH v2 29/35] powerpc/perf: remove current_is_64bit()

2019-11-26 Thread Christophe Leroy




Le 26/11/2019 à 21:13, Michal Suchanek a écrit :

Since commit ed1cd6deb013 ("powerpc: Activate CONFIG_THREAD_INFO_IN_TASK")
current_is_64bit() is quivalent to !is_32bit_task().
Remove the redundant function.

Link: https://github.com/linuxppc/issues/issues/275
Link: https://lkml.org/lkml/2019/9/12/540

Fixes: linuxppc#275
Suggested-by: Christophe Leroy 
Signed-off-by: Michal Suchanek 


This change is already in powerpc/next, see 
https://github.com/linuxppc/linux/commit/42484d2c0f82b666292faf6668c77b49a3a04bc0


Christophe


---
  arch/powerpc/perf/callchain.c | 17 +
  1 file changed, 1 insertion(+), 16 deletions(-)

diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c
index c84bbd4298a0..35d542515faf 100644
--- a/arch/powerpc/perf/callchain.c
+++ b/arch/powerpc/perf/callchain.c
@@ -284,16 +284,6 @@ static void perf_callchain_user_64(struct 
perf_callchain_entry_ctx *entry,
}
  }
  
-static inline int current_is_64bit(void)

-{
-   /*
-* We can't use test_thread_flag() here because we may be on an
-* interrupt stack, and the thread flags don't get copied over
-* from the thread_info on the main stack to the interrupt stack.
-*/
-   return !test_ti_thread_flag(task_thread_info(current), TIF_32BIT);
-}
-
  #else  /* CONFIG_PPC64 */
  /*
   * On 32-bit we just access the address and let hash_page create a
@@ -321,11 +311,6 @@ static inline void perf_callchain_user_64(struct 
perf_callchain_entry_ctx *entry
  {
  }
  
-static inline int current_is_64bit(void)

-{
-   return 0;
-}
-
  static inline int valid_user_sp(unsigned long sp, int is_64)
  {
if (!sp || (sp & 7) || sp > TASK_SIZE - 32)
@@ -486,7 +471,7 @@ static void perf_callchain_user_32(struct 
perf_callchain_entry_ctx *entry,
  void
  perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs 
*regs)
  {
-   if (current_is_64bit())
+   if (!is_32bit_task())
perf_callchain_user_64(entry, regs);
else
perf_callchain_user_32(entry, regs);



Re: [Very RFC 35/46] powernv/pci: Remove open-coded PE lookup in pnv_pci_release_device

2019-11-26 Thread Alexey Kardashevskiy



On 20/11/2019 12:28, Oliver O'Halloran wrote:
> Signed-off-by: Oliver O'Halloran 
> ---
>  arch/powerpc/platforms/powernv/pci-ioda.c | 5 ++---
>  1 file changed, 2 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
> b/arch/powerpc/platforms/powernv/pci-ioda.c
> index 4f38652c7cd7..8525642b1256 100644
> --- a/arch/powerpc/platforms/powernv/pci-ioda.c
> +++ b/arch/powerpc/platforms/powernv/pci-ioda.c
> @@ -3562,14 +3562,14 @@ static void pnv_ioda_release_pe(struct pnv_ioda_pe 
> *pe)
>  static void pnv_pci_release_device(struct pci_dev *pdev)
>  {
>   struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus);
> + struct pnv_ioda_pe *pe = pnv_ioda_get_pe(pdev);
>   struct pci_dn *pdn = pci_get_pdn(pdev);
> - struct pnv_ioda_pe *pe;
>  
>   /* The VF PE state is torn down when sriov_disable() is called */
>   if (pdev->is_virtfn)
>   return;
>  
> - if (!pdn || pdn->pe_number == IODA_INVALID_PE)
> + if (WARN_ON(!pe))


Is that WARN_ON because there is always a PE - from upstream bridge or a
reserved one?



>   return;
>  
>   /*
> @@ -3588,7 +3588,6 @@ static void pnv_pci_release_device(struct pci_dev *pdev)
>* be increased on adding devices. It leads to unbalanced PE's device
>* count and eventually make normal PCI hotplug path broken.
>*/
> - pe = &phb->ioda.pe_array[pdn->pe_number];
>   pdn->pe_number = IODA_INVALID_PE;
>  
>   WARN_ON(--pe->device_count < 0);
> 

-- 
Alexey


Re: [Very RFC 34/46] powernv/pci: Remove open-coded PE lookup in pnv_pci_enable_device_hook()

2019-11-26 Thread Alexey Kardashevskiy



On 20/11/2019 12:28, Oliver O'Halloran wrote:
> Signed-off-by: Oliver O'Halloran 



Reviewed-by: Alexey Kardashevskiy 

but better squash it.


> ---
>  arch/powerpc/platforms/powernv/pci-ioda.c | 7 +--
>  1 file changed, 1 insertion(+), 6 deletions(-)
> 
> diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
> b/arch/powerpc/platforms/powernv/pci-ioda.c
> index 7e88de18ead6..4f38652c7cd7 100644
> --- a/arch/powerpc/platforms/powernv/pci-ioda.c
> +++ b/arch/powerpc/platforms/powernv/pci-ioda.c
> @@ -3382,7 +3382,6 @@ static resource_size_t 
> pnv_pci_iov_resource_alignment(struct pci_dev *pdev,
>  static bool pnv_pci_enable_device_hook(struct pci_dev *dev)
>  {
>   struct pnv_phb *phb = pci_bus_to_pnvhb(dev->bus);
> - struct pci_dn *pdn;
>  
>   /* The function is probably called while the PEs have
>* not be created yet. For example, resource reassignment
> @@ -3392,11 +3391,7 @@ static bool pnv_pci_enable_device_hook(struct pci_dev 
> *dev)
>   if (!phb->initialized)
>   return true;
>  
> - pdn = pci_get_pdn(dev);
> - if (!pdn || pdn->pe_number == IODA_INVALID_PE)
> - return false;
> -
> - return true;
> + return !!pnv_ioda_get_pe(dev);
>  }
>  
>  static long pnv_pci_ioda1_unset_window(struct iommu_table_group *table_group,
> 

-- 
Alexey


Re: [Very RFC 33/46] powernv/pci: Remove open-coded PE lookup in iommu notifier

2019-11-26 Thread Alexey Kardashevskiy



On 20/11/2019 12:28, Oliver O'Halloran wrote:
> Signed-off-by: Oliver O'Halloran 
> ---
>  arch/powerpc/platforms/powernv/pci.c | 9 -
>  1 file changed, 4 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/powerpc/platforms/powernv/pci.c 
> b/arch/powerpc/platforms/powernv/pci.c
> index 5b1f4677cdce..0eeea8652426 100644
> --- a/arch/powerpc/platforms/powernv/pci.c
> +++ b/arch/powerpc/platforms/powernv/pci.c
> @@ -943,23 +943,22 @@ static int pnv_tce_iommu_bus_notifier(struct 
> notifier_block *nb,
>  {
>   struct device *dev = data;
>   struct pci_dev *pdev;
> - struct pci_dn *pdn;
>   struct pnv_ioda_pe *pe;
>   struct pnv_phb *phb;
>  
>   switch (action) {
>   case BUS_NOTIFY_ADD_DEVICE:
>   pdev = to_pci_dev(dev);
> - pdn = pci_get_pdn(pdev);
>   phb = pci_bus_to_pnvhb(pdev->bus);
>  
>   WARN_ON_ONCE(!phb);
> - if (!pdn || pdn->pe_number == IODA_INVALID_PE || !phb)
> + if (!phb)
>   return 0;

This check is weird - the function does not use @phb anymore, it would
make more sense if pnv_ioda_get_pe() checked phb!=NULL.


>  
> - pe = &phb->ioda.pe_array[pdn->pe_number];
> - if (!pe->table_group.group)
> + pe = pnv_ioda_get_pe(pdev);
> + if (!pe || !pe->table_group.group)
>   return 0;
> +
>   iommu_add_device(&pe->table_group, dev);
>   return 0;
>   case BUS_NOTIFY_DEL_DEVICE:
> 

-- 
Alexey


Re: [Very RFC 32/46] powernv/pci: Remove open-coded PE lookup in iommu_bypass_supported()

2019-11-26 Thread Alexey Kardashevskiy



On 20/11/2019 12:28, Oliver O'Halloran wrote:
> Signed-off-by: Oliver O'Halloran 



Reviewed-by: Alexey Kardashevskiy 

but honestly can be squashed into 31/46 or/and 33/46 or other similar
patches.

> ---
>  arch/powerpc/platforms/powernv/pci-ioda.c | 6 ++
>  1 file changed, 2 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
> b/arch/powerpc/platforms/powernv/pci-ioda.c
> index 98d858999a2d..7e88de18ead6 100644
> --- a/arch/powerpc/platforms/powernv/pci-ioda.c
> +++ b/arch/powerpc/platforms/powernv/pci-ioda.c
> @@ -1801,13 +1801,11 @@ static bool 
> pnv_pci_ioda_iommu_bypass_supported(struct pci_dev *pdev,
>   u64 dma_mask)
>  {
>   struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus);
> - struct pci_dn *pdn = pci_get_pdn(pdev);
> - struct pnv_ioda_pe *pe;
> + struct pnv_ioda_pe *pe = pnv_ioda_get_pe(pdev);
>  
> - if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
> + if (WARN_ON(!pe))
>   return false;
>  
> - pe = &phb->ioda.pe_array[pdn->pe_number];
>   if (pe->tce_bypass_enabled) {
>   u64 top = pe->tce_bypass_base + memblock_end_of_DRAM() - 1;
>   if (dma_mask >= top)
> 

-- 
Alexey


Re: [PATCH v3 2/4] powerpc/fadump: reorganize /sys/kernel/fadump_* sysfs files

2019-11-26 Thread Sourabh Jain



On 11/25/19 12:10 AM, Michal Suchánek wrote:
> On Sat, Nov 16, 2019 at 08:07:29PM +0530, Sourabh Jain wrote:
>>
>>
>> On 11/9/19 6:29 PM, Michal Suchánek wrote:
>>> On Sat, Nov 09, 2019 at 05:53:37PM +0530, Sourabh Jain wrote:
 As the number of FADump sysfs files increases it is hard to manage all of
 them inside /sys/kernel directory. It's better to have all the FADump
 related sysfs files in a dedicated directory /sys/kernel/fadump. But in
 order to maintain the backward compatibility the /sys/kernel/fadump_*
 sysfs files are replicated inside /sys/kernel/fadump/ and eventually get
 removed in future.

 As the FADump sysfs files are now part of dedicated directory there is no
 need to prefix their name with fadump_, hence sysfs file names are also
 updated. For example fadump_enabled sysfs file is now referred as enabled.

 Also consolidate ABI documentation for all the FADump sysfs files in a
 single file Documentation/ABI/testing/sysfs-kernel-fadump.

 Signed-off-by: Sourabh Jain 
 ---
  Documentation/ABI/testing/sysfs-kernel-fadump | 41 +++
  arch/powerpc/kernel/fadump.c  | 38 +
  arch/powerpc/platforms/powernv/opal-core.c| 10 +++--
  3 files changed, 86 insertions(+), 3 deletions(-)
  create mode 100644 Documentation/ABI/testing/sysfs-kernel-fadump

 diff --git a/Documentation/ABI/testing/sysfs-kernel-fadump 
 b/Documentation/ABI/testing/sysfs-kernel-fadump
 new file mode 100644
 index ..a77f1a5ba389
 --- /dev/null
 +++ b/Documentation/ABI/testing/sysfs-kernel-fadump
 @@ -0,0 +1,41 @@
 +What: /sys/kernel/fadump/*
 +Date: Nov 2019
 +Contact:  linuxppc-dev@lists.ozlabs.org
 +Description:
 +  The /sys/kernel/fadump/* is a collection of FADump sysfs
 +  file provide information about the configuration status
 +  of Firmware Assisted Dump (FADump).
 +
 +What: /sys/kernel/fadump/enabled
 +Date: Nov 2019
 +Contact:  linuxppc-dev@lists.ozlabs.org
 +Description:  read only
 +  Primarily used to identify whether the FADump is enabled in
 +  the kernel or not.
 +User: Kdump service
 +
 +What: /sys/kernel/fadump/registered
 +Date: Nov 2019
 +Contact:  linuxppc-dev@lists.ozlabs.org
 +Description:  read/write
 +  Helps to control the dump collect feature from userspace.
 +  Setting 1 to this file enables the system to collect the
 +  dump and 0 to disable it.
 +User: Kdump service
 +
 +What: /sys/kernel/fadump/release_mem
 +Date: Nov 2019
 +Contact:  linuxppc-dev@lists.ozlabs.org
 +Description:  write only
 +  This is a special sysfs file and only available when
 +  the system is booted to capture the vmcore using FADump.
 +  It is used to release the memory reserved by FADump to
 +  save the crash dump.
 +
 +What: /sys/kernel/fadump/release_opalcore
 +Date: Nov 2019
 +Contact:  linuxppc-dev@lists.ozlabs.org
 +Description:  write only
 +  The sysfs file is available when the system is booted to
 +  collect the dump on OPAL based machine. It used to release
 +  the memory used to collect the opalcore.
 diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
 index ed59855430b9..a9591def0c84 100644
 --- a/arch/powerpc/kernel/fadump.c
 +++ b/arch/powerpc/kernel/fadump.c
 @@ -1418,6 +1418,9 @@ static int fadump_region_show(struct seq_file *m, 
 void *private)
return 0;
  }
  
 +struct kobject *fadump_kobj;
 +EXPORT_SYMBOL_GPL(fadump_kobj);
 +
  static struct kobj_attribute fadump_release_attr = 
 __ATTR(fadump_release_mem,
0200, NULL,
fadump_release_memory_store);
 @@ -1428,6 +1431,16 @@ static struct kobj_attribute fadump_register_attr = 
 __ATTR(fadump_registered,
0644, fadump_register_show,
fadump_register_store);
  
 +static struct kobj_attribute release_attr = __ATTR(release_mem,
 +  0200, NULL,
 +  fadump_release_memory_store);
 +static struct kobj_attribute enable_attr = __ATTR(enabled,
 +  0444, fadump_enabled_show,
 +  NULL);
 +static struct kobj_attribute register_attr = __ATTR(registered,
 +  

Re: [Very RFC 31/46] powernv/pci: Remove open-coded PE lookup in pnv_pci_ioda_dma_dev_setup()

2019-11-26 Thread Alexey Kardashevskiy



On 20/11/2019 12:28, Oliver O'Halloran wrote:
> Use the helper to look up the pnv_ioda_pe for the device we're configuring DMA
> for. In the VF case there's no need set pdn->pe_number since nothing looks at
> it any more.
> 
> Signed-off-by: Oliver O'Halloran 



Reviewed-by: Alexey Kardashevskiy 



> ---
>  arch/powerpc/platforms/powernv/pci-ioda.c | 3 +--
>  1 file changed, 1 insertion(+), 2 deletions(-)
> 
> diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
> b/arch/powerpc/platforms/powernv/pci-ioda.c
> index d4b5ee926222..98d858999a2d 100644
> --- a/arch/powerpc/platforms/powernv/pci-ioda.c
> +++ b/arch/powerpc/platforms/powernv/pci-ioda.c
> @@ -1709,10 +1709,9 @@ int pnv_pcibios_sriov_enable(struct pci_dev *pdev, u16 
> num_vfs)
>  
>  static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev 
> *pdev)
>  {
> - struct pci_dn *pdn = pci_get_pdn(pdev);
>   struct pnv_ioda_pe *pe;
>  
> - pe = &phb->ioda.pe_array[pdn->pe_number];
> + pe = pnv_ioda_get_pe(pdev);
>   WARN_ON(get_dma_ops(&pdev->dev) != &dma_iommu_ops);
>   pdev->dev.archdata.dma_offset = pe->tce_bypass_base;
>   set_iommu_table_base(&pdev->dev, pe->table_group.tables[0]);
> 

-- 
Alexey


Re: [Very RFC 30/46] powernv/pci: Remove open-coded PE lookup in PELT-V teardown

2019-11-26 Thread Alexey Kardashevskiy



On 20/11/2019 12:28, Oliver O'Halloran wrote:
> Signed-off-by: Oliver O'Halloran 
> ---
>  arch/powerpc/platforms/powernv/pci-ioda.c | 12 +++-
>  1 file changed, 7 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
> b/arch/powerpc/platforms/powernv/pci-ioda.c
> index 5bd7c1b058da..d4b5ee926222 100644
> --- a/arch/powerpc/platforms/powernv/pci-ioda.c
> +++ b/arch/powerpc/platforms/powernv/pci-ioda.c
> @@ -853,11 +853,13 @@ static int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, 
> struct pnv_ioda_pe *pe)
>  
>   /* Release from all parents PELT-V */
>   while (parent) {
> - struct pci_dn *pdn = pci_get_pdn(parent);
> - if (pdn && pdn->pe_number != IODA_INVALID_PE) {
> - rc = opal_pci_set_peltv(phb->opal_id, pdn->pe_number,
> - pe->pe_number, 
> OPAL_REMOVE_PE_FROM_DOMAIN);
> - /* XXX What to do in case of error ? */

May be print a warning, like a few lines below (in the code, not in the
patch). Not important though if gcc does not complain about an unused
returned value.

Reviewed-by: Alexey Kardashevskiy 




> + struct pnv_ioda_pe *parent_pe = pnv_ioda_get_pe(parent);
> +
> + if (parent_pe) {
> + rc = opal_pci_set_peltv(phb->opal_id,
> + parent_pe->pe_number,
> + pe->pe_number,
> + OPAL_REMOVE_PE_FROM_DOMAIN);
>   }
>   parent = parent->bus->self;
>   }
> 

-- 
Alexey


Re: [Very RFC 29/46] powernv/pci: Remove open-coded PE lookup in PELT-V setup

2019-11-26 Thread Alexey Kardashevskiy



On 20/11/2019 12:28, Oliver O'Halloran wrote:
> Signed-off-by: Oliver O'Halloran 
> ---
>  arch/powerpc/platforms/powernv/pci-ioda.c | 32 +--
>  1 file changed, 24 insertions(+), 8 deletions(-)
> 
> diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
> b/arch/powerpc/platforms/powernv/pci-ioda.c
> index 1c90feed233d..5bd7c1b058da 100644
> --- a/arch/powerpc/platforms/powernv/pci-ioda.c
> +++ b/arch/powerpc/platforms/powernv/pci-ioda.c
> @@ -760,6 +760,11 @@ static int pnv_ioda_set_peltv(struct pnv_phb *phb,
>   }
>   }
>  
> + /*
> +  * Walk the bridges up to the root. Along the way mark this PE as
> +  * downstream of the bridge PE(s) so that errors upstream errors


Too many "errors" in "errors upstream errors".

Otherwise

Reviewed-by: Alexey Kardashevskiy 




> +  * also cause this PE to be frozen.
> +  */
>   if (pe->flags & (PNV_IODA_PE_BUS_ALL | PNV_IODA_PE_BUS))
>   pdev = pe->pbus->self;
>   else if (pe->flags & PNV_IODA_PE_DEV)
> @@ -768,16 +773,27 @@ static int pnv_ioda_set_peltv(struct pnv_phb *phb,
>   else if (pe->flags & PNV_IODA_PE_VF)
>   pdev = pe->parent_dev;
>  #endif /* CONFIG_PCI_IOV */
> +
>   while (pdev) {
> - struct pci_dn *pdn = pci_get_pdn(pdev);
> - struct pnv_ioda_pe *parent;
> + struct pnv_ioda_pe *parent = pnv_ioda_get_pe(pdev);
>  
> - if (pdn && pdn->pe_number != IODA_INVALID_PE) {
> - parent = &phb->ioda.pe_array[pdn->pe_number];
> - ret = pnv_ioda_set_one_peltv(phb, parent, pe, is_add);
> - if (ret)
> - return ret;
> - }
> + /*
> +  * FIXME: This is called from pcibios_setup_bridge(), which is 
> called
> +  * from the bottom (leaf) bridge to the root. This means that 
> this
> +  * doesn't actually setup the PELT-V entries since the PEs for
> +  * the bridges above assigned after this is run for the leaf.
> +  *
> +  * FIXMEFIXME: might not be true since moving PE configuration
> +  * into pcibios_bus_add_device().
> +  */
> + if (!parent)
> + break;
> +
> + WARN_ON(!parent || parent->pe_number == IODA_INVALID_PE);
> +
> + ret = pnv_ioda_set_one_peltv(phb, parent, pe, is_add);
> + if (ret)
> + return ret;
>  
>   pdev = pdev->bus->self;
>   }
> 

-- 
Alexey


Re: [Very RFC 28/46] powernv/iov: Move SR-IOV PF state out of pci_dn

2019-11-26 Thread Alexey Kardashevskiy



On 20/11/2019 12:28, Oliver O'Halloran wrote:
> Move the SR-IOV into a platform specific structure. I'm sure stashing all the
> SR-IOV state in pci_dn seemed like a good idea at the time, but it results in 
> a
> lot of powernv specifics being leaked out of the platform directory.
> 
> Moving all the PHB3/4 specific M64 BAR wrangling into a PowerNV specific
> structure helps to clarify the role of pci_dn and ensures that the platform
> specifics stay that way.
> 
> This will make the code easier to understand and modify since we don't need
> to so much aboute PowerNV changes breaking pseries and EEH, and vis-a-vis.
> 
> Signed-off-by: Oliver O'Halloran 
> ---
> TODO: Remove all the sriov stuff from pci_dn. We can't do that yet because
> the pseries SRIOV support was a giant hack that re-used some of the
> previously powernv specific fields.
> ---
>  arch/powerpc/include/asm/device.h |   3 +
>  arch/powerpc/platforms/powernv/pci-ioda.c | 199 --
>  arch/powerpc/platforms/powernv/pci.h  |  36 
>  3 files changed, 148 insertions(+), 90 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/device.h 
> b/arch/powerpc/include/asm/device.h
> index 266542769e4b..4d8934db7ef5 100644
> --- a/arch/powerpc/include/asm/device.h
> +++ b/arch/powerpc/include/asm/device.h
> @@ -49,6 +49,9 @@ struct dev_archdata {
>  #ifdef CONFIG_CXL_BASE
>   struct cxl_context  *cxl_ctx;
>  #endif
> +#ifdef CONFIG_PCI_IOV
> + void *iov_data;
> +#endif
>  };
>  
>  struct pdev_archdata {
> diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
> b/arch/powerpc/platforms/powernv/pci-ioda.c
> index a1c9315f3208..1c90feed233d 100644
> --- a/arch/powerpc/platforms/powernv/pci-ioda.c
> +++ b/arch/powerpc/platforms/powernv/pci-ioda.c
> @@ -966,14 +966,15 @@ static int pnv_ioda_configure_pe(struct pnv_phb *phb, 
> struct pnv_ioda_pe *pe)
>  #ifdef CONFIG_PCI_IOV
>  static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
>  {
> - struct pci_dn *pdn = pci_get_pdn(dev);
> - int i;
>   struct resource *res, res2;
> + struct pnv_iov_data *iov;
>   resource_size_t size;
>   u16 num_vfs;
> + int i;
>  
>   if (!dev->is_physfn)
>   return -EINVAL;
> + iov = pnv_iov_get(dev);
>  
>   /*
>* "offset" is in VFs.  The M64 windows are sized so that when they
> @@ -983,7 +984,7 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, 
> int offset)
>* separate PE, and changing the IOV BAR start address changes the
>* range of PEs the VFs are in.
>*/
> - num_vfs = pdn->num_vfs;
> + num_vfs = iov->num_vfs;
>   for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
>   res = &dev->resource[i + PCI_IOV_RESOURCES];
>   if (!res->flags || !res->parent)
> @@ -1029,19 +1030,19 @@ static int pnv_pci_vf_resource_shift(struct pci_dev 
> *dev, int offset)
>num_vfs, offset);
>  
>   if (offset < 0) {
> - devm_release_resource(&dev->dev, &pdn->holes[i]);
> - memset(&pdn->holes[i], 0, sizeof(pdn->holes[i]));
> + devm_release_resource(&dev->dev, &iov->holes[i]);
> + memset(&iov->holes[i], 0, sizeof(iov->holes[i]));
>   }
>  
>   pci_update_resource(dev, i + PCI_IOV_RESOURCES);
>  
>   if (offset > 0) {
> - pdn->holes[i].start = res2.start;
> - pdn->holes[i].end = res2.start + size * offset - 1;
> - pdn->holes[i].flags = IORESOURCE_BUS;
> - pdn->holes[i].name = "pnv_iov_reserved";
> + iov->holes[i].start = res2.start;
> + iov->holes[i].end = res2.start + size * offset - 1;
> + iov->holes[i].flags = IORESOURCE_BUS;
> + iov->holes[i].name = "pnv_iov_reserved";
>   devm_request_resource(&dev->dev, res->parent,
> - &pdn->holes[i]);
> + &iov->holes[i]);
>   }
>   }
>   return 0;
> @@ -1273,37 +1274,37 @@ static void pnv_pci_ioda_setup_PEs(void)
>  #ifdef CONFIG_PCI_IOV
>  static int pnv_pci_vf_release_m64(struct pci_dev *pdev, u16 num_vfs)
>  {
> + struct pnv_iov_data   *iov;
>   struct pnv_phb*phb;
> - struct pci_dn *pdn;
>   inti, j;
>   intm64_bars;
>  
>   phb = pci_bus_to_pnvhb(pdev->bus);
> - pdn = pci_get_pdn(pdev);
> + iov = pnv_iov_get(pdev);
>  
> - if (pdn->m64_single_mode)
> + if (iov->m64_single_mode)
>   m64_bars = num_vfs;
>   else
>   m64_bars = 1;
>  
>   for (i = 0; i < PCI_SRIOV_NUM_BARS; i++)
>   for (j = 0; j < m64_bars; j++) {
> - if (pdn->m64_map[j][i] == IODA_INVALID_M64)
> + if (iov->m64_m

Re: [PATCH v11 1/7] mm: ksm: Export ksm_madvise()

2019-11-26 Thread Hugh Dickins
On Mon, 25 Nov 2019, Bharata B Rao wrote:

> On PEF-enabled POWER platforms that support running of secure guests,
> secure pages of the guest are represented by device private pages
> in the host. Such pages needn't participate in KSM merging. This is
> achieved by using ksm_madvise() call which need to be exported
> since KVM PPC can be a kernel module.
> 
> Signed-off-by: Bharata B Rao 
> Acked-by: Paul Mackerras 
> Cc: Andrea Arcangeli 
> Cc: Hugh Dickins 

I can say
Acked-by: Hugh Dickins 
to this one.

But not to your 2/7 which actually makes use of it: because sadly it
needs down_write(&kvm->mm->mmap_sem) for the case when it switches off
VM_MERGEABLE in vma->vm_flags.  That's frustrating, since I think it's
the only operation for which down_read() is not good enough.

I have no idea how contended that mmap_sem is likely to be, nor how
many to-be-secured pages that vma is likely to contain: you might find
it okay simply to go with it down_write throughout, or you might want
to start out with it down_read, and only restart with down_write (then
perhaps downgrade_write later) when you see VM_MERGEABLE is set.

The crash you got (thanks for the link): that will be because your
migrate_vma_pages() had already been applied to a page that was
already being shared via KSM.

But if these secure pages are expected to be few and far between,
maybe you'd prefer to keep VM_MERGEABLE, and add per-page checks
of some kind into mm/ksm.c, to skip over these surprising hybrids.

Hugh

> ---
>  mm/ksm.c | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/mm/ksm.c b/mm/ksm.c
> index dbee2eb4dd05..e45b02ad3f0b 100644
> --- a/mm/ksm.c
> +++ b/mm/ksm.c
> @@ -2478,6 +2478,7 @@ int ksm_madvise(struct vm_area_struct *vma, unsigned 
> long start,
>  
>   return 0;
>  }
> +EXPORT_SYMBOL_GPL(ksm_madvise);
>  
>  int __ksm_enter(struct mm_struct *mm)
>  {
> -- 
> 2.21.0


Re: [Very RFC 27/46] powernv/pci: Clear reserved PE freezes

2019-11-26 Thread Alexey Kardashevskiy



On 20/11/2019 12:28, Oliver O'Halloran wrote:
> When we scan an empty slot the PHB gets an Unsupported Request from the
> downstream bridge when there's no device present at that BDFN.  Some older
> PHBs (p7-IOC) don't allow further config space accesses while the PE is
> frozen, so clear it here without bothering with the diagnostic log.


This executes when EEH is not enabled (rather unsupported case) and the
patch allegedly extends support of some P7 none of which was ever
supported by the powernv platform, or was/is it? Thanks,


> 
> Signed-off-by: Oliver O'Halloran 
> ---
>  arch/powerpc/platforms/powernv/pci.c | 13 +
>  1 file changed, 13 insertions(+)
> 
> diff --git a/arch/powerpc/platforms/powernv/pci.c 
> b/arch/powerpc/platforms/powernv/pci.c
> index 36eea4bb514c..5b1f4677cdce 100644
> --- a/arch/powerpc/platforms/powernv/pci.c
> +++ b/arch/powerpc/platforms/powernv/pci.c
> @@ -642,6 +642,19 @@ static void pnv_pci_config_check_eeh(struct pnv_phb 
> *phb, u16 bdfn)
>   if (fstate == OPAL_EEH_STOPPED_MMIO_FREEZE ||
>   fstate == OPAL_EEH_STOPPED_DMA_FREEZE  ||
>   fstate == OPAL_EEH_STOPPED_MMIO_DMA_FREEZE) {
> +
> + /*
> +  * Scanning an empty slot will result in a freeze on the 
> reserved PE.
> +  *
> +  * Some old and bad PHBs block config space access to frozen 
> PEs in
> +  * addition to MMIOs, so unfreeze it here.
> +  */
> + if (pe_no == phb->ioda.reserved_pe_idx) {
> + phb->unfreeze_pe(phb, phb->ioda.reserved_pe_idx,
> +  OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
> + return;
> + }
> +
>   /*
>* If PHB supports compound PE, freeze it for
>* consistency.
> 

-- 
Alexey


Re: [Very RFC 26/46] powernv/pci: Remove pdn from pnv_pci_cfg_{read|write}

2019-11-26 Thread Alexey Kardashevskiy



On 20/11/2019 12:28, Oliver O'Halloran wrote:
> Remove the use of pci_dn from the low-level config space access functions.
> These are used by the eeh's config ops and the bus config ops that we
> provide to the PCI core.
> 
> Signed-off-by: Oliver O'Halloran 
> ---
>  arch/powerpc/platforms/powernv/eeh-powernv.c | 14 +++
>  arch/powerpc/platforms/powernv/pci.c | 26 
>  arch/powerpc/platforms/powernv/pci.h |  6 ++---
>  3 files changed, 16 insertions(+), 30 deletions(-)
> 
> diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c 
> b/arch/powerpc/platforms/powernv/eeh-powernv.c
> index 49a932ff092a..8a73bc7517c5 100644
> --- a/arch/powerpc/platforms/powernv/eeh-powernv.c
> +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
> @@ -331,31 +331,25 @@ static inline bool pnv_eeh_cfg_blocked(struct eeh_dev 
> *edev)
>  static int pnv_eeh_read_config(struct eeh_dev *edev,
>  int where, int size, u32 *val)
>  {
> - struct pci_dn *pdn = eeh_dev_to_pdn(edev);
> -
> - if (!pdn)
> - return PCIBIOS_DEVICE_NOT_FOUND;
> + struct pnv_phb *phb = edev->controller->private_data;
>  
>   if (pnv_eeh_cfg_blocked(edev)) {
>   *val = 0x;
>   return PCIBIOS_SET_FAILED;
>   }
>  
> - return pnv_pci_cfg_read(pdn, where, size, val);
> + return pnv_pci_cfg_read(phb, edev->bdfn, where, size, val);
>  }
>  
>  static int pnv_eeh_write_config(struct eeh_dev *edev,
>   int where, int size, u32 val)
>  {
> - struct pci_dn *pdn = eeh_dev_to_pdn(edev);
> -
> - if (!pdn)
> - return PCIBIOS_DEVICE_NOT_FOUND;
> + struct pnv_phb *phb = edev->controller->private_data;
>  
>   if (pnv_eeh_cfg_blocked(edev))
>   return PCIBIOS_SET_FAILED;
>  
> - return pnv_pci_cfg_write(pdn, where, size, val);
> + return pnv_pci_cfg_write(phb, edev->bdfn, where, size, val);
>  }
>  
>  static struct eeh_pe *pnv_eeh_pe_get_parent(struct pci_dev *pdev)
> diff --git a/arch/powerpc/platforms/powernv/pci.c 
> b/arch/powerpc/platforms/powernv/pci.c
> index 50142ff045ac..36eea4bb514c 100644
> --- a/arch/powerpc/platforms/powernv/pci.c
> +++ b/arch/powerpc/platforms/powernv/pci.c
> @@ -654,11 +654,9 @@ static void pnv_pci_config_check_eeh(struct pnv_phb 
> *phb, u16 bdfn)
>   }
>  }
>  
> -int pnv_pci_cfg_read(struct pci_dn *pdn,
> +int pnv_pci_cfg_read(struct pnv_phb *phb, u16 bdfn,
>int where, int size, u32 *val)
>  {
> - struct pnv_phb *phb = pdn->phb->private_data;
> - u32 bdfn = (pdn->busno << 8) | pdn->devfn;
>   s64 rc;
>  
>   switch (size) {
> @@ -685,19 +683,16 @@ int pnv_pci_cfg_read(struct pci_dn *pdn,
>   return PCIBIOS_FUNC_NOT_SUPPORTED;
>   }
>  
> - pr_devel("%s: bus: %x devfn: %x +%x/%x -> %08x\n",
> -  __func__, pdn->busno, pdn->devfn, where, size, *val);
> + pr_devel("%s: bdfn: %x  +%x/%x -> %08x\n",
> +  __func__, bdfn, where, size, *val);
>   return PCIBIOS_SUCCESSFUL;
>  }
>  
> -int pnv_pci_cfg_write(struct pci_dn *pdn,
> +int pnv_pci_cfg_write(struct pnv_phb *phb, u16 bdfn,
> int where, int size, u32 val)
>  {
> - struct pnv_phb *phb = pdn->phb->private_data;
> - u32 bdfn = (pdn->busno << 8) | pdn->devfn;
> -
> - pr_devel("%s: bus: %x devfn: %x +%x/%x -> %08x\n",
> -  __func__, pdn->busno, pdn->devfn, where, size, val);
> + pr_devel("%s: bdfn: %x +%x/%x -> %08x\n",
> +  __func__, bdfn, where, size, val);
>   switch (size) {
>   case 1:
>   opal_pci_config_write_byte(phb->opal_id, bdfn, where, val);
> @@ -753,12 +748,11 @@ static int pnv_pci_read_config(struct pci_bus *bus,
>   if (!pdn)
>   return PCIBIOS_DEVICE_NOT_FOUND;
>  
> - edev = pdn_to_eeh_dev(pdn);
> + edev = pnv_eeh_find_edev(phb, bdfn);
>   if (!pnv_eeh_pre_cfg_check(edev))
>   return PCIBIOS_DEVICE_NOT_FOUND;
>  
> - ret = pnv_pci_cfg_read(pdn, where, size, val);
> - phb = pdn->phb->private_data;
> + ret = pnv_pci_cfg_read(phb, bdfn, where, size, val);
>   if (phb->flags & PNV_PHB_FLAG_EEH && edev) {
>   if (*val == EEH_IO_ERROR_VALUE(size) &&
>   eeh_dev_check_failure(edev))
> @@ -784,11 +778,11 @@ static int pnv_pci_write_config(struct pci_bus *bus,
>   if (!pdn)
>   return PCIBIOS_DEVICE_NOT_FOUND;
>  
> - edev = pdn_to_eeh_dev(pdn);
> + edev = pnv_eeh_find_edev(phb, bdfn);
>   if (!pnv_eeh_pre_cfg_check(edev))
>   return PCIBIOS_DEVICE_NOT_FOUND;
>  
> - ret = pnv_pci_cfg_write(pdn, where, size, val);
> + ret = pnv_pci_cfg_write(phb, bdfn, where, size, val);
>  
>   if (!(phb->flags & PNV_PHB_FLAG_EEH))
>   pnv_pci_config_check_eeh(phb, bdfn);
> diff --git a/arch/powerpc/platforms/powernv/pci.h 
> b/arch/powerpc/platforms/powernv/pci.h
> index b

Re: [Very RFC 22/46] powernv/eeh: Allocate eeh_dev's when needed

2019-11-26 Thread Alexey Kardashevskiy



On 25/11/2019 15:26, Oliver O'Halloran wrote:
> On Mon, Nov 25, 2019 at 2:27 PM Alexey Kardashevskiy  wrote:
>>
>>
>>
>> On 20/11/2019 12:28, Oliver O'Halloran wrote:
>>> Have the PowerNV EEH backend allocate the eeh_dev if needed rather than 
>>> using
>>> the one attached to the pci_dn.
>>
>> So that pci_dn attached one is leaked then?
> 
> Sorta, the eeh_dev attached to the pci_dn is supposed to have the same
> lifetime as the pci_dn it's attached to. Whatever frees the pci_dn
> should also be freeing the eeh_dev, but I'm pretty sure the only
> situation where that actually happens is when removing the pci_dn for
> VFs.


Oh, that's lovely. add_sriov_vf_pdns() calls eeh_dev_init() to allocate
@edev but remove_sriov_vf_pdns() does kfree(edev) by itself.


> It's bad.

No sh*t :)

> 
>>> This gets us most of the way towards decoupling
>>> pci_dn from the PowerNV EEH code.
>>>
>>> Signed-off-by: Oliver O'Halloran 
>>> ---
>>> We should probably be free()ing the eeh_dev somewhere. The pci_dev release
>>> function is the right place for it.
>>> ---
>>>  arch/powerpc/platforms/powernv/eeh-powernv.c | 22 
>>>  1 file changed, 18 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c 
>>> b/arch/powerpc/platforms/powernv/eeh-powernv.c
>>> index 1cd80b35..7aba18e08996 100644
>>> --- a/arch/powerpc/platforms/powernv/eeh-powernv.c
>>> +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
>>> @@ -366,10 +366,9 @@ static int pnv_eeh_write_config(struct eeh_dev *edev,
>>>   */
>>>  static struct eeh_dev *pnv_eeh_probe_pdev(struct pci_dev *pdev)
>>>  {
>>> - struct pci_dn *pdn = pci_get_pdn(pdev);
>>> - struct pci_controller *hose = pdn->phb;
>>> - struct pnv_phb *phb = hose->private_data;
>>> - struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
>>> + struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus);
>>> + struct pci_controller *hose = phb->hose;
>>> + struct eeh_dev *edev;
>>>   uint32_t pcie_flags;
>>>   int ret;
>>>   int config_addr = (pdev->bus->number << 8) | (pdev->devfn);
>>> @@ -415,12 +414,27 @@ static struct eeh_dev *pnv_eeh_probe_pdev(struct 
>>> pci_dev *pdev)
>>>   if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
>>>   return NULL;
>>>
>>> + /* otherwise allocate and initialise a new eeh_dev */
>>> + edev = kzalloc(sizeof(*edev), GFP_KERNEL);
>>> + if (!edev) {
>>> + pr_err("%s: out of memory lol\n", __func__);
>>
>> "lol"?
> 
> yeah lol

"unprofessional" is the word for this ;)


> 
> I am pretty sure we do not have to print anything if alloc failed
>> as alloc prints an error anyway. Thanks,
> 
> It does? Neat.

Well, it is this:

https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/process/coding-style.rst#n878

===
These generic allocation functions all emit a stack dump on failure when
used
without __GFP_NOWARN so there is no use in emitting an additional failure
message when NULL is returned.
===

More than a printk. A small detail though.


-- 
Alexey


Re: [PATCH 00/14] powerpc/vas: Page fault handling for user space NX requests

2019-11-26 Thread Andrew Donnellan

Hi Haren,

On 27/11/19 12:00 pm, Haren Myneni wrote:

Haren Myneni (14):
   powerpc/vas: Describe vas-port and interrupts properties
   Revert "powerpc/powernv: remove the unused vas_win_paste_addr and
 vas_win_id functions"
   powerpc/vas: Define nx_fault_stamp in coprocessor_request_block
   powerpc/vas: Setup IRQ mapping and register port for each window
   powerpc/vas: Setup fault window per VAS instance
   powerpc/VAS: Setup fault handler per VAS instance
   powerpc/vas: Read and process fault CRBs
   powerpc/vas: Take reference to PID and mm for user space windows
   powerpc/vas: Update CSB and notify process for fault CRBs
   powerpc/vas: Print CRB and FIFO values
   powerpc/vas: Do not use default credits for receive window
   powerpc/VAS: Return credits after handling fault
   powerpc/vas: Display process stuck message
   powerpc/vas: Free send window in VAS instance after credits returned


In future, please send the patches in reply to the cover letter (and for 
series that don't have a cover letter, send patch 2 onwards as a reply 
to patch 1).


You may want to consider using git send-email which automates all this 
for you.


Thanks,
--
Andrew Donnellan  OzLabs, ADL Canberra
a...@linux.ibm.com IBM Australia Limited



[PATCH 14/14] powerpc/vas: Free send window in VAS instance after credits returned

2019-11-26 Thread Haren Myneni


NX may be processing requests while trying to close window. Wait until
all credits are returned and then free send window from VAS instance.

Signed-off-by: Haren Myneni 
---
 arch/powerpc/platforms/powernv/vas-window.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/vas-window.c 
b/arch/powerpc/platforms/powernv/vas-window.c
index 9ba354c..244952d7 100644
--- a/arch/powerpc/platforms/powernv/vas-window.c
+++ b/arch/powerpc/platforms/powernv/vas-window.c
@@ -1319,14 +1319,14 @@ int vas_win_close(struct vas_window *window)
 
unmap_paste_region(window);
 
-   clear_vinst_win(window);
-
poll_window_busy_state(window);
 
unpin_close_window(window);
 
poll_window_credits(window);
 
+   clear_vinst_win(window);
+
poll_window_castout(window);
 
/* if send window, drop reference to matching receive window */
-- 
1.8.3.1





[PATCH 13/14] powerpc/vas: Display process stuck message

2019-11-26 Thread Haren Myneni


Process can not close send window until all requests are processed.
Means wait until window state is not busy and send credits are
returned. Display debug message in case taking longer to close the
window.

Signed-off-by: Haren Myneni 
---
 arch/powerpc/platforms/powernv/vas-window.c | 26 ++
 1 file changed, 26 insertions(+)

diff --git a/arch/powerpc/platforms/powernv/vas-window.c 
b/arch/powerpc/platforms/powernv/vas-window.c
index 1c0788c..9ba354c 100644
--- a/arch/powerpc/platforms/powernv/vas-window.c
+++ b/arch/powerpc/platforms/powernv/vas-window.c
@@ -1186,6 +1186,7 @@ static void poll_window_credits(struct vas_window *window)
 {
u64 val;
int creds, mode;
+   int count = 0;
 
val = read_hvwc_reg(window, VREG(WINCTL));
if (window->tx_win)
@@ -1204,10 +1205,25 @@ static void poll_window_credits(struct vas_window 
*window)
creds = GET_FIELD(VAS_LRX_WCRED, val);
}
 
+   /*
+* Takes around few microseconds to complete all pending requests
+* and return credits.
+* TODO: Issue CRB Kill to stop all pending requests. Need only
+*   if there is a bug in NX or fault handling in kernel.
+*/
if (creds < window->wcreds_max) {
val = 0;
set_current_state(TASK_UNINTERRUPTIBLE);
schedule_timeout(msecs_to_jiffies(10));
+   count++;
+   /*
+* Process can not close send window until all credits are
+* returned.
+*/
+   if (!(count % 1))
+   pr_debug("%s() pid %d stuck? retries %d\n", __func__,
+   vas_window_pid(window), count);
+
goto retry;
}
 }
@@ -1221,6 +1237,7 @@ static void poll_window_busy_state(struct vas_window 
*window)
 {
int busy;
u64 val;
+   int count = 0;
 
 retry:
val = read_hvwc_reg(window, VREG(WIN_STATUS));
@@ -1229,6 +1246,15 @@ static void poll_window_busy_state(struct vas_window 
*window)
val = 0;
set_current_state(TASK_UNINTERRUPTIBLE);
schedule_timeout(msecs_to_jiffies(5));
+   count++;
+   /*
+* Takes around 5 microseconds to process all pending
+* requests.
+*/
+   if (!(count % 1))
+   pr_debug("%s() pid %d stuck? retries %d\n", __func__,
+   vas_window_pid(window), count);
+
goto retry;
}
 }
-- 
1.8.3.1





[PATCH 12/14] powerpc/VAS: Return credits after handling fault

2019-11-26 Thread Haren Myneni


NX expects OS to return credit for send window after processing each
fault. Also credit has to be returned even for fault window.

Signed-off-by: Sukadev Bhattiprolu 
Signed-off-by: Haren Myneni 
---
 arch/powerpc/platforms/powernv/vas-fault.c  |  9 +
 arch/powerpc/platforms/powernv/vas-window.c | 17 +
 arch/powerpc/platforms/powernv/vas.h|  1 +
 3 files changed, 27 insertions(+)

diff --git a/arch/powerpc/platforms/powernv/vas-fault.c 
b/arch/powerpc/platforms/powernv/vas-fault.c
index ad594c8..2a3ee9f 100644
--- a/arch/powerpc/platforms/powernv/vas-fault.c
+++ b/arch/powerpc/platforms/powernv/vas-fault.c
@@ -244,6 +244,10 @@ static void process_fault_crbs(struct vas_instance *vinst)
memset(fifo, 0, CRB_SIZE);
mutex_unlock(&vinst->mutex);
 
+   /*
+* Return credit for the fault window.
+*/
+   vas_return_credit(vinst->fault_win, 0);
pr_devel("VAS[%d] fault_fifo %p, fifo %p, fault_crbs %d pending 
%d\n",
vinst->vas_id, vinst->fault_fifo, fifo,
vinst->fault_crbs,
@@ -270,6 +274,11 @@ static void process_fault_crbs(struct vas_instance *vinst)
}
 
update_csb(window, crb);
+   /*
+* Return credit for send window after processing
+* fault CRB.
+*/
+   vas_return_credit(window, 1);
} while (true);
 }
 
diff --git a/arch/powerpc/platforms/powernv/vas-window.c 
b/arch/powerpc/platforms/powernv/vas-window.c
index ca208a3..1c0788c 100644
--- a/arch/powerpc/platforms/powernv/vas-window.c
+++ b/arch/powerpc/platforms/powernv/vas-window.c
@@ -1323,6 +1323,23 @@ int vas_win_close(struct vas_window *window)
 EXPORT_SYMBOL_GPL(vas_win_close);
 
 /*
+ * Return credit for the given window.
+ */
+void vas_return_credit(struct vas_window *window, bool tx)
+{
+   uint64_t val;
+
+   val = 0ULL;
+   if (tx) { /* send window */
+   val = SET_FIELD(VAS_TX_WCRED, val, 1);
+   write_hvwc_reg(window, VREG(TX_WCRED_ADDER), val);
+   } else {
+   val = SET_FIELD(VAS_LRX_WCRED, val, 1);
+   write_hvwc_reg(window, VREG(LRX_WCRED_ADDER), val);
+   }
+}
+
+/*
  * Return a system-wide unique window id for the window @win.
  */
 u32 vas_win_id(struct vas_window *win)
diff --git a/arch/powerpc/platforms/powernv/vas.h 
b/arch/powerpc/platforms/powernv/vas.h
index 75bea1d..b8b90f3 100644
--- a/arch/powerpc/platforms/powernv/vas.h
+++ b/arch/powerpc/platforms/powernv/vas.h
@@ -421,6 +421,7 @@ struct vas_winctx {
 extern void vas_wakeup_fault_handler(int virq, void *arg);
 extern int vas_setup_fault_handler(struct vas_instance *vinst);
 extern void vas_cleanup_fault_handler(struct vas_instance *vinst);
+extern void vas_return_credit(struct vas_window *window, bool tx);
 extern struct vas_window *vas_pswid_to_window(struct vas_instance *vinst,
uint32_t pswid);
 
-- 
1.8.3.1





[PATCH 11/14] powerpc/vas: Do not use default credits for receive window

2019-11-26 Thread Haren Myneni


System checkstops if RxFIFO overruns with more requests than the
maximum possible number of CRBs allowed in FIFO at any time. So
max credits value (rxattr.wcreds_max) is set and is passed to
vas_rx_win_open() by the the driver.

Signed-off-by: Haren Myneni 
---
 arch/powerpc/platforms/powernv/vas-window.c | 4 ++--
 arch/powerpc/platforms/powernv/vas.h| 2 --
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/vas-window.c 
b/arch/powerpc/platforms/powernv/vas-window.c
index ad3104c..ca208a3 100644
--- a/arch/powerpc/platforms/powernv/vas-window.c
+++ b/arch/powerpc/platforms/powernv/vas-window.c
@@ -782,7 +782,7 @@ static bool rx_win_args_valid(enum vas_cop_type cop,
if (attr->rx_fifo_size > VAS_RX_FIFO_SIZE_MAX)
return false;
 
-   if (attr->wcreds_max > VAS_RX_WCREDS_MAX)
+   if (!attr->wcreds_max)
return false;
 
if (attr->nx_win) {
@@ -888,7 +888,7 @@ struct vas_window *vas_rx_win_open(int vasid, enum 
vas_cop_type cop,
rxwin->nx_win = rxattr->nx_win;
rxwin->user_win = rxattr->user_win;
rxwin->cop = cop;
-   rxwin->wcreds_max = rxattr->wcreds_max ?: VAS_WCREDS_DEFAULT;
+   rxwin->wcreds_max = rxattr->wcreds_max;
 
init_winctx_for_rxwin(rxwin, rxattr, &winctx);
init_winctx_regs(rxwin, &winctx);
diff --git a/arch/powerpc/platforms/powernv/vas.h 
b/arch/powerpc/platforms/powernv/vas.h
index 03a1c9f..75bea1d 100644
--- a/arch/powerpc/platforms/powernv/vas.h
+++ b/arch/powerpc/platforms/powernv/vas.h
@@ -101,11 +101,9 @@
 /*
  * Initial per-process credits.
  * Max send window credits:4K-1 (12-bits in VAS_TX_WCRED)
- * Max receive window credits: 64K-1 (16 bits in VAS_LRX_WCRED)
  *
  * TODO: Needs tuning for per-process credits
  */
-#define VAS_RX_WCREDS_MAX  ((64 << 10) - 1)
 #define VAS_TX_WCREDS_MAX  ((4 << 10) - 1)
 #define VAS_WCREDS_DEFAULT (1 << 10)
 
-- 
1.8.3.1





[PATCH 10/14] powerpc/vas: Print CRB and FIFO values

2019-11-26 Thread Haren Myneni


Dump FIFO values if could not find send window and print CRB for
debugging.

Signed-off-by: Sukadev Bhattiprolu 
Signed-off-by: Haren Myneni 
---
 arch/powerpc/platforms/powernv/vas-fault.c | 40 ++
 1 file changed, 40 insertions(+)

diff --git a/arch/powerpc/platforms/powernv/vas-fault.c 
b/arch/powerpc/platforms/powernv/vas-fault.c
index dd27649..ad594c8 100644
--- a/arch/powerpc/platforms/powernv/vas-fault.c
+++ b/arch/powerpc/platforms/powernv/vas-fault.c
@@ -36,6 +36,27 @@ void vas_wakeup_fault_handler(int virq, void *arg)
wake_up(&vinst->fault_wq);
 }
 
+static void dump_crb(struct coprocessor_request_block *crb)
+{
+   struct data_descriptor_entry *dde;
+   struct nx_fault_stamp *nx;
+
+   dde = &crb->source;
+   pr_devel("SrcDDE: addr 0x%llx, len %d, count %d, idx %d, flags %d\n",
+   be64_to_cpu(dde->address), be32_to_cpu(dde->length),
+   dde->count, dde->index, dde->flags);
+
+   dde = &crb->target;
+   pr_devel("TgtDDE: addr 0x%llx, len %d, count %d, idx %d, flags %d\n",
+   be64_to_cpu(dde->address), be32_to_cpu(dde->length),
+   dde->count, dde->index, dde->flags);
+
+   nx = &crb->stamp.nx;
+   pr_devel("NX Stamp: PSWID 0x%x, FSA 0x%llx, flags 0x%x, FS 0x%x\n",
+   be32_to_cpu(nx->pswid), crb_nx_fault_addr(crb),
+   nx->flags, be32_to_cpu(nx->fault_status));
+}
+
 static void notify_process(pid_t pid, u64 fault_addr)
 {
int rc;
@@ -154,6 +175,23 @@ static void update_csb(struct vas_window *window,
}
 }
 
+static void dump_fifo(struct vas_instance *vinst)
+{
+   int i;
+   unsigned long *fifo = vinst->fault_fifo;
+
+   pr_err("Fault fifo size %d, max crbs %d, crb size %lu\n",
+   vinst->fault_fifo_size,
+   vinst->fault_fifo_size / CRB_SIZE,
+   sizeof(struct coprocessor_request_block));
+
+   pr_err("Fault FIFO Dump:\n");
+   for (i = 0; i < 64; i += 4, fifo += 4) {
+   pr_err("[%.3d, %p]: 0x%.16lx 0x%.16lx 0x%.16lx 0x%.16lx\n",
+   i, fifo, *fifo, *(fifo+1), *(fifo+2), *(fifo+3));
+   }
+}
+
 /*
  * Process CRBs that we receive on the fault window.
  */
@@ -211,6 +249,7 @@ static void process_fault_crbs(struct vas_instance *vinst)
vinst->fault_crbs,
atomic_read(&vinst->pending_fault));
 
+   dump_crb(crb);
window = vas_pswid_to_window(vinst, crb_nx_pswid(crb));
 
if (IS_ERR(window)) {
@@ -220,6 +259,7 @@ static void process_fault_crbs(struct vas_instance *vinst)
 * even clean it up (return credit).
 * But we should not get here.
 */
+   dump_fifo(vinst);
pr_err("VAS[%d] fault_fifo %p, fifo %p, pswid 0x%x, 
fault_crbs %d, pending %d bad CRB?\n",
vinst->vas_id, vinst->fault_fifo, fifo,
crb_nx_pswid(crb), vinst->fault_crbs,
-- 
1.8.3.1





[PATCH 09/14] powerpc/vas: Update CSB and notify process for fault CRBs

2019-11-26 Thread Haren Myneni


For each fault CRB, update fault address in CRB (fault_storage_addr)
and translation error status in CSB so that user space touch the
fault address and resend the request. If the user space passed invalid
CSB address send signal to process with SIGSEGV.

Signed-off-by: Sukadev Bhattiprolu 
Signed-off-by: Haren Myneni 
---
 arch/powerpc/platforms/powernv/vas-fault.c | 121 -
 1 file changed, 120 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/powernv/vas-fault.c 
b/arch/powerpc/platforms/powernv/vas-fault.c
index 7a8b2b5..dd27649 100644
--- a/arch/powerpc/platforms/powernv/vas-fault.c
+++ b/arch/powerpc/platforms/powernv/vas-fault.c
@@ -36,6 +36,124 @@ void vas_wakeup_fault_handler(int virq, void *arg)
wake_up(&vinst->fault_wq);
 }
 
+static void notify_process(pid_t pid, u64 fault_addr)
+{
+   int rc;
+   struct kernel_siginfo info;
+
+   memset(&info, 0, sizeof(info));
+
+   info.si_signo = SIGSEGV;
+   info.si_errno = EFAULT;
+   info.si_code = SEGV_MAPERR;
+
+   info.si_addr = (void *)fault_addr;
+   rcu_read_lock();
+   rc = kill_pid_info(SIGSEGV, &info, find_vpid(pid));
+   rcu_read_unlock();
+
+   pr_devel("%s(): pid %d kill_proc_info() rc %d\n", __func__, pid, rc);
+}
+
+/*
+ * Update the CSB to indicate a translation error.
+ *
+ * If the fault is in the CSB address itself or if we are unable to
+ * update the CSB, send a signal to the process, because we have no
+ * other way of notifying the user process.
+ *
+ * Remaining settings in the CSB are based on wait_for_csb() of
+ * NX-GZIP.
+ */
+static void update_csb(struct vas_window *window,
+   struct coprocessor_request_block *crb)
+{
+   int rc;
+   pid_t pid;
+   int task_exit = 0;
+   void __user *csb_addr;
+   struct task_struct *tsk;
+   struct coprocessor_status_block csb;
+
+   /*
+* NX user space windows can not be opened for task->mm=NULL
+* and faults will not be generated for kernel requests.
+*/
+   if (!window->mm || !window->user_win)
+   return;
+
+   csb_addr = (void *)__be64_to_cpu(crb->csb_addr);
+
+   csb.cc = CSB_CC_TRANSLATION;
+   csb.ce = CSB_CE_TERMINATION;
+   csb.cs = 0;
+   csb.count = 0;
+
+   /*
+* Returns the fault address in CPU format since it is passed with
+* signal. But if the user space expects BE format, need changes.
+* i.e either kernel (here) or user should convert to CPU format.
+* Not both!
+*/
+   csb.address = crb_nx_fault_addr(crb);
+   csb.flags = 0;
+
+   use_mm(window->mm);
+   rc = copy_to_user(csb_addr, &csb, sizeof(csb));
+   /*
+* User space polls on csb.flags (first byte). So add barrier
+* then copy first byte with csb flags update.
+*/
+   smp_mb();
+   if (!rc) {
+   csb.flags = CSB_V;
+   rc = copy_to_user(csb_addr, &csb, sizeof(u8));
+   }
+   unuse_mm(window->mm);
+
+   /* Success */
+   if (!rc)
+   return;
+
+   /*
+* User space passed invalid CSB address, Notify process with
+* SEGV signal.
+*/
+   tsk = get_pid_task(window->pid, PIDTYPE_PID);
+   /*
+* Send window will be closed after processing all NX requests
+* and process exits after closing all windows. In multi-thread
+* applications, thread may not exists, but does not close FD
+* (means send window) upon exit. Parent thread (tgid) can use
+* and close the window later.
+*/
+   if (tsk) {
+   if (tsk->flags & PF_EXITING)
+   task_exit = 1;
+   put_task_struct(tsk);
+   pid = vas_window_pid(window);
+   } else {
+   pid = vas_window_tgid(window);
+
+   rcu_read_lock();
+   tsk = find_task_by_vpid(pid);
+   if (!tsk) {
+   rcu_read_unlock();
+   return;
+   }
+   if (tsk->flags & PF_EXITING)
+   task_exit = 1;
+   rcu_read_unlock();
+   }
+
+   /* Do not notify if the task is exiting. */
+   if (!task_exit) {
+   pr_err("Invalid CSB address 0x%p signalling pid(%d)\n",
+   csb_addr, pid);
+   notify_process(pid, (u64)csb_addr);
+   }
+}
+
 /*
  * Process CRBs that we receive on the fault window.
  */
@@ -97,7 +215,7 @@ static void process_fault_crbs(struct vas_instance *vinst)
 
if (IS_ERR(window)) {
/*
-* What now? We got an interrupt about a specific send
+* We got an interrupt about a specific send
 * window but we can't find that window and we can't
 * even clean it up (return credit).
 

[PATCH 08/14] powerpc/vas: Take reference to PID and mm for user space windows

2019-11-26 Thread Haren Myneni


Process close windows after its requests are completed. In multi-thread
applications, child can open a window but release FD will not be called
upon its exit. Parent thread will be closing it later upon its exit.

The parent can also send NX requests with this window and NX can
generate page faults. After kernel handles the page fault, send
signal to process by using PID if CSB address is invalid. Parent
thread will not receive signal since its PID is different from the one
saved in vas_window. So use tgid in case if the task for the pid saved
in window is not running and send signal to its parent.

To prevent reusing the pid until the window closed, take reference to
pid and task mm.

Signed-off-by: Haren Myneni 
---
 arch/powerpc/platforms/powernv/vas-debug.c  |  2 +-
 arch/powerpc/platforms/powernv/vas-window.c | 44 ++---
 arch/powerpc/platforms/powernv/vas.h| 14 -
 3 files changed, 54 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/vas-debug.c 
b/arch/powerpc/platforms/powernv/vas-debug.c
index 09e63df..ef9a717 100644
--- a/arch/powerpc/platforms/powernv/vas-debug.c
+++ b/arch/powerpc/platforms/powernv/vas-debug.c
@@ -38,7 +38,7 @@ static int info_show(struct seq_file *s, void *private)
 
seq_printf(s, "Type: %s, %s\n", cop_to_str(window->cop),
window->tx_win ? "Send" : "Receive");
-   seq_printf(s, "Pid : %d\n", window->pid);
+   seq_printf(s, "Pid : %d\n", vas_window_pid(window));
 
 unlock:
mutex_unlock(&vas_mutex);
diff --git a/arch/powerpc/platforms/powernv/vas-window.c 
b/arch/powerpc/platforms/powernv/vas-window.c
index 7fc1542..ad3104c 100644
--- a/arch/powerpc/platforms/powernv/vas-window.c
+++ b/arch/powerpc/platforms/powernv/vas-window.c
@@ -12,6 +12,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 #include 
 #include 
 #include "vas.h"
@@ -887,8 +889,6 @@ struct vas_window *vas_rx_win_open(int vasid, enum 
vas_cop_type cop,
rxwin->user_win = rxattr->user_win;
rxwin->cop = cop;
rxwin->wcreds_max = rxattr->wcreds_max ?: VAS_WCREDS_DEFAULT;
-   if (rxattr->user_win)
-   rxwin->pid = task_pid_vnr(current);
 
init_winctx_for_rxwin(rxwin, rxattr, &winctx);
init_winctx_regs(rxwin, &winctx);
@@ -1037,7 +1037,6 @@ struct vas_window *vas_tx_win_open(int vasid, enum 
vas_cop_type cop,
txwin->tx_win = 1;
txwin->rxwin = rxwin;
txwin->nx_win = txwin->rxwin->nx_win;
-   txwin->pid = attr->pid;
txwin->user_win = attr->user_win;
txwin->wcreds_max = attr->wcreds_max ?: VAS_WCREDS_DEFAULT;
 
@@ -1079,6 +1078,34 @@ struct vas_window *vas_tx_win_open(int vasid, enum 
vas_cop_type cop,
goto free_window;
}
 
+   if (txwin->user_win) {
+   /*
+* Window opened by child thread may not be closed when
+* it exits. So take reference to its pid and release it
+* when the window is free by parent thread.
+* Acquire a reference to the task's pid to make sure
+* pid will not be re-used.
+*/
+   txwin->pid = get_task_pid(current, PIDTYPE_PID);
+   /*
+* Acquire a reference to the task's mm.
+*/
+   txwin->mm = get_task_mm(current);
+
+   if (txwin->mm) {
+   mmput(txwin->mm);
+   mmgrab(txwin->mm);
+   mm_context_add_copro(txwin->mm);
+   } else {
+   put_pid(txwin->pid);
+   pr_err("VAS: pid(%d): mm_struct is not found\n",
+   current->pid);
+   rc = -EPERM;
+   goto free_window;
+   }
+   txwin->tgid = task_tgid_vnr(current);
+   }
+
set_vinst_win(vinst, txwin);
 
return txwin;
@@ -1277,8 +1304,17 @@ int vas_win_close(struct vas_window *window)
poll_window_castout(window);
 
/* if send window, drop reference to matching receive window */
-   if (window->tx_win)
+   if (window->tx_win) {
+   if (window->user_win) {
+   /* Drop references to pid and mm */
+   put_pid(window->pid);
+   if (window->mm) {
+   mmdrop(window->mm);
+   mm_context_remove_copro(window->mm);
+   }
+   }
put_rx_win(window->rxwin);
+   }
 
vas_window_free(window);
 
diff --git a/arch/powerpc/platforms/powernv/vas.h 
b/arch/powerpc/platforms/powernv/vas.h
index eb929c7..03a1c9f 100644
--- a/arch/powerpc/platforms/powernv/vas.h
+++ b/arch/powerpc/platforms/powernv/vas.h
@@ -343,7 +343,9 @@ struct vas_window {
bool user_win;  /* True if user s

[PATCH 07/14] powerpc/vas: Read and process fault CRBs

2019-11-26 Thread Haren Myneni


NX pastes CRB in fault FIFO and generates interrupt whenever faults
on CRB. OS reads CRBs from fault FIFO and process them by setting
faulting address in fault_storge_addr in CRB and update CSB. When CSB
status is changed, process sends NX request after touching the fault
address.

Signed-off-by: Sukadev Bhattiprolu 
Signed-off-by: Haren Myneni 
---
 arch/powerpc/platforms/powernv/vas-fault.c  | 81 +
 arch/powerpc/platforms/powernv/vas-window.c | 51 ++
 arch/powerpc/platforms/powernv/vas.h|  3 ++
 3 files changed, 135 insertions(+)

diff --git a/arch/powerpc/platforms/powernv/vas-fault.c 
b/arch/powerpc/platforms/powernv/vas-fault.c
index c6c105c..7a8b2b5 100644
--- a/arch/powerpc/platforms/powernv/vas-fault.c
+++ b/arch/powerpc/platforms/powernv/vas-fault.c
@@ -12,6 +12,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #include "vas.h"
@@ -36,6 +37,84 @@ void vas_wakeup_fault_handler(int virq, void *arg)
 }
 
 /*
+ * Process CRBs that we receive on the fault window.
+ */
+static void process_fault_crbs(struct vas_instance *vinst)
+{
+   void *fifo;
+   struct vas_window *window;
+   struct coprocessor_request_block buf;
+   struct coprocessor_request_block *crb;
+   u64 csb_addr;
+
+   crb = &buf;
+
+   /*
+* VAS can interrupt with multiple page faults. So process all
+* valid CRBs within fault FIFO until reaches invalid CRB.
+* For valid CRBs, csb_addr should be valid address points to CSB
+* section within CRB. After reading CRB entry, it is reset with
+* 0's in fault FIFO.
+*
+* In case kernel receives another interrupt with different page
+* fault and is processed by the previous handling, will be returned
+* from this function when it sees invalid CRB (means 0's).
+*/
+   do {
+   mutex_lock(&vinst->mutex);
+
+   /*
+* Advance the fault fifo pointer to next CRB.
+* Use CRB_SIZE rather than sizeof(*crb) since the latter is
+* aligned to CRB_ALIGN (256) but the CRB written to by VAS is
+* only CRB_SIZE in len.
+*/
+   fifo = vinst->fault_fifo + (vinst->fault_crbs * CRB_SIZE);
+   csb_addr = ((struct coprocessor_request_block *)fifo)->csb_addr;
+
+   /*
+* Return if reached invalid CRB.
+*/
+   if (!csb_addr) {
+   mutex_unlock(&vinst->mutex);
+   return;
+   }
+
+   vinst->fault_crbs++;
+   if (vinst->fault_crbs == vinst->fault_fifo_size/CRB_SIZE)
+   vinst->fault_crbs = 0;
+
+   memcpy(crb, fifo, CRB_SIZE);
+   memset(fifo, 0, CRB_SIZE);
+   mutex_unlock(&vinst->mutex);
+
+   pr_devel("VAS[%d] fault_fifo %p, fifo %p, fault_crbs %d pending 
%d\n",
+   vinst->vas_id, vinst->fault_fifo, fifo,
+   vinst->fault_crbs,
+   atomic_read(&vinst->pending_fault));
+
+   window = vas_pswid_to_window(vinst, crb_nx_pswid(crb));
+
+   if (IS_ERR(window)) {
+   /*
+* What now? We got an interrupt about a specific send
+* window but we can't find that window and we can't
+* even clean it up (return credit).
+* But we should not get here.
+*/
+   pr_err("VAS[%d] fault_fifo %p, fifo %p, pswid 0x%x, 
fault_crbs %d, pending %d bad CRB?\n",
+   vinst->vas_id, vinst->fault_fifo, fifo,
+   crb_nx_pswid(crb), vinst->fault_crbs,
+   atomic_read(&vinst->pending_fault));
+
+   WARN_ON_ONCE(1);
+   return;
+   }
+
+   } while (true);
+}
+
+/*
  * Fault handler thread for each VAS instance and process fault CRBs.
  */
 static int fault_handler_func(void *arg)
@@ -54,6 +133,8 @@ static int fault_handler_func(void *arg)
break;
 
atomic_dec(&vinst->pending_fault);
+   process_fault_crbs(vinst);
+
} while (!kthread_should_stop());
 
return 0;
diff --git a/arch/powerpc/platforms/powernv/vas-window.c 
b/arch/powerpc/platforms/powernv/vas-window.c
index 5f1faeb..7fc1542 100644
--- a/arch/powerpc/platforms/powernv/vas-window.c
+++ b/arch/powerpc/platforms/powernv/vas-window.c
@@ -1294,3 +1294,54 @@ u32 vas_win_id(struct vas_window *win)
return encode_pswid(win->vinst->vas_id, win->winid);
 }
 EXPORT_SYMBOL_GPL(vas_win_id);
+
+struct vas_window *vas_pswid_to_window(struct vas_instance *vinst,
+   uint32_t pswid)
+{
+   int winid;
+   struct vas_wind

[PATCH 06/14] powerpc/vas: Setup fault handler per VAS instance

2019-11-26 Thread Haren Myneni


Fault handler is created as kernel thread for each VAS instance and
invoked whenever NX generates page fault. This thread reads CRBs
from fault FIFO and process them.

Signed-off-by: Sukadev Bhattiprolu 
Signed-off-by: Haren Myneni 
---
 arch/powerpc/platforms/powernv/vas-fault.c | 54 ++
 arch/powerpc/platforms/powernv/vas.c   |  7 
 arch/powerpc/platforms/powernv/vas.h   |  6 
 3 files changed, 67 insertions(+)

diff --git a/arch/powerpc/platforms/powernv/vas-fault.c 
b/arch/powerpc/platforms/powernv/vas-fault.c
index a5e63a5..c6c105c 100644
--- a/arch/powerpc/platforms/powernv/vas-fault.c
+++ b/arch/powerpc/platforms/powernv/vas-fault.c
@@ -11,6 +11,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #include "vas.h"
@@ -24,6 +25,54 @@
  */
 #define VAS_FAULT_WIN_FIFO_SIZE(4 << 20)
 
+struct task_struct *fault_handler;
+
+void vas_wakeup_fault_handler(int virq, void *arg)
+{
+   struct vas_instance *vinst = arg;
+
+   atomic_inc(&vinst->pending_fault);
+   wake_up(&vinst->fault_wq);
+}
+
+/*
+ * Fault handler thread for each VAS instance and process fault CRBs.
+ */
+static int fault_handler_func(void *arg)
+{
+   struct vas_instance *vinst = (struct vas_instance *)arg;
+
+   do {
+   if (signal_pending(current))
+   flush_signals(current);
+
+   wait_event_interruptible(vinst->fault_wq,
+   atomic_read(&vinst->pending_fault) ||
+   kthread_should_stop());
+
+   if (kthread_should_stop())
+   break;
+
+   atomic_dec(&vinst->pending_fault);
+   } while (!kthread_should_stop());
+
+   return 0;
+}
+
+/*
+ * Create a thread that processes the fault CRBs.
+ */
+int vas_setup_fault_handler(struct vas_instance *vinst)
+{
+   vinst->fault_handler = kthread_run(fault_handler_func, (void *)vinst,
+   "vas-fault-%u", vinst->vas_id);
+
+   if (IS_ERR(vinst->fault_handler))
+   return PTR_ERR(vinst->fault_handler);
+
+   return 0;
+}
+
 /*
  * Fault window is opened per VAS instance. NX pastes fault CRB in fault
  * FIFO upon page faults.
@@ -102,4 +151,9 @@ int vas_cleanup_fault_window(struct vas_instance *vinst)
 
return rc;
 }
+
+void vas_cleanup_fault_handler(struct vas_instance *vinst)
+{
+   kthread_stop(vinst->fault_handler);
+}
 #endif
diff --git a/arch/powerpc/platforms/powernv/vas.c 
b/arch/powerpc/platforms/powernv/vas.c
index dd0e06c..db2aca4 100644
--- a/arch/powerpc/platforms/powernv/vas.c
+++ b/arch/powerpc/platforms/powernv/vas.c
@@ -30,6 +30,7 @@ static irqreturn_t vas_irq_handler(int virq, void *data)
struct vas_instance *vinst = data;
 
pr_devel("VAS %d: virq %d\n", vinst->vas_id, virq);
+   vas_wakeup_fault_handler(virq, data);
 
return IRQ_HANDLED;
 }
@@ -54,6 +55,10 @@ static void vas_irq_fault_handle_setup(struct vas_instance 
*vinst)
 * for user space.
 */
rc = vas_setup_fault_window(vinst);
+
+   if (!rc)
+   rc = vas_setup_fault_handler(vinst);
+
if (rc) {
free_irq(vinst->virq, vinst);
vinst->virq = 0;
@@ -129,6 +134,8 @@ static int init_vas_instance(struct platform_device *pdev)
}
}
 
+   init_waitqueue_head(&vinst->fault_wq);
+
pr_devel("Initialized instance [%s, %d] paste_base 0x%llx 
paste_win_id_shift 0x%llx IRQ %d Port 0x%llx\n",
pdev->name, vasid, vinst->paste_base_addr,
vinst->paste_win_id_shift, vinst->virq,
diff --git a/arch/powerpc/platforms/powernv/vas.h 
b/arch/powerpc/platforms/powernv/vas.h
index e23fd69..ee284b3 100644
--- a/arch/powerpc/platforms/powernv/vas.h
+++ b/arch/powerpc/platforms/powernv/vas.h
@@ -317,6 +317,9 @@ struct vas_instance {
int virq;
int fault_fifo_size;
void *fault_fifo;
+   atomic_t pending_fault;
+   wait_queue_head_t fault_wq;
+   struct task_struct *fault_handler;
struct vas_window *fault_win; /* Fault window */
 
struct mutex mutex;
@@ -414,6 +417,9 @@ struct vas_winctx {
 extern void vas_window_free_dbgdir(struct vas_window *win);
 extern int vas_setup_fault_window(struct vas_instance *vinst);
 extern int vas_cleanup_fault_window(struct vas_instance *vinst);
+extern void vas_wakeup_fault_handler(int virq, void *arg);
+extern int vas_setup_fault_handler(struct vas_instance *vinst);
+extern void vas_cleanup_fault_handler(struct vas_instance *vinst);
 
 static inline void vas_log_write(struct vas_window *win, char *name,
void *regptr, u64 val)
-- 
1.8.3.1





[PATCH 05/14] powerpc/vas: Setup fault window per VAS instance

2019-11-26 Thread Haren Myneni


Setup fault window for each VAS instance. When NX gets fault on request
buffer, write fault CRBs in the corresponding fault FIFO and then sends
an interrupt to the OS.

Signed-off-by: Sukadev Bhattiprolu 
Signed-off-by: Haren Myneni 
---
 arch/powerpc/platforms/powernv/Makefile |   2 +-
 arch/powerpc/platforms/powernv/vas-fault.c  | 105 
 arch/powerpc/platforms/powernv/vas-window.c |  13 +++-
 arch/powerpc/platforms/powernv/vas.c|  12 
 arch/powerpc/platforms/powernv/vas.h|   6 ++
 5 files changed, 134 insertions(+), 4 deletions(-)
 create mode 100644 arch/powerpc/platforms/powernv/vas-fault.c

diff --git a/arch/powerpc/platforms/powernv/Makefile 
b/arch/powerpc/platforms/powernv/Makefile
index a3ac964..74c2246 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -17,6 +17,6 @@ obj-$(CONFIG_MEMORY_FAILURE)  += opal-memory-errors.o
 obj-$(CONFIG_OPAL_PRD) += opal-prd.o
 obj-$(CONFIG_PERF_EVENTS) += opal-imc.o
 obj-$(CONFIG_PPC_MEMTRACE) += memtrace.o
-obj-$(CONFIG_PPC_VAS)  += vas.o vas-window.o vas-debug.o
+obj-$(CONFIG_PPC_VAS)  += vas.o vas-window.o vas-debug.o vas-fault.o
 obj-$(CONFIG_OCXL_BASE)+= ocxl.o
 obj-$(CONFIG_SCOM_DEBUGFS) += opal-xscom.o
diff --git a/arch/powerpc/platforms/powernv/vas-fault.c 
b/arch/powerpc/platforms/powernv/vas-fault.c
new file mode 100644
index 000..a5e63a5
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/vas-fault.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * VAS Fault handling.
+ * Copyright 2019, IBM Corporation
+ */
+
+#define pr_fmt(fmt) "vas: " fmt
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "vas.h"
+
+/*
+ * The maximum FIFO size for fault window can be 8MB
+ * (VAS_RX_FIFO_SIZE_MAX). Using 4MB FIFO since each VAS
+ * instance will be having fault window.
+ * 8MB FIFO can be used if expects more faults for each VAS
+ * instance.
+ */
+#define VAS_FAULT_WIN_FIFO_SIZE(4 << 20)
+
+/*
+ * Fault window is opened per VAS instance. NX pastes fault CRB in fault
+ * FIFO upon page faults.
+ */
+int vas_setup_fault_window(struct vas_instance *vinst)
+{
+   struct vas_rx_win_attr attr;
+
+   vinst->fault_fifo_size = VAS_FAULT_WIN_FIFO_SIZE;
+   vinst->fault_fifo = kzalloc(vinst->fault_fifo_size, GFP_KERNEL);
+   if (!vinst->fault_fifo) {
+   pr_err("Unable to alloc %d bytes for fault_fifo\n",
+   vinst->fault_fifo_size);
+   return -ENOMEM;
+   }
+
+   vas_init_rx_win_attr(&attr, VAS_COP_TYPE_FAULT);
+
+   attr.rx_fifo_size = vinst->fault_fifo_size;
+   attr.rx_fifo = vinst->fault_fifo;
+
+   /*
+* Max creds is based on number of CRBs can fit in the FIFO.
+* (fault_fifo_size/CRB_SIZE). If 8MB FIFO is used, max creds
+* will be 0x since the receive creds field is 16bits wide.
+*/
+   attr.wcreds_max = vinst->fault_fifo_size / CRB_SIZE;
+   attr.lnotify_lpid = 0;
+   attr.lnotify_pid = mfspr(SPRN_PID);
+   attr.lnotify_tid = mfspr(SPRN_PID);
+
+   vinst->fault_win = vas_rx_win_open(vinst->vas_id, VAS_COP_TYPE_FAULT,
+   &attr);
+
+   if (IS_ERR(vinst->fault_win)) {
+   pr_err("VAS: Error %ld opening FaultWin\n",
+   PTR_ERR(vinst->fault_win));
+   kfree(vinst->fault_fifo);
+   return PTR_ERR(vinst->fault_win);
+   }
+
+   pr_devel("VAS: Created FaultWin %d, LPID/PID/TID [%d/%d/%d]\n",
+   vinst->fault_win->winid, attr.lnotify_lpid,
+   attr.lnotify_pid, attr.lnotify_tid);
+
+   return 0;
+}
+
+/*
+ * We do not remove VAS instances. The following functions are needed
+ * when VAS hotplug is supported.
+ */
+#if 0
+/*
+ * Close the fault window and free the receive FIFO.
+ *
+ * TODO:vas_win_close() will block till pending requests are drained.
+ * The fault thread itself allocates the FIFO, opens the window
+ * and when done, closes the window and frees the FIFO.
+ * Are there any other race condition to watch for here or in
+ * vas_win_close()?
+ *
+ */
+int vas_cleanup_fault_window(struct vas_instance *vinst)
+{
+   int rc;
+
+   rc = vas_win_close(vinst->fault_win);
+   if (rc < 0) {
+   pr_err("VAS Fault handler %d: error %d closing window\n",
+   vinst->vas_id, rc);
+   }
+
+   kfree(vinst->fault_fifo);
+   vinst->fault_fifo = NULL;
+
+   return rc;
+}
+#endif
diff --git a/arch/powerpc/platforms/powernv/vas-window.c 
b/arch/powerpc/platforms/powernv/vas-window.c
index ad6be91..5f1faeb 100644
--- a/arch/powerpc/platforms/powernv/vas-window.c
+++ b/arch/powerpc/platforms/powernv/vas-window.c
@@ -383,7 +383,7 @@ int init_winctx_regs(struct vas_window *window, struct 
vas_winctx *winctx)
init_xlate_regs(window, winctx->u

[PATCH 04/14] powerpc/vas: Setup IRQ mapping and register port for each window

2019-11-26 Thread Haren Myneni


Read interrupt and port values from the device tree, setup IRQ
mapping and register IRQ for each VAS instance. Set port value for
each NX window. When NX sees a fault on CRB, kernel gets an interrupt
and handles the fault.

IRQ setup and fault handling is needed only for user space send
windows. So for kernel requests, ignore if interrupts property is
not available.

Signed-off-by: Sukadev Bhattiprolu 
Signed-off-by: Haren Myneni 
---
 arch/powerpc/platforms/powernv/vas-window.c | 14 ++
 arch/powerpc/platforms/powernv/vas.c| 68 ++---
 arch/powerpc/platforms/powernv/vas.h|  2 +
 3 files changed, 78 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/vas-window.c 
b/arch/powerpc/platforms/powernv/vas-window.c
index ea5ca02..ad6be91 100644
--- a/arch/powerpc/platforms/powernv/vas-window.c
+++ b/arch/powerpc/platforms/powernv/vas-window.c
@@ -758,6 +758,8 @@ static void init_winctx_for_rxwin(struct vas_window *rxwin,
 
winctx->min_scope = VAS_SCOPE_LOCAL;
winctx->max_scope = VAS_SCOPE_VECTORED_GROUP;
+   if (rxwin->vinst->virq)
+   winctx->irq_port = rxwin->vinst->irq_port;
 }
 
 static bool rx_win_args_valid(enum vas_cop_type cop,
@@ -959,6 +961,8 @@ static void init_winctx_for_txwin(struct vas_window *txwin,
winctx->tc_mode = txattr->tc_mode;
winctx->min_scope = VAS_SCOPE_LOCAL;
winctx->max_scope = VAS_SCOPE_VECTORED_GROUP;
+   if (txwin->vinst->virq)
+   winctx->irq_port = txwin->vinst->irq_port;
 
winctx->pswid = 0;
 }
@@ -1050,6 +1054,16 @@ struct vas_window *vas_tx_win_open(int vasid, enum 
vas_cop_type cop,
}
} else {
/*
+* Interrupt hanlder setup failed. Means NX can not generate
+* fault for page fault. So not opening for user space tx
+* window.
+*/
+   if (!vinst->virq) {
+   rc = -ENODEV;
+   goto free_window;
+   }
+
+   /*
 * A user mapping must ensure that context switch issues
 * CP_ABORT for this thread.
 */
diff --git a/arch/powerpc/platforms/powernv/vas.c 
b/arch/powerpc/platforms/powernv/vas.c
index ed9cc6d..71bddaa 100644
--- a/arch/powerpc/platforms/powernv/vas.c
+++ b/arch/powerpc/platforms/powernv/vas.c
@@ -14,6 +14,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 #include 
 
 #include "vas.h"
@@ -23,9 +25,33 @@
 
 static DEFINE_PER_CPU(int, cpu_vas_id);
 
+static irqreturn_t vas_irq_handler(int virq, void *data)
+{
+   struct vas_instance *vinst = data;
+
+   pr_devel("VAS %d: virq %d\n", vinst->vas_id, virq);
+
+   return IRQ_HANDLED;
+}
+
+static void vas_irq_fault_handle_setup(struct vas_instance *vinst)
+{
+   int rc;
+   char devname[64];
+
+   snprintf(devname, sizeof(devname), "vas-inst-%d", vinst->vas_id);
+   rc = request_irq(vinst->virq, vas_irq_handler, 0, devname, vinst);
+   if (rc) {
+   pr_err("VAS[%d]: Request IRQ(%d) failed with %d\n",
+   vinst->vas_id, vinst->virq, rc);
+   vinst->virq = 0;
+   }
+}
+
 static int init_vas_instance(struct platform_device *pdev)
 {
-   int rc, cpu, vasid;
+   int rc, cpu, vasid, nresources = 5;
+   uint64_t port;
struct resource *res;
struct vas_instance *vinst;
struct device_node *dn = pdev->dev.of_node;
@@ -36,7 +62,18 @@ static int init_vas_instance(struct platform_device *pdev)
return -ENODEV;
}
 
-   if (pdev->num_resources != 4) {
+   rc = of_property_read_u64(dn, "ibm,vas-port", &port);
+   if (rc) {
+   pr_err("No ibm,vas-port property for %s?\n", pdev->name);
+   /* No interrupts property */
+   nresources = 4;
+   }
+
+   /*
+* interrupts property is available with 'ibm,vas-port' property.
+* 4 Resources and 1 IRQ if interrupts property is available.
+*/
+   if (pdev->num_resources != nresources) {
pr_err("Unexpected DT configuration for [%s, %d]\n",
pdev->name, vasid);
return -ENODEV;
@@ -51,6 +88,7 @@ static int init_vas_instance(struct platform_device *pdev)
mutex_init(&vinst->mutex);
vinst->vas_id = vasid;
vinst->pdev = pdev;
+   vinst->irq_port = port;
 
res = &pdev->resource[0];
vinst->hvwc_bar_start = res->start;
@@ -66,12 +104,23 @@ static int init_vas_instance(struct platform_device *pdev)
pr_err("Bad 'paste_win_id_shift' in DT, %llx\n", res->end);
goto free_vinst;
}
-
vinst->paste_win_id_shift = 63 - res->end;
 
-   pr_devel("Initialized instance [%s, %d], paste_base 0x%llx, "
-   "paste_win_id_shift 0x%llx\n", pdev->name, vasid,

[PATCH 03/14] powerpc/vas: Define nx_fault_stamp in coprocessor_request_block

2019-11-26 Thread Haren Myneni


Kernel sets fault address and status in CRB for NX page fault on user
space address after processing page fault. User space gets the signal
and handles the fault mentioned in CRB by bringing the page in to
memory and send NX request again.

Signed-off-by: Sukadev Bhattiprolu 
Signed-off-by: Haren Myneni 
---
 arch/powerpc/include/asm/icswx.h | 32 +++-
 1 file changed, 31 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/icswx.h b/arch/powerpc/include/asm/icswx.h
index 9872f85..c071471 100644
--- a/arch/powerpc/include/asm/icswx.h
+++ b/arch/powerpc/include/asm/icswx.h
@@ -108,6 +108,21 @@ struct data_descriptor_entry {
__be64 address;
 } __packed __aligned(DDE_ALIGN);
 
+/* 4.3.2 NX-stamped Fault CRB */
+
+#define NX_STAMP_ALIGN  (0x10)
+
+#define NX_STAMP_ACCESS_MASK(0x01)
+#define NX_STAMP_ACCESS_READ0
+#define NX_STAMP_ACCESS_WRITE   1
+
+struct nx_fault_stamp {
+   __be64 fault_storage_addr;
+   __be16 reserved;
+   __u8   flags;
+   __u8   fault_status;
+   __be32 pswid;
+} __packed __aligned(NX_STAMP_ALIGN);
 
 /* Chapter 6.5.2 Coprocessor-Request Block (CRB) */
 
@@ -135,11 +150,26 @@ struct coprocessor_request_block {
 
struct coprocessor_completion_block ccb;
 
-   u8 reserved[48];
+   union {
+   struct nx_fault_stamp nx;
+   u8 reserved[16];
+   } stamp;
+
+   u8 reserved[32];
 
struct coprocessor_status_block csb;
 } __packed __aligned(CRB_ALIGN);
 
+#define crb_csb_addr(c)__be64_to_cpu(c->csb_addr)
+#define crb_nx_fault_addr(c)   __be64_to_cpu(c->stamp.nx.fault_storage_addr)
+#define crb_nx_flags(c)c->stamp.nx.flags
+#define crb_nx_fault_status(c) c->stamp.nx.fault_status
+
+static inline uint32_t crb_nx_pswid(struct coprocessor_request_block *crb)
+{
+   return __be32_to_cpu(crb->stamp.nx.pswid);
+}
+
 
 /* RFC02167 Initiate Coprocessor Instructions document
  * Chapter 8.2.1.1.1 RS
-- 
1.8.3.1





Re: [Very RFC 25/46] powernv/pci: Remove pdn from pnv_pci_config_check_eeh()

2019-11-26 Thread Alexey Kardashevskiy



On 20/11/2019 12:28, Oliver O'Halloran wrote:
> Despite the name this function is generic PowerNV PCI code rather than 
> anything
> EEH specific. Convert to take a phb and bdfn rather than a pci_dn.
> 
> Signed-off-by: Oliver O'Halloran 
> ---
>  arch/powerpc/platforms/powernv/pci.c | 32 ++--
>  1 file changed, 21 insertions(+), 11 deletions(-)
> 
> diff --git a/arch/powerpc/platforms/powernv/pci.c 
> b/arch/powerpc/platforms/powernv/pci.c
> index 6170677bfdc7..50142ff045ac 100644
> --- a/arch/powerpc/platforms/powernv/pci.c
> +++ b/arch/powerpc/platforms/powernv/pci.c
> @@ -591,9 +591,15 @@ static void pnv_pci_handle_eeh_config(struct pnv_phb 
> *phb, u32 pe_no)
>   spin_unlock_irqrestore(&phb->lock, flags);
>  }
>  
> -static void pnv_pci_config_check_eeh(struct pci_dn *pdn)
> +/*
> + * This, very strangely named, function checks if a config access
> + * caused an EEH and un-freezes the PE if it did. This is mainly
> + * for the !CONFIG_EEH case where nothing is going to un-freeze
> + * it for us.
> + */

Rather than writing a comment like this, simply rename it to
pnv_pci_cfg_check_and_unfreeze() or similar as you are changing
callsites anyway. Thanks,



> +static void pnv_pci_config_check_eeh(struct pnv_phb *phb, u16 bdfn)


>  {
> - struct pnv_phb *phb = pdn->phb->private_data;
> + struct pnv_ioda_pe *ioda_pe;
>   u8  fstate = 0;
>   __be16  pcierr = 0;
>   unsigned int pe_no;
> @@ -604,10 +610,11 @@ static void pnv_pci_config_check_eeh(struct pci_dn *pdn)
>* setup that yet. So all ER errors should be mapped to
>* reserved PE.
>*/
> - pe_no = pdn->pe_number;
> - if (pe_no == IODA_INVALID_PE) {
> + ioda_pe = __pnv_ioda_get_pe(phb, bdfn);
> + if (ioda_pe)
> + pe_no = ioda_pe->pe_number;
> + else
>   pe_no = phb->ioda.reserved_pe_idx;
> - }
>  
>   /*
>* Fetch frozen state. If the PHB support compound PE,
> @@ -629,7 +636,7 @@ static void pnv_pci_config_check_eeh(struct pci_dn *pdn)
>   }
>  
>   pr_devel(" -> EEH check, bdfn=%04x PE#%x fstate=%x\n",
> -  (pdn->busno << 8) | (pdn->devfn), pe_no, fstate);
> +  bdfn, pe_no, fstate);
>  
>   /* Clear the frozen state if applicable */
>   if (fstate == OPAL_EEH_STOPPED_MMIO_FREEZE ||
> @@ -642,6 +649,7 @@ static void pnv_pci_config_check_eeh(struct pci_dn *pdn)
>   if (phb->freeze_pe)
>   phb->freeze_pe(phb, pe_no);
>  
> + /* fish out the EEH log and send an EEH event. */
>   pnv_pci_handle_eeh_config(phb, pe_no);
>   }
>  }
> @@ -735,7 +743,8 @@ static int pnv_pci_read_config(struct pci_bus *bus,
>  int where, int size, u32 *val)
>  {
>   struct pci_dn *pdn;
> - struct pnv_phb *phb;
> + struct pnv_phb *phb = pci_bus_to_pnvhb(bus);
> + u16 bdfn = bus->number << 8 | devfn;
>   struct eeh_dev *edev;
>   int ret;
>  
> @@ -755,7 +764,7 @@ static int pnv_pci_read_config(struct pci_bus *bus,
>   eeh_dev_check_failure(edev))
>  return PCIBIOS_DEVICE_NOT_FOUND;
>   } else {
> - pnv_pci_config_check_eeh(pdn);
> + pnv_pci_config_check_eeh(phb, bdfn);
>   }
>  
>   return ret;
> @@ -766,7 +775,8 @@ static int pnv_pci_write_config(struct pci_bus *bus,
>   int where, int size, u32 val)
>  {
>   struct pci_dn *pdn;
> - struct pnv_phb *phb;
> + struct pnv_phb *phb = pci_bus_to_pnvhb(bus);
> + u16 bdfn = bus->number << 8 | devfn;
>   struct eeh_dev *edev;
>   int ret;
>  
> @@ -779,9 +789,9 @@ static int pnv_pci_write_config(struct pci_bus *bus,
>   return PCIBIOS_DEVICE_NOT_FOUND;
>  
>   ret = pnv_pci_cfg_write(pdn, where, size, val);
> - phb = pdn->phb->private_data;
> +
>   if (!(phb->flags & PNV_PHB_FLAG_EEH))
> - pnv_pci_config_check_eeh(pdn);
> + pnv_pci_config_check_eeh(phb, bdfn);
>  
>   return ret;
>  }
> 

-- 
Alexey


[PATCH 02/14] Revert "powerpc/powernv: remove the unused vas_win_paste_addr and vas_win_id functions"

2019-11-26 Thread Haren Myneni


This reverts commit 452d23c0f6bd97f2fd8a9691fee79b76040a0feb.

User space send windows (NX GZIP compression) need vas_win_paste_addr()
to mmap window paste address and vas_win_id() to get window ID when
window address is given.

Added vas_win_id() and vas_win_paste_addr() with:
commit 61f3cca8cda97 ("powerpc/vas: Define vas_win_id()")
commit 5676be2fb7035 ("powerpc/vas: Define vas_win_paste_addr()")

Signed-off-by:Haren Myneni 
---
 arch/powerpc/include/asm/vas.h  | 10 ++
 arch/powerpc/platforms/powernv/vas-window.c | 19 +++
 arch/powerpc/platforms/powernv/vas.h| 20 
 3 files changed, 49 insertions(+)

diff --git a/arch/powerpc/include/asm/vas.h b/arch/powerpc/include/asm/vas.h
index f93e6b0..da0b198 100644
--- a/arch/powerpc/include/asm/vas.h
+++ b/arch/powerpc/include/asm/vas.h
@@ -163,4 +163,14 @@ struct vas_window *vas_tx_win_open(int vasid, enum 
vas_cop_type cop,
  */
 int vas_paste_crb(struct vas_window *win, int offset, bool re);
 
+/*
+ * Return a system-wide unique id for the VAS window @win.
+ */
+extern u32 vas_win_id(struct vas_window *win);
+
+/*
+ * Return the power bus paste address associated with @win so the caller
+ * can map that address into their address space.
+ */
+extern u64 vas_win_paste_addr(struct vas_window *win);
 #endif /* __ASM_POWERPC_VAS_H */
diff --git a/arch/powerpc/platforms/powernv/vas-window.c 
b/arch/powerpc/platforms/powernv/vas-window.c
index 0c0d27d..ea5ca02 100644
--- a/arch/powerpc/platforms/powernv/vas-window.c
+++ b/arch/powerpc/platforms/powernv/vas-window.c
@@ -40,6 +40,16 @@ static void compute_paste_address(struct vas_window *window, 
u64 *addr, int *len
pr_debug("Txwin #%d: Paste addr 0x%llx\n", winid, *addr);
 }
 
+u64 vas_win_paste_addr(struct vas_window *win)
+{
+   u64 addr;
+
+   compute_paste_address(win, &addr, NULL);
+
+   return addr;
+}
+EXPORT_SYMBOL(vas_win_paste_addr);
+
 static inline void get_hvwc_mmio_bar(struct vas_window *window,
u64 *start, int *len)
 {
@@ -1254,3 +1264,12 @@ int vas_win_close(struct vas_window *window)
return 0;
 }
 EXPORT_SYMBOL_GPL(vas_win_close);
+
+/*
+ * Return a system-wide unique window id for the window @win.
+ */
+u32 vas_win_id(struct vas_window *win)
+{
+   return encode_pswid(win->vinst->vas_id, win->winid);
+}
+EXPORT_SYMBOL_GPL(vas_win_id);
diff --git a/arch/powerpc/platforms/powernv/vas.h 
b/arch/powerpc/platforms/powernv/vas.h
index 5574aec..9cc5251 100644
--- a/arch/powerpc/platforms/powernv/vas.h
+++ b/arch/powerpc/platforms/powernv/vas.h
@@ -444,6 +444,26 @@ static inline u64 read_hvwc_reg(struct vas_window *win,
return in_be64(win->hvwc_map+reg);
 }
 
+/*
+ * Encode/decode the Partition Send Window ID (PSWID) for a window in
+ * a way that we can uniquely identify any window in the system. i.e.
+ * we should be able to locate the 'struct vas_window' given the PSWID.
+ *
+ * BitsUsage
+ * 0:7 VAS id (8 bits)
+ * 8:15Unused, 0 (3 bits)
+ * 16:31   Window id (16 bits)
+ */
+static inline u32 encode_pswid(int vasid, int winid)
+{
+   u32 pswid = 0;
+
+   pswid |= vasid << (31 - 7);
+   pswid |= winid;
+
+   return pswid;
+}
+
 static inline void decode_pswid(u32 pswid, int *vasid, int *winid)
 {
if (vasid)
-- 
1.8.3.1





[PATCH 01/14] powerpc/vas: Describe vas-port and interrupts properties

2019-11-26 Thread Haren Myneni
[PATCH 01/14] powerpc/vas: Describe vas-port and interrupts properties

Signed-off-by: Haren Myneni 
---
 Documentation/devicetree/bindings/powerpc/ibm,vas.txt | 5 +
 1 file changed, 5 insertions(+)

diff --git a/Documentation/devicetree/bindings/powerpc/ibm,vas.txt 
b/Documentation/devicetree/bindings/powerpc/ibm,vas.txt
index bf11d2f..12de08b 100644
--- a/Documentation/devicetree/bindings/powerpc/ibm,vas.txt
+++ b/Documentation/devicetree/bindings/powerpc/ibm,vas.txt
@@ -11,6 +11,8 @@ Required properties:
   window context start and length, OS/User window context start and length,
   "Paste address" start and length, "Paste window id" start bit and number
   of bits)
+- ibm,vas-port : Port address for the interrupt.
+- interrupts: IRQ value for each VAS instance and level.
 
 Example:
 
@@ -18,5 +20,8 @@ Example:
compatible = "ibm,vas", "ibm,power9-vas";
reg = <0x60191 0x200 0x60190 0x1 
0x8 0x1 0x20 0x10>;
name = "vas";
+   interrupts = <0x1f 0>;
+   interrupt-parent = <&mpic>;
ibm,vas-id = <0x1>;
+   ibm,vas-port = <0x601000100>;
};
-- 
1.8.3.1





[PATCH 00/14] powerpc/vas: Page fault handling for user space NX requests

2019-11-26 Thread Haren Myneni


Applications send compression / decompression requests to NX with
COPY/PASTE instructions. When NX is processing these requests, can hit
fault on the request buffer (not in memory). It issues an interrupt and
pastes fault CRB in fault FIFO. Expects kernel to handle this fault and
return credits for both send and fault windows after processing.

This patch series adds IRQ and fault window setup, and NX fault handling:
- Read IRQ# from "interrupts" property and configure IRQ per VAS instance.
- Set port# for each window to generate an interrupt when noticed fault.
- Set fault window and FIFO on which NX paste fault CRB.
- Setup fault handler (as kernel thread) per VAS instance.
- When receiving an interrupt, Read CRBs from fault FIFO and update
  coprocessor_status_block (CSB) in the corresponding CRB with translation
  failure (CSB_CC_TRANSLATION). After issuing NX requests, process polls
  on CSB address. When it sees translation error, can touch the request
  buffer to bring the page in to memory and reissue NX request.
- If copy_to_user fails on user space CSB address, OS sends SEGV signal.

Tested these patches with NX-GZIP support and will be posting this series
soon.

Patch 2: Revert 452d23c0f6bd97f2 - Using vas_win_id() and vas_win_paste_addr()
Patch 3: Define nx_fault_stamp on which NX writes fault status for the fault
CRB
Patch 4: IRQ and port setup
Patches 5 and 6: Setup fault window and fault handler per each VAS instance.
fault window is used for NX to paste fault CRB in FIFO. A kernel
thread is created to handle faults on each VAS.
Patches 7, 9 and 10: Read and process CRBs from fault FIFO and notify tasks
by updating CSB or through signals.
Patch 8: Reference to pid and mm so that pid is not used until window closed.
Needef for multi thread application where child can open a window and
can be used by parent later. 
Patches 11 and 12: Return credits for send and fault windows after handling
faults.
Patch 14: Fix closing send window after all credits are returned. This issue
happens only for user space requests. No page faults on kernel
request buffer.


Haren Myneni (14):
  powerpc/vas: Describe vas-port and interrupts properties
  Revert "powerpc/powernv: remove the unused vas_win_paste_addr and
vas_win_id functions"
  powerpc/vas: Define nx_fault_stamp in coprocessor_request_block
  powerpc/vas: Setup IRQ mapping and register port for each window
  powerpc/vas: Setup fault window per VAS instance
  powerpc/VAS: Setup fault handler per VAS instance
  powerpc/vas: Read and process fault CRBs
  powerpc/vas: Take reference to PID and mm for user space windows
  powerpc/vas: Update CSB and notify process for fault CRBs
  powerpc/vas: Print CRB and FIFO values
  powerpc/vas: Do not use default credits for receive window
  powerpc/VAS: Return credits after handling fault
  powerpc/vas: Display process stuck message
  powerpc/vas: Free send window in VAS instance after credits returned

 .../devicetree/bindings/powerpc/ibm,vas.txt|   5 +
 arch/powerpc/include/asm/icswx.h   |  32 +-
 arch/powerpc/include/asm/vas.h |  10 +
 arch/powerpc/platforms/powernv/Makefile|   2 +-
 arch/powerpc/platforms/powernv/vas-debug.c |   2 +-
 arch/powerpc/platforms/powernv/vas-fault.c | 408 +
 arch/powerpc/platforms/powernv/vas-window.c| 192 +-
 arch/powerpc/platforms/powernv/vas.c   |  87 -
 arch/powerpc/platforms/powernv/vas.h   |  54 ++-
 9 files changed, 769 insertions(+), 23 deletions(-)
 create mode 100644 arch/powerpc/platforms/powernv/vas-fault.c

-- 
1.8.3.1





Re: [Very RFC 24/46] powernv/pci: Make the pre-cfg EEH freeze check use eeh_dev rather than pci_dn

2019-11-26 Thread Alexey Kardashevskiy



On 20/11/2019 12:28, Oliver O'Halloran wrote:
> Squash another usage in preperation for making the config accessors pci_dn.
> 
> Signed-off-by: Oliver O'Halloran 



Reviewed-by: Alexey Kardashevskiy 


> ---
> We might want to move this into eeh-powernv.c
> ---
>  arch/powerpc/platforms/powernv/pci.c | 37 +---
>  1 file changed, 17 insertions(+), 20 deletions(-)
> 
> diff --git a/arch/powerpc/platforms/powernv/pci.c 
> b/arch/powerpc/platforms/powernv/pci.c
> index d36dde9777aa..6170677bfdc7 100644
> --- a/arch/powerpc/platforms/powernv/pci.c
> +++ b/arch/powerpc/platforms/powernv/pci.c
> @@ -708,30 +708,23 @@ int pnv_pci_cfg_write(struct pci_dn *pdn,
>  }
>  
>  #if CONFIG_EEH
> -static bool pnv_pci_cfg_check(struct pci_dn *pdn)
> +bool pnv_eeh_pre_cfg_check(struct eeh_dev *edev)
>  {
> - struct eeh_dev *edev = NULL;
> - struct pnv_phb *phb = pdn->phb->private_data;
> -
> - /* EEH not enabled ? */
> - if (!(phb->flags & PNV_PHB_FLAG_EEH))
> + if (!edev || !edev->pe)
>   return true;
>  
> - /* PE reset or device removed ? */
> - edev = pdn->edev;
> - if (edev) {
> - if (edev->pe &&
> - (edev->pe->state & EEH_PE_CFG_BLOCKED))
> - return false;
> + /* PE in reset? */
> + if (edev->pe->state & EEH_PE_CFG_BLOCKED)
> + return false;
>  
> - if (edev->mode & EEH_DEV_REMOVED)
> - return false;
> - }
> + /* Device removed? */
> + if (edev->mode & EEH_DEV_REMOVED)
> + return false;
>  
>   return true;
>  }
>  #else
> -static inline pnv_pci_cfg_check(struct pci_dn *pdn)
> +static inline pnv_pci_cfg_check(struct eeh_dev *edev)
>  {
>   return true;
>  }
> @@ -743,6 +736,7 @@ static int pnv_pci_read_config(struct pci_bus *bus,
>  {
>   struct pci_dn *pdn;
>   struct pnv_phb *phb;
> + struct eeh_dev *edev;
>   int ret;
>  
>   *val = 0x;
> @@ -750,14 +744,15 @@ static int pnv_pci_read_config(struct pci_bus *bus,
>   if (!pdn)
>   return PCIBIOS_DEVICE_NOT_FOUND;
>  
> - if (!pnv_pci_cfg_check(pdn))
> + edev = pdn_to_eeh_dev(pdn);
> + if (!pnv_eeh_pre_cfg_check(edev))
>   return PCIBIOS_DEVICE_NOT_FOUND;
>  
>   ret = pnv_pci_cfg_read(pdn, where, size, val);
>   phb = pdn->phb->private_data;
> - if (phb->flags & PNV_PHB_FLAG_EEH && pdn->edev) {
> + if (phb->flags & PNV_PHB_FLAG_EEH && edev) {
>   if (*val == EEH_IO_ERROR_VALUE(size) &&
> - eeh_dev_check_failure(pdn->edev))
> + eeh_dev_check_failure(edev))
>  return PCIBIOS_DEVICE_NOT_FOUND;
>   } else {
>   pnv_pci_config_check_eeh(pdn);
> @@ -772,13 +767,15 @@ static int pnv_pci_write_config(struct pci_bus *bus,
>  {
>   struct pci_dn *pdn;
>   struct pnv_phb *phb;
> + struct eeh_dev *edev;
>   int ret;
>  
>   pdn = pci_get_pdn_by_devfn(bus, devfn);
>   if (!pdn)
>   return PCIBIOS_DEVICE_NOT_FOUND;
>  
> - if (!pnv_pci_cfg_check(pdn))
> + edev = pdn_to_eeh_dev(pdn);
> + if (!pnv_eeh_pre_cfg_check(edev))
>   return PCIBIOS_DEVICE_NOT_FOUND;
>  
>   ret = pnv_pci_cfg_write(pdn, where, size, val);
> 

-- 
Alexey


[PATCH v3 2/2] powerpc/kvm/book3e: Replace current->mm by kvm->mm

2019-11-26 Thread Leonardo Bras
Given that in kvm_create_vm() there is:
kvm->mm = current->mm;

And that on every kvm_*_ioctl we have:
if (kvm->mm != current->mm)
return -EIO;

I see no reason to keep using current->mm instead of kvm->mm.

By doing so, we would reduce the use of 'global' variables on code, relying
more in the contents of kvm struct.

Signed-off-by: Leonardo Bras 
---
 arch/powerpc/kvm/booke.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index be9a45874194..fd7bdb4f8f87 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -775,7 +775,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct 
kvm_vcpu *vcpu)
debug = current->thread.debug;
current->thread.debug = vcpu->arch.dbg_reg;
 
-   vcpu->arch.pgdir = current->mm->pgd;
+   vcpu->arch.pgdir = vcpu->kvm->mm->pgd;
kvmppc_fix_ee_before_entry();
 
ret = __kvmppc_vcpu_run(kvm_run, vcpu);
-- 
2.23.0



[PATCH v3 1/2] powerpc/kvm/book3s: Replace current->mm by kvm->mm

2019-11-26 Thread Leonardo Bras
Given that in kvm_create_vm() there is:
kvm->mm = current->mm;

And that on every kvm_*_ioctl we have:
if (kvm->mm != current->mm)
return -EIO;

I see no reason to keep using current->mm instead of kvm->mm.

By doing so, we would reduce the use of 'global' variables on code, relying
more in the contents of kvm struct.

Signed-off-by: Leonardo Bras 
---
 arch/powerpc/kvm/book3s_64_mmu_hv.c |  4 ++--
 arch/powerpc/kvm/book3s_64_vio.c| 10 ++
 arch/powerpc/kvm/book3s_hv.c| 10 +-
 3 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c 
b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index d381526c5c9b..6c372f5c61b6 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -284,7 +284,7 @@ static long kvmppc_virtmode_do_h_enter(struct kvm *kvm, 
unsigned long flags,
/* Protect linux PTE lookup from page table destruction */
rcu_read_lock_sched();  /* this disables preemption too */
ret = kvmppc_do_h_enter(kvm, flags, pte_index, pteh, ptel,
-   current->mm->pgd, false, pte_idx_ret);
+   kvm->mm->pgd, false, pte_idx_ret);
rcu_read_unlock_sched();
if (ret == H_TOO_HARD) {
/* this can't happen */
@@ -573,7 +573,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct 
kvm_vcpu *vcpu,
is_ci = false;
pfn = 0;
page = NULL;
-   mm = current->mm;
+   mm = kvm->mm;
pte_size = PAGE_SIZE;
writing = (dsisr & DSISR_ISSTORE) != 0;
/* If writing != 0, then the HPTE must allow writing, if we get here */
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index 883a66e76638..ee6c103bb7d5 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -253,10 +253,11 @@ static int kvm_spapr_tce_release(struct inode *inode, 
struct file *filp)
}
}
 
+   account_locked_vm(kvm->mm,
+   kvmppc_stt_pages(kvmppc_tce_pages(stt->size)), false);
+
kvm_put_kvm(stt->kvm);
 
-   account_locked_vm(current->mm,
-   kvmppc_stt_pages(kvmppc_tce_pages(stt->size)), false);
call_rcu(&stt->rcu, release_spapr_tce_table);
 
return 0;
@@ -272,6 +273,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
 {
struct kvmppc_spapr_tce_table *stt = NULL;
struct kvmppc_spapr_tce_table *siter;
+   struct mm_struct *mm = kvm->mm;
unsigned long npages, size = args->size;
int ret = -ENOMEM;
 
@@ -280,7 +282,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
return -EINVAL;
 
npages = kvmppc_tce_pages(size);
-   ret = account_locked_vm(current->mm, kvmppc_stt_pages(npages), true);
+   ret = account_locked_vm(mm, kvmppc_stt_pages(npages), true);
if (ret)
return ret;
 
@@ -326,7 +328,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
 
kfree(stt);
  fail_acct:
-   account_locked_vm(current->mm, kvmppc_stt_pages(npages), false);
+   account_locked_vm(mm, kvmppc_stt_pages(npages), false);
return ret;
 }
 
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index ec5c0379296a..d3baa23396e6 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -4263,7 +4263,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct 
kvm_vcpu *vcpu)
user_vrsave = mfspr(SPRN_VRSAVE);
 
vcpu->arch.wqp = &vcpu->arch.vcore->wq;
-   vcpu->arch.pgdir = current->mm->pgd;
+   vcpu->arch.pgdir = kvm->mm->pgd;
vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
 
do {
@@ -4595,14 +4595,14 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu 
*vcpu)
 
/* Look up the VMA for the start of this memory slot */
hva = memslot->userspace_addr;
-   down_read(¤t->mm->mmap_sem);
-   vma = find_vma(current->mm, hva);
+   down_read(&kvm->mm->mmap_sem);
+   vma = find_vma(kvm->mm, hva);
if (!vma || vma->vm_start > hva || (vma->vm_flags & VM_IO))
goto up_out;
 
psize = vma_kernel_pagesize(vma);
 
-   up_read(¤t->mm->mmap_sem);
+   up_read(&kvm->mm->mmap_sem);
 
/* We can handle 4k, 64k or 16M pages in the VRMA */
if (psize >= 0x100)
@@ -4635,7 +4635,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
return err;
 
  up_out:
-   up_read(¤t->mm->mmap_sem);
+   up_read(&kvm->mm->mmap_sem);
goto out_srcu;
 }
 
-- 
2.23.0



[PATCH v3 0/2] Replace current->mm by kvm->mm on powerpc/kvm

2019-11-26 Thread Leonardo Bras
Replace current->mm by kvm->mm on powerpc/kvm

By replacing, we would reduce the use of 'global' current on code,
relying more in the contents of kvm struct.

On code, I found that in kvm_create_vm() there is:
kvm->mm = current->mm;

And that on every kvm_*_ioctl we have tests like that:
if (kvm->mm != current->mm)
return -EIO;

So this change would be safe.

---
Changes since v2:
- Rebased on torvalds/master and updated the remaining patches.

Changes since v1:
- Fixes possible 'use after free' on kvm_spapr_tce_release (from v1)
- Fixes possible 'use after free' on kvm_vm_ioctl_create_spapr_tce
- Fixes undeclared variable error


Leonardo Bras (2):
  powerpc/kvm/book3s: Replace current->mm by kvm->mm
  powerpc/kvm/book3e: Replace current->mm by kvm->mm

 arch/powerpc/kvm/book3s_64_mmu_hv.c |  4 ++--
 arch/powerpc/kvm/book3s_64_vio.c| 10 ++
 arch/powerpc/kvm/book3s_hv.c| 10 +-
 arch/powerpc/kvm/booke.c|  2 +-
 4 files changed, 14 insertions(+), 12 deletions(-)

-- 
2.23.0



Re: [PATCH v2] dma-mapping: treat dev->bus_dma_mask as a DMA limit

2019-11-26 Thread Robin Murphy

On 2019-11-26 6:51 pm, Nicolas Saenz Julienne wrote:

On Mon, 2019-11-25 at 16:33 +, Robin Murphy wrote:

On 25/11/2019 7:44 am, Christoph Hellwig wrote:

On Sat, Nov 23, 2019 at 09:51:08AM -0700, Nathan Chancellor wrote:

Just as an FYI, this introduces a warning on arm32 allyesconfig for me:


I think the dma_limit argument to iommu_dma_alloc_iova should be a u64
and/or we need to use min_t and open code the zero exception.

Robin, Nicolas - any opinions?


Yeah, given that it's always held a mask I'm not entirely sure why it
was ever a dma_addr_t rather than a u64. Unless anyone else is desperate
to do it I'll get a cleanup patch ready for rc1.


Sounds good to me too

Robin, since I started the mess, I'll be happy to do it if it helps offloading
some work from you.


No worries - your change only exposed my original weird decision ;)  On 
second look the patch was literally a trivial one-liner, so I've written 
it up already.


Cheers,
Robin.


Re: [PATCH v2 26/35] powerpc/64: system call: Fix sparse warning about missing declaration

2019-11-26 Thread Luc Van Oostenryck
On Tue, Nov 26, 2019 at 09:13:40PM +0100, Michal Suchanek wrote:
> Sparse warns about missing declarations for these functions:
> 
> +arch/powerpc/kernel/syscall_64.c:108:23: warning: symbol 
> 'syscall_exit_prepare' was not declared. Should it be static?
> +arch/powerpc/kernel/syscall_64.c:18:6: warning: symbol 
> 'system_call_exception' was not declared. Should it be static?
> +arch/powerpc/kernel/syscall_64.c:200:23: warning: symbol 
> 'interrupt_exit_user_prepare' was not declared. Should it be static?
> +arch/powerpc/kernel/syscall_64.c:288:23: warning: symbol 
> 'interrupt_exit_kernel_prepare' was not declared. Should it be static?
> 
> Add declaration for them.

I'm fine with this patch but, just FYI, lately people seems to
prefer to add '__visible' to the function definition instead
of creating such header files.

Best regards,
-- Luc Van Oostenryck


Re: [PATCH net v2 0/4] ibmvnic: Harden device commands and queries

2019-11-26 Thread David Miller
From: Thomas Falcon 
Date: Mon, 25 Nov 2019 17:12:52 -0600

> This patch series fixes some shortcomings with the current
> VNIC device command implementation. The first patch fixes
> the initialization of driver completion structures used
> for device commands. Additionally, all waits for device
> commands are bounded with a timeout in the event that the
> device does not respond or becomes inoperable. Finally,
> serialize queries to retain the integrity of device return
> codes.
> 
> Changes in v2:
> 
>  - included header comment for ibmvnic_wait_for_completion
>  - removed open-coded loop in patch 3/4, suggested by Jakub
>  - ibmvnic_wait_for_completion accepts timeout value in milliseconds
>instead of jiffies
>  - timeout calculations cleaned up and completed before wait loop
>  - included missing mutex_destroy calls, suggested by Jakub
>  - included comment before mutex declaration

Series applied, thanks.


Re: [PATCH v3] libfdt: define INT32_MAX and UINT32_MAX in libfdt_env.h

2019-11-26 Thread Rob Herring
On Wed, Nov 13, 2019 at 04:12:02PM +0900, Masahiro Yamada wrote:
> The DTC v1.5.1 added references to (U)INT32_MAX.
> 
> This is no problem for user-space programs since  defines
> (U)INT32_MAX along with (u)int32_t.
> 
> For the kernel space, libfdt_env.h needs to be adjusted before we
> pull in the changes.
> 
> In the kernel, we usually use s/u32 instead of (u)int32_t for the
> fixed-width types.
> 
> Accordingly, we already have S/U32_MAX for their max values.
> So, we should not add (U)INT32_MAX to  any more.
> 
> Instead, add them to the in-kernel libfdt_env.h to compile the
> latest libfdt.
> 
> Signed-off-by: Masahiro Yamada 
> ---
> 
> My initial plan was to change this in a series of 3 patches
> since it is clean, and reduces the code.
> 
> [1/3] https://lore.kernel.org/patchwork/patch/1147095/
> [2/3] https://lore.kernel.org/patchwork/patch/1147096/
> [3/3] https://lore.kernel.org/patchwork/patch/1147097/
> 
> 1/3 is stuck in the license bikeshed.
> 
> For 2/3, I have not been able to get Ack from Russell.
> 
> So, I chose a straight-forward fixup.
> 
> 
> Changes in v3:
>  - Resend as a single patch
> 
>  arch/arm/boot/compressed/libfdt_env.h | 4 +++-
>  arch/powerpc/boot/libfdt_env.h| 2 ++
>  include/linux/libfdt_env.h| 3 +++
>  3 files changed, 8 insertions(+), 1 deletion(-)

Applied.

Rob


[PATCH v2 35/35] MAINTAINERS: perf: Add pattern that matches ppc perf to the perf entry.

2019-11-26 Thread Michal Suchanek
Signed-off-by: Michal Suchanek 
---
 MAINTAINERS | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 66cc549ac327..853690adb36f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -12842,6 +12842,8 @@ F:  arch/*/kernel/*/perf_event*.c
 F: arch/*/kernel/*/*/perf_event*.c
 F: arch/*/include/asm/perf_event.h
 F: arch/*/kernel/perf_callchain.c
+F: arch/*/perf/*
+F: arch/*/perf/*/*
 F: arch/*/events/*
 F: arch/*/events/*/*
 F: tools/perf/
-- 
2.23.0



[PATCH v2 34/35] powerpc/perf: split callchain.c by bitness

2019-11-26 Thread Michal Suchanek
Building callchain.c with !COMPAT proved quite ugly with all the
defines. Splitting out the 32bit and 64bit parts looks better.

No code change intended.

Signed-off-by: Michal Suchanek 
---
v6:
 - move current_is_64bit consolidetaion to earlier patch
 - move defines to the top of callchain_32.c
 - Makefile cleanup
v8:
 - fix valid_user_sp
---
 arch/powerpc/perf/Makefile   |   5 +-
 arch/powerpc/perf/callchain.c| 362 +--
 arch/powerpc/perf/callchain.h|  20 ++
 arch/powerpc/perf/callchain_32.c | 197 +
 arch/powerpc/perf/callchain_64.c | 178 +++
 5 files changed, 400 insertions(+), 362 deletions(-)
 create mode 100644 arch/powerpc/perf/callchain.h
 create mode 100644 arch/powerpc/perf/callchain_32.c
 create mode 100644 arch/powerpc/perf/callchain_64.c

diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile
index c155dcbb8691..53d614e98537 100644
--- a/arch/powerpc/perf/Makefile
+++ b/arch/powerpc/perf/Makefile
@@ -1,6 +1,9 @@
 # SPDX-License-Identifier: GPL-2.0
 
-obj-$(CONFIG_PERF_EVENTS)  += callchain.o perf_regs.o
+obj-$(CONFIG_PERF_EVENTS)  += callchain.o callchain_$(BITS).o perf_regs.o
+ifdef CONFIG_COMPAT
+obj-$(CONFIG_PERF_EVENTS)  += callchain_32.o
+endif
 
 obj-$(CONFIG_PPC_PERF_CTRS)+= core-book3s.o bhrb.o
 obj64-$(CONFIG_PPC_PERF_CTRS)  += ppc970-pmu.o power5-pmu.o \
diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c
index b9fc2f297f30..dd5051015008 100644
--- a/arch/powerpc/perf/callchain.c
+++ b/arch/powerpc/perf/callchain.c
@@ -15,11 +15,9 @@
 #include 
 #include 
 #include 
-#ifdef CONFIG_COMPAT
-#include "../kernel/ppc32.h"
-#endif
 #include 
 
+#include "callchain.h"
 
 /*
  * Is sp valid as the address of the next kernel stack frame after prev_sp?
@@ -102,364 +100,6 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx 
*entry, struct pt_regs *re
}
 }
 
-static inline int valid_user_sp(unsigned long sp)
-{
-   bool is_64 = !is_32bit_task();
-
-   if (!sp || (sp & (is_64 ? 7 : 3)) || sp > STACK_TOP - (is_64 ? 32 : 16))
-   return 0;
-   return 1;
-}
-
-#ifdef CONFIG_PPC64
-/*
- * On 64-bit we don't want to invoke hash_page on user addresses from
- * interrupt context, so if the access faults, we read the page tables
- * to find which page (if any) is mapped and access it directly.
- */
-static int read_user_stack_slow(void __user *ptr, void *buf, int nb)
-{
-   int ret = -EFAULT;
-   pgd_t *pgdir;
-   pte_t *ptep, pte;
-   unsigned shift;
-   unsigned long addr = (unsigned long) ptr;
-   unsigned long offset;
-   unsigned long pfn, flags;
-   void *kaddr;
-
-   pgdir = current->mm->pgd;
-   if (!pgdir)
-   return -EFAULT;
-
-   local_irq_save(flags);
-   ptep = find_current_mm_pte(pgdir, addr, NULL, &shift);
-   if (!ptep)
-   goto err_out;
-   if (!shift)
-   shift = PAGE_SHIFT;
-
-   /* align address to page boundary */
-   offset = addr & ((1UL << shift) - 1);
-
-   pte = READ_ONCE(*ptep);
-   if (!pte_present(pte) || !pte_user(pte))
-   goto err_out;
-   pfn = pte_pfn(pte);
-   if (!page_is_ram(pfn))
-   goto err_out;
-
-   /* no highmem to worry about here */
-   kaddr = pfn_to_kaddr(pfn);
-   memcpy(buf, kaddr + offset, nb);
-   ret = 0;
-err_out:
-   local_irq_restore(flags);
-   return ret;
-}
-
-static int read_user_stack_64(unsigned long __user *ptr, unsigned long *ret)
-{
-   if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned long) ||
-   ((unsigned long)ptr & 7))
-   return -EFAULT;
-
-   pagefault_disable();
-   if (!__get_user_inatomic(*ret, ptr)) {
-   pagefault_enable();
-   return 0;
-   }
-   pagefault_enable();
-
-   return read_user_stack_slow(ptr, ret, 8);
-}
-
-/*
- * 64-bit user processes use the same stack frame for RT and non-RT signals.
- */
-struct signal_frame_64 {
-   chardummy[__SIGNAL_FRAMESIZE];
-   struct ucontext uc;
-   unsigned long   unused[2];
-   unsigned inttramp[6];
-   struct siginfo  *pinfo;
-   void*puc;
-   struct siginfo  info;
-   charabigap[288];
-};
-
-static int is_sigreturn_64_address(unsigned long nip, unsigned long fp)
-{
-   if (nip == fp + offsetof(struct signal_frame_64, tramp))
-   return 1;
-   if (vdso64_rt_sigtramp && current->mm->context.vdso_base &&
-   nip == current->mm->context.vdso_base + vdso64_rt_sigtramp)
-   return 1;
-   return 0;
-}
-
-/*
- * Do some sanity checking on the signal frame pointed to by sp.
- * We check the pinfo and puc pointers in the frame.
- */
-static int sane_signal_64_frame(unsigned long sp)
-{
-   struct signal_frame_64 __user *sf;
-   unsigned long pinfo, puc;
-
-   sf = (struct signal_fram

[PATCH v2 33/35] powerpc/64: Make COMPAT user-selectable disabled on littleendian by default.

2019-11-26 Thread Michal Suchanek
On bigendian ppc64 it is common to have 32bit legacy binaries but much
less so on littleendian.

Signed-off-by: Michal Suchanek 
Reviewed-by: Christophe Leroy 
---
v3: make configurable
---
 arch/powerpc/Kconfig | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 3e56c9c2f16e..825528db2921 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -266,8 +266,9 @@ config PANIC_TIMEOUT
default 180
 
 config COMPAT
-   bool
-   default y if PPC64
+   bool "Enable support for 32bit binaries"
+   depends on PPC64
+   default y if !CPU_LITTLE_ENDIAN
select COMPAT_BINFMT_ELF
select ARCH_WANT_OLD_COMPAT_IPC
select COMPAT_OLD_SIGACTION
-- 
2.23.0



[PATCH v2 32/35] powerpc/64: make buildable without CONFIG_COMPAT

2019-11-26 Thread Michal Suchanek
There are numerous references to 32bit functions in generic and 64bit
code so ifdef them out.

Signed-off-by: Michal Suchanek 
---
v2:
- fix 32bit ifdef condition in signal.c
- simplify the compat ifdef condition in vdso.c - 64bit is redundant
- simplify the compat ifdef condition in callchain.c - 64bit is redundant
v3:
- use IS_ENABLED and maybe_unused where possible
- do not ifdef declarations
- clean up Makefile
v4:
- further makefile cleanup
- simplify is_32bit_task conditions
- avoid ifdef in condition by using return
v5:
- avoid unreachable code on 32bit
- make is_current_64bit constant on !COMPAT
- add stub perf_callchain_user_32 to avoid some ifdefs
v6:
- consolidate current_is_64bit
v7:
- remove leftover perf_callchain_user_32 stub from previous series version
v8:
- fix build again - too trigger-happy with stub removal
- remove a vdso.c hunk that causes warning according to kbuild test robot
v9:
- removed current_is_64bit in previous patch
v10:
- rebase on top of 70ed86f4de5bd
---
 arch/powerpc/include/asm/thread_info.h | 4 ++--
 arch/powerpc/kernel/Makefile   | 6 +++---
 arch/powerpc/kernel/entry_64.S | 2 ++
 arch/powerpc/kernel/signal.c   | 3 +--
 arch/powerpc/kernel/syscall_64.c   | 6 ++
 arch/powerpc/kernel/vdso.c | 3 ++-
 arch/powerpc/perf/callchain.c  | 8 +++-
 7 files changed, 19 insertions(+), 13 deletions(-)

diff --git a/arch/powerpc/include/asm/thread_info.h 
b/arch/powerpc/include/asm/thread_info.h
index 8e1d0195ac36..c128d8a48ea3 100644
--- a/arch/powerpc/include/asm/thread_info.h
+++ b/arch/powerpc/include/asm/thread_info.h
@@ -144,10 +144,10 @@ static inline bool test_thread_local_flags(unsigned int 
flags)
return (ti->local_flags & flags) != 0;
 }
 
-#ifdef CONFIG_PPC64
+#ifdef CONFIG_COMPAT
 #define is_32bit_task()(test_thread_flag(TIF_32BIT))
 #else
-#define is_32bit_task()(1)
+#define is_32bit_task()(IS_ENABLED(CONFIG_PPC32))
 #endif
 
 #if defined(CONFIG_PPC64)
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 45f1d5e54671..35874119b398 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -44,16 +44,16 @@ CFLAGS_btext.o += -DDISABLE_BRANCH_PROFILING
 endif
 
 obj-y  := cputable.o ptrace.o syscalls.o \
-  irq.o align.o signal_32.o pmc.o vdso.o \
+  irq.o align.o signal_$(BITS).o pmc.o vdso.o \
   process.o systbl.o idle.o \
   signal.o sysfs.o cacheinfo.o time.o \
   prom.o traps.o setup-common.o \
   udbg.o misc.o io.o misc_$(BITS).o \
   of_platform.o prom_parse.o
-obj-$(CONFIG_PPC64)+= setup_64.o sys_ppc32.o \
-  signal_64.o ptrace32.o \
+obj-$(CONFIG_PPC64)+= setup_64.o \
   paca.o nvram_64.o firmware.o note.o \
   syscall_64.o
+obj-$(CONFIG_COMPAT)   += sys_ppc32.o ptrace32.o signal_32.o
 obj-$(CONFIG_VDSO32)   += vdso32/
 obj-$(CONFIG_PPC_WATCHDOG) += watchdog.o
 obj-$(CONFIG_HAVE_HW_BREAKPOINT)   += hw_breakpoint.o
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 00173cc904ef..c339a984958f 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -52,8 +52,10 @@
 SYS_CALL_TABLE:
.tc sys_call_table[TC],sys_call_table
 
+#ifdef CONFIG_COMPAT
 COMPAT_SYS_CALL_TABLE:
.tc compat_sys_call_table[TC],compat_sys_call_table
+#endif
 
 /* This value is used to mark exception frames on the stack. */
 exception_marker:
diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
index 60436432399f..61678cb0e6a1 100644
--- a/arch/powerpc/kernel/signal.c
+++ b/arch/powerpc/kernel/signal.c
@@ -247,7 +247,6 @@ static void do_signal(struct task_struct *tsk)
sigset_t *oldset = sigmask_to_save();
struct ksignal ksig = { .sig = 0 };
int ret;
-   int is32 = is_32bit_task();
 
BUG_ON(tsk != current);
 
@@ -277,7 +276,7 @@ static void do_signal(struct task_struct *tsk)
 
rseq_signal_deliver(&ksig, tsk->thread.regs);
 
-   if (is32) {
+   if (is_32bit_task()) {
if (ksig.ka.sa.sa_flags & SA_SIGINFO)
ret = handle_rt_signal32(&ksig, oldset, tsk);
else
diff --git a/arch/powerpc/kernel/syscall_64.c b/arch/powerpc/kernel/syscall_64.c
index 62f44c3072f3..783deda66866 100644
--- a/arch/powerpc/kernel/syscall_64.c
+++ b/arch/powerpc/kernel/syscall_64.c
@@ -18,7 +18,6 @@ typedef long (*syscall_fn)(long, long, long, long, long, 
long);
 
 long system_call_exception(long r3, long r4, long r5, long r6, long r7, long 
r8, unsigned long r0, struct pt_regs *regs)
 {
-   unsigned long ti_fla

[PATCH v2 31/35] powerpc/perf: consolidate valid_user_sp

2019-11-26 Thread Michal Suchanek
Merge the 32bit and 64bit version.

Halve the check constants on 32bit.

Use STACK_TOP since it is defined.

Passing is_64 is now redundant since is_32bit_task() is used to
determine which callchain variant should be used. Use STACK_TOP and
is_32bit_task() directly.

This removes a page from the valid 32bit area on 64bit:
 #define TASK_SIZE_USER32 (0x0001UL - (1 * PAGE_SIZE))
 #define STACK_TOP_USER32 TASK_SIZE_USER32

Signed-off-by: Michal Suchanek 
---
v8: new patch
v11: simplify by using is_32bit_task()
---
 arch/powerpc/perf/callchain.c | 27 +++
 1 file changed, 11 insertions(+), 16 deletions(-)

diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c
index c6c4c609cc14..a22a19975a19 100644
--- a/arch/powerpc/perf/callchain.c
+++ b/arch/powerpc/perf/callchain.c
@@ -102,6 +102,15 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx 
*entry, struct pt_regs *re
}
 }
 
+static inline int valid_user_sp(unsigned long sp)
+{
+   bool is_64 = !is_32bit_task();
+
+   if (!sp || (sp & (is_64 ? 7 : 3)) || sp > STACK_TOP - (is_64 ? 32 : 16))
+   return 0;
+   return 1;
+}
+
 #ifdef CONFIG_PPC64
 /*
  * On 64-bit we don't want to invoke hash_page on user addresses from
@@ -165,13 +174,6 @@ static int read_user_stack_64(unsigned long __user *ptr, 
unsigned long *ret)
return read_user_stack_slow(ptr, ret, 8);
 }
 
-static inline int valid_user_sp(unsigned long sp, int is_64)
-{
-   if (!sp || (sp & 7) || sp > (is_64 ? TASK_SIZE : 0x1UL) - 32)
-   return 0;
-   return 1;
-}
-
 /*
  * 64-bit user processes use the same stack frame for RT and non-RT signals.
  */
@@ -230,7 +232,7 @@ static void perf_callchain_user_64(struct 
perf_callchain_entry_ctx *entry,
 
while (entry->nr < entry->max_stack) {
fp = (unsigned long __user *) sp;
-   if (!valid_user_sp(sp, 1) || read_user_stack_64(fp, &next_sp))
+   if (!valid_user_sp(sp) || read_user_stack_64(fp, &next_sp))
return;
if (level > 0 && read_user_stack_64(&fp[2], &next_ip))
return;
@@ -279,13 +281,6 @@ static inline void perf_callchain_user_64(struct 
perf_callchain_entry_ctx *entry
 {
 }
 
-static inline int valid_user_sp(unsigned long sp, int is_64)
-{
-   if (!sp || (sp & 7) || sp > TASK_SIZE - 32)
-   return 0;
-   return 1;
-}
-
 #define __SIGNAL_FRAMESIZE32   __SIGNAL_FRAMESIZE
 #define sigcontext32   sigcontext
 #define mcontext32 mcontext
@@ -428,7 +423,7 @@ static void perf_callchain_user_32(struct 
perf_callchain_entry_ctx *entry,
 
while (entry->nr < entry->max_stack) {
fp = (unsigned int __user *) (unsigned long) sp;
-   if (!valid_user_sp(sp, 0) || read_user_stack_32(fp, &next_sp))
+   if (!valid_user_sp(sp) || read_user_stack_32(fp, &next_sp))
return;
if (level > 0 && read_user_stack_32(&fp[1], &next_ip))
return;
-- 
2.23.0



[PATCH v2 30/35] powerpc/perf: consolidate read_user_stack_32

2019-11-26 Thread Michal Suchanek
There are two almost identical copies for 32bit and 64bit.

The function is used only in 32bit code which will be split out in next
patch so consolidate to one function.

Signed-off-by: Michal Suchanek 
Reviewed-by: Christophe Leroy 
---
v6: new patch
v8: move the consolidated function out of the ifdef block.
---
 arch/powerpc/perf/callchain.c | 59 +++
 1 file changed, 25 insertions(+), 34 deletions(-)

diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c
index 35d542515faf..c6c4c609cc14 100644
--- a/arch/powerpc/perf/callchain.c
+++ b/arch/powerpc/perf/callchain.c
@@ -165,22 +165,6 @@ static int read_user_stack_64(unsigned long __user *ptr, 
unsigned long *ret)
return read_user_stack_slow(ptr, ret, 8);
 }
 
-static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret)
-{
-   if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned int) ||
-   ((unsigned long)ptr & 3))
-   return -EFAULT;
-
-   pagefault_disable();
-   if (!__get_user_inatomic(*ret, ptr)) {
-   pagefault_enable();
-   return 0;
-   }
-   pagefault_enable();
-
-   return read_user_stack_slow(ptr, ret, 4);
-}
-
 static inline int valid_user_sp(unsigned long sp, int is_64)
 {
if (!sp || (sp & 7) || sp > (is_64 ? TASK_SIZE : 0x1UL) - 32)
@@ -285,25 +269,9 @@ static void perf_callchain_user_64(struct 
perf_callchain_entry_ctx *entry,
 }
 
 #else  /* CONFIG_PPC64 */
-/*
- * On 32-bit we just access the address and let hash_page create a
- * HPTE if necessary, so there is no need to fall back to reading
- * the page tables.  Since this is called at interrupt level,
- * do_page_fault() won't treat a DSI as a page fault.
- */
-static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret)
+static int read_user_stack_slow(void __user *ptr, void *buf, int nb)
 {
-   int rc;
-
-   if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned int) ||
-   ((unsigned long)ptr & 3))
-   return -EFAULT;
-
-   pagefault_disable();
-   rc = __get_user_inatomic(*ret, ptr);
-   pagefault_enable();
-
-   return rc;
+   return 0;
 }
 
 static inline void perf_callchain_user_64(struct perf_callchain_entry_ctx 
*entry,
@@ -326,6 +294,29 @@ static inline int valid_user_sp(unsigned long sp, int 
is_64)
 
 #endif /* CONFIG_PPC64 */
 
+/*
+ * On 32-bit we just access the address and let hash_page create a
+ * HPTE if necessary, so there is no need to fall back to reading
+ * the page tables.  Since this is called at interrupt level,
+ * do_page_fault() won't treat a DSI as a page fault.
+ */
+static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret)
+{
+   int rc;
+
+   if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned int) ||
+   ((unsigned long)ptr & 3))
+   return -EFAULT;
+
+   pagefault_disable();
+   rc = __get_user_inatomic(*ret, ptr);
+   pagefault_enable();
+
+   if (IS_ENABLED(CONFIG_PPC64) && rc)
+   return read_user_stack_slow(ptr, ret, 4);
+   return rc;
+}
+
 /*
  * Layout for non-RT signal frames
  */
-- 
2.23.0



[PATCH v2 29/35] powerpc/perf: remove current_is_64bit()

2019-11-26 Thread Michal Suchanek
Since commit ed1cd6deb013 ("powerpc: Activate CONFIG_THREAD_INFO_IN_TASK")
current_is_64bit() is quivalent to !is_32bit_task().
Remove the redundant function.

Link: https://github.com/linuxppc/issues/issues/275
Link: https://lkml.org/lkml/2019/9/12/540

Fixes: linuxppc#275
Suggested-by: Christophe Leroy 
Signed-off-by: Michal Suchanek 
---
 arch/powerpc/perf/callchain.c | 17 +
 1 file changed, 1 insertion(+), 16 deletions(-)

diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c
index c84bbd4298a0..35d542515faf 100644
--- a/arch/powerpc/perf/callchain.c
+++ b/arch/powerpc/perf/callchain.c
@@ -284,16 +284,6 @@ static void perf_callchain_user_64(struct 
perf_callchain_entry_ctx *entry,
}
 }
 
-static inline int current_is_64bit(void)
-{
-   /*
-* We can't use test_thread_flag() here because we may be on an
-* interrupt stack, and the thread flags don't get copied over
-* from the thread_info on the main stack to the interrupt stack.
-*/
-   return !test_ti_thread_flag(task_thread_info(current), TIF_32BIT);
-}
-
 #else  /* CONFIG_PPC64 */
 /*
  * On 32-bit we just access the address and let hash_page create a
@@ -321,11 +311,6 @@ static inline void perf_callchain_user_64(struct 
perf_callchain_entry_ctx *entry
 {
 }
 
-static inline int current_is_64bit(void)
-{
-   return 0;
-}
-
 static inline int valid_user_sp(unsigned long sp, int is_64)
 {
if (!sp || (sp & 7) || sp > TASK_SIZE - 32)
@@ -486,7 +471,7 @@ static void perf_callchain_user_32(struct 
perf_callchain_entry_ctx *entry,
 void
 perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs 
*regs)
 {
-   if (current_is_64bit())
+   if (!is_32bit_task())
perf_callchain_user_64(entry, regs);
else
perf_callchain_user_32(entry, regs);
-- 
2.23.0



[PATCH v2 28/35] powerpc: move common register copy functions from signal_32.c to signal.c

2019-11-26 Thread Michal Suchanek
These functions are required for 64bit as well.

Signed-off-by: Michal Suchanek 
Reviewed-by: Christophe Leroy 
---
 arch/powerpc/kernel/signal.c| 141 
 arch/powerpc/kernel/signal_32.c | 140 ---
 2 files changed, 141 insertions(+), 140 deletions(-)

diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
index e6c30cee6abf..60436432399f 100644
--- a/arch/powerpc/kernel/signal.c
+++ b/arch/powerpc/kernel/signal.c
@@ -18,12 +18,153 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
 
 #include "signal.h"
 
+#ifdef CONFIG_VSX
+unsigned long copy_fpr_to_user(void __user *to,
+  struct task_struct *task)
+{
+   u64 buf[ELF_NFPREG];
+   int i;
+
+   /* save FPR copy to local buffer then write to the thread_struct */
+   for (i = 0; i < (ELF_NFPREG - 1) ; i++)
+   buf[i] = task->thread.TS_FPR(i);
+   buf[i] = task->thread.fp_state.fpscr;
+   return __copy_to_user(to, buf, ELF_NFPREG * sizeof(double));
+}
+
+unsigned long copy_fpr_from_user(struct task_struct *task,
+void __user *from)
+{
+   u64 buf[ELF_NFPREG];
+   int i;
+
+   if (__copy_from_user(buf, from, ELF_NFPREG * sizeof(double)))
+   return 1;
+   for (i = 0; i < (ELF_NFPREG - 1) ; i++)
+   task->thread.TS_FPR(i) = buf[i];
+   task->thread.fp_state.fpscr = buf[i];
+
+   return 0;
+}
+
+unsigned long copy_vsx_to_user(void __user *to,
+  struct task_struct *task)
+{
+   u64 buf[ELF_NVSRHALFREG];
+   int i;
+
+   /* save FPR copy to local buffer then write to the thread_struct */
+   for (i = 0; i < ELF_NVSRHALFREG; i++)
+   buf[i] = task->thread.fp_state.fpr[i][TS_VSRLOWOFFSET];
+   return __copy_to_user(to, buf, ELF_NVSRHALFREG * sizeof(double));
+}
+
+unsigned long copy_vsx_from_user(struct task_struct *task,
+void __user *from)
+{
+   u64 buf[ELF_NVSRHALFREG];
+   int i;
+
+   if (__copy_from_user(buf, from, ELF_NVSRHALFREG * sizeof(double)))
+   return 1;
+   for (i = 0; i < ELF_NVSRHALFREG ; i++)
+   task->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i];
+   return 0;
+}
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+unsigned long copy_ckfpr_to_user(void __user *to,
+ struct task_struct *task)
+{
+   u64 buf[ELF_NFPREG];
+   int i;
+
+   /* save FPR copy to local buffer then write to the thread_struct */
+   for (i = 0; i < (ELF_NFPREG - 1) ; i++)
+   buf[i] = task->thread.TS_CKFPR(i);
+   buf[i] = task->thread.ckfp_state.fpscr;
+   return __copy_to_user(to, buf, ELF_NFPREG * sizeof(double));
+}
+
+unsigned long copy_ckfpr_from_user(struct task_struct *task,
+ void __user *from)
+{
+   u64 buf[ELF_NFPREG];
+   int i;
+
+   if (__copy_from_user(buf, from, ELF_NFPREG * sizeof(double)))
+   return 1;
+   for (i = 0; i < (ELF_NFPREG - 1) ; i++)
+   task->thread.TS_CKFPR(i) = buf[i];
+   task->thread.ckfp_state.fpscr = buf[i];
+
+   return 0;
+}
+
+unsigned long copy_ckvsx_to_user(void __user *to,
+ struct task_struct *task)
+{
+   u64 buf[ELF_NVSRHALFREG];
+   int i;
+
+   /* save FPR copy to local buffer then write to the thread_struct */
+   for (i = 0; i < ELF_NVSRHALFREG; i++)
+   buf[i] = task->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET];
+   return __copy_to_user(to, buf, ELF_NVSRHALFREG * sizeof(double));
+}
+
+unsigned long copy_ckvsx_from_user(struct task_struct *task,
+ void __user *from)
+{
+   u64 buf[ELF_NVSRHALFREG];
+   int i;
+
+   if (__copy_from_user(buf, from, ELF_NVSRHALFREG * sizeof(double)))
+   return 1;
+   for (i = 0; i < ELF_NVSRHALFREG ; i++)
+   task->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i];
+   return 0;
+}
+#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
+#else
+inline unsigned long copy_fpr_to_user(void __user *to,
+ struct task_struct *task)
+{
+   return __copy_to_user(to, task->thread.fp_state.fpr,
+ ELF_NFPREG * sizeof(double));
+}
+
+inline unsigned long copy_fpr_from_user(struct task_struct *task,
+   void __user *from)
+{
+   return __copy_from_user(task->thread.fp_state.fpr, from,
+ ELF_NFPREG * sizeof(double));
+}
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+inline unsigned long copy_ckfpr_to_user(void __user *to,
+struct task_struct *task)
+{
+   return __copy_to_user(to, task->thread.ckfp_state.fpr,
+ ELF_NFPREG * sizeof(double));
+}
+

[PATCH v2 27/35] powerpc: Add back __ARCH_WANT_SYS_LLSEEK macro

2019-11-26 Thread Michal Suchanek
This partially reverts commit caf6f9c8a326 ("asm-generic: Remove
unneeded __ARCH_WANT_SYS_LLSEEK macro")

When CONFIG_COMPAT is disabled on ppc64 the kernel does not build.

There is resistance to both removing the llseek syscall from the 64bit
syscall tables and building the llseek interface unconditionally.

Link: https://lore.kernel.org/lkml/20190828151552.ga16...@infradead.org/
Link: https://lore.kernel.org/lkml/20190829214319.498c7de2@naga/

Signed-off-by: Michal Suchanek 
Reviewed-by: Arnd Bergmann 
---
 arch/powerpc/include/asm/unistd.h | 1 +
 fs/read_write.c   | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/unistd.h 
b/arch/powerpc/include/asm/unistd.h
index b0720c7c3fcf..700fcdac2e3c 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -31,6 +31,7 @@
 #define __ARCH_WANT_SYS_SOCKETCALL
 #define __ARCH_WANT_SYS_FADVISE64
 #define __ARCH_WANT_SYS_GETPGRP
+#define __ARCH_WANT_SYS_LLSEEK
 #define __ARCH_WANT_SYS_NICE
 #define __ARCH_WANT_SYS_OLD_GETRLIMIT
 #define __ARCH_WANT_SYS_OLD_UNAME
diff --git a/fs/read_write.c b/fs/read_write.c
index 5bbf587f5bc1..89aa2701dbeb 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -331,7 +331,8 @@ COMPAT_SYSCALL_DEFINE3(lseek, unsigned int, fd, 
compat_off_t, offset, unsigned i
 }
 #endif
 
-#if !defined(CONFIG_64BIT) || defined(CONFIG_COMPAT)
+#if !defined(CONFIG_64BIT) || defined(CONFIG_COMPAT) || \
+   defined(__ARCH_WANT_SYS_LLSEEK)
 SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high,
unsigned long, offset_low, loff_t __user *, result,
unsigned int, whence)
-- 
2.23.0



[PATCH v2 26/35] powerpc/64: system call: Fix sparse warning about missing declaration

2019-11-26 Thread Michal Suchanek
Sparse warns about missing declarations for these functions:

+arch/powerpc/kernel/syscall_64.c:108:23: warning: symbol 
'syscall_exit_prepare' was not declared. Should it be static?
+arch/powerpc/kernel/syscall_64.c:18:6: warning: symbol 'system_call_exception' 
was not declared. Should it be static?
+arch/powerpc/kernel/syscall_64.c:200:23: warning: symbol 
'interrupt_exit_user_prepare' was not declared. Should it be static?
+arch/powerpc/kernel/syscall_64.c:288:23: warning: symbol 
'interrupt_exit_kernel_prepare' was not declared. Should it be static?

Add declaration for them.

Signed-off-by: Michal Suchanek 
---
 arch/powerpc/include/asm/asm-prototypes.h | 6 ++
 arch/powerpc/kernel/syscall_64.c  | 1 +
 2 files changed, 7 insertions(+)

diff --git a/arch/powerpc/include/asm/asm-prototypes.h 
b/arch/powerpc/include/asm/asm-prototypes.h
index 399ca63196e4..841746357833 100644
--- a/arch/powerpc/include/asm/asm-prototypes.h
+++ b/arch/powerpc/include/asm/asm-prototypes.h
@@ -96,6 +96,12 @@ ppc_select(int n, fd_set __user *inp, fd_set __user *outp, 
fd_set __user *exp, s
 unsigned long __init early_init(unsigned long dt_ptr);
 void __init machine_init(u64 dt_ptr);
 #endif
+#ifdef CONFIG_PPC64
+long system_call_exception(long r3, long r4, long r5, long r6, long r7, long 
r8, unsigned long r0, struct pt_regs *regs);
+notrace unsigned long syscall_exit_prepare(unsigned long r3, struct pt_regs 
*regs);
+notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, 
unsigned long msr);
+notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, 
unsigned long msr);
+#endif
 
 long ppc_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low,
  u32 len_high, u32 len_low);
diff --git a/arch/powerpc/kernel/syscall_64.c b/arch/powerpc/kernel/syscall_64.c
index d00cfc4a39a9..62f44c3072f3 100644
--- a/arch/powerpc/kernel/syscall_64.c
+++ b/arch/powerpc/kernel/syscall_64.c
@@ -1,4 +1,5 @@
 #include 
+#include 
 #include 
 #include 
 #include 
-- 
2.23.0



[PATCH v2 25/35] powerpc/64s/exception: remove lite interrupt return

2019-11-26 Thread Michal Suchanek
From: Nicholas Piggin 

The difference between lite and regular returns is that the lite case
restores all NVGPRs, whereas lite skips that. This is quite clumsy
though, most interrupts want the NVGPRs saved for debugging, not to
modify in the caller, so the NVGPRs restore is not necessary most of
the time. Restore NVGPRs explicitly for one case that requires it,
and move everything else over to avoiding the restore unless the
interrupt return demands it (e.g., handling a signal).

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kernel/entry_64.S   |  4 
 arch/powerpc/kernel/exceptions-64s.S | 21 +++--
 2 files changed, 11 insertions(+), 14 deletions(-)

diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index b2e68f5ca8f7..00173cc904ef 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -452,10 +452,6 @@ _GLOBAL(fast_interrupt_return)
 
.balign IFETCH_ALIGN_BYTES
 _GLOBAL(interrupt_return)
-   REST_NVGPRS(r1)
-
-   .balign IFETCH_ALIGN_BYTES
-_GLOBAL(interrupt_return_lite)
ld  r4,_MSR(r1)
andi.   r0,r4,MSR_PR
beq kernel_interrupt_return
diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index 269edd1460be..1bccc869ebd3 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1507,7 +1507,7 @@ EXC_COMMON_BEGIN(hardware_interrupt_common)
RUNLATCH_ON
addir3,r1,STACK_FRAME_OVERHEAD
bl  do_IRQ
-   b   interrupt_return_lite
+   b   interrupt_return
 
GEN_KVM hardware_interrupt
 
@@ -1694,7 +1694,7 @@ EXC_COMMON_BEGIN(decrementer_common)
RUNLATCH_ON
addir3,r1,STACK_FRAME_OVERHEAD
bl  timer_interrupt
-   b   interrupt_return_lite
+   b   interrupt_return
 
GEN_KVM decrementer
 
@@ -1785,7 +1785,7 @@ EXC_COMMON_BEGIN(doorbell_super_common)
 #else
bl  unknown_exception
 #endif
-   b   interrupt_return_lite
+   b   interrupt_return
 
GEN_KVM doorbell_super
 
@@ -2183,7 +2183,7 @@ EXC_COMMON_BEGIN(h_doorbell_common)
 #else
bl  unknown_exception
 #endif
-   b   interrupt_return_lite
+   b   interrupt_return
 
GEN_KVM h_doorbell
 
@@ -2213,7 +2213,7 @@ EXC_COMMON_BEGIN(h_virt_irq_common)
RUNLATCH_ON
addir3,r1,STACK_FRAME_OVERHEAD
bl  do_IRQ
-   b   interrupt_return_lite
+   b   interrupt_return
 
GEN_KVM h_virt_irq
 
@@ -2260,7 +2260,7 @@ EXC_COMMON_BEGIN(performance_monitor_common)
RUNLATCH_ON
addir3,r1,STACK_FRAME_OVERHEAD
bl  performance_monitor_exception
-   b   interrupt_return_lite
+   b   interrupt_return
 
GEN_KVM performance_monitor
 
@@ -3013,7 +3013,7 @@ do_hash_page:
 cmpdi  r3,0/* see if __hash_page succeeded */
 
/* Success */
-   beq interrupt_return_lite   /* Return from exception on success */
+   beq interrupt_return/* Return from exception on success */
 
/* Error */
blt-13f
@@ -3027,10 +3027,11 @@ do_hash_page:
 handle_page_fault:
 11:andis.  r0,r5,DSISR_DABRMATCH@h
bne-handle_dabr_fault
+   bl  save_nvgprs
addir3,r1,STACK_FRAME_OVERHEAD
bl  do_page_fault
cmpdi   r3,0
-   beq+interrupt_return_lite
+   beq+interrupt_return
mr  r5,r3
addir3,r1,STACK_FRAME_OVERHEAD
ld  r4,_DAR(r1)
@@ -3045,9 +3046,9 @@ handle_dabr_fault:
bl  do_break
/*
 * do_break() may have changed the NV GPRS while handling a breakpoint.
-* If so, we need to restore them with their updated values. Don't use
-* interrupt_return_lite here.
+* If so, we need to restore them with their updated values.
 */
+   REST_NVGPRS(r1)
b   interrupt_return
 
 
-- 
2.23.0



[PATCH v2 23/35] powerpc/64: system call implement the bulk of the logic in C

2019-11-26 Thread Michal Suchanek
From: Nicholas Piggin 

System call entry and particularly exit code is beyond the limit of what
is reasonable to implement in asm.

This conversion moves all conditional branches out of the asm code,
except for the case that all GPRs should be restored at exit.

Null syscall test is about 5% faster after this patch, because the exit
work is handled under local_irq_disable, and the hard mask and pending
interrupt replay is handled after that, which avoids games with MSR.

Signed-off-by: Nicholas Piggin 
[ms: add endian conversion for dtl_idx]
Signed-off-by: Michal Suchanek 

v3:
- Fix !KUAP build [mpe]
- Fix BookE build/boot [mpe]
- Don't trace irqs with MSR[RI]=0
- Don't allow syscall_exit_prepare to be ftraced, because function
  graph tracing which traces exits barfs after the IRQ state is
  prepared for kernel exit.
- Fix BE syscall table to use normal function descriptors now that they
  are called from C.
- Comment syscall_exit_prepare.
---
 arch/powerpc/include/asm/asm-prototypes.h |  11 -
 .../powerpc/include/asm/book3s/64/kup-radix.h |  14 +-
 arch/powerpc/include/asm/cputime.h|  24 ++
 arch/powerpc/include/asm/hw_irq.h |   4 +
 arch/powerpc/include/asm/ptrace.h |   3 +
 arch/powerpc/include/asm/signal.h |   3 +
 arch/powerpc/include/asm/switch_to.h  |   5 +
 arch/powerpc/include/asm/time.h   |   3 +
 arch/powerpc/kernel/Makefile  |   3 +-
 arch/powerpc/kernel/entry_64.S| 337 +++---
 arch/powerpc/kernel/signal.h  |   2 -
 arch/powerpc/kernel/syscall_64.c  | 195 ++
 arch/powerpc/kernel/systbl.S  |   9 +-
 13 files changed, 300 insertions(+), 313 deletions(-)
 create mode 100644 arch/powerpc/kernel/syscall_64.c

diff --git a/arch/powerpc/include/asm/asm-prototypes.h 
b/arch/powerpc/include/asm/asm-prototypes.h
index 8561498e653c..399ca63196e4 100644
--- a/arch/powerpc/include/asm/asm-prototypes.h
+++ b/arch/powerpc/include/asm/asm-prototypes.h
@@ -103,14 +103,6 @@ long sys_switch_endian(void);
 notrace unsigned int __check_irq_replay(void);
 void notrace restore_interrupts(void);
 
-/* ptrace */
-long do_syscall_trace_enter(struct pt_regs *regs);
-void do_syscall_trace_leave(struct pt_regs *regs);
-
-/* process */
-void restore_math(struct pt_regs *regs);
-void restore_tm_state(struct pt_regs *regs);
-
 /* prom_init (OpenFirmware) */
 unsigned long __init prom_init(unsigned long r3, unsigned long r4,
   unsigned long pp,
@@ -121,9 +113,6 @@ unsigned long __init prom_init(unsigned long r3, unsigned 
long r4,
 void __init early_setup(unsigned long dt_ptr);
 void early_setup_secondary(void);
 
-/* time */
-void accumulate_stolen_time(void);
-
 /* misc runtime */
 extern u64 __bswapdi2(u64);
 extern s64 __lshrdi3(s64, int);
diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h 
b/arch/powerpc/include/asm/book3s/64/kup-radix.h
index f254de956d6a..07058edc5970 100644
--- a/arch/powerpc/include/asm/book3s/64/kup-radix.h
+++ b/arch/powerpc/include/asm/book3s/64/kup-radix.h
@@ -3,6 +3,7 @@
 #define _ASM_POWERPC_BOOK3S_64_KUP_RADIX_H
 
 #include 
+#include 
 
 #define AMR_KUAP_BLOCK_READUL(0x4000)
 #define AMR_KUAP_BLOCK_WRITE   UL(0x8000)
@@ -56,7 +57,14 @@
 
 #ifdef CONFIG_PPC_KUAP
 
-#include 
+#include 
+#include 
+
+static inline void kuap_check_amr(void)
+{
+   if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG) && 
mmu_has_feature(MMU_FTR_RADIX_KUAP))
+   WARN_ON_ONCE(mfspr(SPRN_AMR) != AMR_KUAP_BLOCKED);
+}
 
 /*
  * We support individually allowing read or write, but we don't support nesting
@@ -101,6 +109,10 @@ static inline bool bad_kuap_fault(struct pt_regs *regs, 
bool is_write)
(regs->kuap & (is_write ? AMR_KUAP_BLOCK_WRITE : 
AMR_KUAP_BLOCK_READ)),
"Bug: %s fault blocked by AMR!", is_write ? "Write" : 
"Read");
 }
+#else /* CONFIG_PPC_KUAP */
+static inline void kuap_check_amr(void)
+{
+}
 #endif /* CONFIG_PPC_KUAP */
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/cputime.h 
b/arch/powerpc/include/asm/cputime.h
index 2431b4ada2fa..c43614cffaac 100644
--- a/arch/powerpc/include/asm/cputime.h
+++ b/arch/powerpc/include/asm/cputime.h
@@ -60,6 +60,30 @@ static inline void arch_vtime_task_switch(struct task_struct 
*prev)
 }
 #endif
 
+static inline void account_cpu_user_entry(void)
+{
+   unsigned long tb = mftb();
+   struct cpu_accounting_data *acct = get_accounting(current);
+
+   acct->utime += (tb - acct->starttime_user);
+   acct->starttime = tb;
+}
+static inline void account_cpu_user_exit(void)
+{
+   unsigned long tb = mftb();
+   struct cpu_accounting_data *acct = get_accounting(current);
+
+   acct->stime += (tb - acct->starttime);
+   acct->starttime_user = tb;
+}
+
 #endif /* __KERNEL__ */
+#else /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
+static inline void account_cpu_

[PATCH v2 24/35] powerpc/64s: interrupt return in C

2019-11-26 Thread Michal Suchanek
From: Nicholas Piggin 

Implement the bulk of interrupt return logic in C. The asm return code
must handle a few cases: restoring full GPRs, and emulating stack store.

The asm return code is moved into 64e for now. The new logic has made
allowance for 64e, but I don't have a full environment that works well
to test it, and even booting in emulated qemu is not great for stress
testing. 64e shouldn't be too far off working with this, given a bit
more testing and auditing of the logic.

This is slightly faster on a POWER9 (page fault speed increases about
1.1%), probably due to reduced mtmsrd.

Signed-off-by: Nicholas Piggin 
[ms: Move the FP restore functions to restore_math. They are not used
anywhere else and when restore_math is not built gcc warns about them
being unused.
Add asm/context_tracking.h include to exceptions-64e.S for SCHEDULE_USER
definition.]
Signed-off-by: Michal Suchanek 
---
 .../powerpc/include/asm/book3s/64/kup-radix.h |  10 +
 arch/powerpc/include/asm/switch_to.h  |   6 +
 arch/powerpc/kernel/entry_64.S| 475 --
 arch/powerpc/kernel/exceptions-64e.S  | 255 +-
 arch/powerpc/kernel/exceptions-64s.S  | 119 ++---
 arch/powerpc/kernel/process.c |  89 ++--
 arch/powerpc/kernel/syscall_64.c  | 157 +-
 arch/powerpc/kernel/vector.S  |   2 +-
 8 files changed, 623 insertions(+), 490 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h 
b/arch/powerpc/include/asm/book3s/64/kup-radix.h
index 07058edc5970..762afbed4762 100644
--- a/arch/powerpc/include/asm/book3s/64/kup-radix.h
+++ b/arch/powerpc/include/asm/book3s/64/kup-radix.h
@@ -60,6 +60,12 @@
 #include 
 #include 
 
+static inline void kuap_restore_amr(struct pt_regs *regs)
+{
+   if (mmu_has_feature(MMU_FTR_RADIX_KUAP))
+   mtspr(SPRN_AMR, regs->kuap);
+}
+
 static inline void kuap_check_amr(void)
 {
if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG) && 
mmu_has_feature(MMU_FTR_RADIX_KUAP))
@@ -110,6 +116,10 @@ static inline bool bad_kuap_fault(struct pt_regs *regs, 
bool is_write)
"Bug: %s fault blocked by AMR!", is_write ? "Write" : 
"Read");
 }
 #else /* CONFIG_PPC_KUAP */
+static inline void kuap_restore_amr(struct pt_regs *regs)
+{
+}
+
 static inline void kuap_check_amr(void)
 {
 }
diff --git a/arch/powerpc/include/asm/switch_to.h 
b/arch/powerpc/include/asm/switch_to.h
index 476008bc3d08..b867b58b1093 100644
--- a/arch/powerpc/include/asm/switch_to.h
+++ b/arch/powerpc/include/asm/switch_to.h
@@ -23,7 +23,13 @@ extern void switch_booke_debug_regs(struct debug_reg 
*new_debug);
 
 extern int emulate_altivec(struct pt_regs *);
 
+#ifdef CONFIG_PPC_BOOK3S_64
 void restore_math(struct pt_regs *regs);
+#else
+static inline void restore_math(struct pt_regs *regs)
+{
+}
+#endif
 
 void restore_tm_state(struct pt_regs *regs);
 
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 15bc2a872a76..b2e68f5ca8f7 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -16,6 +16,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -279,7 +280,7 @@ flush_count_cache:
  * state of one is saved on its kernel stack.  Then the state
  * of the other is restored from its kernel stack.  The memory
  * management hardware is updated to the second process's state.
- * Finally, we can return to the second process, via ret_from_except.
+ * Finally, we can return to the second process, via interrupt_return.
  * On entry, r3 points to the THREAD for the current task, r4
  * points to the THREAD for the new task.
  *
@@ -433,408 +434,150 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
addir1,r1,SWITCH_FRAME_SIZE
blr
 
-   .align  7
-_GLOBAL(ret_from_except)
-   ld  r11,_TRAP(r1)
-   andi.   r0,r11,1
-   bne ret_from_except_lite
-   REST_NVGPRS(r1)
-
-_GLOBAL(ret_from_except_lite)
+#ifdef CONFIG_PPC_BOOK3S
/*
-* Disable interrupts so that current_thread_info()->flags
-* can't change between when we test it and when we return
-* from the interrupt.
-*/
-#ifdef CONFIG_PPC_BOOK3E
-   wrteei  0
-#else
-   li  r10,MSR_RI
-   mtmsrd  r10,1 /* Update machine state */
-#endif /* CONFIG_PPC_BOOK3E */
+* If MSR EE/RI was never enabled, IRQs not reconciled, NVGPRs not
+* touched, AMR not set, no exit work created, then this can be used.
+*/
+   .balign IFETCH_ALIGN_BYTES
+_GLOBAL(fast_interrupt_return)
+   ld  r4,_MSR(r1)
+   andi.   r0,r4,MSR_PR
+   bne .Lfast_user_interrupt_return
+   andi.   r0,r4,MSR_RI
+   bne+.Lfast_kernel_interrupt_return
+   addir3,r1,STACK_FRAME_OVERHEAD
+   bl  unrecoverable_exception
+   b   . /* should not get here */
 
-   ld  r9, PACA_THREAD_INFO(r13)
-   ld  r3,_MSR(r1)
-#ifdef CONFIG_PPC_BOOK3E

[PATCH v2 22/35] powerpc/64: system call remove non-volatile GPR save optimisation

2019-11-26 Thread Michal Suchanek
From: Nicholas Piggin 

powerpc has an optimisation where interrupts avoid saving the
non-volatile (or callee saved) registers to the interrupt stack frame if
they are not required.

Two problems with this are that an interrupt does not always know
whether it will need non-volatiles; and if it does need them, they can
only be saved from the entry-scoped asm code (because we don't control
what the C compiler does with these registers).

system calls are the most difficult: some system calls always require
all registers (e.g., fork, to copy regs into the child).  Sometimes
registers are only required under certain conditions (e.g., tracing,
signal delivery). These cases require ugly logic in the call chains
(e.g., ppc_fork), and require a lot of logic to be implemented in asm.

So remove the optimisation for system calls, and always save NVGPRs on
entry. Modern high performance CPUs are not so sensitive, because the
stores are dense in cache and can be hidden by other expensive work in
the syscall path -- the null syscall selftests benchmark on POWER9 is
not slowed (124.40ns before and 123.64ns after, i.e., within the noise).

Other interrupts retain the NVGPR optimisation for now.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kernel/entry_64.S   | 72 +---
 arch/powerpc/kernel/syscalls/syscall.tbl | 22 +---
 2 files changed, 28 insertions(+), 66 deletions(-)

diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 6467bdab8d40..5a3e0b5c9ad1 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -98,13 +98,14 @@ END_BTB_FLUSH_SECTION
std r11,_XER(r1)
std r11,_CTR(r1)
std r9,GPR13(r1)
+   SAVE_NVGPRS(r1)
mflrr10
/*
 * This clears CR0.SO (bit 28), which is the error indication on
 * return from this system call.
 */
rldimi  r2,r11,28,(63-28)
-   li  r11,0xc01
+   li  r11,0xc00
std r10,_LINK(r1)
std r11,_TRAP(r1)
std r3,ORIG_GPR3(r1)
@@ -323,7 +324,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 
 /* Traced system call support */
 .Lsyscall_dotrace:
-   bl  save_nvgprs
addir3,r1,STACK_FRAME_OVERHEAD
bl  do_syscall_trace_enter
 
@@ -408,7 +408,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
mtmsrd  r10,1
 #endif /* CONFIG_PPC_BOOK3E */
 
-   bl  save_nvgprs
addir3,r1,STACK_FRAME_OVERHEAD
bl  do_syscall_trace_leave
b   ret_from_except
@@ -442,62 +441,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 _ASM_NOKPROBE_SYMBOL(system_call_common);
 _ASM_NOKPROBE_SYMBOL(system_call_exit);
 
-/* Save non-volatile GPRs, if not already saved. */
-_GLOBAL(save_nvgprs)
-   ld  r11,_TRAP(r1)
-   andi.   r0,r11,1
-   beqlr-
-   SAVE_NVGPRS(r1)
-   clrrdi  r0,r11,1
-   std r0,_TRAP(r1)
-   blr
-_ASM_NOKPROBE_SYMBOL(save_nvgprs);
-
-   
-/*
- * The sigsuspend and rt_sigsuspend system calls can call do_signal
- * and thus put the process into the stopped state where we might
- * want to examine its user state with ptrace.  Therefore we need
- * to save all the nonvolatile registers (r14 - r31) before calling
- * the C code.  Similarly, fork, vfork and clone need the full
- * register state on the stack so that it can be copied to the child.
- */
-
-_GLOBAL(ppc_fork)
-   bl  save_nvgprs
-   bl  sys_fork
-   b   .Lsyscall_exit
-
-_GLOBAL(ppc_vfork)
-   bl  save_nvgprs
-   bl  sys_vfork
-   b   .Lsyscall_exit
-
-_GLOBAL(ppc_clone)
-   bl  save_nvgprs
-   bl  sys_clone
-   b   .Lsyscall_exit
-
-_GLOBAL(ppc_clone3)
-   bl  save_nvgprs
-   bl  sys_clone3
-   b   .Lsyscall_exit
-
-_GLOBAL(ppc32_swapcontext)
-   bl  save_nvgprs
-   bl  compat_sys_swapcontext
-   b   .Lsyscall_exit
-
-_GLOBAL(ppc64_swapcontext)
-   bl  save_nvgprs
-   bl  sys_swapcontext
-   b   .Lsyscall_exit
-
-_GLOBAL(ppc_switch_endian)
-   bl  save_nvgprs
-   bl  sys_switch_endian
-   b   .Lsyscall_exit
-
 _GLOBAL(ret_from_fork)
bl  schedule_tail
REST_NVGPRS(r1)
@@ -516,6 +459,17 @@ _GLOBAL(ret_from_kernel_thread)
li  r3,0
b   .Lsyscall_exit
 
+/* Save non-volatile GPRs, if not already saved. */
+_GLOBAL(save_nvgprs)
+   ld  r11,_TRAP(r1)
+   andi.   r0,r11,1
+   beqlr-
+   SAVE_NVGPRS(r1)
+   clrrdi  r0,r11,1
+   std r0,_TRAP(r1)
+   blr
+_ASM_NOKPROBE_SYMBOL(save_nvgprs);
+
 #ifdef CONFIG_PPC_BOOK3S_64
 
 #define FLUSH_COUNT_CACHE  \
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl 
b/arch/powerpc/kernel/syscalls/syscall.tbl
index 43f736ed47f2..d899bcb5343e 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -9,7 +9,9 @@
 

[PATCH v2 21/35] powerpc/64s/exception: soft nmi interrupt should not use ret_from_except

2019-11-26 Thread Michal Suchanek
From: Nicholas Piggin 

The soft nmi handler does not reconcile interrupt state, so it should
not return via the normal ret_from_except path. Return like other NMIs,
using the EXCEPTION_RESTORE_REGS macro.

This becomes important when the scv interrupt is implemented, which
must handle soft-masked interrupts that have r13 set to something other
than the PACA -- returning to kernel in this case must restore r13.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kernel/exceptions-64s.S | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index 38bc66b95516..af1264cd005f 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -2740,7 +2740,11 @@ EXC_COMMON_BEGIN(soft_nmi_common)
bl  save_nvgprs
addir3,r1,STACK_FRAME_OVERHEAD
bl  soft_nmi_interrupt
-   b   ret_from_except
+   /* Clear MSR_RI before setting SRR0 and SRR1. */
+   li  r9,0
+   mtmsrd  r9,1
+   EXCEPTION_RESTORE_REGS hsrr=0
+   RFI_TO_KERNEL
 
 #endif /* CONFIG_PPC_WATCHDOG */
 
-- 
2.23.0



[PATCH v2 20/35] powerpc/64s/exception: only test KVM in SRR interrupts when PR KVM is supported

2019-11-26 Thread Michal Suchanek
From: Nicholas Piggin 

Apart from SRESET, MCE, and syscall (hcall variant), the SRR type
interrupts are not escalated to hypervisor mode, so delivered to the OS.

When running PR KVM, the OS is the hypervisor, and the guest runs with
MSR[PR]=1, so these interrupts must test if a guest was running when
interrupted. These tests are required at the real-mode entry points
because the PR KVM host runs with LPCR[AIL]=0.

In HV KVM and nested HV KVM, the guest always receives these interrupts,
so there is no need for the host to make this test. So remove the tests
if PR KVM is not configured.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kernel/exceptions-64s.S | 65 ++--
 1 file changed, 62 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index 2f50587392aa..38bc66b95516 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -214,9 +214,36 @@ do_define_int n
 #ifdef CONFIG_KVM_BOOK3S_64_HANDLER
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 /*
- * If hv is possible, interrupts come into to the hv version
- * of the kvmppc_interrupt code, which then jumps to the PR handler,
- * kvmppc_interrupt_pr, if the guest is a PR guest.
+ * All interrupts which set HSRR registers, as well as SRESET and MCE and
+ * syscall when invoked with "sc 1" switch to MSR[HV]=1 (HVMODE) to be taken,
+ * so they all generally need to test whether they were taken in guest context.
+ *
+ * Note: SRESET and MCE may also be sent to the guest by the hypervisor, and be
+ * taken with MSR[HV]=0.
+ *
+ * Interrupts which set SRR registers (with the above exceptions) do not
+ * elevate to MSR[HV]=1 mode, though most can be taken when running with
+ * MSR[HV]=1  (e.g., bare metal kernel and userspace). So these interrupts do
+ * not need to test whether a guest is running because they get delivered to
+ * the guest directly, including nested HV KVM guests.
+ *
+ * The exception is PR KVM, where the guest runs with MSR[PR]=1 and the host
+ * runs with MSR[HV]=0, so the host takes all interrupts on behalf of the
+ * guest. PR KVM runs with LPCR[AIL]=0 which causes interrupts to always be
+ * delivered to the real-mode entry point, therefore such interrupts only test
+ * KVM in their real mode handlers, and only when PR KVM is possible.
+ *
+ * Interrupts that are taken in MSR[HV]=0 and escalate to MSR[HV]=1 are always
+ * delivered in real-mode when the MMU is in hash mode because the MMU
+ * registers are not set appropriately to translate host addresses. In nested
+ * radix mode these can be delivered in virt-mode as the host translations are
+ * used implicitly (see: effective LPID, effective PID).
+ */
+
+/*
+ * If an interrupt is taken while a guest is running, it is immediately routed
+ * to KVM to handle. If both HV and PR KVM arepossible, KVM interrupts go first
+ * to kvmppc_interrupt_hv, which handles the PR guest case.
  */
 #define kvmppc_interrupt kvmppc_interrupt_hv
 #else
@@ -1258,8 +1285,10 @@ INT_DEFINE_BEGIN(data_access)
IVEC=0x300
IDAR=1
IDSISR=1
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
IKVM_SKIP=1
IKVM_REAL=1
+#endif
 INT_DEFINE_END(data_access)
 
 EXC_REAL_BEGIN(data_access, 0x300, 0x80)
@@ -1306,8 +1335,10 @@ INT_DEFINE_BEGIN(data_access_slb)
IAREA=PACA_EXSLB
IRECONCILE=0
IDAR=1
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
IKVM_SKIP=1
IKVM_REAL=1
+#endif
 INT_DEFINE_END(data_access_slb)
 
 EXC_REAL_BEGIN(data_access_slb, 0x380, 0x80)
@@ -1357,7 +1388,9 @@ INT_DEFINE_BEGIN(instruction_access)
IISIDE=1
IDAR=1
IDSISR=1
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
IKVM_REAL=1
+#endif
 INT_DEFINE_END(instruction_access)
 
 EXC_REAL_BEGIN(instruction_access, 0x400, 0x80)
@@ -1396,7 +1429,9 @@ INT_DEFINE_BEGIN(instruction_access_slb)
IRECONCILE=0
IISIDE=1
IDAR=1
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
IKVM_REAL=1
+#endif
 INT_DEFINE_END(instruction_access_slb)
 
 EXC_REAL_BEGIN(instruction_access_slb, 0x480, 0x80)
@@ -1488,7 +1523,9 @@ INT_DEFINE_BEGIN(alignment)
IVEC=0x600
IDAR=1
IDSISR=1
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
IKVM_REAL=1
+#endif
 INT_DEFINE_END(alignment)
 
 EXC_REAL_BEGIN(alignment, 0x600, 0x100)
@@ -1518,7 +1555,9 @@ EXC_COMMON_BEGIN(alignment_common)
  */
 INT_DEFINE_BEGIN(program_check)
IVEC=0x700
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
IKVM_REAL=1
+#endif
 INT_DEFINE_END(program_check)
 
 EXC_REAL_BEGIN(program_check, 0x700, 0x100)
@@ -1581,7 +1620,9 @@ EXC_COMMON_BEGIN(program_check_common)
 INT_DEFINE_BEGIN(fp_unavailable)
IVEC=0x800
IRECONCILE=0
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
IKVM_REAL=1
+#endif
 INT_DEFINE_END(fp_unavailable)
 
 EXC_REAL_BEGIN(fp_unavailable, 0x800, 0x100)
@@ -1643,7 +1684,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)
 INT_DEFINE_BEGIN(decrementer)
IVEC=0x900

[PATCH v2 19/35] powerpc/64s/exception: add more comments for interrupt handlers

2019-11-26 Thread Michal Suchanek
From: Nicholas Piggin 

A few of the non-standard handlers are left uncommented. Some more
description could be added to some.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kernel/exceptions-64s.S | 391 ---
 1 file changed, 353 insertions(+), 38 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index ef37d0ab6594..2f50587392aa 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -121,26 +121,26 @@ name:
 /*
  * Interrupt code generation macros
  */
-#define IVEC   .L_IVEC_\name\()
-#define IHSRR  .L_IHSRR_\name\()
-#define IHSRR_IF_HVMODE.L_IHSRR_IF_HVMODE_\name\()
-#define IAREA  .L_IAREA_\name\()
-#define IVIRT  .L_IVIRT_\name\()
-#define IISIDE .L_IISIDE_\name\()
-#define IDAR   .L_IDAR_\name\()
-#define IDSISR .L_IDSISR_\name\()
-#define ISET_RI.L_ISET_RI_\name\()
-#define IBRANCH_TO_COMMON  .L_IBRANCH_TO_COMMON_\name\()
-#define IREALMODE_COMMON   .L_IREALMODE_COMMON_\name\()
-#define IMASK  .L_IMASK_\name\()
-#define IKVM_SKIP  .L_IKVM_SKIP_\name\()
-#define IKVM_REAL  .L_IKVM_REAL_\name\()
+#define IVEC   .L_IVEC_\name\()/* Interrupt vector address */
+#define IHSRR  .L_IHSRR_\name\()   /* Sets SRR or HSRR registers */
+#define IHSRR_IF_HVMODE.L_IHSRR_IF_HVMODE_\name\() /* HSRR if HV else 
SRR */
+#define IAREA  .L_IAREA_\name\()   /* PACA save area */
+#define IVIRT  .L_IVIRT_\name\()   /* Has virt mode entry point */
+#define IISIDE .L_IISIDE_\name\()  /* Uses SRR0/1 not DAR/DSISR */
+#define IDAR   .L_IDAR_\name\()/* Uses DAR (or SRR0) */
+#define IDSISR .L_IDSISR_\name\()  /* Uses DSISR (or SRR1) */
+#define ISET_RI.L_ISET_RI_\name\() /* Run common code w/ 
MSR[RI]=1 */
+#define IBRANCH_TO_COMMON  .L_IBRANCH_TO_COMMON_\name\() /* ENTRY branch 
to common */
+#define IREALMODE_COMMON   .L_IREALMODE_COMMON_\name\() /* Common runs in 
realmode */
+#define IMASK  .L_IMASK_\name\()   /* IRQ soft-mask bit */
+#define IKVM_SKIP  .L_IKVM_SKIP_\name\()   /* Generate KVM skip handler */
+#define IKVM_REAL  .L_IKVM_REAL_\name\()   /* Real entry tests KVM */
 #define __IKVM_REAL(name)  .L_IKVM_REAL_ ## name
-#define IKVM_VIRT  .L_IKVM_VIRT_\name\()
-#define ISTACK .L_ISTACK_\name\()
+#define IKVM_VIRT  .L_IKVM_VIRT_\name\()   /* Virt entry tests KVM */
+#define ISTACK .L_ISTACK_\name\()  /* Set regular kernel stack */
 #define __ISTACK(name) .L_ISTACK_ ## name
-#define IRECONCILE .L_IRECONCILE_\name\()
-#define IKUAP  .L_IKUAP_\name\()
+#define IRECONCILE .L_IRECONCILE_\name\()  /* Do RECONCILE_IRQ_STATE */
+#define IKUAP  .L_IKUAP_\name\()   /* Do KUAP lock */
 
 #define INT_DEFINE_BEGIN(n)\
 .macro int_define_ ## n name
@@ -759,6 +759,39 @@ __start_interrupts:
 EXC_VIRT_NONE(0x4000, 0x100)
 
 
+/**
+ * Interrupt 0x100 - System Reset Interrupt (SRESET aka NMI).
+ * This is a non-maskable, asynchronous interrupt always taken in real-mode.
+ * It is caused by:
+ * - Wake from power-saving state, on powernv.
+ * - An NMI from another CPU, triggered by firmware or hypercall.
+ * - As crash/debug signal injected from BMC, firmware or hypervisor.
+ *
+ * Handling:
+ * Power-save wakeup is the only performance critical path, so this is
+ * determined quickly as possible first. In this case volatile registers
+ * can be discarded and SPRs like CFAR don't need to be read.
+ *
+ * If not a powersave wakeup, then it's run as a regular interrupt, however
+ * it uses its own stack and PACA save area to preserve the regular kernel
+ * environment for debugging.
+ *
+ * This interrupt is not maskable, so triggering it when MSR[RI] is clear,
+ * or SCRATCH0 is in use, etc. may cause a crash. It's also not entirely
+ * correct to switch to virtual mode to run the regular interrupt handler
+ * because it might be interrupted when the MMU is in a bad state (e.g., SLB
+ * is clear).
+ *
+ * FWNMI:
+ * PAPR specifies a "fwnmi" facility which sends the sreset to a different
+ * entry point with a different register set up. Some hypervisors will
+ * send the sreset to 0x100 in the guest if it is not fwnmi capable.
+ *
+ * KVM:
+ * Unlike most SRR interrupts, this may be taken by the host while executing
+ * in a guest, so a KVM test is required. KVM will pull the CPU out of guest
+ * mode and then raise the sreset.
+ */
 INT_DEFINE_BEGIN(system_reset)
IVEC=0x100
IAREA=PACA_EXNMI
@@ -834,6 +867,7 @@ TRAMP_REAL_BEGIN(system_reset_idle_wake)
  * Vectors for the FWNMI option.  Share common code.
  */
 TRAMP_REAL_BEGIN(system_reset_fwnmi)
+   /* XXX: fwnmi guest could run a nested/PR guest, so why no test?  */
__IKVM_REAL(system_reset

[PATCH v2 18/35] powerpc/64s/exception: Clean up SRR specifiers

2019-11-26 Thread Michal Suchanek
From: Nicholas Piggin 

Remove more magic numbers and replace with nicely named bools.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kernel/exceptions-64s.S | 68 +---
 1 file changed, 32 insertions(+), 36 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index 9494403b9586..ef37d0ab6594 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -105,11 +105,6 @@ name:
ori reg,reg,(ABS_ADDR(label))@l;\
addis   reg,reg,(ABS_ADDR(label))@h
 
-/* Exception register prefixes */
-#define EXC_HV_OR_STD  2 /* depends on HVMODE */
-#define EXC_HV 1
-#define EXC_STD0
-
 /*
  * Branch to label using its 0xC000 address. This results in instruction
  * address suitable for MSR[IR]=0 or 1, which allows relocation to be turned
@@ -128,6 +123,7 @@ name:
  */
 #define IVEC   .L_IVEC_\name\()
 #define IHSRR  .L_IHSRR_\name\()
+#define IHSRR_IF_HVMODE.L_IHSRR_IF_HVMODE_\name\()
 #define IAREA  .L_IAREA_\name\()
 #define IVIRT  .L_IVIRT_\name\()
 #define IISIDE .L_IISIDE_\name\()
@@ -159,7 +155,10 @@ do_define_int n
.error "IVEC not defined"
.endif
.ifndef IHSRR
-   IHSRR=EXC_STD
+   IHSRR=0
+   .endif
+   .ifndef IHSRR_IF_HVMODE
+   IHSRR_IF_HVMODE=0
.endif
.ifndef IAREA
IAREA=PACA_EXGEN
@@ -257,7 +256,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
ld  r9,IAREA+EX_R9(r13)
ld  r10,IAREA+EX_R10(r13)
/* HSRR variants have the 0x2 bit added to their trap number */
-   .if IHSRR == EXC_HV_OR_STD
+   .if IHSRR_IF_HVMODE
BEGIN_FTR_SECTION
ori r12,r12,(IVEC + 0x2)
FTR_SECTION_ELSE
@@ -278,7 +277,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
ld  r10,IAREA+EX_R10(r13)
ld  r11,IAREA+EX_R11(r13)
ld  r12,IAREA+EX_R12(r13)
-   .if IHSRR == EXC_HV_OR_STD
+   .if IHSRR_IF_HVMODE
BEGIN_FTR_SECTION
b   kvmppc_skip_Hinterrupt
FTR_SECTION_ELSE
@@ -403,7 +402,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
stw r10,IAREA+EX_DSISR(r13)
.endif
 
-   .if IHSRR == EXC_HV_OR_STD
+   .if IHSRR_IF_HVMODE
BEGIN_FTR_SECTION
mfspr   r11,SPRN_HSRR0  /* save HSRR0 */
mfspr   r12,SPRN_HSRR1  /* and HSRR1 */
@@ -485,7 +484,7 @@ DEFINE_FIXED_SYMBOL(\name\()_common_virt)
.abort "Bad maskable vector"
.endif
 
-   .if IHSRR == EXC_HV_OR_STD
+   .if IHSRR_IF_HVMODE
BEGIN_FTR_SECTION
bne masked_Hinterrupt
FTR_SECTION_ELSE
@@ -618,12 +617,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
  * Restore all registers including H/SRR0/1 saved in a stack frame of a
  * standard exception.
  */
-.macro EXCEPTION_RESTORE_REGS hsrr
+.macro EXCEPTION_RESTORE_REGS hsrr=0
/* Move original SRR0 and SRR1 into the respective regs */
ld  r9,_MSR(r1)
-   .if \hsrr == EXC_HV_OR_STD
-   .error "EXC_HV_OR_STD Not implemented for EXCEPTION_RESTORE_REGS"
-   .endif
.if \hsrr
mtspr   SPRN_HSRR1,r9
.else
@@ -898,7 +894,7 @@ EXC_COMMON_BEGIN(system_reset_common)
ld  r10,SOFTE(r1)
stb r10,PACAIRQSOFTMASK(r13)
 
-   EXCEPTION_RESTORE_REGS EXC_STD
+   EXCEPTION_RESTORE_REGS
RFI_TO_USER_OR_KERNEL
 
GEN_KVM system_reset
@@ -952,7 +948,7 @@ TRAMP_REAL_BEGIN(machine_check_fwnmi)
lhz r12,PACA_IN_MCE(r13);   \
subir12,r12,1;  \
sth r12,PACA_IN_MCE(r13);   \
-   EXCEPTION_RESTORE_REGS EXC_STD
+   EXCEPTION_RESTORE_REGS
 
 EXC_COMMON_BEGIN(machine_check_early_common)
/*
@@ -1321,7 +1317,7 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
 
 INT_DEFINE_BEGIN(hardware_interrupt)
IVEC=0x500
-   IHSRR=EXC_HV_OR_STD
+   IHSRR_IF_HVMODE=1
IMASK=IRQS_DISABLED
IKVM_REAL=1
IKVM_VIRT=1
@@ -1490,7 +1486,7 @@ EXC_COMMON_BEGIN(decrementer_common)
 
 INT_DEFINE_BEGIN(hdecrementer)
IVEC=0x980
-   IHSRR=EXC_HV
+   IHSRR=1
ISTACK=0
IRECONCILE=0
IKVM_REAL=1
@@ -1732,7 +1728,7 @@ EXC_COMMON_BEGIN(single_step_common)
 
 INT_DEFINE_BEGIN(h_data_storage)
IVEC=0xe00
-   IHSRR=EXC_HV
+   IHSRR=1
IDAR=1
IDSISR=1
IKVM_SKIP=1
@@ -1764,7 +1760,7 @@ ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_TYPE_RADIX)
 
 INT_DEFINE_BEGIN(h_instr_storage)
IVEC=0xe20
-   IHSRR=EXC_HV
+   IHSRR=1
IKVM_REAL=1
IKVM_VIRT=1
 INT_DEFINE_END(h_instr_storage)
@@ -1787,7 +1783,7 @@ EXC_COMMON_BEGIN(h_instr_storage_common)
 
 INT_DEFINE_BEGIN(emulation_assist)
   

[PATCH v2 17/35] powerpc/64s/exception: re-inline some handlers

2019-11-26 Thread Michal Suchanek
From: Nicholas Piggin 

The reduction in interrupt entry size allows some handlers to be
re-inlined.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kernel/exceptions-64s.S | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index 7a234e6d7bf5..9494403b9586 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1186,7 +1186,7 @@ INT_DEFINE_BEGIN(data_access)
 INT_DEFINE_END(data_access)
 
 EXC_REAL_BEGIN(data_access, 0x300, 0x80)
-   GEN_INT_ENTRY data_access, virt=0, ool=1
+   GEN_INT_ENTRY data_access, virt=0
 EXC_REAL_END(data_access, 0x300, 0x80)
 EXC_VIRT_BEGIN(data_access, 0x4300, 0x80)
GEN_INT_ENTRY data_access, virt=1
@@ -1216,7 +1216,7 @@ INT_DEFINE_BEGIN(data_access_slb)
 INT_DEFINE_END(data_access_slb)
 
 EXC_REAL_BEGIN(data_access_slb, 0x380, 0x80)
-   GEN_INT_ENTRY data_access_slb, virt=0, ool=1
+   GEN_INT_ENTRY data_access_slb, virt=0
 EXC_REAL_END(data_access_slb, 0x380, 0x80)
 EXC_VIRT_BEGIN(data_access_slb, 0x4380, 0x80)
GEN_INT_ENTRY data_access_slb, virt=1
@@ -1472,7 +1472,7 @@ INT_DEFINE_BEGIN(decrementer)
 INT_DEFINE_END(decrementer)
 
 EXC_REAL_BEGIN(decrementer, 0x900, 0x80)
-   GEN_INT_ENTRY decrementer, virt=0, ool=1
+   GEN_INT_ENTRY decrementer, virt=0
 EXC_REAL_END(decrementer, 0x900, 0x80)
 EXC_VIRT_BEGIN(decrementer, 0x4900, 0x80)
GEN_INT_ENTRY decrementer, virt=1
-- 
2.23.0



[PATCH v2 16/35] powerpc/64s/exception: hdecrementer avoid touching the stack

2019-11-26 Thread Michal Suchanek
From: Nicholas Piggin 

The hdec interrupt handler is reported to sometimes fire in Linux if
KVM leaves it pending after a guest exists. This is harmless, so there
is a no-op handler for it.

The interrupt handler currently uses the regular kernel stack. Change
this to avoid touching the stack entirely.

This should be the last place where the regular Linux stack can be
accessed with asynchronous interrupts (including PMI) soft-masked.
It might be possible to take advantage of this invariant, e.g., to
context switch the kernel stack SLB entry without clearing MSR[EE].

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/include/asm/time.h  |  1 -
 arch/powerpc/kernel/exceptions-64s.S | 25 -
 arch/powerpc/kernel/time.c   |  9 -
 3 files changed, 20 insertions(+), 15 deletions(-)

diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index 08dbe3e6831c..e0107495c4de 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -24,7 +24,6 @@ extern struct clock_event_device decrementer_clockevent;
 
 
 extern void generic_calibrate_decr(void);
-extern void hdec_interrupt(struct pt_regs *regs);
 
 /* Some sane defaults: 125 MHz timebase, 1GHz processor */
 extern unsigned long ppc_proc_freq;
diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index 9fa71d51ecf4..7a234e6d7bf5 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1491,6 +1491,8 @@ EXC_COMMON_BEGIN(decrementer_common)
 INT_DEFINE_BEGIN(hdecrementer)
IVEC=0x980
IHSRR=EXC_HV
+   ISTACK=0
+   IRECONCILE=0
IKVM_REAL=1
IKVM_VIRT=1
 INT_DEFINE_END(hdecrementer)
@@ -1502,11 +1504,24 @@ EXC_VIRT_BEGIN(hdecrementer, 0x4980, 0x80)
GEN_INT_ENTRY hdecrementer, virt=1
 EXC_VIRT_END(hdecrementer, 0x4980, 0x80)
 EXC_COMMON_BEGIN(hdecrementer_common)
-   GEN_COMMON hdecrementer
-   bl  save_nvgprs
-   addir3,r1,STACK_FRAME_OVERHEAD
-   bl  hdec_interrupt
-   b   ret_from_except
+   __GEN_COMMON_ENTRY hdecrementer
+   /*
+* Hypervisor decrementer interrupts not caught by the KVM test
+* shouldn't occur but are sometimes left pending on exit from a KVM
+* guest.  We don't need to do anything to clear them, as they are
+* edge-triggered.
+*
+* Be careful to avoid touching the kernel stack.
+*/
+   ld  r10,PACA_EXGEN+EX_CTR(r13)
+   mtctr   r10
+   mtcrf   0x80,r9
+   ld  r9,PACA_EXGEN+EX_R9(r13)
+   ld  r10,PACA_EXGEN+EX_R10(r13)
+   ld  r11,PACA_EXGEN+EX_R11(r13)
+   ld  r12,PACA_EXGEN+EX_R12(r13)
+   ld  r13,PACA_EXGEN+EX_R13(r13)
+   HRFI_TO_KERNEL
 
GEN_KVM hdecrementer
 
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 694522308cd5..bebc8c440289 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -663,15 +663,6 @@ void timer_broadcast_interrupt(void)
 }
 #endif
 
-/*
- * Hypervisor decrementer interrupts shouldn't occur but are sometimes
- * left pending on exit from a KVM guest.  We don't need to do anything
- * to clear them, as they are edge-triggered.
- */
-void hdec_interrupt(struct pt_regs *regs)
-{
-}
-
 #ifdef CONFIG_SUSPEND
 static void generic_suspend_disable_irqs(void)
 {
-- 
2.23.0



[PATCH v2 15/35] powerpc/64s/exception: trim unused arguments from KVMTEST macro

2019-11-26 Thread Michal Suchanek
From: Nicholas Piggin 

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kernel/exceptions-64s.S | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index abf26db36427..9fa71d51ecf4 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -224,7 +224,7 @@ do_define_int n
 #define kvmppc_interrupt kvmppc_interrupt_pr
 #endif
 
-.macro KVMTEST name, hsrr, n
+.macro KVMTEST name
lbz r10,HSTATE_IN_GUEST(r13)
cmpwi   r10,0
bne \name\()_kvm
@@ -293,7 +293,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 .endm
 
 #else
-.macro KVMTEST name, hsrr, n
+.macro KVMTEST name
 .endm
 .macro GEN_KVM name
 .endm
@@ -437,7 +437,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
 DEFINE_FIXED_SYMBOL(\name\()_common_real)
 \name\()_common_real:
.if IKVM_REAL
-   KVMTEST \name IHSRR IVEC
+   KVMTEST \name
.endif
 
ld  r10,PACAKMSR(r13)   /* get MSR value for kernel */
@@ -460,7 +460,7 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real)
 DEFINE_FIXED_SYMBOL(\name\()_common_virt)
 \name\()_common_virt:
.if IKVM_VIRT
-   KVMTEST \name IHSRR IVEC
+   KVMTEST \name
 1:
.endif
.endif /* IVIRT */
@@ -1595,7 +1595,7 @@ INT_DEFINE_END(system_call)
GET_PACA(r13)
std r10,PACA_EXGEN+EX_R10(r13)
INTERRUPT_TO_KERNEL
-   KVMTEST system_call EXC_STD 0xc00 /* uses r10, branch to 
system_call_kvm */
+   KVMTEST system_call /* uses r10, branch to system_call_kvm */
mfctr   r9
 #else
mr  r9,r13
-- 
2.23.0



[PATCH v2 14/35] powerpc/64s/exception: remove the SPR saving patch code macros

2019-11-26 Thread Michal Suchanek
From: Nicholas Piggin 

These are used infrequently enough they don't provide much help, so
inline them.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kernel/exceptions-64s.S | 82 ++--
 1 file changed, 28 insertions(+), 54 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index 716a95ba814f..abf26db36427 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -110,46 +110,6 @@ name:
 #define EXC_HV 1
 #define EXC_STD0
 
-/*
- * PPR save/restore macros used in exceptions-64s.S
- * Used for P7 or later processors
- */
-#define SAVE_PPR(area, ra) \
-BEGIN_FTR_SECTION_NESTED(940)  \
-   ld  ra,area+EX_PPR(r13);/* Read PPR from paca */\
-   std ra,_PPR(r1);\
-END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,940)
-
-#define RESTORE_PPR_PACA(area, ra) \
-BEGIN_FTR_SECTION_NESTED(941)  \
-   ld  ra,area+EX_PPR(r13);\
-   mtspr   SPRN_PPR,ra;\
-END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,941)
-
-/*
- * Get an SPR into a register if the CPU has the given feature
- */
-#define OPT_GET_SPR(ra, spr, ftr)  \
-BEGIN_FTR_SECTION_NESTED(943)  \
-   mfspr   ra,spr; \
-END_FTR_SECTION_NESTED(ftr,ftr,943)
-
-/*
- * Set an SPR from a register if the CPU has the given feature
- */
-#define OPT_SET_SPR(ra, spr, ftr)  \
-BEGIN_FTR_SECTION_NESTED(943)  \
-   mtspr   spr,ra; \
-END_FTR_SECTION_NESTED(ftr,ftr,943)
-
-/*
- * Save a register to the PACA if the CPU has the given feature
- */
-#define OPT_SAVE_REG_TO_PACA(offset, ra, ftr)  \
-BEGIN_FTR_SECTION_NESTED(943)  \
-   std ra,offset(r13); \
-END_FTR_SECTION_NESTED(ftr,ftr,943)
-
 /*
  * Branch to label using its 0xC000 address. This results in instruction
  * address suitable for MSR[IR]=0 or 1, which allows relocation to be turned
@@ -278,18 +238,18 @@ do_define_int n
cmpwi   r10,KVM_GUEST_MODE_SKIP
beq 89f
.else
-BEGIN_FTR_SECTION_NESTED(947)
+BEGIN_FTR_SECTION
ld  r10,IAREA+EX_CFAR(r13)
std r10,HSTATE_CFAR(r13)
-END_FTR_SECTION_NESTED(CPU_FTR_CFAR,CPU_FTR_CFAR,947)
+END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
.endif
 
ld  r10,PACA_EXGEN+EX_CTR(r13)
mtctr   r10
-BEGIN_FTR_SECTION_NESTED(948)
+BEGIN_FTR_SECTION
ld  r10,IAREA+EX_PPR(r13)
std r10,HSTATE_PPR(r13)
-END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
ld  r11,IAREA+EX_R11(r13)
ld  r12,IAREA+EX_R12(r13)
std r12,HSTATE_SCRATCH0(r13)
@@ -386,10 +346,14 @@ 
END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
SET_SCRATCH0(r13)   /* save r13 */
GET_PACA(r13)
std r9,IAREA+EX_R9(r13) /* save r9 */
-   OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR)
+BEGIN_FTR_SECTION
+   mfspr   r9,SPRN_PPR
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
HMT_MEDIUM
std r10,IAREA+EX_R10(r13)   /* save r10 - r12 */
-   OPT_GET_SPR(r10, SPRN_CFAR, CPU_FTR_CFAR)
+BEGIN_FTR_SECTION
+   mfspr   r10,SPRN_CFAR
+END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
.if \ool
.if !\virt
b   tramp_real_\name
@@ -402,8 +366,12 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
.endif
.endif
 
-   OPT_SAVE_REG_TO_PACA(IAREA+EX_PPR, r9, CPU_FTR_HAS_PPR)
-   OPT_SAVE_REG_TO_PACA(IAREA+EX_CFAR, r10, CPU_FTR_CFAR)
+BEGIN_FTR_SECTION
+   std r9,IAREA+EX_PPR(r13)
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+BEGIN_FTR_SECTION
+   std r10,IAREA+EX_CFAR(r13)
+END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
INTERRUPT_TO_KERNEL
mfctr   r10
std r10,IAREA+EX_CTR(r13)
@@ -558,7 +526,10 @@ DEFINE_FIXED_SYMBOL(\name\()_common_virt)
.endif
beq 101f/* if from kernel mode  */
ACCOUNT_CPU_USER_ENTRY(r13, r9, r10)
-   SAVE_PPR(IAREA, r9)
+BEGIN_FTR_SECTION
+   ld  r9,IAREA+EX_PPR(r13)/* Read PPR from paca   */
+   std r9,_PPR(r1)
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 101:
.else
.if IKUAP
@@ -598,10 +569,10 @@ DEFINE_FIXED_SYMBOL(\name\()_common_virt)
std r10,_DSISR(r1)
  

[PATCH v2 13/35] powerpc/64s/exception: remove confusing IEARLY option

2019-11-26 Thread Michal Suchanek
From: Nicholas Piggin 

Replace IEARLY=1 and IEARLY=2 with IBRANCH_COMMON, which controls if
the entry code branches to a common handler; and IREALMODE_COMMON,
which controls whether the common handler should remain in real mode.

These special cases no longer avoid loading the SRR registers, there
is no point as most of them load the registers immediately anyway.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kernel/exceptions-64s.S | 48 ++--
 1 file changed, 24 insertions(+), 24 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index 7db76e7be0aa..716a95ba814f 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -174,7 +174,8 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
 #define IDAR   .L_IDAR_\name\()
 #define IDSISR .L_IDSISR_\name\()
 #define ISET_RI.L_ISET_RI_\name\()
-#define IEARLY .L_IEARLY_\name\()
+#define IBRANCH_TO_COMMON  .L_IBRANCH_TO_COMMON_\name\()
+#define IREALMODE_COMMON   .L_IREALMODE_COMMON_\name\()
 #define IMASK  .L_IMASK_\name\()
 #define IKVM_SKIP  .L_IKVM_SKIP_\name\()
 #define IKVM_REAL  .L_IKVM_REAL_\name\()
@@ -218,8 +219,15 @@ do_define_int n
.ifndef ISET_RI
ISET_RI=1
.endif
-   .ifndef IEARLY
-   IEARLY=0
+   .ifndef IBRANCH_TO_COMMON
+   IBRANCH_TO_COMMON=1
+   .endif
+   .ifndef IREALMODE_COMMON
+   IREALMODE_COMMON=0
+   .else
+   .if ! IBRANCH_TO_COMMON
+   .error "IREALMODE_COMMON=1 but IBRANCH_TO_COMMON=0"
+   .endif
.endif
.ifndef IMASK
IMASK=0
@@ -353,6 +361,11 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
  */
 
 .macro GEN_BRANCH_TO_COMMON name, virt
+   .if IREALMODE_COMMON
+   LOAD_HANDLER(r10, \name\()_common)
+   mtctr   r10
+   bctr
+   .else
.if \virt
 #ifndef CONFIG_RELOCATABLE
b   \name\()_common_virt
@@ -366,6 +379,7 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
mtctr   r10
bctr
.endif
+   .endif
 .endm
 
 .macro GEN_INT_ENTRY name, virt, ool=0
@@ -421,11 +435,6 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
stw r10,IAREA+EX_DSISR(r13)
.endif
 
-   .if IEARLY == 2
-   /* nothing more */
-   .elseif IEARLY
-   BRANCH_TO_C000(r11, \name\()_common)
-   .else
.if IHSRR == EXC_HV_OR_STD
BEGIN_FTR_SECTION
mfspr   r11,SPRN_HSRR0  /* save HSRR0 */
@@ -441,6 +450,8 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
mfspr   r11,SPRN_SRR0   /* save SRR0 */
mfspr   r12,SPRN_SRR1   /* and SRR1 */
.endif
+
+   .if IBRANCH_TO_COMMON
GEN_BRANCH_TO_COMMON \name \virt
.endif
 
@@ -926,6 +937,7 @@ INT_DEFINE_BEGIN(machine_check_early)
IVEC=0x200
IAREA=PACA_EXMC
IVIRT=0 /* no virt entry point */
+   IREALMODE_COMMON=1
/*
 * MSR_RI is not enabled, because PACA_EXMC is being used, so a
 * nested machine check corrupts it. machine_check_common enables
@@ -933,7 +945,6 @@ INT_DEFINE_BEGIN(machine_check_early)
 */
ISET_RI=0
ISTACK=0
-   IEARLY=1
IDAR=1
IDSISR=1
IRECONCILE=0
@@ -973,9 +984,6 @@ TRAMP_REAL_BEGIN(machine_check_fwnmi)
EXCEPTION_RESTORE_REGS EXC_STD
 
 EXC_COMMON_BEGIN(machine_check_early_common)
-   mfspr   r11,SPRN_SRR0
-   mfspr   r12,SPRN_SRR1
-
/*
 * Switch to mc_emergency stack and handle re-entrancy (we limit
 * the nested MCE upto level 4 to avoid stack overflow).
@@ -1822,7 +1830,7 @@ EXC_COMMON_BEGIN(emulation_assist_common)
 INT_DEFINE_BEGIN(hmi_exception_early)
IVEC=0xe60
IHSRR=EXC_HV
-   IEARLY=1
+   IREALMODE_COMMON=1
ISTACK=0
IRECONCILE=0
IKUAP=0 /* We don't touch AMR here, we never go to virtual mode */
@@ -1842,8 +1850,6 @@ EXC_REAL_END(hmi_exception, 0xe60, 0x20)
 EXC_VIRT_NONE(0x4e60, 0x20)
 
 EXC_COMMON_BEGIN(hmi_exception_early_common)
-   mfspr   r11,SPRN_HSRR0  /* Save HSRR0 */
-   mfspr   r12,SPRN_HSRR1  /* Save HSRR1 */
mr  r10,r1  /* Save r1 */
ld  r1,PACAEMERGSP(r13) /* Use emergency stack for realmode */
subir1,r1,INT_FRAME_SIZE/* alloc stack frame*/
@@ -2169,29 +2175,23 @@ EXC_VIRT_NONE(0x5400, 0x100)
 INT_DEFINE_BEGIN(denorm_exception)
IVEC=0x1500
IHSRR=EXC_HV
-   IEARLY=2
+   IBRANCH_TO_COMMON=0
IKVM_REAL=1
 INT_DEFINE_END(denorm_exception)
 
 EXC_REAL_BEGIN(denorm_exception, 0x1500, 0x100)
GEN_INT_ENTRY denorm_exception, virt=0
 #ifdef CONFIG_PPC_DENORMALISATION
-   mfspr   r10,SPRN_HSRR1
-   andis.  r10,r10,(H

[PATCH v2 12/35] powerpc/64s/exception: move KVM test to common code

2019-11-26 Thread Michal Suchanek
From: Nicholas Piggin 

This allows more code to be moved out of unrelocated regions. The system
call KVMTEST is changed to be open-coded and remain in the tramp area to
avoid having to move it to entry_64.S. The custom nature of the system
call entry code means the hcall case can be made more streamlined than
regular interrupt handlers.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kernel/exceptions-64s.S| 239 
 arch/powerpc/kvm/book3s_hv_rmhandlers.S |  11 --
 arch/powerpc/kvm/book3s_segment.S   |   7 -
 3 files changed, 119 insertions(+), 138 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index fbc3fbb293f7..7db76e7be0aa 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -44,7 +44,6 @@
  * EXC_VIRT_BEGIN/END  - virt (AIL), unrelocated exception vectors
  * TRAMP_REAL_BEGIN- real, unrelocated helpers (virt may call these)
  * TRAMP_VIRT_BEGIN- virt, unreloc helpers (in practice, real can use)
- * TRAMP_KVM_BEGIN - KVM handlers, these are put into real, unrelocated
  * EXC_COMMON  - After switching to virtual, relocated mode.
  */
 
@@ -74,13 +73,6 @@ name:
 #define TRAMP_VIRT_BEGIN(name) \
FIXED_SECTION_ENTRY_BEGIN(virt_trampolines, name)
 
-#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
-#define TRAMP_KVM_BEGIN(name)  \
-   TRAMP_VIRT_BEGIN(name)
-#else
-#define TRAMP_KVM_BEGIN(name)
-#endif
-
 #define EXC_REAL_NONE(start, size) \
FIXED_SECTION_ENTRY_BEGIN_LOCATION(real_vectors, 
exc_real_##start##_##unused, start, size); \
FIXED_SECTION_ENTRY_END_LOCATION(real_vectors, 
exc_real_##start##_##unused, start, size)
@@ -271,6 +263,9 @@ do_define_int n
 .endm
 
 .macro GEN_KVM name
+   .balign IFETCH_ALIGN_BYTES
+\name\()_kvm:
+
.if IKVM_SKIP
cmpwi   r10,KVM_GUEST_MODE_SKIP
beq 89f
@@ -281,13 +276,18 @@ BEGIN_FTR_SECTION_NESTED(947)
 END_FTR_SECTION_NESTED(CPU_FTR_CFAR,CPU_FTR_CFAR,947)
.endif
 
+   ld  r10,PACA_EXGEN+EX_CTR(r13)
+   mtctr   r10
 BEGIN_FTR_SECTION_NESTED(948)
ld  r10,IAREA+EX_PPR(r13)
std r10,HSTATE_PPR(r13)
 END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
-   ld  r10,IAREA+EX_R10(r13)
+   ld  r11,IAREA+EX_R11(r13)
+   ld  r12,IAREA+EX_R12(r13)
std r12,HSTATE_SCRATCH0(r13)
sldir12,r9,32
+   ld  r9,IAREA+EX_R9(r13)
+   ld  r10,IAREA+EX_R10(r13)
/* HSRR variants have the 0x2 bit added to their trap number */
.if IHSRR == EXC_HV_OR_STD
BEGIN_FTR_SECTION
@@ -300,29 +300,16 @@ 
END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
.else
ori r12,r12,(IVEC)
.endif
-
-#ifdef CONFIG_RELOCATABLE
-   /*
-* KVM requires __LOAD_FAR_HANDLER beause kvmppc_interrupt lives
-* outside the head section. CONFIG_RELOCATABLE KVM expects CTR
-* to be saved in HSTATE_SCRATCH1.
-*/
-   ld  r9,IAREA+EX_CTR(r13)
-   std r9,HSTATE_SCRATCH1(r13)
-   __LOAD_FAR_HANDLER(r9, kvmppc_interrupt)
-   mtctr   r9
-   ld  r9,IAREA+EX_R9(r13)
-   bctr
-#else
-   ld  r9,IAREA+EX_R9(r13)
b   kvmppc_interrupt
-#endif
-
 
.if IKVM_SKIP
 89:mtocrf  0x80,r9
+   ld  r10,PACA_EXGEN+EX_CTR(r13)
+   mtctr   r10
ld  r9,IAREA+EX_R9(r13)
ld  r10,IAREA+EX_R10(r13)
+   ld  r11,IAREA+EX_R11(r13)
+   ld  r12,IAREA+EX_R12(r13)
.if IHSRR == EXC_HV_OR_STD
BEGIN_FTR_SECTION
b   kvmppc_skip_Hinterrupt
@@ -407,11 +394,6 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
mfctr   r10
std r10,IAREA+EX_CTR(r13)
mfcrr9
-
-   .if (!\virt && IKVM_REAL) || (\virt && IKVM_VIRT)
-   KVMTEST \name IHSRR IVEC
-   .endif
-
std r11,IAREA+EX_R11(r13)
std r12,IAREA+EX_R12(r13)
 
@@ -475,6 +457,10 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
 .macro __GEN_COMMON_ENTRY name
 DEFINE_FIXED_SYMBOL(\name\()_common_real)
 \name\()_common_real:
+   .if IKVM_REAL
+   KVMTEST \name IHSRR IVEC
+   .endif
+
ld  r10,PACAKMSR(r13)   /* get MSR value for kernel */
/* MSR[RI] is clear iff using SRR regs */
.if IHSRR == EXC_HV_OR_STD
@@ -487,9 +473,17 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real)
mtmsrd  r10
 
.if IVIRT
+   .if IKVM_VIRT
+   b   1f /* skip the virt test coming from real */
+   .endif
+
.balign IFETCH_ALIGN_BYTES
 DEFINE_FIXED_SYMBOL(\name\()_common_virt)
 \name\()_common_virt:
+   .if IKVM_VIRT
+   KVMTEST \name IHSRR IVEC
+1:
+   .endif
.endif /* IVIRT */
 .endm
 
@@ -848,8 +842,6 @@ END_FTR_SECTIO

[PATCH v2 11/35] powerpc/64s/exception: move soft-mask test to common code

2019-11-26 Thread Michal Suchanek
From: Nicholas Piggin 

As well as moving code out of the unrelocated vectors, this allows the
masked handlers to be moved to common code, and allows the soft_nmi
handler to be generated more like a regular handler.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kernel/exceptions-64s.S | 106 +--
 1 file changed, 49 insertions(+), 57 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index 5803ce3b9404..fbc3fbb293f7 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -411,36 +411,6 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
.if (!\virt && IKVM_REAL) || (\virt && IKVM_VIRT)
KVMTEST \name IHSRR IVEC
.endif
-   .if IMASK
-   lbz r10,PACAIRQSOFTMASK(r13)
-   andi.   r10,r10,IMASK
-   /* Associate vector numbers with bits in paca->irq_happened */
-   .if IVEC == 0x500 || IVEC == 0xea0
-   li  r10,PACA_IRQ_EE
-   .elseif IVEC == 0x900
-   li  r10,PACA_IRQ_DEC
-   .elseif IVEC == 0xa00 || IVEC == 0xe80
-   li  r10,PACA_IRQ_DBELL
-   .elseif IVEC == 0xe60
-   li  r10,PACA_IRQ_HMI
-   .elseif IVEC == 0xf00
-   li  r10,PACA_IRQ_PMI
-   .else
-   .abort "Bad maskable vector"
-   .endif
-
-   .if IHSRR == EXC_HV_OR_STD
-   BEGIN_FTR_SECTION
-   bne masked_Hinterrupt
-   FTR_SECTION_ELSE
-   bne masked_interrupt
-   ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
-   .elseif IHSRR
-   bne masked_Hinterrupt
-   .else
-   bne masked_interrupt
-   .endif
-   .endif
 
std r11,IAREA+EX_R11(r13)
std r12,IAREA+EX_R12(r13)
@@ -524,6 +494,37 @@ DEFINE_FIXED_SYMBOL(\name\()_common_virt)
 .endm
 
 .macro __GEN_COMMON_BODY name
+   .if IMASK
+   lbz r10,PACAIRQSOFTMASK(r13)
+   andi.   r10,r10,IMASK
+   /* Associate vector numbers with bits in paca->irq_happened */
+   .if IVEC == 0x500 || IVEC == 0xea0
+   li  r10,PACA_IRQ_EE
+   .elseif IVEC == 0x900
+   li  r10,PACA_IRQ_DEC
+   .elseif IVEC == 0xa00 || IVEC == 0xe80
+   li  r10,PACA_IRQ_DBELL
+   .elseif IVEC == 0xe60
+   li  r10,PACA_IRQ_HMI
+   .elseif IVEC == 0xf00
+   li  r10,PACA_IRQ_PMI
+   .else
+   .abort "Bad maskable vector"
+   .endif
+
+   .if IHSRR == EXC_HV_OR_STD
+   BEGIN_FTR_SECTION
+   bne masked_Hinterrupt
+   FTR_SECTION_ELSE
+   bne masked_interrupt
+   ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+   .elseif IHSRR
+   bne masked_Hinterrupt
+   .else
+   bne masked_interrupt
+   .endif
+   .endif
+
.if ISTACK
andi.   r10,r12,MSR_PR  /* See if coming from user  */
mr  r10,r1  /* Save r1  */
@@ -2343,18 +2344,10 @@ EXC_VIRT_NONE(0x5800, 0x100)
 
 #ifdef CONFIG_PPC_WATCHDOG
 
-#define MASKED_DEC_HANDLER_LABEL 3f
-
-#define MASKED_DEC_HANDLER(_H) \
-3: /* soft-nmi */  \
-   std r12,PACA_EXGEN+EX_R12(r13); \
-   GET_SCRATCH0(r10);  \
-   std r10,PACA_EXGEN+EX_R13(r13); \
-   mfspr   r11,SPRN_SRR0;  /* save SRR0 */ \
-   mfspr   r12,SPRN_SRR1;  /* and SRR1 */  \
-   LOAD_HANDLER(r10, soft_nmi_common); \
-   mtctr   r10;\
-   bctr
+INT_DEFINE_BEGIN(soft_nmi)
+   IVEC=0x900
+   ISTACK=0
+INT_DEFINE_END(soft_nmi)
 
 /*
  * Branch to soft_nmi_interrupt using the emergency stack. The emergency
@@ -2366,19 +2359,16 @@ EXC_VIRT_NONE(0x5800, 0x100)
  * and run it entirely with interrupts hard disabled.
  */
 EXC_COMMON_BEGIN(soft_nmi_common)
+   mfspr   r11,SPRN_SRR0
mr  r10,r1
ld  r1,PACAEMERGSP(r13)
subir1,r1,INT_FRAME_SIZE
-   __ISTACK(decrementer)=0
-   __GEN_COMMON_BODY decrementer
+   __GEN_COMMON_BODY soft_nmi
bl  save_nvgprs
addir3,r1,STACK_FRAME_OVERHEAD
bl  soft_nmi_interrupt
b   ret_from_except
 
-#else /* CONFIG_PPC_WATCHDOG */
-#define MASKED_DEC_HANDLER_LABEL 2f /* normal return */
-#define MASKED_DEC_HANDLER(_H)
 #endif /* CONFIG_PPC_WATCHDOG */
 
 /*
@@ -2397,7 +2387,6 @@ masked_Hinterrupt:
.else
 mas

[PATCH v2 10/35] powerpc/64s/exception: move real->virt switch into the common handler

2019-11-26 Thread Michal Suchanek
From: Nicholas Piggin 

The real mode interrupt entry points currently use rfid to branch to
the common handler in virtual mode. This is a significant amount of
code, and forces other code (notably the KVM test) to live in the
real mode handler.

In the interest of minimising the amount of code that runs unrelocated
move the switch to virt mode into the common code, and do it with
mtmsrd, which avoids clobbering SRRs (although the post-KVMTEST
performance of real-mode interrupt handlers is not a big concern these
days).

This requires CTR to always be saved (real-mode needs to reach 0xc...)
but that's not a huge impact these days. It could be optimized away in
future.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/include/asm/exception-64s.h |   4 -
 arch/powerpc/kernel/exceptions-64s.S | 251 ++-
 2 files changed, 109 insertions(+), 146 deletions(-)

diff --git a/arch/powerpc/include/asm/exception-64s.h 
b/arch/powerpc/include/asm/exception-64s.h
index 33f4f72eb035..47bd4ea0837d 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -33,11 +33,7 @@
 #include 
 
 /* PACA save area size in u64 units (exgen, exmc, etc) */
-#if defined(CONFIG_RELOCATABLE)
 #define EX_SIZE10
-#else
-#define EX_SIZE9
-#endif
 
 /*
  * maximum recursive depth of MCE exceptions
diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index b8588618cdc3..5803ce3b9404 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -32,16 +32,10 @@
 #define EX_CCR 52
 #define EX_CFAR56
 #define EX_PPR 64
-#if defined(CONFIG_RELOCATABLE)
 #define EX_CTR 72
 .if EX_SIZE != 10
.error "EX_SIZE is wrong"
 .endif
-#else
-.if EX_SIZE != 9
-   .error "EX_SIZE is wrong"
-.endif
-#endif
 
 /*
  * Following are fixed section helper macros.
@@ -124,22 +118,6 @@ name:
 #define EXC_HV 1
 #define EXC_STD0
 
-#if defined(CONFIG_RELOCATABLE)
-/*
- * If we support interrupts with relocation on AND we're a relocatable kernel,
- * we need to use CTR to get to the 2nd level handler.  So, save/restore it
- * when required.
- */
-#define SAVE_CTR(reg, area)mfctr   reg ;   std reg,area+EX_CTR(r13)
-#define GET_CTR(reg, area) ld  reg,area+EX_CTR(r13)
-#define RESTORE_CTR(reg, area) ld  reg,area+EX_CTR(r13) ; mtctr reg
-#else
-/* ...else CTR is unused and in register. */
-#define SAVE_CTR(reg, area)
-#define GET_CTR(reg, area) mfctr   reg
-#define RESTORE_CTR(reg, area)
-#endif
-
 /*
  * PPR save/restore macros used in exceptions-64s.S
  * Used for P7 or later processors
@@ -199,6 +177,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
 #define IVEC   .L_IVEC_\name\()
 #define IHSRR  .L_IHSRR_\name\()
 #define IAREA  .L_IAREA_\name\()
+#define IVIRT  .L_IVIRT_\name\()
 #define IISIDE .L_IISIDE_\name\()
 #define IDAR   .L_IDAR_\name\()
 #define IDSISR .L_IDSISR_\name\()
@@ -232,6 +211,9 @@ do_define_int n
.ifndef IAREA
IAREA=PACA_EXGEN
.endif
+   .ifndef IVIRT
+   IVIRT=1
+   .endif
.ifndef IISIDE
IISIDE=0
.endif
@@ -325,7 +307,7 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
 * outside the head section. CONFIG_RELOCATABLE KVM expects CTR
 * to be saved in HSTATE_SCRATCH1.
 */
-   mfctr   r9
+   ld  r9,IAREA+EX_CTR(r13)
std r9,HSTATE_SCRATCH1(r13)
__LOAD_FAR_HANDLER(r9, kvmppc_interrupt)
mtctr   r9
@@ -362,101 +344,6 @@ 
END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
 .endm
 #endif
 
-.macro INT_SAVE_SRR_AND_JUMP label, hsrr, set_ri
-   ld  r10,PACAKMSR(r13)   /* get MSR value for kernel */
-   .if ! \set_ri
-   xorir10,r10,MSR_RI  /* Clear MSR_RI */
-   .endif
-   .if \hsrr == EXC_HV_OR_STD
-   BEGIN_FTR_SECTION
-   mfspr   r11,SPRN_HSRR0  /* save HSRR0 */
-   mfspr   r12,SPRN_HSRR1  /* and HSRR1 */
-   mtspr   SPRN_HSRR1,r10
-   FTR_SECTION_ELSE
-   mfspr   r11,SPRN_SRR0   /* save SRR0 */
-   mfspr   r12,SPRN_SRR1   /* and SRR1 */
-   mtspr   SPRN_SRR1,r10
-   ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
-   .elseif \hsrr
-   mfspr   r11,SPRN_HSRR0  /* save HSRR0 */
-   mfspr   r12,SPRN_HSRR1  /* and HSRR1 */
-   mtspr   SPRN_HSRR1,r10
-   .else
-   mfspr   r11,SPRN_SRR0   /* save SRR0 */
-   mfspr   r12,SPRN_SRR1   /* and SRR1 */
-   mtspr   SPRN_SRR1,r10
-   .endif
-   LOAD_HANDLER(r10, \label\())
-   .if \hsrr == EXC_HV_OR_STD
-   BEGIN_FTR_SECTION
-   mtspr   SPRN_HSRR0,r10
-   HRFI_TO_KERNEL
-   FTR_SECTION_ELSE
-   mtspr   SPRN_SRR0,

[PATCH v2 09/35] powerpc/64s/exception: Add ISIDE option

2019-11-26 Thread Michal Suchanek
From: Nicholas Piggin 

Rather than using DAR=2 to select the i-side registers, add an
explicit option.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kernel/exceptions-64s.S | 23 ---
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index bef0c2eee7dc..b8588618cdc3 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -199,6 +199,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
 #define IVEC   .L_IVEC_\name\()
 #define IHSRR  .L_IHSRR_\name\()
 #define IAREA  .L_IAREA_\name\()
+#define IISIDE .L_IISIDE_\name\()
 #define IDAR   .L_IDAR_\name\()
 #define IDSISR .L_IDSISR_\name\()
 #define ISET_RI.L_ISET_RI_\name\()
@@ -231,6 +232,9 @@ do_define_int n
.ifndef IAREA
IAREA=PACA_EXGEN
.endif
+   .ifndef IISIDE
+   IISIDE=0
+   .endif
.ifndef IDAR
IDAR=0
.endif
@@ -542,7 +546,7 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
 */
GET_SCRATCH0(r10)
std r10,IAREA+EX_R13(r13)
-   .if IDAR == 1
+   .if IDAR && !IISIDE
.if IHSRR
mfspr   r10,SPRN_HDAR
.else
@@ -550,7 +554,7 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
.endif
std r10,IAREA+EX_DAR(r13)
.endif
-   .if IDSISR == 1
+   .if IDSISR && !IISIDE
.if IHSRR
mfspr   r10,SPRN_HDSISR
.else
@@ -625,16 +629,18 @@ 
END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
std r9,GPR11(r1)
std r10,GPR12(r1)
std r11,GPR13(r1)
+
.if IDAR
-   .if IDAR == 2
+   .if IISIDE
ld  r10,_NIP(r1)
.else
ld  r10,IAREA+EX_DAR(r13)
.endif
std r10,_DAR(r1)
.endif
+
.if IDSISR
-   .if IDSISR == 2
+   .if IISIDE
ld  r10,_MSR(r1)
lis r11,DSISR_SRR1_MATCH_64S@h
and r10,r10,r11
@@ -643,6 +649,7 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
.endif
std r10,_DSISR(r1)
.endif
+
 BEGIN_FTR_SECTION_NESTED(66)
ld  r10,IAREA+EX_CFAR(r13)
std r10,ORIG_GPR3(r1)
@@ -1311,8 +1318,9 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
 
 INT_DEFINE_BEGIN(instruction_access)
IVEC=0x400
-   IDAR=2
-   IDSISR=2
+   IISIDE=1
+   IDAR=1
+   IDSISR=1
IKVM_REAL=1
 INT_DEFINE_END(instruction_access)
 
@@ -1341,7 +1349,8 @@ INT_DEFINE_BEGIN(instruction_access_slb)
IVEC=0x480
IAREA=PACA_EXSLB
IRECONCILE=0
-   IDAR=2
+   IISIDE=1
+   IDAR=1
IKVM_REAL=1
 INT_DEFINE_END(instruction_access_slb)
 
-- 
2.23.0



[PATCH v2 08/35] powerpc/64s/exception: Remove old INT_KVM_HANDLER

2019-11-26 Thread Michal Suchanek
From: Nicholas Piggin 

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kernel/exceptions-64s.S | 55 +---
 1 file changed, 26 insertions(+), 29 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index f318869607db..bef0c2eee7dc 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -266,15 +266,6 @@ do_define_int n
.endif
 .endm
 
-.macro INT_KVM_HANDLER name, vec, hsrr, area, skip
-   TRAMP_KVM_BEGIN(\name\()_kvm)
-   KVM_HANDLER \vec, \hsrr, \area, \skip
-.endm
-
-.macro GEN_KVM name
-   KVM_HANDLER IVEC, IHSRR, IAREA, IKVM_SKIP
-.endm
-
 #ifdef CONFIG_KVM_BOOK3S_64_HANDLER
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 /*
@@ -293,35 +284,35 @@ do_define_int n
bne \name\()_kvm
 .endm
 
-.macro KVM_HANDLER vec, hsrr, area, skip
-   .if \skip
+.macro GEN_KVM name
+   .if IKVM_SKIP
cmpwi   r10,KVM_GUEST_MODE_SKIP
beq 89f
.else
 BEGIN_FTR_SECTION_NESTED(947)
-   ld  r10,\area+EX_CFAR(r13)
+   ld  r10,IAREA+EX_CFAR(r13)
std r10,HSTATE_CFAR(r13)
 END_FTR_SECTION_NESTED(CPU_FTR_CFAR,CPU_FTR_CFAR,947)
.endif
 
 BEGIN_FTR_SECTION_NESTED(948)
-   ld  r10,\area+EX_PPR(r13)
+   ld  r10,IAREA+EX_PPR(r13)
std r10,HSTATE_PPR(r13)
 END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
-   ld  r10,\area+EX_R10(r13)
+   ld  r10,IAREA+EX_R10(r13)
std r12,HSTATE_SCRATCH0(r13)
sldir12,r9,32
/* HSRR variants have the 0x2 bit added to their trap number */
-   .if \hsrr == EXC_HV_OR_STD
+   .if IHSRR == EXC_HV_OR_STD
BEGIN_FTR_SECTION
-   ori r12,r12,(\vec + 0x2)
+   ori r12,r12,(IVEC + 0x2)
FTR_SECTION_ELSE
-   ori r12,r12,(\vec)
+   ori r12,r12,(IVEC)
ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
-   .elseif \hsrr
-   ori r12,r12,(\vec + 0x2)
+   .elseif IHSRR
+   ori r12,r12,(IVEC+ 0x2)
.else
-   ori r12,r12,(\vec)
+   ori r12,r12,(IVEC)
.endif
 
 #ifdef CONFIG_RELOCATABLE
@@ -334,25 +325,25 @@ 
END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
std r9,HSTATE_SCRATCH1(r13)
__LOAD_FAR_HANDLER(r9, kvmppc_interrupt)
mtctr   r9
-   ld  r9,\area+EX_R9(r13)
+   ld  r9,IAREA+EX_R9(r13)
bctr
 #else
-   ld  r9,\area+EX_R9(r13)
+   ld  r9,IAREA+EX_R9(r13)
b   kvmppc_interrupt
 #endif
 
 
-   .if \skip
+   .if IKVM_SKIP
 89:mtocrf  0x80,r9
-   ld  r9,\area+EX_R9(r13)
-   ld  r10,\area+EX_R10(r13)
-   .if \hsrr == EXC_HV_OR_STD
+   ld  r9,IAREA+EX_R9(r13)
+   ld  r10,IAREA+EX_R10(r13)
+   .if IHSRR == EXC_HV_OR_STD
BEGIN_FTR_SECTION
b   kvmppc_skip_Hinterrupt
FTR_SECTION_ELSE
b   kvmppc_skip_interrupt
ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
-   .elseif \hsrr
+   .elseif IHSRR
b   kvmppc_skip_Hinterrupt
.else
b   kvmppc_skip_interrupt
@@ -363,7 +354,7 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
 #else
 .macro KVMTEST name, hsrr, n
 .endm
-.macro KVM_HANDLER name, vec, hsrr, area, skip
+.macro GEN_KVM name
 .endm
 #endif
 
@@ -1640,6 +1631,12 @@ EXC_VIRT_NONE(0x4b00, 0x100)
  * without saving, though xer is not a good idea to use, as hardware may
  * interpret some bits so it may be costly to change them.
  */
+INT_DEFINE_BEGIN(system_call)
+   IVEC=0xc00
+   IKVM_REAL=1
+   IKVM_VIRT=1
+INT_DEFINE_END(system_call)
+
 .macro SYSTEM_CALL virt
 #ifdef CONFIG_KVM_BOOK3S_64_HANDLER
/*
@@ -1733,7 +1730,7 @@ TRAMP_KVM_BEGIN(system_call_kvm)
SET_SCRATCH0(r10)
std r9,PACA_EXGEN+EX_R9(r13)
mfcrr9
-   KVM_HANDLER 0xc00, EXC_STD, PACA_EXGEN, 0
+   GEN_KVM system_call
 #endif
 
 
-- 
2.23.0



[PATCH v2 07/35] powerpc/64s/exception: Remove old INT_COMMON macro

2019-11-26 Thread Michal Suchanek
From: Nicholas Piggin 

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kernel/exceptions-64s.S | 51 +---
 1 file changed, 24 insertions(+), 27 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index ba2dcd91aaaf..f318869607db 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -591,8 +591,8 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
  * If stack=0, then the stack is already set in r1, and r1 is saved in r10.
  * PPR save and CPU accounting is not done for the !stack case (XXX why not?)
  */
-.macro INT_COMMON vec, area, stack, kaup, reconcile, dar, dsisr
-   .if \stack
+.macro GEN_COMMON name
+   .if ISTACK
andi.   r10,r12,MSR_PR  /* See if coming from user  */
mr  r10,r1  /* Save r1  */
subir1,r1,INT_FRAME_SIZE/* alloc frame on kernel stack  */
@@ -609,54 +609,54 @@ 
END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
std r0,GPR0(r1) /* save r0 in stackframe*/
std r10,GPR1(r1)/* save r1 in stackframe*/
 
-   .if \stack
-   .if \kaup
+   .if ISTACK
+   .if IKUAP
kuap_save_amr_and_lock r9, r10, cr1, cr0
.endif
beq 101f/* if from kernel mode  */
ACCOUNT_CPU_USER_ENTRY(r13, r9, r10)
-   SAVE_PPR(\area, r9)
+   SAVE_PPR(IAREA, r9)
 101:
.else
-   .if \kaup
+   .if IKUAP
kuap_save_amr_and_lock r9, r10, cr1
.endif
.endif
 
/* Save original regs values from save area to stack frame. */
-   ld  r9,\area+EX_R9(r13) /* move r9, r10 to stackframe   */
-   ld  r10,\area+EX_R10(r13)
+   ld  r9,IAREA+EX_R9(r13) /* move r9, r10 to stackframe   */
+   ld  r10,IAREA+EX_R10(r13)
std r9,GPR9(r1)
std r10,GPR10(r1)
-   ld  r9,\area+EX_R11(r13)/* move r11 - r13 to stackframe */
-   ld  r10,\area+EX_R12(r13)
-   ld  r11,\area+EX_R13(r13)
+   ld  r9,IAREA+EX_R11(r13)/* move r11 - r13 to stackframe */
+   ld  r10,IAREA+EX_R12(r13)
+   ld  r11,IAREA+EX_R13(r13)
std r9,GPR11(r1)
std r10,GPR12(r1)
std r11,GPR13(r1)
-   .if \dar
-   .if \dar == 2
+   .if IDAR
+   .if IDAR == 2
ld  r10,_NIP(r1)
.else
-   ld  r10,\area+EX_DAR(r13)
+   ld  r10,IAREA+EX_DAR(r13)
.endif
std r10,_DAR(r1)
.endif
-   .if \dsisr
-   .if \dsisr == 2
+   .if IDSISR
+   .if IDSISR == 2
ld  r10,_MSR(r1)
lis r11,DSISR_SRR1_MATCH_64S@h
and r10,r10,r11
.else
-   lwz r10,\area+EX_DSISR(r13)
+   lwz r10,IAREA+EX_DSISR(r13)
.endif
std r10,_DSISR(r1)
.endif
 BEGIN_FTR_SECTION_NESTED(66)
-   ld  r10,\area+EX_CFAR(r13)
+   ld  r10,IAREA+EX_CFAR(r13)
std r10,ORIG_GPR3(r1)
 END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66)
-   GET_CTR(r10, \area)
+   GET_CTR(r10, IAREA)
std r10,_CTR(r1)
std r2,GPR2(r1) /* save r2 in stackframe*/
SAVE_4GPRS(3, r1)   /* save r3 - r6 in stackframe   */
@@ -668,26 +668,22 @@ END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66)
mfspr   r11,SPRN_XER/* save XER in stackframe   */
std r10,SOFTE(r1)
std r11,_XER(r1)
-   li  r9,(\vec)+1
+   li  r9,(IVEC)+1
std r9,_TRAP(r1)/* set trap number  */
li  r10,0
ld  r11,exception_marker@toc(r2)
std r10,RESULT(r1)  /* clear regs->result   */
std r11,STACK_FRAME_OVERHEAD-16(r1) /* mark the frame   */
 
-   .if \stack
+   .if ISTACK
ACCOUNT_STOLEN_TIME
.endif
 
-   .if \reconcile
+   .if IRECONCILE
RECONCILE_IRQ_STATE(r10, r11)
.endif
 .endm
 
-.macro GEN_COMMON name
-   INT_COMMON IVEC, IAREA, ISTACK, IKUAP, IRECONCILE, IDAR, IDSISR
-.endm
-
 /*
  * Restore all registers including H/SRR0/1 saved in a stack frame of a
  * standard exception.
@@ -2400,7 +2396,8 @@ EXC_COMMON_BEGIN(soft_nmi_common)
mr  r10,r1
ld  r1,PACAEMERGSP(r13)
subir1,r1,INT_FRAME_SIZE
-   INT_COMMON 0x900, PACA_EXGEN, 0, 1, 1, 0, 0
+   __ISTACK(decrementer)=0
+   GEN_COMMON decrementer
bl  save_nvgprs
addir3,r1,STACK_FRAME_OVERHEAD
bl  soft_nmi_interrupt
-- 
2.23.0



[PATCH v2 06/35] powerpc/64s/exception: Remove old INT_ENTRY macro

2019-11-26 Thread Michal Suchanek
From: Nicholas Piggin 

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kernel/exceptions-64s.S | 68 
 1 file changed, 30 insertions(+), 38 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index b5decc9a0cbf..ba2dcd91aaaf 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -482,13 +482,13 @@ 
END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
  * - Fall through and continue executing in real, unrelocated mode.
  *   This is done if early=2.
  */
-.macro INT_HANDLER name, vec, ool=0, early=0, virt=0, hsrr=0, area=PACA_EXGEN, 
ri=1, dar=0, dsisr=0, bitmask=0, kvm=0
+.macro GEN_INT_ENTRY name, virt, ool=0
SET_SCRATCH0(r13)   /* save r13 */
GET_PACA(r13)
-   std r9,\area\()+EX_R9(r13)  /* save r9 */
+   std r9,IAREA+EX_R9(r13) /* save r9 */
OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR)
HMT_MEDIUM
-   std r10,\area\()+EX_R10(r13)/* save r10 - r12 */
+   std r10,IAREA+EX_R10(r13)   /* save r10 - r12 */
OPT_GET_SPR(r10, SPRN_CFAR, CPU_FTR_CFAR)
.if \ool
.if !\virt
@@ -502,47 +502,47 @@ 
END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
.endif
.endif
 
-   OPT_SAVE_REG_TO_PACA(\area\()+EX_PPR, r9, CPU_FTR_HAS_PPR)
-   OPT_SAVE_REG_TO_PACA(\area\()+EX_CFAR, r10, CPU_FTR_CFAR)
+   OPT_SAVE_REG_TO_PACA(IAREA+EX_PPR, r9, CPU_FTR_HAS_PPR)
+   OPT_SAVE_REG_TO_PACA(IAREA+EX_CFAR, r10, CPU_FTR_CFAR)
INTERRUPT_TO_KERNEL
-   SAVE_CTR(r10, \area\())
+   SAVE_CTR(r10, IAREA)
mfcrr9
-   .if \kvm
-   KVMTEST \name \hsrr \vec
+   .if (!\virt && IKVM_REAL) || (\virt && IKVM_VIRT)
+   KVMTEST \name IHSRR IVEC
.endif
-   .if \bitmask
+   .if IMASK
lbz r10,PACAIRQSOFTMASK(r13)
-   andi.   r10,r10,\bitmask
+   andi.   r10,r10,IMASK
/* Associate vector numbers with bits in paca->irq_happened */
-   .if \vec == 0x500 || \vec == 0xea0
+   .if IVEC == 0x500 || IVEC == 0xea0
li  r10,PACA_IRQ_EE
-   .elseif \vec == 0x900
+   .elseif IVEC == 0x900
li  r10,PACA_IRQ_DEC
-   .elseif \vec == 0xa00 || \vec == 0xe80
+   .elseif IVEC == 0xa00 || IVEC == 0xe80
li  r10,PACA_IRQ_DBELL
-   .elseif \vec == 0xe60
+   .elseif IVEC == 0xe60
li  r10,PACA_IRQ_HMI
-   .elseif \vec == 0xf00
+   .elseif IVEC == 0xf00
li  r10,PACA_IRQ_PMI
.else
.abort "Bad maskable vector"
.endif
 
-   .if \hsrr == EXC_HV_OR_STD
+   .if IHSRR == EXC_HV_OR_STD
BEGIN_FTR_SECTION
bne masked_Hinterrupt
FTR_SECTION_ELSE
bne masked_interrupt
ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
-   .elseif \hsrr
+   .elseif IHSRR
bne masked_Hinterrupt
.else
bne masked_interrupt
.endif
.endif
 
-   std r11,\area\()+EX_R11(r13)
-   std r12,\area\()+EX_R12(r13)
+   std r11,IAREA+EX_R11(r13)
+   std r12,IAREA+EX_R12(r13)
 
/*
 * DAR/DSISR, SCRATCH0 must be read before setting MSR[RI],
@@ -550,47 +550,39 @@ 
END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
 * not recoverable if they are live.
 */
GET_SCRATCH0(r10)
-   std r10,\area\()+EX_R13(r13)
-   .if \dar == 1
-   .if \hsrr
+   std r10,IAREA+EX_R13(r13)
+   .if IDAR == 1
+   .if IHSRR
mfspr   r10,SPRN_HDAR
.else
mfspr   r10,SPRN_DAR
.endif
-   std r10,\area\()+EX_DAR(r13)
+   std r10,IAREA+EX_DAR(r13)
.endif
-   .if \dsisr == 1
-   .if \hsrr
+   .if IDSISR == 1
+   .if IHSRR
mfspr   r10,SPRN_HDSISR
.else
mfspr   r10,SPRN_DSISR
.endif
-   stw r10,\area\()+EX_DSISR(r13)
+   stw r10,IAREA+EX_DSISR(r13)
.endif
 
-   .if \early == 2
+   .if IEARLY == 2
/* nothing more */
-   .elseif \early
+   .elseif IEARLY
mfctr   r10 /* save ctr, even for !RELOCATABLE */
BRANCH_TO_C000(r11, \name\()_common)
.elseif !\virt
-   INT_SAVE_SRR_AND_JUMP \name\()_common, \hsrr, \ri
+   INT_SAVE_SRR_AND_JUMP \name\()_common, IHSRR, ISET_RI
.else
-   INT_VIRT_SAVE_SRR_AND_JUMP \name\()_common, \hsrr
+   INT_VIRT_SAVE_SRR_AND_JUMP \name\()_common, IHSRR
.endif
.if \ool
.popsection

[PATCH v2 05/35] powerpc/64s/exception: Move all interrupt handlers to new style code gen macros

2019-11-26 Thread Michal Suchanek
From: Nicholas Piggin 

Aside from label names and BUG line numbers, the generated code change
is an additional HMI KVM handler added for the "late" KVM handler,
because early and late HMI generation is achieved by defining two
different interrupt types.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kernel/exceptions-64s.S | 556 ---
 1 file changed, 418 insertions(+), 138 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index 828fa4df15cf..b5decc9a0cbf 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -206,8 +206,10 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
 #define IMASK  .L_IMASK_\name\()
 #define IKVM_SKIP  .L_IKVM_SKIP_\name\()
 #define IKVM_REAL  .L_IKVM_REAL_\name\()
+#define __IKVM_REAL(name)  .L_IKVM_REAL_ ## name
 #define IKVM_VIRT  .L_IKVM_VIRT_\name\()
 #define ISTACK .L_ISTACK_\name\()
+#define __ISTACK(name) .L_ISTACK_ ## name
 #define IRECONCILE .L_IRECONCILE_\name\()
 #define IKUAP  .L_IKUAP_\name\()
 
@@ -570,7 +572,7 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
/* nothing more */
.elseif \early
mfctr   r10 /* save ctr, even for !RELOCATABLE */
-   BRANCH_TO_C000(r11, \name\()_early_common)
+   BRANCH_TO_C000(r11, \name\()_common)
.elseif !\virt
INT_SAVE_SRR_AND_JUMP \name\()_common, \hsrr, \ri
.else
@@ -843,6 +845,19 @@ __start_interrupts:
 EXC_VIRT_NONE(0x4000, 0x100)
 
 
+INT_DEFINE_BEGIN(system_reset)
+   IVEC=0x100
+   IAREA=PACA_EXNMI
+   /*
+* MSR_RI is not enabled, because PACA_EXNMI and nmi stack is
+* being used, so a nested NMI exception would corrupt it.
+*/
+   ISET_RI=0
+   ISTACK=0
+   IRECONCILE=0
+   IKVM_REAL=1
+INT_DEFINE_END(system_reset)
+
 EXC_REAL_BEGIN(system_reset, 0x100, 0x100)
 #ifdef CONFIG_PPC_P7_NAP
/*
@@ -880,11 +895,8 @@ BEGIN_FTR_SECTION
 END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
 #endif
 
-   INT_HANDLER system_reset, 0x100, area=PACA_EXNMI, ri=0, kvm=1
+   GEN_INT_ENTRY system_reset, virt=0
/*
-* MSR_RI is not enabled, because PACA_EXNMI and nmi stack is
-* being used, so a nested NMI exception would corrupt it.
-*
 * In theory, we should not enable relocation here if it was disabled
 * in SRR1, because the MMU may not be configured to support it (e.g.,
 * SLB may have been cleared). In practice, there should only be a few
@@ -893,7 +905,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
 */
 EXC_REAL_END(system_reset, 0x100, 0x100)
 EXC_VIRT_NONE(0x4100, 0x100)
-INT_KVM_HANDLER system_reset 0x100, EXC_STD, PACA_EXNMI, 0
+TRAMP_KVM_BEGIN(system_reset_kvm)
+   GEN_KVM system_reset
 
 #ifdef CONFIG_PPC_P7_NAP
 TRAMP_REAL_BEGIN(system_reset_idle_wake)
@@ -908,8 +921,8 @@ TRAMP_REAL_BEGIN(system_reset_idle_wake)
  * Vectors for the FWNMI option.  Share common code.
  */
 TRAMP_REAL_BEGIN(system_reset_fwnmi)
-   /* See comment at system_reset exception, don't turn on RI */
-   INT_HANDLER system_reset, 0x100, area=PACA_EXNMI, ri=0
+   __IKVM_REAL(system_reset)=0
+   GEN_INT_ENTRY system_reset, virt=0
 
 #endif /* CONFIG_PPC_PSERIES */
 
@@ -929,7 +942,7 @@ EXC_COMMON_BEGIN(system_reset_common)
mr  r10,r1
ld  r1,PACA_NMI_EMERG_SP(r13)
subir1,r1,INT_FRAME_SIZE
-   INT_COMMON 0x100, PACA_EXNMI, 0, 1, 0, 0, 0
+   GEN_COMMON system_reset
bl  save_nvgprs
/*
 * Set IRQS_ALL_DISABLED unconditionally so arch_irqs_disabled does
@@ -971,23 +984,46 @@ EXC_COMMON_BEGIN(system_reset_common)
RFI_TO_USER_OR_KERNEL
 
 
-EXC_REAL_BEGIN(machine_check, 0x200, 0x100)
-   INT_HANDLER machine_check, 0x200, early=1, area=PACA_EXMC, dar=1, 
dsisr=1
+INT_DEFINE_BEGIN(machine_check_early)
+   IVEC=0x200
+   IAREA=PACA_EXMC
/*
 * MSR_RI is not enabled, because PACA_EXMC is being used, so a
 * nested machine check corrupts it. machine_check_common enables
 * MSR_RI.
 */
+   ISET_RI=0
+   ISTACK=0
+   IEARLY=1
+   IDAR=1
+   IDSISR=1
+   IRECONCILE=0
+   IKUAP=0 /* We don't touch AMR here, we never go to virtual mode */
+INT_DEFINE_END(machine_check_early)
+
+INT_DEFINE_BEGIN(machine_check)
+   IVEC=0x200
+   IAREA=PACA_EXMC
+   ISET_RI=0
+   IDAR=1
+   IDSISR=1
+   IKVM_SKIP=1
+   IKVM_REAL=1
+INT_DEFINE_END(machine_check)
+
+EXC_REAL_BEGIN(machine_check, 0x200, 0x100)
+   GEN_INT_ENTRY machine_check_early, virt=0
 EXC_REAL_END(machine_check, 0x200, 0x100)
 EXC_VIRT_NONE(0x4200, 0x100)
 
 #ifdef CONFIG_PPC_PSERIES
 TRAMP_REAL_BEGIN(machine_check_fwnmi)
/* See comment at machine_check exception, don't turn on RI */
-   INT_HANDLER machine_check, 0x200, ear

[PATCH v2 04/35] powerpc/64s/exception: Expand EXC_COMMON and EXC_COMMON_ASYNC macros

2019-11-26 Thread Michal Suchanek
From: Nicholas Piggin 

These don't provide a large amount of code sharing. Removing them
makes code easier to shuffle around. For example, some of the common
instructions will be moved into the common code gen macro.

No generated code change.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kernel/exceptions-64s.S | 160 ---
 1 file changed, 117 insertions(+), 43 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index 0e39e98ef719..828fa4df15cf 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -757,28 +757,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
 #define FINISH_NAP
 #endif
 
-#define EXC_COMMON(name, realvec, hdlr)
\
-   EXC_COMMON_BEGIN(name); \
-   INT_COMMON realvec, PACA_EXGEN, 1, 1, 1, 0, 0 ; \
-   bl  save_nvgprs;\
-   addir3,r1,STACK_FRAME_OVERHEAD; \
-   bl  hdlr;   \
-   b   ret_from_except
-
-/*
- * Like EXC_COMMON, but for exceptions that can occur in the idle task and
- * therefore need the special idle handling (finish nap and runlatch)
- */
-#define EXC_COMMON_ASYNC(name, realvec, hdlr)  \
-   EXC_COMMON_BEGIN(name); \
-   INT_COMMON realvec, PACA_EXGEN, 1, 1, 1, 0, 0 ; \
-   FINISH_NAP; \
-   RUNLATCH_ON;\
-   addir3,r1,STACK_FRAME_OVERHEAD; \
-   bl  hdlr;   \
-   b   ret_from_except_lite
-
-
 /*
  * There are a few constraints to be concerned with.
  * - Real mode exceptions code/data must be located at their physical location.
@@ -1349,7 +1327,13 @@ EXC_VIRT_BEGIN(hardware_interrupt, 0x4500, 0x100)
INT_HANDLER hardware_interrupt, 0x500, virt=1, hsrr=EXC_HV_OR_STD, 
bitmask=IRQS_DISABLED, kvm=1
 EXC_VIRT_END(hardware_interrupt, 0x4500, 0x100)
 INT_KVM_HANDLER hardware_interrupt, 0x500, EXC_HV_OR_STD, PACA_EXGEN, 0
-EXC_COMMON_ASYNC(hardware_interrupt_common, 0x500, do_IRQ)
+EXC_COMMON_BEGIN(hardware_interrupt_common)
+   INT_COMMON 0x500, PACA_EXGEN, 1, 1, 1, 0, 0
+   FINISH_NAP
+   RUNLATCH_ON
+   addir3,r1,STACK_FRAME_OVERHEAD
+   bl  do_IRQ
+   b   ret_from_except_lite
 
 
 EXC_REAL_BEGIN(alignment, 0x600, 0x100)
@@ -1455,7 +1439,13 @@ EXC_VIRT_BEGIN(decrementer, 0x4900, 0x80)
INT_HANDLER decrementer, 0x900, virt=1, bitmask=IRQS_DISABLED
 EXC_VIRT_END(decrementer, 0x4900, 0x80)
 INT_KVM_HANDLER decrementer, 0x900, EXC_STD, PACA_EXGEN, 0
-EXC_COMMON_ASYNC(decrementer_common, 0x900, timer_interrupt)
+EXC_COMMON_BEGIN(decrementer_common)
+   INT_COMMON 0x900, PACA_EXGEN, 1, 1, 1, 0, 0
+   FINISH_NAP
+   RUNLATCH_ON
+   addir3,r1,STACK_FRAME_OVERHEAD
+   bl  timer_interrupt
+   b   ret_from_except_lite
 
 
 EXC_REAL_BEGIN(hdecrementer, 0x980, 0x80)
@@ -1465,7 +1455,12 @@ EXC_VIRT_BEGIN(hdecrementer, 0x4980, 0x80)
INT_HANDLER hdecrementer, 0x980, virt=1, hsrr=EXC_HV, kvm=1
 EXC_VIRT_END(hdecrementer, 0x4980, 0x80)
 INT_KVM_HANDLER hdecrementer, 0x980, EXC_HV, PACA_EXGEN, 0
-EXC_COMMON(hdecrementer_common, 0x980, hdec_interrupt)
+EXC_COMMON_BEGIN(hdecrementer_common)
+   INT_COMMON 0x980, PACA_EXGEN, 1, 1, 1, 0, 0
+   bl  save_nvgprs
+   addir3,r1,STACK_FRAME_OVERHEAD
+   bl  hdec_interrupt
+   b   ret_from_except
 
 
 EXC_REAL_BEGIN(doorbell_super, 0xa00, 0x100)
@@ -1475,11 +1470,17 @@ EXC_VIRT_BEGIN(doorbell_super, 0x4a00, 0x100)
INT_HANDLER doorbell_super, 0xa00, virt=1, bitmask=IRQS_DISABLED
 EXC_VIRT_END(doorbell_super, 0x4a00, 0x100)
 INT_KVM_HANDLER doorbell_super, 0xa00, EXC_STD, PACA_EXGEN, 0
+EXC_COMMON_BEGIN(doorbell_super_common)
+   INT_COMMON 0xa00, PACA_EXGEN, 1, 1, 1, 0, 0
+   FINISH_NAP
+   RUNLATCH_ON
+   addir3,r1,STACK_FRAME_OVERHEAD
 #ifdef CONFIG_PPC_DOORBELL
-EXC_COMMON_ASYNC(doorbell_super_common, 0xa00, doorbell_exception)
+   bl  doorbell_exception
 #else
-EXC_COMMON_ASYNC(doorbell_super_common, 0xa00, unknown_exception)
+   bl  unknown_exception
 #endif
+   b   ret_from_except_lite
 
 
 EXC_REAL_NONE(0xb00, 0x100)
@@ -1623,7 +1624,12 @@ EXC_VIRT_BEGIN(single_step, 0x4d00, 0x100)
INT_HANDLER single_step, 0xd00, virt=1
 EXC_VIRT_END(single_step, 0x4d00, 0x100)
 INT_KVM_HANDLER single_step, 0xd00, EXC_STD, PACA_EXGEN, 0
-EXC_COMMON(single_step_common, 0xd00, single_step_exception)
+EXC_COMMON_BEGIN(single_step_common)
+   INT_COMMON 0xd00, PACA_EXGEN, 1, 1, 1, 0, 0
+   bl  save_nvgprs
+   ad

[PATCH v2 03/35] powerpc/64s/exception: Add GEN_KVM macro that uses INT_DEFINE parameters

2019-11-26 Thread Michal Suchanek
From: Nicholas Piggin 

No generated code change.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kernel/exceptions-64s.S | 12 +++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index 591ae2a73e18..0e39e98ef719 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -204,6 +204,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
 #define ISET_RI.L_ISET_RI_\name\()
 #define IEARLY .L_IEARLY_\name\()
 #define IMASK  .L_IMASK_\name\()
+#define IKVM_SKIP  .L_IKVM_SKIP_\name\()
 #define IKVM_REAL  .L_IKVM_REAL_\name\()
 #define IKVM_VIRT  .L_IKVM_VIRT_\name\()
 #define ISTACK .L_ISTACK_\name\()
@@ -243,6 +244,9 @@ do_define_int n
.ifndef IMASK
IMASK=0
.endif
+   .ifndef IKVM_SKIP
+   IKVM_SKIP=0
+   .endif
.ifndef IKVM_REAL
IKVM_REAL=0
.endif
@@ -265,6 +269,10 @@ do_define_int n
KVM_HANDLER \vec, \hsrr, \area, \skip
 .endm
 
+.macro GEN_KVM name
+   KVM_HANDLER IVEC, IHSRR, IAREA, IKVM_SKIP
+.endm
+
 #ifdef CONFIG_KVM_BOOK3S_64_HANDLER
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 /*
@@ -1226,6 +1234,7 @@ INT_DEFINE_BEGIN(data_access)
IVEC=0x300
IDAR=1
IDSISR=1
+   IKVM_SKIP=1
IKVM_REAL=1
 INT_DEFINE_END(data_access)
 
@@ -1235,7 +1244,8 @@ EXC_REAL_END(data_access, 0x300, 0x80)
 EXC_VIRT_BEGIN(data_access, 0x4300, 0x80)
GEN_INT_ENTRY data_access, virt=1
 EXC_VIRT_END(data_access, 0x4300, 0x80)
-INT_KVM_HANDLER data_access, 0x300, EXC_STD, PACA_EXGEN, 1
+TRAMP_KVM_BEGIN(data_access_kvm)
+   GEN_KVM data_access
 EXC_COMMON_BEGIN(data_access_common)
GEN_COMMON data_access
ld  r4,_DAR(r1)
-- 
2.23.0



[PATCH v2 02/35] powerpc/64s/exception: Add GEN_COMMON macro that uses INT_DEFINE parameters

2019-11-26 Thread Michal Suchanek
From: Nicholas Piggin 

No generated code change.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kernel/exceptions-64s.S | 24 +---
 1 file changed, 17 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index e6ad6e6cf65e..591ae2a73e18 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -206,6 +206,9 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
 #define IMASK  .L_IMASK_\name\()
 #define IKVM_REAL  .L_IKVM_REAL_\name\()
 #define IKVM_VIRT  .L_IKVM_VIRT_\name\()
+#define ISTACK .L_ISTACK_\name\()
+#define IRECONCILE .L_IRECONCILE_\name\()
+#define IKUAP  .L_IKUAP_\name\()
 
 #define INT_DEFINE_BEGIN(n)\
 .macro int_define_ ## n name
@@ -246,6 +249,15 @@ do_define_int n
.ifndef IKVM_VIRT
IKVM_VIRT=0
.endif
+   .ifndef ISTACK
+   ISTACK=1
+   .endif
+   .ifndef IRECONCILE
+   IRECONCILE=1
+   .endif
+   .ifndef IKUAP
+   IKUAP=1
+   .endif
 .endm
 
 .macro INT_KVM_HANDLER name, vec, hsrr, area, skip
@@ -670,6 +682,10 @@ END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66)
.endif
 .endm
 
+.macro GEN_COMMON name
+   INT_COMMON IVEC, IAREA, ISTACK, IKUAP, IRECONCILE, IDAR, IDSISR
+.endm
+
 /*
  * Restore all registers including H/SRR0/1 saved in a stack frame of a
  * standard exception.
@@ -1221,13 +1237,7 @@ EXC_VIRT_BEGIN(data_access, 0x4300, 0x80)
 EXC_VIRT_END(data_access, 0x4300, 0x80)
 INT_KVM_HANDLER data_access, 0x300, EXC_STD, PACA_EXGEN, 1
 EXC_COMMON_BEGIN(data_access_common)
-   /*
-* Here r13 points to the paca, r9 contains the saved CR,
-* SRR0 and SRR1 are saved in r11 and r12,
-* r9 - r13 are saved in paca->exgen.
-* EX_DAR and EX_DSISR have saved DAR/DSISR
-*/
-   INT_COMMON 0x300, PACA_EXGEN, 1, 1, 1, 1, 1
+   GEN_COMMON data_access
ld  r4,_DAR(r1)
ld  r5,_DSISR(r1)
 BEGIN_MMU_FTR_SECTION
-- 
2.23.0



[PATCH v2 01/35] powerpc/64s/exception: Introduce INT_DEFINE parameter block for code generation

2019-11-26 Thread Michal Suchanek
From: Nicholas Piggin 

The code generation macro arguments are difficult to read, and
defaults can't easily be used.

This introduces a block where parameters can be set for interrupt
handler code generation by the subsequent macros, and adds the first
generation macro for interrupt entry.

One interrupt handler is converted to the new macros to demonstrate
the change, the rest will be coverted all at once.

No generated code change.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kernel/exceptions-64s.S | 77 ++--
 1 file changed, 73 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index d0018dd17e0a..e6ad6e6cf65e 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -193,6 +193,61 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
mtctr   reg;\
bctr
 
+/*
+ * Interrupt code generation macros
+ */
+#define IVEC   .L_IVEC_\name\()
+#define IHSRR  .L_IHSRR_\name\()
+#define IAREA  .L_IAREA_\name\()
+#define IDAR   .L_IDAR_\name\()
+#define IDSISR .L_IDSISR_\name\()
+#define ISET_RI.L_ISET_RI_\name\()
+#define IEARLY .L_IEARLY_\name\()
+#define IMASK  .L_IMASK_\name\()
+#define IKVM_REAL  .L_IKVM_REAL_\name\()
+#define IKVM_VIRT  .L_IKVM_VIRT_\name\()
+
+#define INT_DEFINE_BEGIN(n)\
+.macro int_define_ ## n name
+
+#define INT_DEFINE_END(n)  \
+.endm ;
\
+int_define_ ## n n ;   \
+do_define_int n
+
+.macro do_define_int name
+   .ifndef IVEC
+   .error "IVEC not defined"
+   .endif
+   .ifndef IHSRR
+   IHSRR=EXC_STD
+   .endif
+   .ifndef IAREA
+   IAREA=PACA_EXGEN
+   .endif
+   .ifndef IDAR
+   IDAR=0
+   .endif
+   .ifndef IDSISR
+   IDSISR=0
+   .endif
+   .ifndef ISET_RI
+   ISET_RI=1
+   .endif
+   .ifndef IEARLY
+   IEARLY=0
+   .endif
+   .ifndef IMASK
+   IMASK=0
+   .endif
+   .ifndef IKVM_REAL
+   IKVM_REAL=0
+   .endif
+   .ifndef IKVM_VIRT
+   IKVM_VIRT=0
+   .endif
+.endm
+
 .macro INT_KVM_HANDLER name, vec, hsrr, area, skip
TRAMP_KVM_BEGIN(\name\()_kvm)
KVM_HANDLER \vec, \hsrr, \area, \skip
@@ -474,7 +529,7 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
 */
GET_SCRATCH0(r10)
std r10,\area\()+EX_R13(r13)
-   .if \dar
+   .if \dar == 1
.if \hsrr
mfspr   r10,SPRN_HDAR
.else
@@ -482,7 +537,7 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
.endif
std r10,\area\()+EX_DAR(r13)
.endif
-   .if \dsisr
+   .if \dsisr == 1
.if \hsrr
mfspr   r10,SPRN_HDSISR
.else
@@ -506,6 +561,14 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
.endif
 .endm
 
+.macro GEN_INT_ENTRY name, virt, ool=0
+   .if ! \virt
+   INT_HANDLER \name, IVEC, \ool, IEARLY, \virt, IHSRR, IAREA, 
ISET_RI, IDAR, IDSISR, IMASK, IKVM_REAL
+   .else
+   INT_HANDLER \name, IVEC, \ool, IEARLY, \virt, IHSRR, IAREA, 
ISET_RI, IDAR, IDSISR, IMASK, IKVM_VIRT
+   .endif
+.endm
+
 /*
  * On entry r13 points to the paca, r9-r13 are saved in the paca,
  * r9 contains the saved CR, r11 and r12 contain the saved SRR0 and
@@ -1143,12 +1206,18 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
bl  unrecoverable_exception
b   .
 
+INT_DEFINE_BEGIN(data_access)
+   IVEC=0x300
+   IDAR=1
+   IDSISR=1
+   IKVM_REAL=1
+INT_DEFINE_END(data_access)
 
 EXC_REAL_BEGIN(data_access, 0x300, 0x80)
-   INT_HANDLER data_access, 0x300, ool=1, dar=1, dsisr=1, kvm=1
+   GEN_INT_ENTRY data_access, virt=0, ool=1
 EXC_REAL_END(data_access, 0x300, 0x80)
 EXC_VIRT_BEGIN(data_access, 0x4300, 0x80)
-   INT_HANDLER data_access, 0x300, virt=1, dar=1, dsisr=1
+   GEN_INT_ENTRY data_access, virt=1
 EXC_VIRT_END(data_access, 0x4300, 0x80)
 INT_KVM_HANDLER data_access, 0x300, EXC_STD, PACA_EXGEN, 1
 EXC_COMMON_BEGIN(data_access_common)
-- 
2.23.0



[PATCH v2 00/35] exception cleanup, syscall in C and !COMPAT

2019-11-26 Thread Michal Suchanek
Hello,

This is merge of https://patchwork.ozlabs.org/cover/1162376/ (except two
last experimental patches) and
https://patchwork.ozlabs.org/patch/1162079/ rebased on top of master.

There was minor conflict in Makefile in the latter series.

Refreshed the patchset to fix build error on ppc32 and ppc64e.

Thanks

Michal

Michal Suchanek (10):
  powerpc/64: system call: Fix sparse warning about missing declaration
  powerpc: Add back __ARCH_WANT_SYS_LLSEEK macro
  powerpc: move common register copy functions from signal_32.c to
signal.c
  powerpc/perf: remove current_is_64bit()
  powerpc/perf: consolidate read_user_stack_32
  powerpc/perf: consolidate valid_user_sp
  powerpc/64: make buildable without CONFIG_COMPAT
  powerpc/64: Make COMPAT user-selectable disabled on littleendian by
default.
  powerpc/perf: split callchain.c by bitness
  MAINTAINERS: perf: Add pattern that matches ppc perf to the perf
entry.

Nicholas Piggin (25):
  powerpc/64s/exception: Introduce INT_DEFINE parameter block for code
generation
  powerpc/64s/exception: Add GEN_COMMON macro that uses INT_DEFINE
parameters
  powerpc/64s/exception: Add GEN_KVM macro that uses INT_DEFINE
parameters
  powerpc/64s/exception: Expand EXC_COMMON and EXC_COMMON_ASYNC macros
  powerpc/64s/exception: Move all interrupt handlers to new style code
gen macros
  powerpc/64s/exception: Remove old INT_ENTRY macro
  powerpc/64s/exception: Remove old INT_COMMON macro
  powerpc/64s/exception: Remove old INT_KVM_HANDLER
  powerpc/64s/exception: Add ISIDE option
  powerpc/64s/exception: move real->virt switch into the common handler
  powerpc/64s/exception: move soft-mask test to common code
  powerpc/64s/exception: move KVM test to common code
  powerpc/64s/exception: remove confusing IEARLY option
  powerpc/64s/exception: remove the SPR saving patch code macros
  powerpc/64s/exception: trim unused arguments from KVMTEST macro
  powerpc/64s/exception: hdecrementer avoid touching the stack
  powerpc/64s/exception: re-inline some handlers
  powerpc/64s/exception: Clean up SRR specifiers
  powerpc/64s/exception: add more comments for interrupt handlers
  powerpc/64s/exception: only test KVM in SRR interrupts when PR KVM is
supported
  powerpc/64s/exception: soft nmi interrupt should not use
ret_from_except
  powerpc/64: system call remove non-volatile GPR save optimisation
  powerpc/64: system call implement the bulk of the logic in C
  powerpc/64s: interrupt return in C
  powerpc/64s/exception: remove lite interrupt return

 MAINTAINERS   |2 +
 arch/powerpc/Kconfig  |5 +-
 arch/powerpc/include/asm/asm-prototypes.h |   17 +-
 .../powerpc/include/asm/book3s/64/kup-radix.h |   24 +-
 arch/powerpc/include/asm/cputime.h|   24 +
 arch/powerpc/include/asm/exception-64s.h  |4 -
 arch/powerpc/include/asm/hw_irq.h |4 +
 arch/powerpc/include/asm/ptrace.h |3 +
 arch/powerpc/include/asm/signal.h |3 +
 arch/powerpc/include/asm/switch_to.h  |   11 +
 arch/powerpc/include/asm/thread_info.h|4 +-
 arch/powerpc/include/asm/time.h   |4 +-
 arch/powerpc/include/asm/unistd.h |1 +
 arch/powerpc/kernel/Makefile  |9 +-
 arch/powerpc/kernel/entry_64.S|  880 ++--
 arch/powerpc/kernel/exceptions-64e.S  |  255 ++-
 arch/powerpc/kernel/exceptions-64s.S  | 1937 -
 arch/powerpc/kernel/process.c |   89 +-
 arch/powerpc/kernel/signal.c  |  144 +-
 arch/powerpc/kernel/signal.h  |2 -
 arch/powerpc/kernel/signal_32.c   |  140 --
 arch/powerpc/kernel/syscall_64.c  |  349 +++
 arch/powerpc/kernel/syscalls/syscall.tbl  |   22 +-
 arch/powerpc/kernel/systbl.S  |9 +-
 arch/powerpc/kernel/time.c|9 -
 arch/powerpc/kernel/vdso.c|3 +-
 arch/powerpc/kernel/vector.S  |2 +-
 arch/powerpc/kvm/book3s_hv_rmhandlers.S   |   11 -
 arch/powerpc/kvm/book3s_segment.S |7 -
 arch/powerpc/perf/Makefile|5 +-
 arch/powerpc/perf/callchain.c |  387 +---
 arch/powerpc/perf/callchain.h |   20 +
 arch/powerpc/perf/callchain_32.c  |  197 ++
 arch/powerpc/perf/callchain_64.c  |  178 ++
 fs/read_write.c   |3 +-
 35 files changed, 2799 insertions(+), 1965 deletions(-)
 create mode 100644 arch/powerpc/kernel/syscall_64.c
 create mode 100644 arch/powerpc/perf/callchain.h
 create mode 100644 arch/powerpc/perf/callchain_32.c
 create mode 100644 arch/powerpc/perf/callchain_64.c

-- 
2.23.0



Re: [PATCH v2] of: unittest: fix memory leak in attach_node_and_children

2019-11-26 Thread Rob Herring
On Tue, 26 Nov 2019 02:48:04 +0100, Erhard Furtner wrote:
> In attach_node_and_children memory is allocated for full_name via
> kasprintf. If the condition of the 1st if is not met the function
> returns early without freeing the memory. Add a kfree() to fix that.
> 
> This has been detected with kmemleak:
> Link: https://bugzilla.kernel.org/show_bug.cgi?id=205327
> 
> It looks like the leak was introduced by this commit:
> Fixes: 5babefb7f7ab ("of: unittest: allow base devicetree to have symbol 
> metadata")
> 
> Signed-off-by: Erhard Furtner 
> Reviewed-by: Michael Ellerman 
> Reviewed-by: Tyrel Datwyler 
> ---
> Changes in v2:
>   - Make the commit message more clearer.
> 
>  drivers/of/unittest.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
> 

Applied, thanks.

Rob


Re: [PATCH v2] dma-mapping: treat dev->bus_dma_mask as a DMA limit

2019-11-26 Thread Nicolas Saenz Julienne
On Mon, 2019-11-25 at 16:33 +, Robin Murphy wrote:
> On 25/11/2019 7:44 am, Christoph Hellwig wrote:
> > On Sat, Nov 23, 2019 at 09:51:08AM -0700, Nathan Chancellor wrote:
> > > Just as an FYI, this introduces a warning on arm32 allyesconfig for me:
> > 
> > I think the dma_limit argument to iommu_dma_alloc_iova should be a u64
> > and/or we need to use min_t and open code the zero exception.
> > 
> > Robin, Nicolas - any opinions?
> 
> Yeah, given that it's always held a mask I'm not entirely sure why it 
> was ever a dma_addr_t rather than a u64. Unless anyone else is desperate 
> to do it I'll get a cleanup patch ready for rc1.

Sounds good to me too

Robin, since I started the mess, I'll be happy to do it if it helps offloading
some work from you.

Regards,
Nicolas



signature.asc
Description: This is a digitally signed message part


Re: [PATCH 1/1] powerpc/kvm/book3s: Fixes possible 'use after release' of kvm

2019-11-26 Thread Sean Christopherson
On Tue, Nov 26, 2019 at 02:52:12PM -0300, Leonardo Bras wrote:
> Fixes a possible 'use after free' of kvm variable.
> It does use mutex_unlock(&kvm->lock) after possible freeing a variable
> with kvm_put_kvm(kvm).

Moving the calls to kvm_put_kvm() to the end of the functions doesn't
actually fix a use-after-free.  In these flows, the reference being
released is a borrowed reference that KVM takes on behalf of the entity it
is creating, e.g. device, vcpu, or spapr tce.  The caller of these create
helpers must also hold its own reference to @kvm on top of the borrowed
reference, i.e. these kvm_put_kvm() calls will never free @kvm (assuming
there are no refcounting bugs elsewhere in KVM).

If one these kvm_put_kvm() calls did unexpectedly free @kvm (due to a bug
somewhere else), KVM would still hit a use-after-free scenario as the
caller still thinks @kvm is valid.  Currently, this would only happen on a
subsequent ioctl() on the caller's file descriptor (which holds a pointer
to @kvm), as the callers of these functions don't directly dereference
@kvm after the functions return.  But, not deferencing @kvm isn't deliberate
or functionally required, it's just how the code happens to be written.

The intent of adding kvm_put_kvm_no_destroy() was primarily to document
that under no circumstance should the to-be-put reference be the *last*
reference to @kvm.  Moving the call to kvm_put_kvm{_no_destroy}() doesn't
change that

> Signed-off-by: Leonardo Bras 
> ---
>  arch/powerpc/kvm/book3s_64_vio.c | 3 +--
>  virt/kvm/kvm_main.c  | 8 
>  2 files changed, 5 insertions(+), 6 deletions(-)
> 
> diff --git a/arch/powerpc/kvm/book3s_64_vio.c 
> b/arch/powerpc/kvm/book3s_64_vio.c
> index 5834db0a54c6..a402ead833b6 100644
> --- a/arch/powerpc/kvm/book3s_64_vio.c
> +++ b/arch/powerpc/kvm/book3s_64_vio.c
> @@ -316,14 +316,13 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
>  
>   if (ret >= 0)
>   list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables);
> - else
> - kvm_put_kvm(kvm);
>  
>   mutex_unlock(&kvm->lock);
>  
>   if (ret >= 0)
>   return ret;
>  
> + kvm_put_kvm(kvm);
>   kfree(stt);
>   fail_acct:
>   account_locked_vm(current->mm, kvmppc_stt_pages(npages), false);
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 13efc291b1c7..f37089b60d09 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -2744,10 +2744,8 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, 
> u32 id)
>   /* Now it's all set up, let userspace reach it */
>   kvm_get_kvm(kvm);
>   r = create_vcpu_fd(vcpu);
> - if (r < 0) {
> - kvm_put_kvm(kvm);
> + if (r < 0)
>   goto unlock_vcpu_destroy;
> - }
>  
>   kvm->vcpus[atomic_read(&kvm->online_vcpus)] = vcpu;
>  
> @@ -2771,6 +2769,8 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, 
> u32 id)
>   mutex_lock(&kvm->lock);
>   kvm->created_vcpus--;
>   mutex_unlock(&kvm->lock);
> + if (r < 0)
> + kvm_put_kvm(kvm);
>   return r;
>  }
>  
> @@ -3183,10 +3183,10 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
>   kvm_get_kvm(kvm);
>   ret = anon_inode_getfd(ops->name, &kvm_device_fops, dev, O_RDWR | 
> O_CLOEXEC);
>   if (ret < 0) {
> - kvm_put_kvm(kvm);
>   mutex_lock(&kvm->lock);
>   list_del(&dev->vm_node);
>   mutex_unlock(&kvm->lock);
> + kvm_put_kvm(kvm);
>   ops->destroy(dev);
>   return ret;
>   }
> -- 
> 2.23.0
> 


[PATCH 1/1] powerpc/kvm/book3s: Fixes possible 'use after release' of kvm

2019-11-26 Thread Leonardo Bras
Fixes a possible 'use after free' of kvm variable.
It does use mutex_unlock(&kvm->lock) after possible freeing a variable
with kvm_put_kvm(kvm).

Signed-off-by: Leonardo Bras 
---
 arch/powerpc/kvm/book3s_64_vio.c | 3 +--
 virt/kvm/kvm_main.c  | 8 
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index 5834db0a54c6..a402ead833b6 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -316,14 +316,13 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
 
if (ret >= 0)
list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables);
-   else
-   kvm_put_kvm(kvm);
 
mutex_unlock(&kvm->lock);
 
if (ret >= 0)
return ret;
 
+   kvm_put_kvm(kvm);
kfree(stt);
  fail_acct:
account_locked_vm(current->mm, kvmppc_stt_pages(npages), false);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 13efc291b1c7..f37089b60d09 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2744,10 +2744,8 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 
id)
/* Now it's all set up, let userspace reach it */
kvm_get_kvm(kvm);
r = create_vcpu_fd(vcpu);
-   if (r < 0) {
-   kvm_put_kvm(kvm);
+   if (r < 0)
goto unlock_vcpu_destroy;
-   }
 
kvm->vcpus[atomic_read(&kvm->online_vcpus)] = vcpu;
 
@@ -2771,6 +2769,8 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 
id)
mutex_lock(&kvm->lock);
kvm->created_vcpus--;
mutex_unlock(&kvm->lock);
+   if (r < 0)
+   kvm_put_kvm(kvm);
return r;
 }
 
@@ -3183,10 +3183,10 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
kvm_get_kvm(kvm);
ret = anon_inode_getfd(ops->name, &kvm_device_fops, dev, O_RDWR | 
O_CLOEXEC);
if (ret < 0) {
-   kvm_put_kvm(kvm);
mutex_lock(&kvm->lock);
list_del(&dev->vm_node);
mutex_unlock(&kvm->lock);
+   kvm_put_kvm(kvm);
ops->destroy(dev);
return ret;
}
-- 
2.23.0



[PATCH v2 1/2] powerpc/hw_breakpoints: Rewrite 8xx breakpoints to allow any address range size.

2019-11-26 Thread Christophe Leroy
Unlike standard powerpc, Powerpc 8xx doesn't have SPRN_DABR, but
it has a breakpoint support based on a set of comparators which
allow more flexibility.

Commit 4ad8622dc548 ("powerpc/8xx: Implement hw_breakpoint")
implemented breakpoints by emulating the DABR behaviour. It did
this by setting one comparator the match 4 bytes at breakpoint address
and the other comparator to match 4 bytes at breakpoint address + 4.

Rewrite 8xx hw_breakpoint to make breakpoints match all addresses
defined by the breakpoint address and length by making full use of
comparators.

Now, comparator E is set to match any address greater than breakpoint
address minus one. Comparator F is set to match any address lower than
breakpoint address plus breakpoint length. Addresses are aligned
to 32 bits.

When the breakpoint range starts at address 0, the breakpoint is set
to match comparator F only. When the breakpoint range end at address
0x, the breakpoint is set to match comparator E only.
Otherwise the breakpoint is set to match comparator E and F.

At the same time, use registers bit names instead of hardcoded values.

Signed-off-by: Christophe Leroy 
Cc: Ravi Bangoria 

---
v2: rebased on today's powerpc/next ; added 32 bit alignment
---
 arch/powerpc/include/asm/hw_breakpoint.h |  4 +++
 arch/powerpc/include/asm/reg_8xx.h   | 14 
 arch/powerpc/kernel/hw_breakpoint.c  | 15 +
 arch/powerpc/kernel/process.c| 57 
 4 files changed, 61 insertions(+), 29 deletions(-)

diff --git a/arch/powerpc/include/asm/hw_breakpoint.h 
b/arch/powerpc/include/asm/hw_breakpoint.h
index 27ac6f5d2891..f2f8d8aa8e3b 100644
--- a/arch/powerpc/include/asm/hw_breakpoint.h
+++ b/arch/powerpc/include/asm/hw_breakpoint.h
@@ -34,7 +34,11 @@ struct arch_hw_breakpoint {
 #define HW_BRK_TYPE_PRIV_ALL   (HW_BRK_TYPE_USER | HW_BRK_TYPE_KERNEL | \
 HW_BRK_TYPE_HYP)
 
+#ifdef CONFIG_PPC_8xx
+#define HW_BREAKPOINT_ALIGN 0x3
+#else
 #define HW_BREAKPOINT_ALIGN 0x7
+#endif
 
 #define DABR_MAX_LEN   8
 #define DAWR_MAX_LEN   512
diff --git a/arch/powerpc/include/asm/reg_8xx.h 
b/arch/powerpc/include/asm/reg_8xx.h
index 07df35ee8cbc..299ee7be0f67 100644
--- a/arch/powerpc/include/asm/reg_8xx.h
+++ b/arch/powerpc/include/asm/reg_8xx.h
@@ -35,7 +35,21 @@
 #define SPRN_CMPE  152
 #define SPRN_CMPF  153
 #define SPRN_LCTRL1156
+#define   LCTRL1_CTE_GT0xc000
+#define   LCTRL1_CTF_LT0x1400
+#define   LCTRL1_CRWE_RW   0x
+#define   LCTRL1_CRWE_RO   0x0004
+#define   LCTRL1_CRWE_WO   0x000c
+#define   LCTRL1_CRWF_RW   0x
+#define   LCTRL1_CRWF_RO   0x0001
+#define   LCTRL1_CRWF_WO   0x0003
 #define SPRN_LCTRL2157
+#define   LCTRL2_LW0EN 0x8000
+#define   LCTRL2_LW0LA_E   0x
+#define   LCTRL2_LW0LA_F   0x0400
+#define   LCTRL2_LW0LA_EandF   0x0800
+#define   LCTRL2_LW0LADC   0x0200
+#define   LCTRL2_SLW0EN0x0002
 #ifdef CONFIG_PPC_8xx
 #define SPRN_ICTRL 158
 #endif
diff --git a/arch/powerpc/kernel/hw_breakpoint.c 
b/arch/powerpc/kernel/hw_breakpoint.c
index 58ce3d37c2a3..2462cd7c565c 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -160,6 +160,9 @@ static int hw_breakpoint_validate_len(struct 
arch_hw_breakpoint *hw)
/* DAWR region can't cross 512 bytes boundary */
if ((start_addr >> 9) != (end_addr >> 9))
return -EINVAL;
+   } else if (IS_ENABLED(CONFIG_PPC_8xx)) {
+   /* 8xx can setup a range without limitation */
+   max_len = U16_MAX;
}
 
if (hw_len > max_len)
@@ -328,13 +331,11 @@ int hw_breakpoint_handler(struct die_args *args)
}
 
info->type &= ~HW_BRK_TYPE_EXTRANEOUS_IRQ;
-   if (IS_ENABLED(CONFIG_PPC_8xx)) {
-   if (!dar_within_range(regs->dar, info))
-   info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ;
-   } else {
-   if (!stepping_handler(regs, bp, info))
-   goto out;
-   }
+   if (!dar_within_range(regs->dar, info))
+   info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ;
+
+   if (!IS_ENABLED(CONFIG_PPC_8xx) && !stepping_handler(regs, bp, info))
+   goto out;
 
/*
 * As a policy, the callback is invoked in a 'trigger-after-execute'
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 4df94b6e2f32..7fcf72e58826 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -740,28 +740,6 @@ static inline int __set_dabr(unsigned long dabr, unsigned 
long dabrx)
mtspr(SPRN_DABRX, dabrx);
return 0;
 }
-#elif defined(CONFIG_PPC_8xx)
-static inline int __set_dabr(unsigned long dabr, unsigned long dabrx)
-{
-   unsigned long addr = dabr & ~HW_BRK_TYPE_DABR

[PATCH v2 2/2] selftests/powerpc: enable range tests on 8xx in ptrace-hwbreak.c selftest

2019-11-26 Thread Christophe Leroy
8xx is now able to support any range length so range tests can be enabled.

Signed-off-by: Christophe Leroy 

---
v2: new
---
 tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c 
b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c
index 7deedbc16b0b..fc477dfe86a2 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c
@@ -455,9 +455,8 @@ run_tests(pid_t child_pid, struct ppc_debug_info *dbginfo, 
bool dawr)
if (dbginfo->features & PPC_DEBUG_FEATURE_DATA_BP_RANGE) {
test_sethwdebug_exact(child_pid);
 
-   if (!is_8xx)
-   test_sethwdebug_range_aligned(child_pid);
-   if (dawr && !is_8xx) {
+   test_sethwdebug_range_aligned(child_pid);
+   if (dawr || is_8xx) {
test_sethwdebug_range_unaligned(child_pid);
test_sethwdebug_range_unaligned_dar(child_pid);
test_sethwdebug_dawr_max_range(child_pid);
-- 
2.13.3



Re: Bug 205201 - Booting halts if Dawicontrol DC-2976 UW SCSI board installed, unless RAM size limited to 3500M

2019-11-26 Thread Christoph Hellwig
On Tue, Nov 26, 2019 at 12:26:38PM +0100, Christian Zigotzky wrote:
> Hello Christoph,
>
> The PCI TV card works with your patch! I was able to patch your Git kernel 
> with the patch above.
>
> I haven't found any error messages in the dmesg yet.

Thanks.  Unfortunately this is a bit of a hack as we need to set
the mask based on runtime information like the magic FSL PCIe window.
Let me try to draft something better up, and thanks already for testing
this one!


[PATCH] powernv/opal-sensor-groups: Add documentation for the sysfs interfaces

2019-11-26 Thread Gautham R. Shenoy
From: Shilpasri G Bhat 

Commit bf9571550f52 ("powerpc/powernv: Add support to clear sensor
groups data") added a mechanism to clear sensor-group data via a sysfs
interface. However, the ABI for that interface has not been
documented.

This patch documents the ABI for the sysfs interface for sensor-groups
and clearing the sensor-groups.

This patch was originally sent by Shilpasri G Bhat on the mailing list:
https://lkml.org/lkml/2018/8/1/85

Signed-off-by: Shilpasri G Bhat 
Signed-off-by: Gautham R. Shenoy 
---
 .../ABI/testing/sysfs-firmware-opal-sensor-groups   | 21 +
 1 file changed, 21 insertions(+)
 create mode 100644 Documentation/ABI/testing/sysfs-firmware-opal-sensor-groups

diff --git a/Documentation/ABI/testing/sysfs-firmware-opal-sensor-groups 
b/Documentation/ABI/testing/sysfs-firmware-opal-sensor-groups
new file mode 100644
index 000..3a2dfe5
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-firmware-opal-sensor-groups
@@ -0,0 +1,21 @@
+What:  /sys/firmware/opal/sensor_groups
+Date:  August 2017
+Contact:   Linux for PowerPC mailing list 
+Description:   Sensor groups directory for POWER9 powernv servers
+
+   Each folder in this directory contains a sensor group
+   which are classified based on type of the sensor
+   like power, temperature, frequency, current, etc. They
+   can also indicate the group of sensors belonging to
+   different owners like CSM, Profiler, Job-Scheduler
+
+What:  /sys/firmware/opal/sensor_groups//clear
+Date:  August 2017
+Contact:   Linux for PowerPC mailing list 
+Description:   Sysfs file to clear the min-max of all the sensors
+   belonging to the group.
+
+   Writing 1 to this file will clear the minimum and
+   maximum values of all the sensors in the group.
+   In POWER9, the min-max of a sensor is the historical minimum
+   and maximum value of the sensor cached by OCC.
-- 
1.9.4



[PATCH v2] powerpc/8xx: Fix permanently mapped IMMR region.

2019-11-26 Thread Christophe Leroy
When not using large TLBs, the IMMR region is still
mapped as a whole block in the FIXMAP area.

Properly report that the IMMR region is block-mapped even
when not using large TLBs.

Signed-off-by: Christophe Leroy 

---
v2: rebased on today's powerpc/next (this drops the change to mem.c which is 
already merged)
---
 arch/powerpc/mm/nohash/8xx.c | 13 +++--
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c
index 090af2d2d3e4..2c98078d2ede 100644
--- a/arch/powerpc/mm/nohash/8xx.c
+++ b/arch/powerpc/mm/nohash/8xx.c
@@ -21,33 +21,34 @@ extern int __map_without_ltlbs;
 static unsigned long block_mapped_ram;
 
 /*
- * Return PA for this VA if it is in an area mapped with LTLBs.
+ * Return PA for this VA if it is in an area mapped with LTLBs or fixmap.
  * Otherwise, returns 0
  */
 phys_addr_t v_block_mapped(unsigned long va)
 {
unsigned long p = PHYS_IMMR_BASE;
 
-   if (__map_without_ltlbs)
-   return 0;
if (va >= VIRT_IMMR_BASE && va < VIRT_IMMR_BASE + IMMR_SIZE)
return p + va - VIRT_IMMR_BASE;
+   if (__map_without_ltlbs)
+   return 0;
if (va >= PAGE_OFFSET && va < PAGE_OFFSET + block_mapped_ram)
return __pa(va);
return 0;
 }
 
 /*
- * Return VA for a given PA mapped with LTLBs or 0 if not mapped
+ * Return VA for a given PA mapped with LTLBs or fixmap
+ * Return 0 if not mapped
  */
 unsigned long p_block_mapped(phys_addr_t pa)
 {
unsigned long p = PHYS_IMMR_BASE;
 
-   if (__map_without_ltlbs)
-   return 0;
if (pa >= p && pa < p + IMMR_SIZE)
return VIRT_IMMR_BASE + pa - p;
+   if (__map_without_ltlbs)
+   return 0;
if (pa < block_mapped_ram)
return (unsigned long)__va(pa);
return 0;
-- 
2.13.3



[PATCH v4 16/16] powerpc/32s: Activate CONFIG_VMAP_STACK

2019-11-26 Thread Christophe Leroy
A few changes to retrieve DAR and DSISR from struct regs
instead of retrieving them directly, as they may have
changed due to a TLB miss.

Also modifies hash_page() and friends to work with virtual
data addresses instead of physical ones. Same on load_up_fpu()
and load_up_altivec().

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/entry_32.S |  4 +++
 arch/powerpc/kernel/fpu.S  |  3 +++
 arch/powerpc/kernel/head_32.S  | 19 +++---
 arch/powerpc/kernel/head_32.h  |  4 ++-
 arch/powerpc/kernel/vector.S   |  3 +++
 arch/powerpc/mm/book3s32/hash_low.S| 46 +-
 arch/powerpc/mm/book3s32/mmu.c |  9 +--
 arch/powerpc/platforms/Kconfig.cputype |  2 ++
 8 files changed, 67 insertions(+), 23 deletions(-)

diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 00fcf954e742..1d3b152ee54f 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -1365,7 +1365,11 @@ _GLOBAL(enter_rtas)
lis r6,1f@ha/* physical return address for rtas */
addir6,r6,1f@l
tophys(r6,r6)
+#ifdef CONFIG_VMAP_STACK
+   mr  r7, r1
+#else
tophys(r7,r1)
+#endif
lwz r8,RTASENTRY(r4)
lwz r4,RTASBASE(r4)
mfmsr   r9
diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
index 0bb991ddd264..3235a8da6af7 100644
--- a/arch/powerpc/kernel/fpu.S
+++ b/arch/powerpc/kernel/fpu.S
@@ -94,6 +94,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
/* enable use of FP after return */
 #ifdef CONFIG_PPC32
mfspr   r5,SPRN_SPRG_THREAD /* current task's THREAD (phys) */
+#ifdef CONFIG_VMAP_STACK
+   tovirt(r5, r5)
+#endif
lwz r4,THREAD_FPEXC_MODE(r5)
ori r9,r9,MSR_FP/* enable FP for current */
or  r9,r9,r4
diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index 90ef355e958b..28391a408a22 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -272,14 +272,22 @@ __secondary_hold_acknowledge:
  */
. = 0x200
DO_KVM  0x200
+MachineCheck:
EXCEPTION_PROLOG_0
+#ifdef CONFIG_VMAP_STACK
+   li  r11, MSR_KERNEL & ~(MSR_IR | MSR_RI) /* can take DTLB miss */
+   mtmsr   r11
+#endif
 #ifdef CONFIG_PPC_CHRP
mfspr   r11, SPRN_SPRG_THREAD
+#ifdef CONFIG_VMAP_STACK
+   tovirt(r11, r11)
+#endif
lwz r11, RTAS_SP(r11)
cmpwi   cr1, r11, 0
bne cr1, 7f
 #endif /* CONFIG_PPC_CHRP */
-   EXCEPTION_PROLOG_1
+   EXCEPTION_PROLOG_1 rtas
 7: EXCEPTION_PROLOG_2
addir3,r1,STACK_FRAME_OVERHEAD
 #ifdef CONFIG_PPC_CHRP
@@ -294,7 +302,7 @@ __secondary_hold_acknowledge:
. = 0x300
DO_KVM  0x300
 DataAccess:
-   EXCEPTION_PROLOG
+   EXCEPTION_PROLOG dar
get_and_save_dar_dsisr_on_stack r4, r5, r11
 BEGIN_MMU_FTR_SECTION
 #ifdef CONFIG_PPC_KUAP
@@ -334,7 +342,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
. = 0x600
DO_KVM  0x600
 Alignment:
-   EXCEPTION_PROLOG
+   EXCEPTION_PROLOG dar
save_dar_dsisr_on_stack r4, r5, r11
addir3,r1,STACK_FRAME_OVERHEAD
EXC_XFER_STD(0x600, alignment_exception)
@@ -645,6 +653,11 @@ handle_page_fault_tramp_1:
 handle_page_fault_tramp_2:
EXC_XFER_LITE(0x300, handle_page_fault)
 
+#ifdef CONFIG_VMAP_STACK
+stack_ovf_trampoline:
+   b   stack_ovf
+#endif
+
 AltiVecUnavailable:
EXCEPTION_PROLOG
 #ifdef CONFIG_ALTIVEC
diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index 283d4298d555..ae2c8e07e1d5 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -38,10 +38,12 @@
andi.   r11, r11, MSR_PR
 .endm
 
-.macro EXCEPTION_PROLOG_1
+.macro EXCEPTION_PROLOG_1 rtas
 #ifdef CONFIG_VMAP_STACK
+   .ifb\rtas
li  r11, MSR_KERNEL & ~(MSR_IR | MSR_RI) /* can take DTLB miss */
mtmsr   r11
+   .endif
subir11, r1, INT_FRAME_SIZE /* use r1 if kernel */
 #else
tophys(r11,r1)  /* use tophys(r1) if kernel */
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index 8eb867dbad5f..25c14a0981bf 100644
--- a/arch/powerpc/kernel/vector.S
+++ b/arch/powerpc/kernel/vector.S
@@ -67,6 +67,9 @@ _GLOBAL(load_up_altivec)
 #ifdef CONFIG_PPC32
mfspr   r5,SPRN_SPRG_THREAD /* current task's THREAD (phys) 
*/
orisr9,r9,MSR_VEC@h
+#ifdef CONFIG_VMAP_STACK
+   tovirt(r5, r5)
+#endif
 #else
ld  r4,PACACURRENT(r13)
addir5,r4,THREAD/* Get THREAD */
diff --git a/arch/powerpc/mm/book3s32/hash_low.S 
b/arch/powerpc/mm/book3s32/hash_low.S
index 8bbbd9775c8a..c11b0a005196 100644
--- a/arch/powerpc/mm/book3s32/hash_low.S
+++ b/arch/powerpc/mm/book3s32/hash_low.S
@@ -25,6 +25,12 @@
 #include 
 #include 
 
+#ifdef CONFIG_VMAP_STA

[PATCH v4 14/16] powerpc/32s: reorganise DSI handler.

2019-11-26 Thread Christophe Leroy
The part decidated to handling hash_page() is fully unneeded for
processors not having real hash pages like the 603.

Lets enlarge the content of the feature fixup, and provide
an alternative which jumps directly instead of getting NIPs.

Also, in preparation of VMAP stacks, the end of DSI handler has moved
to later in the code as it won't fit anymore once VMAP stacks
are there.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/head_32.S | 31 +--
 1 file changed, 17 insertions(+), 14 deletions(-)

diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index 449625b4ff03..7ec780858299 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -295,24 +295,20 @@ __secondary_hold_acknowledge:
DO_KVM  0x300
 DataAccess:
EXCEPTION_PROLOG
-   mfspr   r10,SPRN_DSISR
-   stw r10,_DSISR(r11)
+   get_and_save_dar_dsisr_on_stack r4, r5, r11
+BEGIN_MMU_FTR_SECTION
 #ifdef CONFIG_PPC_KUAP
-   andis.  r0,r10,(DSISR_BAD_FAULT_32S | DSISR_DABRMATCH | 
DSISR_PROTFAULT)@h
+   andis.  r0, r5, (DSISR_BAD_FAULT_32S | DSISR_DABRMATCH | 
DSISR_PROTFAULT)@h
 #else
-   andis.  r0,r10,(DSISR_BAD_FAULT_32S|DSISR_DABRMATCH)@h
+   andis.  r0, r5, (DSISR_BAD_FAULT_32S | DSISR_DABRMATCH)@h
 #endif
-   bne 1f  /* if not, try to put a PTE */
-   mfspr   r4,SPRN_DAR /* into the hash table */
-   rlwinm  r3,r10,32-15,21,21  /* DSISR_STORE -> _PAGE_RW */
-BEGIN_MMU_FTR_SECTION
+   bne handle_page_fault_tramp_2   /* if not, try to put a PTE */
+   rlwinm  r3, r5, 32 - 15, 21, 21 /* DSISR_STORE -> _PAGE_RW */
bl  hash_page
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
-1: lwz r5,_DSISR(r11)  /* get DSISR value */
-   mfspr   r4,SPRN_DAR
-   stw r4, _DAR(r11)
-   EXC_XFER_LITE(0x300, handle_page_fault)
-
+   b   handle_page_fault_tramp_1
+FTR_SECTION_ELSE
+   b   handle_page_fault_tramp_2
+ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE)
 
 /* Instruction access exception. */
. = 0x400
@@ -642,6 +638,13 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU)
 
. = 0x3000
 
+handle_page_fault_tramp_1:
+   lwz r4, _DAR(r11)
+   lwz r5, _DSISR(r11)
+   /* fall through */
+handle_page_fault_tramp_2:
+   EXC_XFER_LITE(0x300, handle_page_fault)
+
 AltiVecUnavailable:
EXCEPTION_PROLOG
 #ifdef CONFIG_ALTIVEC
-- 
2.13.3



[PATCH v4 15/16] powerpc/32s: avoid crossing page boundary while changing SRR0/1.

2019-11-26 Thread Christophe Leroy
Trying VMAP_STACK with KVM, vmlinux was not starting.
This was due to SRR0 and SRR1 clobbered by an ISI due to
the rfi being in a different page than the mtsrr0/1:

c0003fe0 :
c0003fe0:   38 83 00 54 addir4,r3,84
c0003fe4:   7c 60 00 a6 mfmsr   r3
c0003fe8:   70 60 00 30 andi.   r0,r3,48
c0003fec:   4d 82 00 20 beqlr
c0003ff0:   7c 63 00 78 andcr3,r3,r0
c0003ff4:   7c 9a 03 a6 mtsrr0  r4
c0003ff8:   7c 7b 03 a6 mtsrr1  r3
c0003ffc:   7c 00 04 ac hwsync
c0004000:   4c 00 00 64 rfi

Align the 4 instruction block used to deactivate MMU to order 4,
so that the block never crosses a page boundary.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/head_32.S | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index 7ec780858299..90ef355e958b 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -917,6 +917,8 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
ori r4,r4,2f@l
tophys(r4,r4)
li  r3,MSR_KERNEL & ~(MSR_IR|MSR_DR)
+
+   .align  4
mtspr   SPRN_SRR0,r4
mtspr   SPRN_SRR1,r3
SYNC
@@ -1058,6 +1060,8 @@ _ENTRY(update_bats)
rlwinm  r0, r6, 0, ~MSR_RI
rlwinm  r0, r0, 0, ~MSR_EE
mtmsr   r0
+
+   .align  4
mtspr   SPRN_SRR0, r4
mtspr   SPRN_SRR1, r3
SYNC
@@ -1097,6 +1101,8 @@ mmu_off:
andi.   r0,r3,MSR_DR|MSR_IR /* MMU enabled? */
beqlr
andcr3,r3,r0
+
+   .align  4
mtspr   SPRN_SRR0,r4
mtspr   SPRN_SRR1,r3
sync
-- 
2.13.3



[PATCH v4 13/16] powerpc/8xx: Enable CONFIG_VMAP_STACK

2019-11-26 Thread Christophe Leroy
This patch enables CONFIG_VMAP_STACK. For that, a few changes are
done in head_8xx.S.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/head_8xx.S | 34 --
 arch/powerpc/platforms/Kconfig.cputype |  1 +
 2 files changed, 29 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 225e242ce1c5..fc6d4d10e298 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -127,7 +127,7 @@ instruction_counter:
 /* Machine check */
. = 0x200
 MachineCheck:
-   EXCEPTION_PROLOG
+   EXCEPTION_PROLOG dar
save_dar_dsisr_on_stack r4, r5, r11
li  r6, RPN_PATTERN
mtspr   SPRN_DAR, r6/* Tag DAR, to be used in DTLB Error */
@@ -140,7 +140,7 @@ MachineCheck:
 /* Alignment exception */
. = 0x600
 Alignment:
-   EXCEPTION_PROLOG
+   EXCEPTION_PROLOG dar
save_dar_dsisr_on_stack r4, r5, r11
li  r6, RPN_PATTERN
mtspr   SPRN_DAR, r6/* Tag DAR, to be used in DTLB Error */
@@ -457,20 +457,26 @@ InstructionTLBError:
  */
. = 0x1400
 DataTLBError:
-   EXCEPTION_PROLOG_0
+   EXCEPTION_PROLOG_0 dar
mfspr   r11, SPRN_DAR
cmpwi   cr1, r11, RPN_PATTERN
beq-cr1, FixupDAR   /* must be a buggy dcbX, icbi insn. */
 DARFixed:/* Return from dcbx instruction bug workaround */
+#ifdef CONFIG_VMAP_STACK
+   li  r11, RPN_PATTERN
+   mtspr   SPRN_DAR, r11   /* Tag DAR, to be used in DTLB Error */
+#endif
EXCEPTION_PROLOG_1
-   EXCEPTION_PROLOG_2
+   EXCEPTION_PROLOG_2 dar
get_and_save_dar_dsisr_on_stack r4, r5, r11
andis.  r10,r5,DSISR_NOHPTE@h
beq+.Ldtlbie
tlbie   r4
 .Ldtlbie:
+#ifndef CONFIG_VMAP_STACK
li  r10,RPN_PATTERN
mtspr   SPRN_DAR,r10/* Tag DAR, to be used in DTLB Error */
+#endif
/* 0x300 is DataAccess exception, needed by bad_page_fault() */
EXC_XFER_LITE(0x300, handle_page_fault)
 
@@ -492,16 +498,20 @@ DARFixed:/* Return from dcbx instruction bug workaround */
  */
 do_databreakpoint:
EXCEPTION_PROLOG_1
-   EXCEPTION_PROLOG_2
+   EXCEPTION_PROLOG_2 dar
addir3,r1,STACK_FRAME_OVERHEAD
mfspr   r4,SPRN_BAR
stw r4,_DAR(r11)
+#ifdef CONFIG_VMAP_STACK
+   lwz r5,_DSISR(r11)
+#else
mfspr   r5,SPRN_DSISR
+#endif
EXC_XFER_STD(0x1c00, do_break)
 
. = 0x1c00
 DataBreakpoint:
-   EXCEPTION_PROLOG_0
+   EXCEPTION_PROLOG_0 dar
mfspr   r11, SPRN_SRR0
cmplwi  cr1, r11, (.Ldtlbie - PAGE_OFFSET)@l
cmplwi  cr7, r11, (.Litlbie - PAGE_OFFSET)@l
@@ -530,6 +540,11 @@ InstructionBreakpoint:
EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_STD)
EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_STD)
 
+#ifdef CONFIG_VMAP_STACK
+stack_ovf_trampoline:
+   b   stack_ovf
+#endif
+
. = 0x2000
 
 /* This is the procedure to calculate the data EA for buggy dcbx,dcbi 
instructions
@@ -650,7 +665,14 @@ FixupDAR:/* Entry point for dcbx workaround. */
 152:
mfdar   r11
mtctr   r11 /* restore ctr reg from DAR */
+#ifdef CONFIG_VMAP_STACK
+   mfspr   r11, SPRN_SPRG_THREAD
+   stw r10, DAR(r11)
+   mfspr   r10, SPRN_DSISR
+   stw r10, DSISR(r11)
+#else
mtdar   r10 /* save fault EA to DAR */
+#endif
mfspr   r10,SPRN_M_TW
b   DARFixed/* Go back to normal TLB handling */
 
diff --git a/arch/powerpc/platforms/Kconfig.cputype 
b/arch/powerpc/platforms/Kconfig.cputype
index 1e352c2eea7a..f0583251e9a3 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -49,6 +49,7 @@ config PPC_8xx
select PPC_HAVE_KUEP
select PPC_HAVE_KUAP
select PPC_MM_SLICES if HUGETLB_PAGE
+   select HAVE_ARCH_VMAP_STACK
 
 config 40x
bool "AMCC 40x"
-- 
2.13.3



[PATCH v4 12/16] powerpc/8xx: split breakpoint exception

2019-11-26 Thread Christophe Leroy
Breakpoint exception is big.

Split it to support future growth on exception prolog.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/head_8xx.S | 19 ++-
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 1e718e47fe3c..225e242ce1c5 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -490,14 +490,7 @@ DARFixed:/* Return from dcbx instruction bug workaround */
  * support of breakpoints and such.  Someday I will get around to
  * using them.
  */
-   . = 0x1c00
-DataBreakpoint:
-   EXCEPTION_PROLOG_0
-   mfspr   r11, SPRN_SRR0
-   cmplwi  cr1, r11, (.Ldtlbie - PAGE_OFFSET)@l
-   cmplwi  cr7, r11, (.Litlbie - PAGE_OFFSET)@l
-   beq-cr1, 11f
-   beq-cr7, 11f
+do_databreakpoint:
EXCEPTION_PROLOG_1
EXCEPTION_PROLOG_2
addir3,r1,STACK_FRAME_OVERHEAD
@@ -505,7 +498,15 @@ DataBreakpoint:
stw r4,_DAR(r11)
mfspr   r5,SPRN_DSISR
EXC_XFER_STD(0x1c00, do_break)
-11:
+
+   . = 0x1c00
+DataBreakpoint:
+   EXCEPTION_PROLOG_0
+   mfspr   r11, SPRN_SRR0
+   cmplwi  cr1, r11, (.Ldtlbie - PAGE_OFFSET)@l
+   cmplwi  cr7, r11, (.Litlbie - PAGE_OFFSET)@l
+   cror4*cr1+eq, 4*cr1+eq, 4*cr7+eq
+   bne cr1, do_databreakpoint
mtcrr10
mfspr   r10, SPRN_SPRG_SCRATCH0
mfspr   r11, SPRN_SPRG_SCRATCH1
-- 
2.13.3



[PATCH v4 06/16] powerpc/32: prepare for CONFIG_VMAP_STACK

2019-11-26 Thread Christophe Leroy
To support CONFIG_VMAP_STACK, the kernel has to activate Data MMU
Translation for accessing the stack. Before doing that it must save
SRR0, SRR1 and also DAR and DSISR when relevant, in order to not
loose them in case there is a Data TLB Miss once the translation is
reactivated.

This patch adds fields in thread struct for saving those registers.
It prepares entry_32.S to handle exception entry with
Data MMU Translation enabled and alters EXCEPTION_PROLOG macros to
save SRR0, SRR1, DAR and DSISR then reenables Data MMU.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/include/asm/processor.h   |   6 ++
 arch/powerpc/include/asm/thread_info.h |   5 ++
 arch/powerpc/kernel/asm-offsets.c  |   6 ++
 arch/powerpc/kernel/entry_32.S |   7 +++
 arch/powerpc/kernel/head_32.h  | 101 +
 5 files changed, 115 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/include/asm/processor.h 
b/arch/powerpc/include/asm/processor.h
index a9993e7a443b..92c02d15f117 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -163,6 +163,12 @@ struct thread_struct {
 #if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_PPC_KUAP)
unsigned long   kuap;   /* opened segments for user access */
 #endif
+#ifdef CONFIG_VMAP_STACK
+   unsigned long   srr0;
+   unsigned long   srr1;
+   unsigned long   dar;
+   unsigned long   dsisr;
+#endif
/* Debug Registers */
struct debug_reg debug;
struct thread_fp_state  fp_state;
diff --git a/arch/powerpc/include/asm/thread_info.h 
b/arch/powerpc/include/asm/thread_info.h
index 8e1d0195ac36..488d5c4670ff 100644
--- a/arch/powerpc/include/asm/thread_info.h
+++ b/arch/powerpc/include/asm/thread_info.h
@@ -10,10 +10,15 @@
 #define _ASM_POWERPC_THREAD_INFO_H
 
 #include 
+#include 
 
 #ifdef __KERNEL__
 
+#if defined(CONFIG_VMAP_STACK) && CONFIG_THREAD_SHIFT < PAGE_SHIFT
+#define THREAD_SHIFT   PAGE_SHIFT
+#else
 #define THREAD_SHIFT   CONFIG_THREAD_SHIFT
+#endif
 
 #define THREAD_SIZE(1 << THREAD_SHIFT)
 
diff --git a/arch/powerpc/kernel/asm-offsets.c 
b/arch/powerpc/kernel/asm-offsets.c
index 484f54dab247..782cbf489ab0 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -127,6 +127,12 @@ int main(void)
OFFSET(KSP_VSID, thread_struct, ksp_vsid);
 #else /* CONFIG_PPC64 */
OFFSET(PGDIR, thread_struct, pgdir);
+#ifdef CONFIG_VMAP_STACK
+   OFFSET(SRR0, thread_struct, srr0);
+   OFFSET(SRR1, thread_struct, srr1);
+   OFFSET(DAR, thread_struct, dar);
+   OFFSET(DSISR, thread_struct, dsisr);
+#endif
 #ifdef CONFIG_SPE
OFFSET(THREAD_EVR0, thread_struct, evr[0]);
OFFSET(THREAD_ACC, thread_struct, acc);
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 317ad9df8ba8..2a26fe19f0b1 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -140,6 +140,9 @@ transfer_to_handler:
stw r12,_CTR(r11)
stw r2,_XER(r11)
mfspr   r12,SPRN_SPRG_THREAD
+#ifdef CONFIG_VMAP_STACK
+   tovirt(r12, r12)
+#endif
beq 2f  /* if from user, fix up THREAD.regs */
addir2, r12, -THREAD
addir11,r1,STACK_FRAME_OVERHEAD
@@ -195,7 +198,11 @@ transfer_to_handler:
 transfer_to_handler_cont:
 3:
mflrr9
+#ifdef CONFIG_VMAP_STACK
+   tovirt(r9, r9)
+#else
tovirt(r2, r2)  /* set r2 to current */
+#endif
lwz r11,0(r9)   /* virtual address of handler */
lwz r9,4(r9)/* where to go when done */
 #if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index f19a1ab91fb5..59e775930be8 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -10,31 +10,57 @@
  * We assume sprg3 has the physical address of the current
  * task's thread_struct.
  */
-.macro EXCEPTION_PROLOG
-   EXCEPTION_PROLOG_0
+.macro EXCEPTION_PROLOG ext
+   EXCEPTION_PROLOG_0  \ext
EXCEPTION_PROLOG_1
-   EXCEPTION_PROLOG_2
+   EXCEPTION_PROLOG_2  \ext
 .endm
 
-.macro EXCEPTION_PROLOG_0
+.macro EXCEPTION_PROLOG_0 ext
mtspr   SPRN_SPRG_SCRATCH0,r10
mtspr   SPRN_SPRG_SCRATCH1,r11
+#ifdef CONFIG_VMAP_STACK
+   mfspr   r10, SPRN_SPRG_THREAD
+   .ifnb   \ext
+   mfspr   r11, SPRN_DAR
+   stw r11, DAR(r10)
+   mfspr   r11, SPRN_DSISR
+   stw r11, DSISR(r10)
+   .endif
+   mfspr   r11, SPRN_SRR0
+   stw r11, SRR0(r10)
+#endif
mfspr   r11, SPRN_SRR1  /* check whether user or kernel */
+#ifdef CONFIG_VMAP_STACK
+   stw r11, SRR1(r10)
+#endif
mfcrr10
andi.   r11, r11, MSR_PR
 .endm
 
 .macro EXCEPTION_PROLOG_1
+#ifdef CONFIG_VMAP_STACK
+   li  r11, MSR

  1   2   >