RE: [PATCH v2 03/10] PCI: designware-ep: Move the function of getting MSI capability forward

2019-08-23 Thread Xiaowei Bao


> -Original Message-
> From: Andrew Murray 
> Sent: 2019年8月23日 21:39
> To: Xiaowei Bao 
> Cc: bhelg...@google.com; robh...@kernel.org; mark.rutl...@arm.com;
> shawn...@kernel.org; Leo Li ; kis...@ti.com;
> lorenzo.pieral...@arm.co; a...@arndb.de; gre...@linuxfoundation.org; M.h.
> Lian ; Mingkai Hu ; Roy
> Zang ; jingooh...@gmail.com;
> gustavo.pimen...@synopsys.com; linux-...@vger.kernel.org;
> devicet...@vger.kernel.org; linux-ker...@vger.kernel.org;
> linux-arm-ker...@lists.infradead.org; linuxppc-dev@lists.ozlabs.org
> Subject: Re: [PATCH v2 03/10] PCI: designware-ep: Move the function of
> getting MSI capability forward
> 
> On Thu, Aug 22, 2019 at 07:22:35PM +0800, Xiaowei Bao wrote:
> > Move the function of getting MSI capability to the front of init
> > function, because the init function of the EP platform driver will use
> > the return value by the function of getting MSI capability.
> >
> > Signed-off-by: Xiaowei Bao 
> 
> Reviewed-by: Andrew Murray 

Thanks a lot, I think move this to ep_init is better.

> 
> > ---
> > v2:
> >  - No change.
> >
> >  drivers/pci/controller/dwc/pcie-designware-ep.c | 7 ---
> >  1 file changed, 4 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c
> > b/drivers/pci/controller/dwc/pcie-designware-ep.c
> > index b8388f8..0a6c199 100644
> > --- a/drivers/pci/controller/dwc/pcie-designware-ep.c
> > +++ b/drivers/pci/controller/dwc/pcie-designware-ep.c
> > @@ -656,6 +656,10 @@ int dw_pcie_ep_init(struct dw_pcie_ep *ep)
> > if (ret < 0)
> > epc->max_functions = 1;
> >
> > +   ep->msi_cap = dw_pcie_ep_find_capability(pci, PCI_CAP_ID_MSI);
> > +
> > +   ep->msix_cap = dw_pcie_ep_find_capability(pci, PCI_CAP_ID_MSIX);
> > +
> > if (ep->ops->ep_init)
> > ep->ops->ep_init(ep);
> >
> > @@ -672,9 +676,6 @@ int dw_pcie_ep_init(struct dw_pcie_ep *ep)
> > dev_err(dev, "Failed to reserve memory for MSI/MSI-X\n");
> > return -ENOMEM;
> > }
> > -   ep->msi_cap = dw_pcie_ep_find_capability(pci, PCI_CAP_ID_MSI);
> > -
> > -   ep->msix_cap = dw_pcie_ep_find_capability(pci, PCI_CAP_ID_MSIX);
> >
> > offset = dw_pcie_ep_find_ext_capability(pci, PCI_EXT_CAP_ID_REBAR);
> > if (offset) {
> > --
> > 2.9.5
> >


RE: [PATCH v2 08/10] PCI: layerscape: Add EP mode support for ls1088a and ls2088a

2019-08-23 Thread Xiaowei Bao


> -Original Message-
> From: Andrew Murray 
> Sent: 2019年8月23日 22:28
> To: Xiaowei Bao 
> Cc: bhelg...@google.com; robh...@kernel.org; mark.rutl...@arm.com;
> shawn...@kernel.org; Leo Li ; kis...@ti.com;
> lorenzo.pieral...@arm.co; a...@arndb.de; gre...@linuxfoundation.org; M.h.
> Lian ; Mingkai Hu ; Roy
> Zang ; jingooh...@gmail.com;
> gustavo.pimen...@synopsys.com; linux-...@vger.kernel.org;
> devicet...@vger.kernel.org; linux-ker...@vger.kernel.org;
> linux-arm-ker...@lists.infradead.org; linuxppc-dev@lists.ozlabs.org
> Subject: Re: [PATCH v2 08/10] PCI: layerscape: Add EP mode support for
> ls1088a and ls2088a
> 
> On Thu, Aug 22, 2019 at 07:22:40PM +0800, Xiaowei Bao wrote:
> > Add PCIe EP mode support for ls1088a and ls2088a, there are some
> > difference between LS1 and LS2 platform, so refactor the code of the
> > EP driver.
> >
> > Signed-off-by: Xiaowei Bao 
> > ---
> > v2:
> >  - New mechanism for layerscape EP driver.
> 
> Was there a v1 of this patch?

Yes, but I don't know how to comments, ^_^

> 
> >
> >  drivers/pci/controller/dwc/pci-layerscape-ep.c | 76
> > --
> >  1 file changed, 58 insertions(+), 18 deletions(-)
> >
> > diff --git a/drivers/pci/controller/dwc/pci-layerscape-ep.c
> > b/drivers/pci/controller/dwc/pci-layerscape-ep.c
> > index 7ca5fe8..2a66f07 100644
> > --- a/drivers/pci/controller/dwc/pci-layerscape-ep.c
> > +++ b/drivers/pci/controller/dwc/pci-layerscape-ep.c
> > @@ -20,27 +20,29 @@
> >
> >  #define PCIE_DBI2_OFFSET   0x1000  /* DBI2 base address*/
> >
> > -struct ls_pcie_ep {
> > -   struct dw_pcie  *pci;
> > -   struct pci_epc_features *ls_epc;
> > +#define to_ls_pcie_ep(x)   dev_get_drvdata((x)->dev)
> > +
> > +struct ls_pcie_ep_drvdata {
> > +   u32 func_offset;
> > +   const struct dw_pcie_ep_ops *ops;
> > +   const struct dw_pcie_ops*dw_pcie_ops;
> >  };
> >
> > -#define to_ls_pcie_ep(x)   dev_get_drvdata((x)->dev)
> > +struct ls_pcie_ep {
> > +   struct dw_pcie  *pci;
> > +   struct pci_epc_features *ls_epc;
> > +   const struct ls_pcie_ep_drvdata *drvdata; };
> >
> >  static int ls_pcie_establish_link(struct dw_pcie *pci)  {
> > return 0;
> >  }
> >
> > -static const struct dw_pcie_ops ls_pcie_ep_ops = {
> > +static const struct dw_pcie_ops dw_ls_pcie_ep_ops = {
> > .start_link = ls_pcie_establish_link,  };
> >
> > -static const struct of_device_id ls_pcie_ep_of_match[] = {
> > -   { .compatible = "fsl,ls-pcie-ep",},
> > -   { },
> > -};
> > -
> >  static const struct pci_epc_features*  ls_pcie_ep_get_features(struct
> > dw_pcie_ep *ep)  { @@ -82,10 +84,44 @@ static int
> > ls_pcie_ep_raise_irq(struct dw_pcie_ep *ep, u8 func_no,
> > }
> >  }
> >
> > -static const struct dw_pcie_ep_ops pcie_ep_ops = {
> > +static unsigned int ls_pcie_ep_func_conf_select(struct dw_pcie_ep *ep,
> > +   u8 func_no)
> > +{
> > +   struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
> > +   struct ls_pcie_ep *pcie = to_ls_pcie_ep(pci);
> > +   u8 header_type;
> > +
> > +   header_type = ioread8(pci->dbi_base + PCI_HEADER_TYPE);
> > +
> > +   if (header_type & (1 << 7))
> > +   return pcie->drvdata->func_offset * func_no;
> > +   else
> > +   return 0;
> 
> It looks like there isn't a PCI define for multi function, the nearest I 
> could find
> was PCI_HEADER_TYPE_MULTIDEVICE in hotplug/ibmphp.h. A comment
> above the test might be helpful to explain the test.

Yes, I have not find the PCI_HEADER_TYPE_MULTIDEVICE define. OK, I will add
The comments in next version patch.

> 
> As the ls_pcie_ep_drvdata structures are static, the unset .func_offset will 
> be
> initialised to 0, so you could just drop the test above.

OK, thanks

> 
> However something to the effect of the following may help spot
> misconfiguration:
> 
> WARN_ON(func_no && !pcie->drvdata->func_offset); return
> pcie->drvdata->func_offset * func_no;

Thanks a lot, this looks better.

> 
> The WARN is probably quite useful as if you are attempting to use non-zero
> functions and func_offset isn't set - then things may appear to work normally
> but actually will break horribly.

got it, thanks.

> 
> Thanks,
> 
> Andrew Murray
> 
> > +}
> > +
> > +static const struct dw_pcie_ep_ops ls_pcie_ep_ops = {
> > .ep_init = ls_pcie_ep_init,
> > .raise_irq = ls_pcie_ep_raise_irq,
> > .get_features = ls_pcie_ep_get_features,
> > +   .func_conf_select = ls_pcie_ep_func_conf_select, };
> > +
> > +static const struct ls_pcie_ep_drvdata ls1_ep_drvdata = {
> > +   .ops = _pcie_ep_ops,
> > +   .dw_pcie_ops = _ls_pcie_ep_ops,
> > +};
> > +
> > +static const struct ls_pcie_ep_drvdata ls2_ep_drvdata = {
> > +   .func_offset = 0x2,
> > +   .ops = _pcie_ep_ops,
> > +   .dw_pcie_ops = _ls_pcie_ep_ops,
> > +};
> > +
> > +static const struct of_device_id ls_pcie_ep_of_match[] = {
> > +   { .compatible = "fsl,ls1046a-pcie-ep", .data = _ep_drvdata },
> > +   { 

RE: [PATCH v2 07/10] PCI: layerscape: Modify the MSIX to the doorbell way

2019-08-23 Thread Xiaowei Bao


> -Original Message-
> From: Andrew Murray 
> Sent: 2019年8月23日 21:58
> To: Xiaowei Bao 
> Cc: bhelg...@google.com; robh...@kernel.org; mark.rutl...@arm.com;
> shawn...@kernel.org; Leo Li ; kis...@ti.com;
> lorenzo.pieral...@arm.co; a...@arndb.de; gre...@linuxfoundation.org; M.h.
> Lian ; Mingkai Hu ; Roy
> Zang ; jingooh...@gmail.com;
> gustavo.pimen...@synopsys.com; linux-...@vger.kernel.org;
> devicet...@vger.kernel.org; linux-ker...@vger.kernel.org;
> linux-arm-ker...@lists.infradead.org; linuxppc-dev@lists.ozlabs.org
> Subject: Re: [PATCH v2 07/10] PCI: layerscape: Modify the MSIX to the
> doorbell way
> 
> On Thu, Aug 22, 2019 at 07:22:39PM +0800, Xiaowei Bao wrote:
> > The layerscape platform use the doorbell way to trigger MSIX interrupt
> > in EP mode.
> >
> 
> I have no problems with this patch, however...
> 
> Are you able to add to this message a reason for why you are making this
> change? Did dw_pcie_ep_raise_msix_irq not work when func_no != 0? Or did
> it work yet dw_pcie_ep_raise_msix_irq_doorbell is more efficient?

The fact is that, this driver is verified in ls1046a platform of NXP before, 
and ls1046a don't
support MSIX feature, so I set the msix_capable of pci_epc_features struct is 
false,
but in other platform, e.g. ls1088a, it support the MSIX feature, I verified 
the MSIX
feature in ls1088a, it is not OK, so I changed to another way. Thanks.

> 
> Thanks,
> 
> Andrew Murray
> 
> > Signed-off-by: Xiaowei Bao 
> > ---
> > v2:
> >  - No change.
> >
> >  drivers/pci/controller/dwc/pci-layerscape-ep.c | 3 ++-
> >  1 file changed, 2 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/pci/controller/dwc/pci-layerscape-ep.c
> > b/drivers/pci/controller/dwc/pci-layerscape-ep.c
> > index 8461f62..7ca5fe8 100644
> > --- a/drivers/pci/controller/dwc/pci-layerscape-ep.c
> > +++ b/drivers/pci/controller/dwc/pci-layerscape-ep.c
> > @@ -74,7 +74,8 @@ static int ls_pcie_ep_raise_irq(struct dw_pcie_ep *ep,
> u8 func_no,
> > case PCI_EPC_IRQ_MSI:
> > return dw_pcie_ep_raise_msi_irq(ep, func_no, interrupt_num);
> > case PCI_EPC_IRQ_MSIX:
> > -   return dw_pcie_ep_raise_msix_irq(ep, func_no, interrupt_num);
> > +   return dw_pcie_ep_raise_msix_irq_doorbell(ep, func_no,
> > + interrupt_num);
> > default:
> > dev_err(pci->dev, "UNKNOWN IRQ type\n");
> > return -EINVAL;
> > --
> > 2.9.5
> >


RE: [PATCH v2 05/10] PCI: layerscape: Fix some format issue of the code

2019-08-23 Thread Xiaowei Bao


> -Original Message-
> From: Andrew Murray 
> Sent: 2019年8月23日 21:45
> To: Xiaowei Bao 
> Cc: bhelg...@google.com; robh...@kernel.org; mark.rutl...@arm.com;
> shawn...@kernel.org; Leo Li ; kis...@ti.com;
> lorenzo.pieral...@arm.co; a...@arndb.de; gre...@linuxfoundation.org; M.h.
> Lian ; Mingkai Hu ; Roy
> Zang ; jingooh...@gmail.com;
> gustavo.pimen...@synopsys.com; linux-...@vger.kernel.org;
> devicet...@vger.kernel.org; linux-ker...@vger.kernel.org;
> linux-arm-ker...@lists.infradead.org; linuxppc-dev@lists.ozlabs.org
> Subject: Re: [PATCH v2 05/10] PCI: layerscape: Fix some format issue of the
> code
> 
> On Thu, Aug 22, 2019 at 07:22:37PM +0800, Xiaowei Bao wrote:
> > Fix some format issue of the code in EP driver.
> >
> > Signed-off-by: Xiaowei Bao 
> 
> Reviewed-by: Andrew Murray 

Thanks.

> 
> 
> > ---
> > v2:
> >  - No change.
> >
> >  drivers/pci/controller/dwc/pci-layerscape-ep.c | 4 ++--
> >  1 file changed, 2 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/pci/controller/dwc/pci-layerscape-ep.c
> > b/drivers/pci/controller/dwc/pci-layerscape-ep.c
> > index be61d96..4e92a95 100644
> > --- a/drivers/pci/controller/dwc/pci-layerscape-ep.c
> > +++ b/drivers/pci/controller/dwc/pci-layerscape-ep.c
> > @@ -62,7 +62,7 @@ static void ls_pcie_ep_init(struct dw_pcie_ep *ep)
> > }
> >
> >  static int ls_pcie_ep_raise_irq(struct dw_pcie_ep *ep, u8 func_no,
> > - enum pci_epc_irq_type type, u16 interrupt_num)
> > +   enum pci_epc_irq_type type, u16 interrupt_num)
> >  {
> > struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
> >
> > @@ -86,7 +86,7 @@ static const struct dw_pcie_ep_ops pcie_ep_ops = {
> > };
> >
> >  static int __init ls_add_pcie_ep(struct ls_pcie_ep *pcie,
> > -   struct platform_device *pdev)
> > +struct platform_device *pdev)
> >  {
> > struct dw_pcie *pci = pcie->pci;
> > struct device *dev = pci->dev;
> > --
> > 2.9.5
> >


RE: [PATCH v2 02/10] PCI: designware-ep: Add the doorbell mode of MSI-X in EP mode

2019-08-23 Thread Xiaowei Bao


> -Original Message-
> From: Andrew Murray 
> Sent: 2019年8月23日 21:36
> To: Xiaowei Bao 
> Cc: bhelg...@google.com; robh...@kernel.org; mark.rutl...@arm.com;
> shawn...@kernel.org; Leo Li ; kis...@ti.com;
> lorenzo.pieral...@arm.co; a...@arndb.de; gre...@linuxfoundation.org; M.h.
> Lian ; Mingkai Hu ; Roy
> Zang ; jingooh...@gmail.com;
> gustavo.pimen...@synopsys.com; linux-...@vger.kernel.org;
> devicet...@vger.kernel.org; linux-ker...@vger.kernel.org;
> linux-arm-ker...@lists.infradead.org; linuxppc-dev@lists.ozlabs.org
> Subject: Re: [PATCH v2 02/10] PCI: designware-ep: Add the doorbell mode of
> MSI-X in EP mode
> 
> On Thu, Aug 22, 2019 at 07:22:34PM +0800, Xiaowei Bao wrote:
> > Add the doorbell mode of MSI-X in EP mode.
> >
> > Signed-off-by: Xiaowei Bao 
> > ---
> > v2:
> >  - Remove the macro of no used.
> >
> >  drivers/pci/controller/dwc/pcie-designware-ep.c | 14 ++
> >  drivers/pci/controller/dwc/pcie-designware.h| 12 
> >  2 files changed, 26 insertions(+)
> >
> > diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c
> > b/drivers/pci/controller/dwc/pcie-designware-ep.c
> > index 3e2b740..b8388f8 100644
> > --- a/drivers/pci/controller/dwc/pcie-designware-ep.c
> > +++ b/drivers/pci/controller/dwc/pcie-designware-ep.c
> > @@ -480,6 +480,20 @@ int dw_pcie_ep_raise_msi_irq(struct dw_pcie_ep
> *ep, u8 func_no,
> > return 0;
> >  }
> >
> > +int dw_pcie_ep_raise_msix_irq_doorbell(struct dw_pcie_ep *ep, u8
> func_no,
> > +  u16 interrupt_num)
> > +{
> > +   struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
> > +   u32 msg_data;
> > +
> > +   msg_data = (func_no << PCIE_MSIX_DOORBELL_PF_SHIFT) |
> > +  (interrupt_num - 1);
> > +
> > +   dw_pcie_writel_dbi(pci, PCIE_MSIX_DOORBELL, msg_data);
> > +
> > +   return 0;
> > +}
> > +
> >  int dw_pcie_ep_raise_msix_irq(struct dw_pcie_ep *ep, u8 func_no,
> >   u16 interrupt_num)
> >  {
> > diff --git a/drivers/pci/controller/dwc/pcie-designware.h
> > b/drivers/pci/controller/dwc/pcie-designware.h
> > index a0fdbf7..895a9ef 100644
> > --- a/drivers/pci/controller/dwc/pcie-designware.h
> > +++ b/drivers/pci/controller/dwc/pcie-designware.h
> > @@ -88,6 +88,9 @@
> >  #define PCIE_MISC_CONTROL_1_OFF0x8BC
> >  #define PCIE_DBI_RO_WR_EN  BIT(0)
> >
> > +#define PCIE_MSIX_DOORBELL 0x948
> > +#define PCIE_MSIX_DOORBELL_PF_SHIFT24
> > +
> >  /*
> >   * iATU Unroll-specific register definitions
> >   * From 4.80 core version the address translation will be made by
> > unroll @@ -400,6 +403,8 @@ int dw_pcie_ep_raise_msi_irq(struct
> dw_pcie_ep *ep, u8 func_no,
> >  u8 interrupt_num);
> >  int dw_pcie_ep_raise_msix_irq(struct dw_pcie_ep *ep, u8 func_no,
> >  u16 interrupt_num);
> > +int dw_pcie_ep_raise_msix_irq_doorbell(struct dw_pcie_ep *ep, u8
> func_no,
> > +  u16 interrupt_num);
> >  void dw_pcie_ep_reset_bar(struct dw_pcie *pci, enum pci_barno bar);
> > #else  static inline void dw_pcie_ep_linkup(struct dw_pcie_ep *ep) @@
> > -432,6 +437,13 @@ static inline int dw_pcie_ep_raise_msix_irq(struct
> dw_pcie_ep *ep, u8 func_no,
> > return 0;
> >  }
> >
> > +static inline int dw_pcie_ep_raise_msix_irq_doorbell(struct dw_pcie_ep
> *ep,
> > +u8 func_no,
> > +u16 interrupt_num)
> > +{
> > +   return 0;
> > +}
> > +
> 
> Looks OK to me.
> 
> Reviewed-by: Andrew Murray 

Thanks a lot.

> 
> >  static inline void dw_pcie_ep_reset_bar(struct dw_pcie *pci, enum
> > pci_barno bar)  {  }
> > --
> > 2.9.5
> >


RE: [PATCH v2 01/10] PCI: designware-ep: Add multiple PFs support for DWC

2019-08-23 Thread Xiaowei Bao


> -Original Message-
> From: Andrew Murray 
> Sent: 2019年8月23日 21:25
> To: Xiaowei Bao 
> Cc: bhelg...@google.com; robh...@kernel.org; mark.rutl...@arm.com;
> shawn...@kernel.org; Leo Li ; kis...@ti.com;
> lorenzo.pieral...@arm.co; a...@arndb.de; gre...@linuxfoundation.org; M.h.
> Lian ; Mingkai Hu ; Roy
> Zang ; jingooh...@gmail.com;
> gustavo.pimen...@synopsys.com; linux-...@vger.kernel.org;
> devicet...@vger.kernel.org; linux-ker...@vger.kernel.org;
> linux-arm-ker...@lists.infradead.org; linuxppc-dev@lists.ozlabs.org
> Subject: Re: [PATCH v2 01/10] PCI: designware-ep: Add multiple PFs support
> for DWC
> 
> On Thu, Aug 22, 2019 at 07:22:33PM +0800, Xiaowei Bao wrote:
> > Add multiple PFs support for DWC, different PF have different config
> > space we use pf-offset property which get from the DTS to access the
> > different pF config space.
> 
> It looks like you're missing a --cover-letter again.
> 
> >
> > Signed-off-by: Xiaowei Bao 
> > ---
> > v2:
> >  - Remove duplicate redundant code.
> >  - Reimplement the PF config space access way.
> >
> >  drivers/pci/controller/dwc/pcie-designware-ep.c | 122
> 
> >  drivers/pci/controller/dwc/pcie-designware.c|  59 
> >  drivers/pci/controller/dwc/pcie-designware.h|  11 ++-
> >  3 files changed, 134 insertions(+), 58 deletions(-)
> >
> > diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c
> > b/drivers/pci/controller/dwc/pcie-designware-ep.c
> > index 2bf5a35..3e2b740 100644
> > --- a/drivers/pci/controller/dwc/pcie-designware-ep.c
> > +++ b/drivers/pci/controller/dwc/pcie-designware-ep.c
> > @@ -19,12 +19,17 @@ void dw_pcie_ep_linkup(struct dw_pcie_ep *ep)
> > pci_epc_linkup(epc);
> >  }
> >
> > -static void __dw_pcie_ep_reset_bar(struct dw_pcie *pci, enum pci_barno
> bar,
> > -  int flags)
> > +static void __dw_pcie_ep_reset_bar(struct dw_pcie *pci, u8 func_no,
> > +  enum pci_barno bar, int flags)
> >  {
> > u32 reg;
> > +   unsigned int func_offset = 0;
> > +   struct dw_pcie_ep *ep = >ep;
> >
> > -   reg = PCI_BASE_ADDRESS_0 + (4 * bar);
> > +   if (ep->ops->func_conf_select)
> > +   func_offset = ep->ops->func_conf_select(ep, func_no);
> > +
> > +   reg = func_offset + PCI_BASE_ADDRESS_0 + (4 * bar);
> 
> This pattern of checking if func_conf_select exists and using it to get an 
> offset
> is repeated a lot throughout this file. You could move this functionality 
> into a
> new function (similar to dw_pcie_read_dbi etc). Or perhaps a new variant of
> dw_pcie_writel_ should be created that writes takes a func_no argument.

Thanks for your comments, I thought about this method before, but there is a 
issue
about the method of access the different func config space, due to our platform 
use
this method that different func have different offset from dbi_base to access 
the
different config space, but others platform maybe use the way that write a 
register
to implement different func config space access, so I think reserve a callback 
function 
to different platform to implement the own method, my point is that, if use 
register 
method they can implement the code in this function and return offset is 0, if 
use 
offset method, they can return the offset value which can be use by dw_pcie_ep 
driver.
 
> 
> 
> > dw_pcie_dbi_ro_wr_en(pci);
> > dw_pcie_writel_dbi2(pci, reg, 0x0);
> > dw_pcie_writel_dbi(pci, reg, 0x0);
> 
> 
> > @@ -235,7 +257,7 @@ static int dw_pcie_ep_map_addr(struct pci_epc
> *epc, u8 func_no,
> > struct dw_pcie_ep *ep = epc_get_drvdata(epc);
> > struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
> >
> > -   ret = dw_pcie_ep_outbound_atu(ep, addr, pci_addr, size);
> > +   ret = dw_pcie_ep_outbound_atu(ep, func_no, addr, pci_addr, size);
> > if (ret) {
> > dev_err(pci->dev, "Failed to enable address\n");
> > return ret;
> > @@ -249,11 +271,15 @@ static int dw_pcie_ep_get_msi(struct pci_epc
> *epc, u8 func_no)
> > struct dw_pcie_ep *ep = epc_get_drvdata(epc);
> > struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
> > u32 val, reg;
> > +   unsigned int func_offset = 0;
> > +
> > +   if (ep->ops->func_conf_select)
> > +   func_offset = ep->ops->func_conf_select(ep, func_no);
> >
> > if (!ep->msi_cap)
> > return -EINVAL;
> >
> > -   reg = ep->msi_cap + PCI_MSI_FLAGS;
> > +   reg = ep->msi_cap + func_offset + PCI_MSI_FLAGS;
> 
> This makes me nervous.
> 
> From a PCI viewpoint, each function has it's own capability structure and
> within each function there may exist a MSI capability. Yet what we're doing
> here is using dw_pcie_ep_find_capability to get the list of capabilities for
> function 0, and then applying offsets from that for subsequent functions. I.e.
> we're applying DW specific knowledge to find the correct capability, rather
> than following the general PCI approach.
> 
> I think the above hunk shouldn't be required - but 

Re: cleanup the dma_pgprot handling

2019-08-23 Thread Paul Burton
Hi Christoph,

On Fri, Aug 16, 2019 at 09:07:48AM +0200, Christoph Hellwig wrote:
> I'd still like to hear a confirmation from the mips folks how
> the write combibe attribute can or can't work with the KSEG1
> uncached segment.

Quoting section 4.8 "Cacheability and Coherency Attributes and Access
Types" of "MIPS Architecture Volume 1: Introduction to the MIPS32
Architecture" (MD00080, revision 6.01):

https://www.mips.com/?do-download=introduction-to-the-mips32-architecture-v6-01

> Memory access types are specified by architecturally-defined and
> implementation-specific Cacheability and Coherency Attribute bits
> (CCAs) generated by the MMU for the access.
>
> Slightly different cacheability and coherency attributes such as
> “cached coherent, update on write” and “cached coherent, exclusive on
> write” can map to the same memory access type; in this case they both
> map to cached coherent. In order to map to the same access type, the
> fundamental mechanisms of both CCAs must be the same.
>
> When the operation of the instruction is affected, the instructions
> are described in terms of memory access types. The load and store
> operations in a processor proceed according to the specific CCA of the
> reference, however, and the pseudocode for load and store common
> functions uses the CCA value rather than the corresponding memory
> access type.

So I believe uncached & uncached accelerated are another case like that
described above - they're 2 different CCAs but the same "access type",
namely uncached.

Section 4.9 then goes on to forbid mixing access types, but not CCAs.

It would be nice if the precise mapping from CCA to access type was
provided, but I don't see that anywhere. I can check with the
architecture team to be sure, but to my knowledge we're fine to mix
access via kseg1 (ie. uncached) & mappings with CCA=7 (uncached
accelerated).

Thanks,
Paul


Re: [PATCH kernel] vfio/spapr_tce: Fix incorrect tce_iommu_group memory free

2019-08-23 Thread Alex Williamson
On Mon, 19 Aug 2019 11:51:17 +1000
Alexey Kardashevskiy  wrote:

> The @tcegrp variable is used in 1) a loop over attached groups
> 2) it stores a pointer to a newly allocated tce_iommu_group if 1) found
> nothing. However the error handler does not distinguish how we got there
> and incorrectly releases memory for a found+incompatible group.
> 
> This fixes it by adding another error handling case.
> 
> Fixes: 0bd971676e68 ("powerpc/powernv/npu: Add compound IOMMU groups")
> Signed-off-by: Alexey Kardashevskiy 
> ---

Applied to vfio next branch with Paul's R-b.  Thanks,

Alex

> 
> The bug is there since 2157e7b82f3b but it would not appear in practice
> before 0bd971676e68, hence that "Fixes". Or it still should be
> 157e7b82f3b ("vfio: powerpc/spapr: Register memory and define IOMMU v2")
> ?
> 
> Found it when tried adding a "compound PE" (GPU + NPUs) to a container
> with a passed through xHCI host. The compatibility test (->create_table
> should be equal) treats them as incompatible which might a bug (or
> we are just suboptimal here) on its own.
> 
> ---
>  drivers/vfio/vfio_iommu_spapr_tce.c | 9 +
>  1 file changed, 5 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c 
> b/drivers/vfio/vfio_iommu_spapr_tce.c
> index 8ce9ad21129f..babef8b00daf 100644
> --- a/drivers/vfio/vfio_iommu_spapr_tce.c
> +++ b/drivers/vfio/vfio_iommu_spapr_tce.c
> @@ -1234,7 +1234,7 @@ static long tce_iommu_take_ownership_ddw(struct 
> tce_container *container,
>  static int tce_iommu_attach_group(void *iommu_data,
>   struct iommu_group *iommu_group)
>  {
> - int ret;
> + int ret = 0;
>   struct tce_container *container = iommu_data;
>   struct iommu_table_group *table_group;
>   struct tce_iommu_group *tcegrp = NULL;
> @@ -1287,13 +1287,13 @@ static int tce_iommu_attach_group(void *iommu_data,
>   !table_group->ops->release_ownership) {
>   if (container->v2) {
>   ret = -EPERM;
> - goto unlock_exit;
> + goto free_exit;
>   }
>   ret = tce_iommu_take_ownership(container, table_group);
>   } else {
>   if (!container->v2) {
>   ret = -EPERM;
> - goto unlock_exit;
> + goto free_exit;
>   }
>   ret = tce_iommu_take_ownership_ddw(container, table_group);
>   if (!tce_groups_attached(container) && !container->tables[0])
> @@ -1305,10 +1305,11 @@ static int tce_iommu_attach_group(void *iommu_data,
>   list_add(>next, >group_list);
>   }
>  
> -unlock_exit:
> +free_exit:
>   if (ret && tcegrp)
>   kfree(tcegrp);
>  
> +unlock_exit:
>   mutex_unlock(>lock);
>  
>   return ret;



[PATCH v5 5/7] kexec_elf: remove Elf_Rel macro

2019-08-23 Thread Sven Schnelle
It wasn't used anywhere, so lets drop it.

Reviewed-by: Christophe Leroy 
Reviewed-by: Thiago Jung Bauermann 
Signed-off-by: Sven Schnelle 
---
 kernel/kexec_elf.c | 4 
 1 file changed, 4 deletions(-)

diff --git a/kernel/kexec_elf.c b/kernel/kexec_elf.c
index 87935bd5e2ba..6c806ce96ac1 100644
--- a/kernel/kexec_elf.c
+++ b/kernel/kexec_elf.c
@@ -23,10 +23,6 @@
 
 #define elf_addr_to_cpuelf64_to_cpu
 
-#ifndef Elf_Rel
-#define Elf_RelElf64_Rel
-#endif /* Elf_Rel */
-
 static inline bool elf_is_elf_file(const struct elfhdr *ehdr)
 {
return memcmp(ehdr->e_ident, ELFMAG, SELFMAG) == 0;
-- 
2.23.0.rc1



[PATCH v5 7/7] kexec_elf: support 32 bit ELF files

2019-08-23 Thread Sven Schnelle
The powerpc version only supported 64 bit. Add some
code to switch decoding of fields during runtime so
we can kexec a 32 bit kernel from a 64 bit kernel and
vice versa.

Signed-off-by: Sven Schnelle 
Reviewed-by: Thiago Jung Bauermann 
---
 kernel/kexec_elf.c | 57 ++
 1 file changed, 42 insertions(+), 15 deletions(-)

diff --git a/kernel/kexec_elf.c b/kernel/kexec_elf.c
index 85f2bd177d6e..d3689632e8b9 100644
--- a/kernel/kexec_elf.c
+++ b/kernel/kexec_elf.c
@@ -21,8 +21,6 @@
 #include 
 #include 
 
-#define elf_addr_to_cpuelf64_to_cpu
-
 static inline bool elf_is_elf_file(const struct elfhdr *ehdr)
 {
return memcmp(ehdr->e_ident, ELFMAG, SELFMAG) == 0;
@@ -152,9 +150,6 @@ static int elf_read_ehdr(const char *buf, size_t len, 
struct elfhdr *ehdr)
ehdr->e_type  = elf16_to_cpu(ehdr, buf_ehdr->e_type);
ehdr->e_machine   = elf16_to_cpu(ehdr, buf_ehdr->e_machine);
ehdr->e_version   = elf32_to_cpu(ehdr, buf_ehdr->e_version);
-   ehdr->e_entry = elf_addr_to_cpu(ehdr, buf_ehdr->e_entry);
-   ehdr->e_phoff = elf_addr_to_cpu(ehdr, buf_ehdr->e_phoff);
-   ehdr->e_shoff = elf_addr_to_cpu(ehdr, buf_ehdr->e_shoff);
ehdr->e_flags = elf32_to_cpu(ehdr, buf_ehdr->e_flags);
ehdr->e_phentsize = elf16_to_cpu(ehdr, buf_ehdr->e_phentsize);
ehdr->e_phnum = elf16_to_cpu(ehdr, buf_ehdr->e_phnum);
@@ -162,6 +157,24 @@ static int elf_read_ehdr(const char *buf, size_t len, 
struct elfhdr *ehdr)
ehdr->e_shnum = elf16_to_cpu(ehdr, buf_ehdr->e_shnum);
ehdr->e_shstrndx  = elf16_to_cpu(ehdr, buf_ehdr->e_shstrndx);
 
+   switch (ehdr->e_ident[EI_CLASS]) {
+   case ELFCLASS64:
+   ehdr->e_entry = elf64_to_cpu(ehdr, buf_ehdr->e_entry);
+   ehdr->e_phoff = elf64_to_cpu(ehdr, buf_ehdr->e_phoff);
+   ehdr->e_shoff = elf64_to_cpu(ehdr, buf_ehdr->e_shoff);
+   break;
+
+   case ELFCLASS32:
+   ehdr->e_entry = elf32_to_cpu(ehdr, buf_ehdr->e_entry);
+   ehdr->e_phoff = elf32_to_cpu(ehdr, buf_ehdr->e_phoff);
+   ehdr->e_shoff = elf32_to_cpu(ehdr, buf_ehdr->e_shoff);
+   break;
+
+   default:
+   pr_debug("Unknown ELF class.\n");
+   return -EINVAL;
+   }
+
return elf_is_ehdr_sane(ehdr, len) ? 0 : -ENOEXEC;
 }
 
@@ -192,6 +205,7 @@ static int elf_read_phdr(const char *buf, size_t len,
 {
/* Override the const in proghdrs, we are the ones doing the loading. */
struct elf_phdr *phdr = (struct elf_phdr *) _info->proghdrs[idx];
+   const struct elfhdr *ehdr = elf_info->ehdr;
const char *pbuf;
struct elf_phdr *buf_phdr;
 
@@ -199,18 +213,31 @@ static int elf_read_phdr(const char *buf, size_t len,
buf_phdr = (struct elf_phdr *) pbuf;
 
phdr->p_type   = elf32_to_cpu(elf_info->ehdr, buf_phdr->p_type);
-   phdr->p_offset = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_offset);
-   phdr->p_paddr  = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_paddr);
-   phdr->p_vaddr  = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_vaddr);
phdr->p_flags  = elf32_to_cpu(elf_info->ehdr, buf_phdr->p_flags);
 
-   /*
-* The following fields have a type equivalent to Elf_Addr
-* both in 32 bit and 64 bit ELF.
-*/
-   phdr->p_filesz = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_filesz);
-   phdr->p_memsz  = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_memsz);
-   phdr->p_align  = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_align);
+   switch (ehdr->e_ident[EI_CLASS]) {
+   case ELFCLASS64:
+   phdr->p_offset = elf64_to_cpu(ehdr, buf_phdr->p_offset);
+   phdr->p_paddr  = elf64_to_cpu(ehdr, buf_phdr->p_paddr);
+   phdr->p_vaddr  = elf64_to_cpu(ehdr, buf_phdr->p_vaddr);
+   phdr->p_filesz = elf64_to_cpu(ehdr, buf_phdr->p_filesz);
+   phdr->p_memsz  = elf64_to_cpu(ehdr, buf_phdr->p_memsz);
+   phdr->p_align  = elf64_to_cpu(ehdr, buf_phdr->p_align);
+   break;
+
+   case ELFCLASS32:
+   phdr->p_offset = elf32_to_cpu(ehdr, buf_phdr->p_offset);
+   phdr->p_paddr  = elf32_to_cpu(ehdr, buf_phdr->p_paddr);
+   phdr->p_vaddr  = elf32_to_cpu(ehdr, buf_phdr->p_vaddr);
+   phdr->p_filesz = elf32_to_cpu(ehdr, buf_phdr->p_filesz);
+   phdr->p_memsz  = elf32_to_cpu(ehdr, buf_phdr->p_memsz);
+   phdr->p_align  = elf32_to_cpu(ehdr, buf_phdr->p_align);
+   break;
+
+   default:
+   pr_debug("Unknown ELF class.\n");
+   return -EINVAL;
+   }
 
return elf_is_phdr_sane(phdr, len) ? 0 : -ENOEXEC;
 }
-- 
2.23.0.rc1



[PATCH v5 4/7] kexec_elf: remove PURGATORY_STACK_SIZE

2019-08-23 Thread Sven Schnelle
It's not used anywhere so just drop it.

Signed-off-by: Sven Schnelle 
Reviewed-by: Thiago Jung Bauermann 
---
 kernel/kexec_elf.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/kernel/kexec_elf.c b/kernel/kexec_elf.c
index 137037603117..87935bd5e2ba 100644
--- a/kernel/kexec_elf.c
+++ b/kernel/kexec_elf.c
@@ -21,8 +21,6 @@
 #include 
 #include 
 
-#define PURGATORY_STACK_SIZE   (16 * 1024)
-
 #define elf_addr_to_cpuelf64_to_cpu
 
 #ifndef Elf_Rel
-- 
2.23.0.rc1



[PATCH v5 3/7] kexec_elf: remove parsing of section headers

2019-08-23 Thread Sven Schnelle
We're not using them, so we can drop the parsing.

Signed-off-by: Sven Schnelle 
Reviewed-by: Thiago Jung Bauermann 
---
 include/linux/kexec.h |   1 -
 kernel/kexec_elf.c| 137 --
 2 files changed, 138 deletions(-)

diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index da2a6b1d69e7..f0b809258ed3 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -226,7 +226,6 @@ struct kexec_elf_info {
 
const struct elfhdr *ehdr;
const struct elf_phdr *proghdrs;
-   struct elf_shdr *sechdrs;
 };
 
 int kexec_build_elf_info(const char *buf, size_t len, struct elfhdr *ehdr,
diff --git a/kernel/kexec_elf.c b/kernel/kexec_elf.c
index 34376fbc55be..137037603117 100644
--- a/kernel/kexec_elf.c
+++ b/kernel/kexec_elf.c
@@ -257,134 +257,6 @@ static int elf_read_phdrs(const char *buf, size_t len,
return 0;
 }
 
-/**
- * elf_is_shdr_sane - check that it is safe to use the section header
- * @buf_len:   size of the buffer in which the ELF file is loaded.
- */
-static bool elf_is_shdr_sane(const struct elf_shdr *shdr, size_t buf_len)
-{
-   bool size_ok;
-
-   /* SHT_NULL headers have undefined values, so we can't check them. */
-   if (shdr->sh_type == SHT_NULL)
-   return true;
-
-   /* Now verify sh_entsize */
-   switch (shdr->sh_type) {
-   case SHT_SYMTAB:
-   size_ok = shdr->sh_entsize == sizeof(Elf_Sym);
-   break;
-   case SHT_RELA:
-   size_ok = shdr->sh_entsize == sizeof(Elf_Rela);
-   break;
-   case SHT_DYNAMIC:
-   size_ok = shdr->sh_entsize == sizeof(Elf_Dyn);
-   break;
-   case SHT_REL:
-   size_ok = shdr->sh_entsize == sizeof(Elf_Rel);
-   break;
-   case SHT_NOTE:
-   case SHT_PROGBITS:
-   case SHT_HASH:
-   case SHT_NOBITS:
-   default:
-   /*
-* This is a section whose entsize requirements
-* I don't care about.  If I don't know about
-* the section I can't care about it's entsize
-* requirements.
-*/
-   size_ok = true;
-   break;
-   }
-
-   if (!size_ok) {
-   pr_debug("ELF section with wrong entry size.\n");
-   return false;
-   } else if (shdr->sh_addr + shdr->sh_size < shdr->sh_addr) {
-   pr_debug("ELF section address wraps around.\n");
-   return false;
-   }
-
-   if (shdr->sh_type != SHT_NOBITS) {
-   if (shdr->sh_offset + shdr->sh_size < shdr->sh_offset) {
-   pr_debug("ELF section location wraps around.\n");
-   return false;
-   } else if (shdr->sh_offset + shdr->sh_size > buf_len) {
-   pr_debug("ELF section not in file.\n");
-   return false;
-   }
-   }
-
-   return true;
-}
-
-static int elf_read_shdr(const char *buf, size_t len,
-struct kexec_elf_info *elf_info,
-int idx)
-{
-   struct elf_shdr *shdr = _info->sechdrs[idx];
-   const struct elfhdr *ehdr = elf_info->ehdr;
-   const char *sbuf;
-   struct elf_shdr *buf_shdr;
-
-   sbuf = buf + ehdr->e_shoff + idx * sizeof(*buf_shdr);
-   buf_shdr = (struct elf_shdr *) sbuf;
-
-   shdr->sh_name  = elf32_to_cpu(ehdr, buf_shdr->sh_name);
-   shdr->sh_type  = elf32_to_cpu(ehdr, buf_shdr->sh_type);
-   shdr->sh_addr  = elf_addr_to_cpu(ehdr, buf_shdr->sh_addr);
-   shdr->sh_offset= elf_addr_to_cpu(ehdr, buf_shdr->sh_offset);
-   shdr->sh_link  = elf32_to_cpu(ehdr, buf_shdr->sh_link);
-   shdr->sh_info  = elf32_to_cpu(ehdr, buf_shdr->sh_info);
-
-   /*
-* The following fields have a type equivalent to Elf_Addr
-* both in 32 bit and 64 bit ELF.
-*/
-   shdr->sh_flags = elf_addr_to_cpu(ehdr, buf_shdr->sh_flags);
-   shdr->sh_size  = elf_addr_to_cpu(ehdr, buf_shdr->sh_size);
-   shdr->sh_addralign = elf_addr_to_cpu(ehdr, buf_shdr->sh_addralign);
-   shdr->sh_entsize   = elf_addr_to_cpu(ehdr, buf_shdr->sh_entsize);
-
-   return elf_is_shdr_sane(shdr, len) ? 0 : -ENOEXEC;
-}
-
-/**
- * elf_read_shdrs - read the section headers from the buffer
- *
- * This function assumes that the section header table was checked for sanity.
- * Use elf_is_ehdr_sane() if it wasn't.
- */
-static int elf_read_shdrs(const char *buf, size_t len,
- struct kexec_elf_info *elf_info)
-{
-   size_t shdr_size, i;
-
-   /*
-* e_shnum is at most 65536 so calculating
-* the size of the section header cannot overflow.
-*/
-   shdr_size = sizeof(struct elf_shdr) * elf_info->ehdr->e_shnum;
-
-   elf_info->sechdrs = kzalloc(shdr_size, GFP_KERNEL);
-   if (!elf_info->sechdrs)
-   

[PATCH v5 1/7] kexec: add KEXEC_ELF

2019-08-23 Thread Sven Schnelle
Right now powerpc provides an implementation to read elf files
with the kexec_file_load() syscall. Make that available as a public
kexec interface so it can be re-used on other architectures.

Signed-off-by: Sven Schnelle 
---
 arch/Kconfig  |   3 +
 arch/powerpc/Kconfig  |   1 +
 arch/powerpc/kernel/kexec_elf_64.c| 545 +-
 include/linux/kexec.h |  24 +
 kernel/Makefile   |   1 +
 .../kexec_elf_64.c => kernel/kexec_elf.c  | 183 ++
 6 files changed, 70 insertions(+), 687 deletions(-)
 copy arch/powerpc/kernel/kexec_elf_64.c => kernel/kexec_elf.c (75%)

diff --git a/arch/Kconfig b/arch/Kconfig
index a7b57dd42c26..01244412f393 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -18,6 +18,9 @@ config KEXEC_CORE
select CRASH_CORE
bool
 
+config KEXEC_ELF
+   bool
+
 config HAVE_IMA_KEXEC
bool
 
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 7cb51bba4985..fbb0b305e1be 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -511,6 +511,7 @@ config KEXEC_FILE
select KEXEC_CORE
select HAVE_IMA_KEXEC
select BUILD_BIN2C
+   select KEXEC_ELF
depends on PPC64
depends on CRYPTO=y
depends on CRYPTO_SHA256=y
diff --git a/arch/powerpc/kernel/kexec_elf_64.c 
b/arch/powerpc/kernel/kexec_elf_64.c
index 83cf7b852876..3072fd6dbe94 100644
--- a/arch/powerpc/kernel/kexec_elf_64.c
+++ b/arch/powerpc/kernel/kexec_elf_64.c
@@ -23,541 +23,6 @@
 #include 
 #include 
 
-#define PURGATORY_STACK_SIZE   (16 * 1024)
-
-#define elf_addr_to_cpuelf64_to_cpu
-
-#ifndef Elf_Rel
-#define Elf_RelElf64_Rel
-#endif /* Elf_Rel */
-
-struct elf_info {
-   /*
-* Where the ELF binary contents are kept.
-* Memory managed by the user of the struct.
-*/
-   const char *buffer;
-
-   const struct elfhdr *ehdr;
-   const struct elf_phdr *proghdrs;
-   struct elf_shdr *sechdrs;
-};
-
-static inline bool elf_is_elf_file(const struct elfhdr *ehdr)
-{
-   return memcmp(ehdr->e_ident, ELFMAG, SELFMAG) == 0;
-}
-
-static uint64_t elf64_to_cpu(const struct elfhdr *ehdr, uint64_t value)
-{
-   if (ehdr->e_ident[EI_DATA] == ELFDATA2LSB)
-   value = le64_to_cpu(value);
-   else if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB)
-   value = be64_to_cpu(value);
-
-   return value;
-}
-
-static uint16_t elf16_to_cpu(const struct elfhdr *ehdr, uint16_t value)
-{
-   if (ehdr->e_ident[EI_DATA] == ELFDATA2LSB)
-   value = le16_to_cpu(value);
-   else if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB)
-   value = be16_to_cpu(value);
-
-   return value;
-}
-
-static uint32_t elf32_to_cpu(const struct elfhdr *ehdr, uint32_t value)
-{
-   if (ehdr->e_ident[EI_DATA] == ELFDATA2LSB)
-   value = le32_to_cpu(value);
-   else if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB)
-   value = be32_to_cpu(value);
-
-   return value;
-}
-
-/**
- * elf_is_ehdr_sane - check that it is safe to use the ELF header
- * @buf_len:   size of the buffer in which the ELF file is loaded.
- */
-static bool elf_is_ehdr_sane(const struct elfhdr *ehdr, size_t buf_len)
-{
-   if (ehdr->e_phnum > 0 && ehdr->e_phentsize != sizeof(struct elf_phdr)) {
-   pr_debug("Bad program header size.\n");
-   return false;
-   } else if (ehdr->e_shnum > 0 &&
-  ehdr->e_shentsize != sizeof(struct elf_shdr)) {
-   pr_debug("Bad section header size.\n");
-   return false;
-   } else if (ehdr->e_ident[EI_VERSION] != EV_CURRENT ||
-  ehdr->e_version != EV_CURRENT) {
-   pr_debug("Unknown ELF version.\n");
-   return false;
-   }
-
-   if (ehdr->e_phoff > 0 && ehdr->e_phnum > 0) {
-   size_t phdr_size;
-
-   /*
-* e_phnum is at most 65535 so calculating the size of the
-* program header cannot overflow.
-*/
-   phdr_size = sizeof(struct elf_phdr) * ehdr->e_phnum;
-
-   /* Sanity check the program header table location. */
-   if (ehdr->e_phoff + phdr_size < ehdr->e_phoff) {
-   pr_debug("Program headers at invalid location.\n");
-   return false;
-   } else if (ehdr->e_phoff + phdr_size > buf_len) {
-   pr_debug("Program headers truncated.\n");
-   return false;
-   }
-   }
-
-   if (ehdr->e_shoff > 0 && ehdr->e_shnum > 0) {
-   size_t shdr_size;
-
-   /*
-* e_shnum is at most 65536 so calculating
-* the size of the section header cannot overflow.
-*/
-   shdr_size = sizeof(struct elf_shdr) * ehdr->e_shnum;
-
-   

[PATCH v5 6/7] kexec_elf: remove unused variable in kexec_elf_load()

2019-08-23 Thread Sven Schnelle
base was never assigned, so we can remove it.

Reviewed-by: Christophe Leroy 
Reviewed-by: Thiago Jung Bauermann 
Signed-off-by: Sven Schnelle 
---
 kernel/kexec_elf.c | 7 ++-
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/kernel/kexec_elf.c b/kernel/kexec_elf.c
index 6c806ce96ac1..85f2bd177d6e 100644
--- a/kernel/kexec_elf.c
+++ b/kernel/kexec_elf.c
@@ -363,7 +363,7 @@ int kexec_elf_load(struct kimage *image, struct elfhdr 
*ehdr,
 struct kexec_buf *kbuf,
 unsigned long *lowest_load_addr)
 {
-   unsigned long base = 0, lowest_addr = UINT_MAX;
+   unsigned long lowest_addr = UINT_MAX;
int ret;
size_t i;
 
@@ -385,7 +385,7 @@ int kexec_elf_load(struct kimage *image, struct elfhdr 
*ehdr,
kbuf->bufsz = size;
kbuf->memsz = phdr->p_memsz;
kbuf->buf_align = phdr->p_align;
-   kbuf->buf_min = phdr->p_paddr + base;
+   kbuf->buf_min = phdr->p_paddr;
kbuf->mem = KEXEC_BUF_MEM_UNKNOWN;
ret = kexec_add_buffer(kbuf);
if (ret)
@@ -396,9 +396,6 @@ int kexec_elf_load(struct kimage *image, struct elfhdr 
*ehdr,
lowest_addr = load_addr;
}
 
-   /* Update entry point to reflect new load address. */
-   ehdr->e_entry += base;
-
*lowest_load_addr = lowest_addr;
ret = 0;
  out:
-- 
2.23.0.rc1



[PATCH v5 2/7] kexec_elf: change order of elf_*_to_cpu() functions

2019-08-23 Thread Sven Schnelle
Change the order to have a 64/32/16 order, no functional change.

Signed-off-by: Sven Schnelle 
Reviewed-by: Thiago Jung Bauermann 
---
 kernel/kexec_elf.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/kernel/kexec_elf.c b/kernel/kexec_elf.c
index 26c6310167a0..34376fbc55be 100644
--- a/kernel/kexec_elf.c
+++ b/kernel/kexec_elf.c
@@ -44,22 +44,22 @@ static uint64_t elf64_to_cpu(const struct elfhdr *ehdr, 
uint64_t value)
return value;
 }
 
-static uint16_t elf16_to_cpu(const struct elfhdr *ehdr, uint16_t value)
+static uint32_t elf32_to_cpu(const struct elfhdr *ehdr, uint32_t value)
 {
if (ehdr->e_ident[EI_DATA] == ELFDATA2LSB)
-   value = le16_to_cpu(value);
+   value = le32_to_cpu(value);
else if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB)
-   value = be16_to_cpu(value);
+   value = be32_to_cpu(value);
 
return value;
 }
 
-static uint32_t elf32_to_cpu(const struct elfhdr *ehdr, uint32_t value)
+static uint16_t elf16_to_cpu(const struct elfhdr *ehdr, uint16_t value)
 {
if (ehdr->e_ident[EI_DATA] == ELFDATA2LSB)
-   value = le32_to_cpu(value);
+   value = le16_to_cpu(value);
else if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB)
-   value = be32_to_cpu(value);
+   value = be16_to_cpu(value);
 
return value;
 }
-- 
2.23.0.rc1



[PATCH v5 0/7] kexec: add generic support for elf kernel images

2019-08-23 Thread Sven Schnelle
Changes to v4:
 - rebase on current powerpc/merge tree
 - fix syscall name in commit message
 - remove a few unused #defines in arch/powerpc/kernel/kexec_elf_64.c

Changes to v3:
 - add support for 32-bit ELF files

Changes to v2:
 - use git format-patch -C

Changes to v1:
 - split up patch into smaller pieces
 - rebase onto powerpc/next
 - remove unused variable in kexec_elf_load()

Changes to RFC version:
 - remove unused Elf_Rel macro
 - remove section header parsing
 - remove PURGATORY_STACK_SIZE
 - change order of elf_*_to_cpu() functions
 - remove elf_addr_to_cpu macro

Sven Schnelle (7):
  kexec: add KEXEC_ELF
  kexec_elf: change order of elf_*_to_cpu() functions
  kexec_elf: remove parsing of section headers
  kexec_elf: remove PURGATORY_STACK_SIZE
  kexec_elf: remove Elf_Rel macro
  kexec_elf: remove unused variable in kexec_elf_load()
  kexec_elf: support 32 bit ELF files

 arch/Kconfig  |   3 +
 arch/powerpc/Kconfig  |   1 +
 arch/powerpc/kernel/kexec_elf_64.c| 545 +-
 include/linux/kexec.h |  23 +
 kernel/Makefile   |   1 +
 .../kexec_elf_64.c => kernel/kexec_elf.c  | 394 +++--
 6 files changed, 115 insertions(+), 852 deletions(-)
 copy arch/powerpc/kernel/kexec_elf_64.c => kernel/kexec_elf.c (50%)

-- 
2.23.0.rc1



Re: [PATCH v2] powerpc/powernv: Add ultravisor message log interface

2019-08-23 Thread kbuild test robot
Hi Claudio,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on linus/master]
[cannot apply to v5.3-rc5 next-20190823]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/Claudio-Carvalho/powerpc-powernv-Add-ultravisor-message-log-interface/20190823-214650
config: powerpc-defconfig (attached as .config)
compiler: powerpc64-linux-gcc (GCC) 7.4.0
reproduce:
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
GCC_VERSION=7.4.0 make.cross ARCH=powerpc 

If you fix the issue, kindly add following tag
Reported-by: kbuild test robot 

All errors (new ones prefixed by >>):

   In file included from arch/powerpc/include/asm/lppaca.h:48:0,
from arch/powerpc/include/asm/paca.h:17,
from arch/powerpc/include/asm/current.h:13,
from include/linux/mutex.h:14,
from include/linux/kernfs.h:12,
from include/linux/sysfs.h:16,
from include/linux/kobject.h:20,
from include/linux/device.h:16,
from arch/powerpc/include/asm/io.h:27,
from arch/powerpc/platforms/powernv/opal-msglog.c:8:
   arch/powerpc/platforms/powernv/opal-msglog.c: In function 'opal_msglog_init':
>> arch/powerpc/platforms/powernv/opal-msglog.c:159:27: error: 
>> 'FW_FEATURE_ULTRAVISOR' undeclared (first use in this function); did you 
>> mean 'FW_FEATURE_ALWAYS'?
 if (firmware_has_feature(FW_FEATURE_ULTRAVISOR))
  ^
   arch/powerpc/include/asm/firmware.h:120:25: note: in definition of macro 
'firmware_has_feature'
 ((FW_FEATURE_ALWAYS & (feature)) ||\
^~~
   arch/powerpc/platforms/powernv/opal-msglog.c:159:27: note: each undeclared 
identifier is reported only once for each function it appears in
 if (firmware_has_feature(FW_FEATURE_ULTRAVISOR))
  ^
   arch/powerpc/include/asm/firmware.h:120:25: note: in definition of macro 
'firmware_has_feature'
 ((FW_FEATURE_ALWAYS & (feature)) ||\
^~~
   arch/powerpc/platforms/powernv/opal-msglog.c: In function 
'opal_msglog_sysfs_init':
   arch/powerpc/platforms/powernv/opal-msglog.c:181:27: error: 
'FW_FEATURE_ULTRAVISOR' undeclared (first use in this function); did you mean 
'FW_FEATURE_ALWAYS'?
 if (firmware_has_feature(FW_FEATURE_ULTRAVISOR))
  ^
   arch/powerpc/include/asm/firmware.h:120:25: note: in definition of macro 
'firmware_has_feature'
 ((FW_FEATURE_ALWAYS & (feature)) ||\
^~~

vim +159 arch/powerpc/platforms/powernv/opal-msglog.c

   > 8  #include 
 9  #include 
10  #include 
11  #include 
12  #include 
13  #include 
14  #include 
15  
16  /* OPAL in-memory console. Defined in OPAL source at core/console.c */
17  struct memcons {
18  __be64 magic;
19  #define MEMCONS_MAGIC   0x6630696567726173L
20  __be64 obuf_phys;
21  __be64 ibuf_phys;
22  __be32 obuf_size;
23  __be32 ibuf_size;
24  __be32 out_pos;
25  #define MEMCONS_OUT_POS_WRAP0x8000u
26  #define MEMCONS_OUT_POS_MASK0x00ffu
27  __be32 in_prod;
28  __be32 in_cons;
29  };
30  
31  static struct memcons *opal_memcons = NULL;
32  static struct memcons *opal_uv_memcons;
33  
34  static ssize_t msglog_copy(struct memcons *memcons, const char 
*bin_attr_name,
35 char *to, loff_t pos, size_t count)
36  {
37  const char *conbuf;
38  ssize_t ret;
39  size_t first_read = 0;
40  uint32_t out_pos, avail;
41  
42  if (!memcons)
43  return -ENODEV;
44  
45  out_pos = be32_to_cpu(READ_ONCE(memcons->out_pos));
46  
47  /* Now we've read out_pos, put a barrier in before reading the 
new
48   * data it points to in conbuf. */
49  smp_rmb();
50  
51  conbuf = phys_to_virt(be64_to_cpu(memcons->obuf_phys));
52  
53  /* When the buffer has wrapped, read from the out_pos marker to 
the end
54   * of the buffer, and then read the remaining data as in the 
un-wrapped
55   * case. */
56  if (out_pos & MEMCONS_OUT_POS_WRAP) {
57  
58  out_pos &= MEMCONS_OUT_POS_MASK;
59  avail = be32_to_cpu(memcons->obuf_size) - out_pos;
60  
61  ret = memory_read_from_buffer(to, count, ,
62 

Re: [PATCH 3/3] powerpc: use __builtin_trap() in BUG/WARN macros.

2019-08-23 Thread Christophe Leroy




On 08/19/2019 03:45 PM, Segher Boessenkool wrote:

On Mon, Aug 19, 2019 at 05:05:46PM +0200, Christophe Leroy wrote:

Le 19/08/2019 à 16:37, Segher Boessenkool a écrit :

On Mon, Aug 19, 2019 at 04:08:43PM +0200, Christophe Leroy wrote:

Le 19/08/2019 à 15:23, Segher Boessenkool a écrit :

On Mon, Aug 19, 2019 at 01:06:31PM +, Christophe Leroy wrote:

Note that we keep using an assembly text using "twi 31, 0, 0" for
inconditional traps because GCC drops all code after
__builtin_trap() when the condition is always true at build time.


As I said, it can also do this for conditional traps, if it can prove
the condition is always true.


But we have another branch for 'always true' and 'always false' using
__builtin_constant_p(), which don't use __builtin_trap(). Is there
anything wrong with that ?:


The compiler might not realise it is constant when it evaluates the
__builtin_constant_p, but only realises it later.  As the documentation
for the builtin says:
   A return of 0 does not indicate that the
   value is _not_ a constant, but merely that GCC cannot prove it is a
   constant with the specified value of the '-O' option.


So you mean GCC would not be able to prove that
__builtin_constant_p(cond) is always true but it would be able to prove
that if (cond)  is always true ?


Not sure what you mean, sorry.


And isn't there a away to tell GCC that '__builtin_trap()' is
recoverable in our case ?


No, GCC knows that a trap will never fall through.


I think it may work if you do

#define BUG_ON(x) do {  \
if (__builtin_constant_p(x)) {  \
if (x)  \
BUG();  \
} else {\
BUG_ENTRY("", 0); \
if (x)  \
__builtin_trap();   \
}   \
} while (0)


It doesn't work:


You need to make a BUG_ENTRY so that it refers to the *following* trap
instruction, if you go this way.


I don't know how BUG_ENTRY works exactly.


It's basic, maybe too basic: it adds an inline asm with a label, and
adds a .long in the __bug_table section with the address of that label.

When putting it after the __builtin_trap(), I changed it to using the
address before the one of the label which is always the twxx instruction
as far as I can see.

#define BUG_ENTRY(insn, flags, ...) \
__asm__ __volatile__(   \
"1:" insn "\n"  \
".section __bug_table,\"aw\"\n" \
"2:\t" PPC_LONG "1b, %0\n"  \
"\t.short %1, %2\n"   \
".org 2b+%3\n"\
".previous\n" \
: : "i" (__FILE__), "i" (__LINE__), \
  "i" (flags),\
  "i" (sizeof(struct bug_entry)), \
  ##__VA_ARGS__)


#define MY_BUG_ENTRY(lab, flags)\
__asm__ __volatile__(   \
".section __bug_table,\"aw\"\n" \
"2:\t" PPC_LONG "%4, %0\n"  \
"\t.short %1, %2\n"   \
".org 2b+%3\n"\
".previous\n" \
: : "i" (__FILE__), "i" (__LINE__), \
  "i" (flags),\
  "i" (sizeof(struct bug_entry)), \
  "i" (lab))

called as

#define BUG_ON(x) do {  \
MY_BUG_ENTRY(&, 0); \
lab: if (x) \
__builtin_trap();   \
} while (0)

not sure how reliable that works -- *if* it works, I just typed that in
without testing or anything -- but hopefully you get the idea.



I've not been able to make it work. GCC puts the label (.L2 and .L6) 
outside of the function, so the instruction preceding the label is blr, 
not the trap.


#define _EMIT_BUG_ENTRY \
".section __bug_table,\"aw\"\n" \
"2:\t" PPC_LONG "%4, %0\n"  \
"\t.short %1, %2\n"   \
".org 2b+%3\n"\
".previous\n"

#define BUG_ENTRY(flags, label) \
__asm__ __volatile__(   \
_EMIT_BUG_ENTRY \
: : "i" (__FILE__), "i" (__LINE__), \
  "i" 

Re: [PATCH kernel] vfio/spapr_tce: Fix incorrect tce_iommu_group memory free

2019-08-23 Thread Alex Williamson
On Fri, 23 Aug 2019 15:32:41 +1000
Paul Mackerras  wrote:

> On Mon, Aug 19, 2019 at 11:51:17AM +1000, Alexey Kardashevskiy wrote:
> > The @tcegrp variable is used in 1) a loop over attached groups
> > 2) it stores a pointer to a newly allocated tce_iommu_group if 1) found
> > nothing. However the error handler does not distinguish how we got there
> > and incorrectly releases memory for a found+incompatible group.
> > 
> > This fixes it by adding another error handling case.
> > 
> > Fixes: 0bd971676e68 ("powerpc/powernv/npu: Add compound IOMMU groups")
> > Signed-off-by: Alexey Kardashevskiy   
> 
> Good catch.  This is potentially nasty since it is a double free.
> Alex, are you going to take this, or would you prefer it goes via
> Michael Ellerman's tree?
> 
> Reviewed-by: Paul Mackerras 

I can take it, I've got it queued, but was hoping for an ack/review by
you or David.  I'll add the R-b and push it out to my next branch.
Thanks,

Alex


Re: [PATCH v2 08/10] PCI: layerscape: Add EP mode support for ls1088a and ls2088a

2019-08-23 Thread Andrew Murray
On Thu, Aug 22, 2019 at 07:22:40PM +0800, Xiaowei Bao wrote:
> Add PCIe EP mode support for ls1088a and ls2088a, there are some
> difference between LS1 and LS2 platform, so refactor the code of
> the EP driver.
> 
> Signed-off-by: Xiaowei Bao 
> ---
> v2:
>  - New mechanism for layerscape EP driver.

Was there a v1 of this patch?

> 
>  drivers/pci/controller/dwc/pci-layerscape-ep.c | 76 
> --
>  1 file changed, 58 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/pci/controller/dwc/pci-layerscape-ep.c 
> b/drivers/pci/controller/dwc/pci-layerscape-ep.c
> index 7ca5fe8..2a66f07 100644
> --- a/drivers/pci/controller/dwc/pci-layerscape-ep.c
> +++ b/drivers/pci/controller/dwc/pci-layerscape-ep.c
> @@ -20,27 +20,29 @@
>  
>  #define PCIE_DBI2_OFFSET 0x1000  /* DBI2 base address*/
>  
> -struct ls_pcie_ep {
> - struct dw_pcie  *pci;
> - struct pci_epc_features *ls_epc;
> +#define to_ls_pcie_ep(x) dev_get_drvdata((x)->dev)
> +
> +struct ls_pcie_ep_drvdata {
> + u32 func_offset;
> + const struct dw_pcie_ep_ops *ops;
> + const struct dw_pcie_ops*dw_pcie_ops;
>  };
>  
> -#define to_ls_pcie_ep(x) dev_get_drvdata((x)->dev)
> +struct ls_pcie_ep {
> + struct dw_pcie  *pci;
> + struct pci_epc_features *ls_epc;
> + const struct ls_pcie_ep_drvdata *drvdata;
> +};
>  
>  static int ls_pcie_establish_link(struct dw_pcie *pci)
>  {
>   return 0;
>  }
>  
> -static const struct dw_pcie_ops ls_pcie_ep_ops = {
> +static const struct dw_pcie_ops dw_ls_pcie_ep_ops = {
>   .start_link = ls_pcie_establish_link,
>  };
>  
> -static const struct of_device_id ls_pcie_ep_of_match[] = {
> - { .compatible = "fsl,ls-pcie-ep",},
> - { },
> -};
> -
>  static const struct pci_epc_features*
>  ls_pcie_ep_get_features(struct dw_pcie_ep *ep)
>  {
> @@ -82,10 +84,44 @@ static int ls_pcie_ep_raise_irq(struct dw_pcie_ep *ep, u8 
> func_no,
>   }
>  }
>  
> -static const struct dw_pcie_ep_ops pcie_ep_ops = {
> +static unsigned int ls_pcie_ep_func_conf_select(struct dw_pcie_ep *ep,
> + u8 func_no)
> +{
> + struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
> + struct ls_pcie_ep *pcie = to_ls_pcie_ep(pci);
> + u8 header_type;
> +
> + header_type = ioread8(pci->dbi_base + PCI_HEADER_TYPE);
> +
> + if (header_type & (1 << 7))
> + return pcie->drvdata->func_offset * func_no;
> + else
> + return 0;

It looks like there isn't a PCI define for multi function, the nearest I
could find was PCI_HEADER_TYPE_MULTIDEVICE in hotplug/ibmphp.h. A comment
above the test might be helpful to explain the test.

As the ls_pcie_ep_drvdata structures are static, the unset .func_offset
will be initialised to 0, so you could just drop the test above.

However something to the effect of the following may help spot
misconfiguration:

WARN_ON(func_no && !pcie->drvdata->func_offset);
return pcie->drvdata->func_offset * func_no;

The WARN is probably quite useful as if you are attempting to use
non-zero functions and func_offset isn't set - then things may appear to work
normally but actually will break horribly.

Thanks,

Andrew Murray

> +}
> +
> +static const struct dw_pcie_ep_ops ls_pcie_ep_ops = {
>   .ep_init = ls_pcie_ep_init,
>   .raise_irq = ls_pcie_ep_raise_irq,
>   .get_features = ls_pcie_ep_get_features,
> + .func_conf_select = ls_pcie_ep_func_conf_select,
> +};
> +
> +static const struct ls_pcie_ep_drvdata ls1_ep_drvdata = {
> + .ops = _pcie_ep_ops,
> + .dw_pcie_ops = _ls_pcie_ep_ops,
> +};
> +
> +static const struct ls_pcie_ep_drvdata ls2_ep_drvdata = {
> + .func_offset = 0x2,
> + .ops = _pcie_ep_ops,
> + .dw_pcie_ops = _ls_pcie_ep_ops,
> +};
> +
> +static const struct of_device_id ls_pcie_ep_of_match[] = {
> + { .compatible = "fsl,ls1046a-pcie-ep", .data = _ep_drvdata },
> + { .compatible = "fsl,ls1088a-pcie-ep", .data = _ep_drvdata },
> + { .compatible = "fsl,ls2088a-pcie-ep", .data = _ep_drvdata },
> + { },
>  };
>  
>  static int __init ls_add_pcie_ep(struct ls_pcie_ep *pcie,
> @@ -98,7 +134,7 @@ static int __init ls_add_pcie_ep(struct ls_pcie_ep *pcie,
>   int ret;
>  
>   ep = >ep;
> - ep->ops = _ep_ops;
> + ep->ops = pcie->drvdata->ops;
>  
>   res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "addr_space");
>   if (!res)
> @@ -137,14 +173,11 @@ static int __init ls_pcie_ep_probe(struct 
> platform_device *pdev)
>   if (!ls_epc)
>   return -ENOMEM;
>  
> - dbi_base = platform_get_resource_byname(pdev, IORESOURCE_MEM, "regs");
> - pci->dbi_base = devm_pci_remap_cfg_resource(dev, dbi_base);
> - if (IS_ERR(pci->dbi_base))
> - return PTR_ERR(pci->dbi_base);
> + pcie->drvdata = of_device_get_match_data(dev);
>  
> - pci->dbi_base2 = pci->dbi_base + PCIE_DBI2_OFFSET;
>

[PATCH] powerpc/mm/radix: remove useless kernel messages

2019-08-23 Thread Qian Cai
Booting a POWER9 PowerNV system generates a few messages below with
"ptrval" due to the pointers printed without a specifier
extension (i.e unadorned %p) are hashed to prevent leaking information
about the kernel memory layout.

radix-mmu: Initializing Radix MMU
radix-mmu: Partition table (ptrval)
radix-mmu: Mapped 0x-0x4000 with 1.00 GiB
pages (exec)
radix-mmu: Mapped 0x4000-0x0020 with 1.00 GiB
pages
radix-mmu: Mapped 0x2000-0x2020 with 1.00 GiB
pages
radix-mmu: Process table (ptrval) and radix root for kernel:
(ptrval)

Signed-off-by: Qian Cai 
---
 arch/powerpc/mm/book3s64/radix_pgtable.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c 
b/arch/powerpc/mm/book3s64/radix_pgtable.c
index b4ca9e95e678..b6692ee9411d 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -386,7 +386,6 @@ static void __init radix_init_pgtable(void)
 * physical address here.
 */
register_process_table(__pa(process_tb), 0, PRTB_SIZE_SHIFT - 12);
-   pr_info("Process table %p and radix root for kernel: %p\n", process_tb, 
init_mm.pgd);
asm volatile("ptesync" : : : "memory");
asm volatile(PPC_TLBIE_5(%0,%1,2,1,1) : :
 "r" (TLBIEL_INVAL_SET_LPID), "r" (0));
@@ -420,7 +419,6 @@ static void __init radix_init_partition_table(void)
mmu_partition_table_set_entry(0, dw0, 0);
 
pr_info("Initializing Radix MMU\n");
-   pr_info("Partition table %p\n", partition_tb);
 }
 
 void __init radix_init_native(void)
-- 
1.8.3.1



Re: [PATCH v4 1/3] dt-bindings: pci: layerscape-pci: add compatible strings "fsl,ls1028a-pcie"

2019-08-23 Thread Lorenzo Pieralisi
On Fri, Aug 23, 2019 at 04:26:41PM +0800, Xiaowei Bao wrote:
> Add the PCIe compatible string for LS1028A
> 
> Signed-off-by: Xiaowei Bao 
> Signed-off-by: Hou Zhiqiang 
> Reviewed-by: Rob Herring 
> ---
> v2:
>  - No change.
> v3:
>  - No change.
> v4:
>  - No change.
> 
>  Documentation/devicetree/bindings/pci/layerscape-pci.txt | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/Documentation/devicetree/bindings/pci/layerscape-pci.txt 
> b/Documentation/devicetree/bindings/pci/layerscape-pci.txt
> index e20ceaa..99a386e 100644
> --- a/Documentation/devicetree/bindings/pci/layerscape-pci.txt
> +++ b/Documentation/devicetree/bindings/pci/layerscape-pci.txt
> @@ -21,6 +21,7 @@ Required properties:
>  "fsl,ls1046a-pcie"
>  "fsl,ls1043a-pcie"
>  "fsl,ls1012a-pcie"
> +"fsl,ls1028a-pcie"
>EP mode:
>   "fsl,ls1046a-pcie-ep", "fsl,ls-pcie-ep"
>  - reg: base addresses and lengths of the PCIe controller register blocks.

This series does not apply to v5.3-rc1, what is it based on ?

Lorenzo


Re: [PATCH v2 07/10] PCI: layerscape: Modify the MSIX to the doorbell way

2019-08-23 Thread Andrew Murray
On Thu, Aug 22, 2019 at 07:22:39PM +0800, Xiaowei Bao wrote:
> The layerscape platform use the doorbell way to trigger MSIX
> interrupt in EP mode.
> 

I have no problems with this patch, however...

Are you able to add to this message a reason for why you are making this
change? Did dw_pcie_ep_raise_msix_irq not work when func_no != 0? Or did
it work yet dw_pcie_ep_raise_msix_irq_doorbell is more efficient?

Thanks,

Andrew Murray

> Signed-off-by: Xiaowei Bao 
> ---
> v2:
>  - No change.
> 
>  drivers/pci/controller/dwc/pci-layerscape-ep.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/pci/controller/dwc/pci-layerscape-ep.c 
> b/drivers/pci/controller/dwc/pci-layerscape-ep.c
> index 8461f62..7ca5fe8 100644
> --- a/drivers/pci/controller/dwc/pci-layerscape-ep.c
> +++ b/drivers/pci/controller/dwc/pci-layerscape-ep.c
> @@ -74,7 +74,8 @@ static int ls_pcie_ep_raise_irq(struct dw_pcie_ep *ep, u8 
> func_no,
>   case PCI_EPC_IRQ_MSI:
>   return dw_pcie_ep_raise_msi_irq(ep, func_no, interrupt_num);
>   case PCI_EPC_IRQ_MSIX:
> - return dw_pcie_ep_raise_msix_irq(ep, func_no, interrupt_num);
> + return dw_pcie_ep_raise_msix_irq_doorbell(ep, func_no,
> +   interrupt_num);
>   default:
>   dev_err(pci->dev, "UNKNOWN IRQ type\n");
>   return -EINVAL;
> -- 
> 2.9.5
> 


Re: [PATCH v2 05/10] PCI: layerscape: Fix some format issue of the code

2019-08-23 Thread Andrew Murray
On Thu, Aug 22, 2019 at 07:22:37PM +0800, Xiaowei Bao wrote:
> Fix some format issue of the code in EP driver.
> 
> Signed-off-by: Xiaowei Bao 

Reviewed-by: Andrew Murray 


> ---
> v2:
>  - No change.
> 
>  drivers/pci/controller/dwc/pci-layerscape-ep.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/pci/controller/dwc/pci-layerscape-ep.c 
> b/drivers/pci/controller/dwc/pci-layerscape-ep.c
> index be61d96..4e92a95 100644
> --- a/drivers/pci/controller/dwc/pci-layerscape-ep.c
> +++ b/drivers/pci/controller/dwc/pci-layerscape-ep.c
> @@ -62,7 +62,7 @@ static void ls_pcie_ep_init(struct dw_pcie_ep *ep)
>  }
>  
>  static int ls_pcie_ep_raise_irq(struct dw_pcie_ep *ep, u8 func_no,
> -   enum pci_epc_irq_type type, u16 interrupt_num)
> + enum pci_epc_irq_type type, u16 interrupt_num)
>  {
>   struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
>  
> @@ -86,7 +86,7 @@ static const struct dw_pcie_ep_ops pcie_ep_ops = {
>  };
>  
>  static int __init ls_add_pcie_ep(struct ls_pcie_ep *pcie,
> - struct platform_device *pdev)
> +  struct platform_device *pdev)
>  {
>   struct dw_pcie *pci = pcie->pci;
>   struct device *dev = pci->dev;
> -- 
> 2.9.5
> 


Re: [PATCH v2 03/10] PCI: designware-ep: Move the function of getting MSI capability forward

2019-08-23 Thread Andrew Murray
On Thu, Aug 22, 2019 at 07:22:35PM +0800, Xiaowei Bao wrote:
> Move the function of getting MSI capability to the front of init
> function, because the init function of the EP platform driver will use
> the return value by the function of getting MSI capability.
> 
> Signed-off-by: Xiaowei Bao 

Reviewed-by: Andrew Murray 

> ---
> v2:
>  - No change.
> 
>  drivers/pci/controller/dwc/pcie-designware-ep.c | 7 ---
>  1 file changed, 4 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c 
> b/drivers/pci/controller/dwc/pcie-designware-ep.c
> index b8388f8..0a6c199 100644
> --- a/drivers/pci/controller/dwc/pcie-designware-ep.c
> +++ b/drivers/pci/controller/dwc/pcie-designware-ep.c
> @@ -656,6 +656,10 @@ int dw_pcie_ep_init(struct dw_pcie_ep *ep)
>   if (ret < 0)
>   epc->max_functions = 1;
>  
> + ep->msi_cap = dw_pcie_ep_find_capability(pci, PCI_CAP_ID_MSI);
> +
> + ep->msix_cap = dw_pcie_ep_find_capability(pci, PCI_CAP_ID_MSIX);
> +
>   if (ep->ops->ep_init)
>   ep->ops->ep_init(ep);
>  
> @@ -672,9 +676,6 @@ int dw_pcie_ep_init(struct dw_pcie_ep *ep)
>   dev_err(dev, "Failed to reserve memory for MSI/MSI-X\n");
>   return -ENOMEM;
>   }
> - ep->msi_cap = dw_pcie_ep_find_capability(pci, PCI_CAP_ID_MSI);
> -
> - ep->msix_cap = dw_pcie_ep_find_capability(pci, PCI_CAP_ID_MSIX);
>  
>   offset = dw_pcie_ep_find_ext_capability(pci, PCI_EXT_CAP_ID_REBAR);
>   if (offset) {
> -- 
> 2.9.5
> 


Re: [PATCH v2 02/10] PCI: designware-ep: Add the doorbell mode of MSI-X in EP mode

2019-08-23 Thread Andrew Murray
On Thu, Aug 22, 2019 at 07:22:34PM +0800, Xiaowei Bao wrote:
> Add the doorbell mode of MSI-X in EP mode.
> 
> Signed-off-by: Xiaowei Bao 
> ---
> v2:
>  - Remove the macro of no used.
> 
>  drivers/pci/controller/dwc/pcie-designware-ep.c | 14 ++
>  drivers/pci/controller/dwc/pcie-designware.h| 12 
>  2 files changed, 26 insertions(+)
> 
> diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c 
> b/drivers/pci/controller/dwc/pcie-designware-ep.c
> index 3e2b740..b8388f8 100644
> --- a/drivers/pci/controller/dwc/pcie-designware-ep.c
> +++ b/drivers/pci/controller/dwc/pcie-designware-ep.c
> @@ -480,6 +480,20 @@ int dw_pcie_ep_raise_msi_irq(struct dw_pcie_ep *ep, u8 
> func_no,
>   return 0;
>  }
>  
> +int dw_pcie_ep_raise_msix_irq_doorbell(struct dw_pcie_ep *ep, u8 func_no,
> +u16 interrupt_num)
> +{
> + struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
> + u32 msg_data;
> +
> + msg_data = (func_no << PCIE_MSIX_DOORBELL_PF_SHIFT) |
> +(interrupt_num - 1);
> +
> + dw_pcie_writel_dbi(pci, PCIE_MSIX_DOORBELL, msg_data);
> +
> + return 0;
> +}
> +
>  int dw_pcie_ep_raise_msix_irq(struct dw_pcie_ep *ep, u8 func_no,
> u16 interrupt_num)
>  {
> diff --git a/drivers/pci/controller/dwc/pcie-designware.h 
> b/drivers/pci/controller/dwc/pcie-designware.h
> index a0fdbf7..895a9ef 100644
> --- a/drivers/pci/controller/dwc/pcie-designware.h
> +++ b/drivers/pci/controller/dwc/pcie-designware.h
> @@ -88,6 +88,9 @@
>  #define PCIE_MISC_CONTROL_1_OFF  0x8BC
>  #define PCIE_DBI_RO_WR_ENBIT(0)
>  
> +#define PCIE_MSIX_DOORBELL   0x948
> +#define PCIE_MSIX_DOORBELL_PF_SHIFT  24
> +
>  /*
>   * iATU Unroll-specific register definitions
>   * From 4.80 core version the address translation will be made by unroll
> @@ -400,6 +403,8 @@ int dw_pcie_ep_raise_msi_irq(struct dw_pcie_ep *ep, u8 
> func_no,
>u8 interrupt_num);
>  int dw_pcie_ep_raise_msix_irq(struct dw_pcie_ep *ep, u8 func_no,
>u16 interrupt_num);
> +int dw_pcie_ep_raise_msix_irq_doorbell(struct dw_pcie_ep *ep, u8 func_no,
> +u16 interrupt_num);
>  void dw_pcie_ep_reset_bar(struct dw_pcie *pci, enum pci_barno bar);
>  #else
>  static inline void dw_pcie_ep_linkup(struct dw_pcie_ep *ep)
> @@ -432,6 +437,13 @@ static inline int dw_pcie_ep_raise_msix_irq(struct 
> dw_pcie_ep *ep, u8 func_no,
>   return 0;
>  }
>  
> +static inline int dw_pcie_ep_raise_msix_irq_doorbell(struct dw_pcie_ep *ep,
> +  u8 func_no,
> +  u16 interrupt_num)
> +{
> + return 0;
> +}
> +

Looks OK to me.

Reviewed-by: Andrew Murray 

>  static inline void dw_pcie_ep_reset_bar(struct dw_pcie *pci, enum pci_barno 
> bar)
>  {
>  }
> -- 
> 2.9.5
> 


Re: [PATCH v2 01/10] PCI: designware-ep: Add multiple PFs support for DWC

2019-08-23 Thread Andrew Murray
On Thu, Aug 22, 2019 at 07:22:33PM +0800, Xiaowei Bao wrote:
> Add multiple PFs support for DWC, different PF have different config space
> we use pf-offset property which get from the DTS to access the different pF
> config space.

It looks like you're missing a --cover-letter again.

> 
> Signed-off-by: Xiaowei Bao 
> ---
> v2:
>  - Remove duplicate redundant code.
>  - Reimplement the PF config space access way.
> 
>  drivers/pci/controller/dwc/pcie-designware-ep.c | 122 
> 
>  drivers/pci/controller/dwc/pcie-designware.c|  59 
>  drivers/pci/controller/dwc/pcie-designware.h|  11 ++-
>  3 files changed, 134 insertions(+), 58 deletions(-)
> 
> diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c 
> b/drivers/pci/controller/dwc/pcie-designware-ep.c
> index 2bf5a35..3e2b740 100644
> --- a/drivers/pci/controller/dwc/pcie-designware-ep.c
> +++ b/drivers/pci/controller/dwc/pcie-designware-ep.c
> @@ -19,12 +19,17 @@ void dw_pcie_ep_linkup(struct dw_pcie_ep *ep)
>   pci_epc_linkup(epc);
>  }
>  
> -static void __dw_pcie_ep_reset_bar(struct dw_pcie *pci, enum pci_barno bar,
> -int flags)
> +static void __dw_pcie_ep_reset_bar(struct dw_pcie *pci, u8 func_no,
> +enum pci_barno bar, int flags)
>  {
>   u32 reg;
> + unsigned int func_offset = 0;
> + struct dw_pcie_ep *ep = >ep;
>  
> - reg = PCI_BASE_ADDRESS_0 + (4 * bar);
> + if (ep->ops->func_conf_select)
> + func_offset = ep->ops->func_conf_select(ep, func_no);
> +
> + reg = func_offset + PCI_BASE_ADDRESS_0 + (4 * bar);

This pattern of checking if func_conf_select exists and using it to get an
offset is repeated a lot throughout this file. You could move this
functionality into a new function (similar to dw_pcie_read_dbi etc). Or
perhaps a new variant of dw_pcie_writel_ should be created that writes takes
a func_no argument.
 

>   dw_pcie_dbi_ro_wr_en(pci);
>   dw_pcie_writel_dbi2(pci, reg, 0x0);
>   dw_pcie_writel_dbi(pci, reg, 0x0);


> @@ -235,7 +257,7 @@ static int dw_pcie_ep_map_addr(struct pci_epc *epc, u8 
> func_no,
>   struct dw_pcie_ep *ep = epc_get_drvdata(epc);
>   struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
>  
> - ret = dw_pcie_ep_outbound_atu(ep, addr, pci_addr, size);
> + ret = dw_pcie_ep_outbound_atu(ep, func_no, addr, pci_addr, size);
>   if (ret) {
>   dev_err(pci->dev, "Failed to enable address\n");
>   return ret;
> @@ -249,11 +271,15 @@ static int dw_pcie_ep_get_msi(struct pci_epc *epc, u8 
> func_no)
>   struct dw_pcie_ep *ep = epc_get_drvdata(epc);
>   struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
>   u32 val, reg;
> + unsigned int func_offset = 0;
> +
> + if (ep->ops->func_conf_select)
> + func_offset = ep->ops->func_conf_select(ep, func_no);
>  
>   if (!ep->msi_cap)
>   return -EINVAL;
>  
> - reg = ep->msi_cap + PCI_MSI_FLAGS;
> + reg = ep->msi_cap + func_offset + PCI_MSI_FLAGS;

This makes me nervous.

>From a PCI viewpoint, each function has it's own capability structure and
within each function there may exist a MSI capability. Yet what we're doing
here is using dw_pcie_ep_find_capability to get the list of capabilities for
function 0, and then applying offsets from that for subsequent functions. I.e.
we're applying DW specific knowledge to find the correct capability, rather
than following the general PCI approach.

I think the above hunk shouldn't be required - but instead
dw_pcie_ep_find_capability is updated to take a func_no parameter.

Have I understood this correctly?

>   val = dw_pcie_readw_dbi(pci, reg);
>   if (!(val & PCI_MSI_FLAGS_ENABLE))
>   return -EINVAL;
> @@ -268,11 +294,15 @@ static int dw_pcie_ep_set_msi(struct pci_epc *epc, u8 
> func_no, u8 interrupts)
>   struct dw_pcie_ep *ep = epc_get_drvdata(epc);
>   struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
>   u32 val, reg;
> + unsigned int func_offset = 0;
> +
> + if (ep->ops->func_conf_select)
> + func_offset = ep->ops->func_conf_select(ep, func_no);
>  
>   if (!ep->msi_cap)
>   return -EINVAL;
>  
> - reg = ep->msi_cap + PCI_MSI_FLAGS;
> + reg = ep->msi_cap + func_offset + PCI_MSI_FLAGS;
>   val = dw_pcie_readw_dbi(pci, reg);
>   val &= ~PCI_MSI_FLAGS_QMASK;
>   val |= (interrupts << 1) & PCI_MSI_FLAGS_QMASK;
> @@ -288,11 +318,15 @@ static int dw_pcie_ep_get_msix(struct pci_epc *epc, u8 
> func_no)
>   struct dw_pcie_ep *ep = epc_get_drvdata(epc);
>   struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
>   u32 val, reg;
> + unsigned int func_offset = 0;
> +
> + if (ep->ops->func_conf_select)
> + func_offset = ep->ops->func_conf_select(ep, func_no);
>  
>   if (!ep->msix_cap)
>   return -EINVAL;
>  
> - reg = ep->msix_cap + PCI_MSIX_FLAGS;
> + reg = 

[PATCH 1/2] powerpc/32s: automatically allocate BAT in setbat()

2019-08-23 Thread Christophe Leroy
If no BAT is given to setbat(), select an available BAT.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/mm/book3s32/mmu.c | 11 ++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c
index 50a1991d257f..5f08b93ecffd 100644
--- a/arch/powerpc/mm/book3s32/mmu.c
+++ b/arch/powerpc/mm/book3s32/mmu.c
@@ -251,9 +251,18 @@ void __init setbat(int index, unsigned long virt, 
phys_addr_t phys,
 {
unsigned int bl;
int wimgxpp;
-   struct ppc_bat *bat = BATS[index];
+   struct ppc_bat *bat;
unsigned long flags = pgprot_val(prot);
 
+   if (index == -1)
+   index = find_free_bat();
+   if (index == -1) {
+   pr_err("%s: no BAT available for mapping 0x%llx\n", __func__,
+  (unsigned long long)phys);
+   return;
+   }
+   bat = BATS[index];
+
if ((flags & _PAGE_NO_CACHE) ||
(cpu_has_feature(CPU_FTR_NEED_COHERENT) == 0))
flags &= ~_PAGE_COHERENT;
-- 
2.13.3



[PATCH 2/2] powerpc/83xx: map IMMR with a BAT.

2019-08-23 Thread Christophe Leroy
On mpc83xx with a QE, IMMR is 2Mbytes.
On mpc83xx without a QE, IMMR is 1Mbytes.
Each driver will map a part of it to access the registers it needs.
Some driver will map the same part of IMMR as other drivers.

In order to reduce TLB misses, map the full IMMR with a BAT.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/platforms/83xx/misc.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/arch/powerpc/platforms/83xx/misc.c 
b/arch/powerpc/platforms/83xx/misc.c
index f46d7bf3b140..1e395b01c535 100644
--- a/arch/powerpc/platforms/83xx/misc.c
+++ b/arch/powerpc/platforms/83xx/misc.c
@@ -18,6 +18,8 @@
 #include 
 #include 
 
+#include 
+
 #include "mpc83xx.h"
 
 static __be32 __iomem *restart_reg_base;
@@ -145,6 +147,14 @@ void __init mpc83xx_setup_arch(void)
if (ppc_md.progress)
ppc_md.progress("mpc83xx_setup_arch()", 0);
 
+   if (!__map_without_bats) {
+   int immrsize = IS_ENABLED(CONFIG_QUICC_ENGINE) ? SZ_2M : SZ_1M;
+
+   ioremap_bot = ALIGN_DOWN(ioremap_bot - immrsize, immrsize);
+   setbat(-1, ioremap_bot, get_immrbase(), immrsize, 
PAGE_KERNEL_NCG);
+   update_bats();
+   }
+
mpc83xx_setup_pci();
 }
 
-- 
2.13.3



Re: [PATCH v2] powerpc/powernv: Add ultravisor message log interface

2019-08-23 Thread Michael Ellerman
Hi Claudio,

Claudio Carvalho  writes:
> Ultravisor (UV) provides an in-memory console which follows the OPAL
> in-memory console structure.
>
> This patch extends the OPAL msglog code to also initialize the UV memory
> console and provide a sysfs interface (uv_msglog) for userspace to view
> the UV message log.
>
> CC: Madhavan Srinivasan 
> CC: Oliver O'Halloran 
> Signed-off-by: Claudio Carvalho 
> ---
> This patch depends on the "kvmppc: Paravirtualize KVM to support
> ultravisor" patchset submitted by Claudio Carvalho.
> ---
>  arch/powerpc/platforms/powernv/opal-msglog.c | 99 ++--
>  1 file changed, 72 insertions(+), 27 deletions(-)

I think the code changes look mostly OK here.

But I'm not sure about the end result in sysfs.

If I'm reading it right this will create:

 /sys/firmware/opal/uv_msglog

Which I think is a little weird, because the UV is not OPAL.

So I guess I wonder if the file should be created elsewhere to avoid any
confusion and keep things nicely separated.

Possibly /sys/firmware/ultravisor/msglog ?

cheers


[PATCH] powerpc/64: Fix stacktrace on BE when function_graph is enabled

2019-08-23 Thread Michael Ellerman
Currently if we oops or warn while function_graph is active the stack
trace looks like:
  .trace_graph_return+0xac/0x100
  .ftrace_return_to_handler+0x98/0x140
  .return_to_handler+0x20/0x40
  .return_to_handler+0x0/0x40
  .return_to_handler+0x0/0x40
  .return_to_handler+0x0/0x40
  .return_to_handler+0x0/0x40
  .return_to_handler+0x0/0x40
  .return_to_handler+0x0/0x40
  .cpu_startup_entry+0x34/0x40
  .start_secondary+0x680/0x6f0
  start_secondary_prolog+0x10/0x14

Notice the multiple entries that just show .return_to_handler.

There is logic in show_stack() to detect this case and print the
traced function, but we inadvertently broke it in commit
7d56c65a6ff9 ("powerpc/ftrace: Remove mod_return_to_handler") (2014),
because that commit accidentally removed the dereference of rth which
gets the text address from the function descriptor. Hence this is only
broken on big endian (or technically ELFv1).

Fix it by using the proper accessor, which is ppc_function_entry().
Result is we get a stack trace such as:

  .trace_graph_return+0x134/0x160
  .ftrace_return_to_handler+0x94/0x140
  .return_to_handler+0x20/0x40
  .return_to_handler+0x0/0x40 (.shared_cede_loop+0x48/0x130)
  .return_to_handler+0x0/0x40 (.cpuidle_enter_state+0xa0/0x690)
  .return_to_handler+0x0/0x40 (.cpuidle_enter+0x44/0x70)
  .return_to_handler+0x0/0x40 (.call_cpuidle+0x68/0xc0)
  .return_to_handler+0x0/0x40 (.do_idle+0x37c/0x400)
  .return_to_handler+0x0/0x40 (.cpu_startup_entry+0x30/0x50)
  .rest_init+0x224/0x348

Fixes: 7d56c65a6ff9 ("powerpc/ftrace: Remove mod_return_to_handler")
Signed-off-by: Michael Ellerman 
---
 arch/powerpc/kernel/process.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 8fc4de0d22b4..1601d7cfe45e 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -2048,7 +2048,7 @@ void show_stack(struct task_struct *tsk, unsigned long 
*stack)
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
struct ftrace_ret_stack *ret_stack;
extern void return_to_handler(void);
-   unsigned long rth = (unsigned long)return_to_handler;
+   unsigned long rth = ppc_function_entry(return_to_handler);
int curr_frame = 0;
 #endif
 
-- 
2.21.0



Re: [PATCH v7 0/7] KVMPPC driver to manage secure guest pages

2019-08-23 Thread Michael Ellerman
Paul Mackerras  writes:
> On Thu, Aug 22, 2019 at 03:56:13PM +0530, Bharata B Rao wrote:
>> A pseries guest can be run as a secure guest on Ultravisor-enabled
>> POWER platforms. On such platforms, this driver will be used to manage
>> the movement of guest pages between the normal memory managed by
>> hypervisor(HV) and secure memory managed by Ultravisor(UV).
>> 
>> Private ZONE_DEVICE memory equal to the amount of secure memory
>> available in the platform for running secure guests is created.
>> Whenever a page belonging to the guest becomes secure, a page from
>> this private device memory is used to represent and track that secure
>> page on the HV side. The movement of pages between normal and secure
>> memory is done via migrate_vma_pages(). The reverse movement is driven
>> via pagemap_ops.migrate_to_ram().
>> 
>> The page-in or page-out requests from UV will come to HV as hcalls and
>> HV will call back into UV via uvcalls to satisfy these page requests.
>> 
>> These patches are against hmm.git
>> (https://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma.git/log/?h=hmm)
>> 
>> plus
>> 
>> Claudio Carvalho's base ultravisor enablement patchset v6
>> (https://lore.kernel.org/linuxppc-dev/20190822034838.27876-1-cclau...@linux.ibm.com/T/#t)
>
> How are you thinking these patches will go upstream?  Are you going to
> send them via the hmm tree?
>
> I assume you need Claudio's patchset as a prerequisite for your series
> to compile, which means the hmm maintainers would need to pull in a
> topic branch from Michael Ellerman's powerpc tree, or something like
> that.

I think more workable would be for me to make a topic branch based on
the hmm tree (or some commit from the hmm tree), which I then apply the
patches on top of, and merge any required powerpc changes into that. I
can then ask Linus to merge that branch late in the merge window once
the hmm changes have gone in.

The bigger problem at the moment is the lack of reviews or acks on the
bulk of the series.

cheers


[PATCH] powerpc/8xx: Fix permanently mapped IMMR region.

2019-08-23 Thread Christophe Leroy
When not using large TLBs, the IMMR region is still
mapped as a whole block in the FIXMAP area.

Do not remove pages mapped in the FIXMAP region when
initialising paging.

Properly report that the IMMR region is block-mapped even
when not using large TLBs.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/mm/mem.c|  8 
 arch/powerpc/mm/nohash/8xx.c | 13 +++--
 2 files changed, 7 insertions(+), 14 deletions(-)

diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 69f99128a8d6..8e221d8744ba 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -216,14 +216,6 @@ void __init paging_init(void)
unsigned long long total_ram = memblock_phys_mem_size();
phys_addr_t top_of_ram = memblock_end_of_DRAM();
 
-#ifdef CONFIG_PPC32
-   unsigned long v = __fix_to_virt(__end_of_fixed_addresses - 1);
-   unsigned long end = __fix_to_virt(FIX_HOLE);
-
-   for (; v < end; v += PAGE_SIZE)
-   map_kernel_page(v, 0, __pgprot(0)); /* XXX gross */
-#endif
-
 #ifdef CONFIG_HIGHMEM
map_kernel_page(PKMAP_BASE, 0, __pgprot(0));/* XXX gross */
pkmap_page_table = virt_to_kpte(PKMAP_BASE);
diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c
index 4a06cb342da2..e6918235fe04 100644
--- a/arch/powerpc/mm/nohash/8xx.c
+++ b/arch/powerpc/mm/nohash/8xx.c
@@ -21,33 +21,34 @@ extern int __map_without_ltlbs;
 static unsigned long block_mapped_ram;
 
 /*
- * Return PA for this VA if it is in an area mapped with LTLBs.
+ * Return PA for this VA if it is in an area mapped with LTLBs or fixmap.
  * Otherwise, returns 0
  */
 phys_addr_t v_block_mapped(unsigned long va)
 {
unsigned long p = PHYS_IMMR_BASE;
 
-   if (__map_without_ltlbs)
-   return 0;
if (va >= VIRT_IMMR_BASE && va < VIRT_IMMR_BASE + IMMR_SIZE)
return p + va - VIRT_IMMR_BASE;
+   if (__map_without_ltlbs)
+   return 0;
if (va >= PAGE_OFFSET && va < PAGE_OFFSET + block_mapped_ram)
return __pa(va);
return 0;
 }
 
 /*
- * Return VA for a given PA mapped with LTLBs or 0 if not mapped
+ * Return VA for a given PA mapped with LTLBs or fixmap
+ * Return 0 if not mapped
  */
 unsigned long p_block_mapped(phys_addr_t pa)
 {
unsigned long p = PHYS_IMMR_BASE;
 
-   if (__map_without_ltlbs)
-   return 0;
if (pa >= p && pa < p + IMMR_SIZE)
return VIRT_IMMR_BASE + pa - p;
+   if (__map_without_ltlbs)
+   return 0;
if (pa < block_mapped_ram)
return (unsigned long)__va(pa);
return 0;
-- 
2.13.3



[PATCH] powerpc/32: Don't populate page tables for block mapped pages except on the 8xx.

2019-08-23 Thread Christophe Leroy
Commit d2f15e0979ee ("powerpc/32: always populate page tables for
Abatron BDI.") wrongly sets page tables for any PPC32 for using BDI,
and does't update them after init (remove RX on init section, set
text and rodata read-only)

Only the 8xx requires page tables to be populated for using the BDI.
They also need to be populated in order to see the mappings in
/sys/kernel/debug/kernel_page_tables

On BOOK3S_32, pages that are not mapped by page tables are mapped
by BATs. The BDI knows BATs and they can be viewed in
/sys/kernel/debug/powerpc/block_address_translation

Only set pagetables for RAM and IMMR on the 8xx and properly update
them at the end of init.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/mm/nohash/8xx.c | 52 +---
 arch/powerpc/mm/pgtable_32.c |  5 +
 2 files changed, 50 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c
index e6918235fe04..2c98078d2ede 100644
--- a/arch/powerpc/mm/nohash/8xx.c
+++ b/arch/powerpc/mm/nohash/8xx.c
@@ -104,6 +104,19 @@ static void mmu_patch_addis(s32 *site, long simm)
patch_instruction_site(site, instr);
 }
 
+void __init mmu_mapin_ram_chunk(unsigned long offset, unsigned long top, 
pgprot_t prot)
+{
+   unsigned long s = offset;
+   unsigned long v = PAGE_OFFSET + s;
+   phys_addr_t p = memstart_addr + s;
+
+   for (; s < top; s += PAGE_SIZE) {
+   map_kernel_page(v, p, prot);
+   v += PAGE_SIZE;
+   p += PAGE_SIZE;
+   }
+}
+
 unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
 {
unsigned long mapped;
@@ -116,10 +129,20 @@ unsigned long __init mmu_mapin_ram(unsigned long base, 
unsigned long top)
if (!IS_ENABLED(CONFIG_PIN_TLB_TEXT))
mmu_patch_cmp_limit(__itlbmiss_linmem_top, 0);
} else {
+   unsigned long einittext8 = ALIGN(__pa(_einittext), SZ_8M);
+
mapped = top & ~(LARGE_PAGE_SIZE_8M - 1);
if (!IS_ENABLED(CONFIG_PIN_TLB_TEXT))
-   mmu_patch_cmp_limit(__itlbmiss_linmem_top,
-   _ALIGN(__pa(_einittext), 8 << 20));
+   mmu_patch_cmp_limit(__itlbmiss_linmem_top, 
einittext8);
+
+   /*
+* Populate page tables to:
+* - have them appear in /sys/kernel/debug/kernel_page_tables
+* - allow the BDI to find the pages when they are not PINNED
+*/
+   mmu_mapin_ram_chunk(0, einittext8, PAGE_KERNEL_X);
+   mmu_mapin_ram_chunk(einittext8, mapped, PAGE_KERNEL);
+   mmu_mapin_immr();
}
 
mmu_patch_cmp_limit(__dtlbmiss_linmem_top, mapped);
@@ -145,18 +168,41 @@ void mmu_mark_initmem_nx(void)
if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX) && CONFIG_ETEXT_SHIFT < 23)
mmu_patch_addis(__itlbmiss_linmem_top8,
-((long)_etext & ~(LARGE_PAGE_SIZE_8M - 1)));
-   if (!IS_ENABLED(CONFIG_PIN_TLB_TEXT))
+   if (!IS_ENABLED(CONFIG_PIN_TLB_TEXT)) {
+   unsigned long einittext8 = ALIGN(__pa(_einittext), SZ_8M);
+   unsigned long etext8 = ALIGN(__pa(_etext), SZ_8M);
+   unsigned long etext = __pa(_etext);
+
mmu_patch_cmp_limit(__itlbmiss_linmem_top, __pa(_etext));
+
+   /* Update page tables for PTDUMP and BDI */
+   mmu_mapin_ram_chunk(0, einittext8, __pgprot(0));
+   if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) {
+   mmu_mapin_ram_chunk(0, etext, PAGE_KERNEL_TEXT);
+   mmu_mapin_ram_chunk(etext, einittext8, PAGE_KERNEL);
+   } else {
+   mmu_mapin_ram_chunk(0, etext8, PAGE_KERNEL_TEXT);
+   mmu_mapin_ram_chunk(etext8, einittext8, PAGE_KERNEL);
+   }
+   }
 }
 
 #ifdef CONFIG_STRICT_KERNEL_RWX
 void mmu_mark_rodata_ro(void)
 {
+   unsigned long sinittext = __pa(_sinittext);
+   unsigned long etext = __pa(_etext);
+
if (CONFIG_DATA_SHIFT < 23)
mmu_patch_addis(__dtlbmiss_romem_top8,
-__pa(((unsigned long)_sinittext) &
  ~(LARGE_PAGE_SIZE_8M - 1)));
mmu_patch_addis(__dtlbmiss_romem_top, -__pa(_sinittext));
+
+   /* Update page tables for PTDUMP and BDI */
+   mmu_mapin_ram_chunk(0, sinittext, __pgprot(0));
+   mmu_mapin_ram_chunk(0, etext, PAGE_KERNEL_ROX);
+   mmu_mapin_ram_chunk(etext, sinittext, PAGE_KERNEL_RO);
 }
 #endif
 
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index 97f401a06fcc..c5fc33a202aa 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -270,10 +270,7 @@ void __init mapin_ram(void)
if (base >= top)
continue;

[PATCH v4 2/3] arm64: dts: ls1028a: Add PCIe controller DT nodes

2019-08-23 Thread Xiaowei Bao
LS1028a implements 2 PCIe 3.0 controllers.

Signed-off-by: Xiaowei Bao 
Signed-off-by: Hou Zhiqiang 
---
v2:
 - Fix up the legacy INTx allocate failed issue.
v3:
 - No change.
v4:
 - Remove the num-lanes proparty.
depends on: https://patchwork.kernel.org/project/linux-pci/list/?series=162215

 arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi | 50 ++
 1 file changed, 50 insertions(+)

diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi 
b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi
index 72b9a75..a25f9d9 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi
@@ -625,6 +625,56 @@
};
};
 
+   pcie@340 {
+   compatible = "fsl,ls1028a-pcie";
+   reg = <0x00 0x0340 0x0 0x0010   /* controller 
registers */
+  0x80 0x 0x0 0x2000>; /* 
configuration space */
+   reg-names = "regs", "config";
+   interrupts = , /* PME 
interrupt */
+; /* aer 
interrupt */
+   interrupt-names = "pme", "aer";
+   #address-cells = <3>;
+   #size-cells = <2>;
+   device_type = "pci";
+   dma-coherent;
+   bus-range = <0x0 0xff>;
+   ranges = <0x8100 0x0 0x 0x80 0x0001 0x0 
0x0001   /* downstream I/O */
+ 0x8200 0x0 0x4000 0x80 0x4000 0x0 
0x4000>; /* non-prefetchable memory */
+   msi-parent = <>;
+   #interrupt-cells = <1>;
+   interrupt-map-mask = <0 0 0 7>;
+   interrupt-map = < 0 0 1  0 0 GIC_SPI 109 
IRQ_TYPE_LEVEL_HIGH>,
+   < 0 0 2  0 0 GIC_SPI 110 
IRQ_TYPE_LEVEL_HIGH>,
+   < 0 0 3  0 0 GIC_SPI 111 
IRQ_TYPE_LEVEL_HIGH>,
+   < 0 0 4  0 0 GIC_SPI 112 
IRQ_TYPE_LEVEL_HIGH>;
+   status = "disabled";
+   };
+
+   pcie@350 {
+   compatible = "fsl,ls1028a-pcie";
+   reg = <0x00 0x0350 0x0 0x0010   /* controller 
registers */
+  0x88 0x 0x0 0x2000>; /* 
configuration space */
+   reg-names = "regs", "config";
+   interrupts = ,
+;
+   interrupt-names = "pme", "aer";
+   #address-cells = <3>;
+   #size-cells = <2>;
+   device_type = "pci";
+   dma-coherent;
+   bus-range = <0x0 0xff>;
+   ranges = <0x8100 0x0 0x 0x88 0x0001 0x0 
0x0001   /* downstream I/O */
+ 0x8200 0x0 0x4000 0x88 0x4000 0x0 
0x4000>; /* non-prefetchable memory */
+   msi-parent = <>;
+   #interrupt-cells = <1>;
+   interrupt-map-mask = <0 0 0 7>;
+   interrupt-map = < 0 0 1  0 0 GIC_SPI 114 
IRQ_TYPE_LEVEL_HIGH>,
+   < 0 0 2  0 0 GIC_SPI 115 
IRQ_TYPE_LEVEL_HIGH>,
+   < 0 0 3  0 0 GIC_SPI 116 
IRQ_TYPE_LEVEL_HIGH>,
+   < 0 0 4  0 0 GIC_SPI 117 
IRQ_TYPE_LEVEL_HIGH>;
+   status = "disabled";
+   };
+
pcie@1f000 { /* Integrated Endpoint Root Complex */
compatible = "pci-host-ecam-generic";
reg = <0x01 0xf000 0x0 0x10>;
-- 
2.9.5



[PATCH v4 3/3] PCI: layerscape: Add LS1028a support

2019-08-23 Thread Xiaowei Bao
Add support for the LS1028a PCIe controller.

Signed-off-by: Xiaowei Bao 
Signed-off-by: Hou Zhiqiang 
---
v2:
 - No change.
v3:
 - Reuse the ls2088 driver data structurt.
v4:
 - No change.

 drivers/pci/controller/dwc/pci-layerscape.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/pci/controller/dwc/pci-layerscape.c 
b/drivers/pci/controller/dwc/pci-layerscape.c
index 3a5fa26..f24f79a 100644
--- a/drivers/pci/controller/dwc/pci-layerscape.c
+++ b/drivers/pci/controller/dwc/pci-layerscape.c
@@ -263,6 +263,7 @@ static const struct ls_pcie_drvdata ls2088_drvdata = {
 static const struct of_device_id ls_pcie_of_match[] = {
{ .compatible = "fsl,ls1012a-pcie", .data = _drvdata },
{ .compatible = "fsl,ls1021a-pcie", .data = _drvdata },
+   { .compatible = "fsl,ls1028a-pcie", .data = _drvdata },
{ .compatible = "fsl,ls1043a-pcie", .data = _drvdata },
{ .compatible = "fsl,ls1046a-pcie", .data = _drvdata },
{ .compatible = "fsl,ls2080a-pcie", .data = _drvdata },
-- 
2.9.5



[PATCH v4 1/3] dt-bindings: pci: layerscape-pci: add compatible strings "fsl, ls1028a-pcie"

2019-08-23 Thread Xiaowei Bao
Add the PCIe compatible string for LS1028A

Signed-off-by: Xiaowei Bao 
Signed-off-by: Hou Zhiqiang 
Reviewed-by: Rob Herring 
---
v2:
 - No change.
v3:
 - No change.
v4:
 - No change.

 Documentation/devicetree/bindings/pci/layerscape-pci.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/pci/layerscape-pci.txt 
b/Documentation/devicetree/bindings/pci/layerscape-pci.txt
index e20ceaa..99a386e 100644
--- a/Documentation/devicetree/bindings/pci/layerscape-pci.txt
+++ b/Documentation/devicetree/bindings/pci/layerscape-pci.txt
@@ -21,6 +21,7 @@ Required properties:
 "fsl,ls1046a-pcie"
 "fsl,ls1043a-pcie"
 "fsl,ls1012a-pcie"
+"fsl,ls1028a-pcie"
   EP mode:
"fsl,ls1046a-pcie-ep", "fsl,ls-pcie-ep"
 - reg: base addresses and lengths of the PCIe controller register blocks.
-- 
2.9.5



Re: [PATCH 2/3] powerpc/numa: Early request for home node associativity

2019-08-23 Thread Satheesh Rajendran
On Thu, Aug 22, 2019 at 08:12:34PM +0530, Srikar Dronamraju wrote:
> Currently the kernel detects if its running on a shared lpar platform
> and requests home node associativity before the scheduler sched_domains
> are setup. However between the time NUMA setup is initialized and the
> request for home node associativity, workqueue initializes its per node
> cpumask. The per node workqueue possible cpumask may turn invalid
> after home node associativity resulting in weird situations like
> workqueue possible cpumask being a subset of workqueue online cpumask.

Tested this series on Power KVM guest and expected that it fixes 
https://github.com/linuxppc/issues/issues/167 but am able to see the below 
warning
still while doing vcpu hotplug with numa nodes, Advise if am missing anything or
this is not the intended series to fix above issue.

Env:
HW: Power8
Host/Guest Kernel: 5.3.0-rc5-00172-g13e3f1076e29 (linux master + this series)
Qemu: 4.0.90 (v4.1.0-rc3)

Guest Config:
..
 4
...
/home/kvmci/linux/vmlinux
root=/dev/sda2 rw console=tty0 console=ttyS0,115200 
init=/sbin/init  initcall_debug numa=debug crashkernel=1024M selinux=0
...
  

  
  


Event: 
vcpu hotplug

[root@atest-guest ~]# [   41.447170] random: crng init done
[   41.448153] random: 7 urandom warning(s) missed due to ratelimiting
[   51.727256] VPHN hcall succeeded. Reset polling...
[   51.826301] adding cpu 2 to node 1
[   51.856238] WARNING: workqueue cpumask: online intersect > possible intersect
[   51.916297] VPHN hcall succeeded. Reset polling...
[   52.036272] adding cpu 3 to node 1


Regards,
-Satheesh.
> 
> This can be fixed by requesting home node associativity earlier just
> before NUMA setup. However at the NUMA setup time, kernel may not be in
> a position to detect if its running on a shared lpar platform. So
> request for home node associativity and if the request fails, fallback
> on the device tree property.
> 
> However home node associativity requires cpu's hwid which is set in
> smp_setup_pacas. Hence call smp_setup_pacas before numa_setup_cpus.
> 
> Signed-off-by: Srikar Dronamraju 
> Cc: Michael Ellerman 
> Cc: Nicholas Piggin 
> Cc: Nathan Lynch 
> Cc: linuxppc-dev@lists.ozlabs.org
> Reported-by: Satheesh Rajendran 
> Reported-by: Abdul Haleem 
> ---
>  arch/powerpc/kernel/setup-common.c |  5 +++--
>  arch/powerpc/mm/numa.c | 28 +++-
>  2 files changed, 30 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/setup-common.c 
> b/arch/powerpc/kernel/setup-common.c
> index 1f8db66..9135dba 100644
> --- a/arch/powerpc/kernel/setup-common.c
> +++ b/arch/powerpc/kernel/setup-common.c
> @@ -888,6 +888,9 @@ void __init setup_arch(char **cmdline_p)
>   /* Check the SMT related command line arguments (ppc64). */
>   check_smt_enabled();
> 
> +#ifdef CONFIG_SMP
> + smp_setup_pacas();
> +#endif
>   /* Parse memory topology */
>   mem_topology_setup();
> 
> @@ -899,8 +902,6 @@ void __init setup_arch(char **cmdline_p)
>* so smp_release_cpus() does nothing for them.
>*/
>  #ifdef CONFIG_SMP
> - smp_setup_pacas();
> -
>   /* On BookE, setup per-core TLB data structures. */
>   setup_tlb_core_data();
> 
> diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
> index 88b5157..7965d3b 100644
> --- a/arch/powerpc/mm/numa.c
> +++ b/arch/powerpc/mm/numa.c
> @@ -461,6 +461,21 @@ static int of_drconf_to_nid_single(struct drmem_lmb *lmb)
>   return nid;
>  }
> 
> +static int vphn_get_nid(unsigned long cpu)
> +{
> + __be32 associativity[VPHN_ASSOC_BUFSIZE] = {0};
> + long rc;
> +
> + /* Use associativity from first thread for all siblings */
> + rc = hcall_vphn(get_hard_smp_processor_id(cpu),
> + VPHN_FLAG_VCPU, associativity);
> +
> + if (rc == H_SUCCESS)
> + return  associativity_to_nid(associativity);
> +
> + return NUMA_NO_NODE;
> +}
> +
>  /*
>   * Figure out to which domain a cpu belongs and stick it there.
>   * Return the id of the domain used.
> @@ -490,7 +505,18 @@ static int numa_setup_cpu(unsigned long lcpu)
>   goto out;
>   }
> 
> - nid = of_node_to_nid_single(cpu);
> + /*
> +  * On a shared lpar, the device tree might not have the correct node
> +  * associativity.  At this time lppaca, or its __old_status field
> +  * may not be updated. Hence request an explicit associativity
> +  * irrespective of whether the lpar is shared or dedicated.  Use the
> +  * device tree property as a fallback.
> +  */
> + if (firmware_has_feature(FW_FEATURE_VPHN))
> + nid = vphn_get_nid(lcpu);
> +
> + if (nid == NUMA_NO_NODE)
> + nid = of_node_to_nid_single(cpu);
> 
>  out_present:
>   if (nid < 0 || !node_possible(nid))
> -- 
> 1.8.3.1
> 



[RFC 3/3] cpuidle/powernv : Add flags to identify stop state type

2019-08-23 Thread Abhishek Goel
Removed threshold latency which was being used to decide if a state
is cpuidle type or not. This decision can be taken using flags, as this
information has been encapsulated in the state->flags and being read
from idle device-tree.

Signed-off-by: Abhishek Goel 
---
 arch/powerpc/include/asm/opal-api.h |  7 +++
 drivers/cpuidle/cpuidle-powernv.c   | 16 +---
 2 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/include/asm/opal-api.h 
b/arch/powerpc/include/asm/opal-api.h
index 383242eb0dea..b9068fee21d8 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -233,6 +233,13 @@
 #define OPAL_PM_STOP_INST_FAST 0x0010
 #define OPAL_PM_STOP_INST_DEEP 0x0020
 
+/*
+ * Flags for stop states to distinguish between cpuidle and
+ * cpuoffline type of states.
+ */
+#define OPAL_PM_STOP_CPUIDLE   0x0100
+#define OPAL_PM_STOP_CPUHOTPLUG0x0200
+
 /*
  * OPAL_CONFIG_CPU_IDLE_STATE parameters
  */
diff --git a/drivers/cpuidle/cpuidle-powernv.c 
b/drivers/cpuidle/cpuidle-powernv.c
index 1b6c84d4ac77..1a33a548b769 100644
--- a/drivers/cpuidle/cpuidle-powernv.c
+++ b/drivers/cpuidle/cpuidle-powernv.c
@@ -270,8 +270,13 @@ static int powernv_add_idle_states(void)
goto out;
}
 
-   /* TODO: Count only states which are eligible for cpuidle */
-   dt_idle_states = nr_pnv_idle_states;
+   /* Count only cpuidle states*/
+   for (i = 0; i < nr_pnv_idle_states; i++) {
+   if (pnv_idle_states[i].flags & OPAL_PM_STOP_CPUIDLE)
+   dt_idle_states++;
+   }
+   pr_info("idle states in dt = %d , states with idle flag = %d",
+   nr_pnv_idle_states, dt_idle_states);
 
/*
 * Since snooze is used as first idle state, max idle states allowed is
@@ -300,11 +305,8 @@ static int powernv_add_idle_states(void)
continue;
}
 
-   /*
-* If an idle state has exit latency beyond
-* POWERNV_THRESHOLD_LATENCY_NS then don't use it in cpu-idle.
-*/
-   if (state->latency_ns > POWERNV_THRESHOLD_LATENCY_NS) {
+   /* Check whether a state is of cpuidle type */
+   if ((state->flags & OPAL_PM_STOP_CPUIDLE) != state->flags) {
pr_info("State %d is not idletype\n", i);
continue;
}
-- 
2.17.1



[RFC 2/3] cpuidle/powernv: Add support for versioned stop states

2019-08-23 Thread Abhishek Goel
New device tree format adds a compatible flag which has version
corresponding to every state, so that only kernel which has the capability
to handle the version of stop state will enable it. Drawback of the array
based dt node is that versioning of idle states is not possible.

Older kernel will still see stop0 and stop0_lite in older format and we
will deprecate it after some time.

Consider a case that stop4 has a bug. We take the following steps to
mitigate the problem.

1) Change compatible string for stop4 in OPAL to "stop4,v2" from
"stop4,v1", i.e. basicallly bump up the previous version and ship the
new firmware.
2) The kernel will ignore stop4 as it won't be able to recognize this
new version. Kernel will also ignore all the deeper states because its
possible that a cpu have requested for a deeper state but was never able
to enter into it. But we will still have shallower states that are there
before stop 4. This, thus prevents from completely disabling stop states.

Linux kernel can now look at the version string and decide if it has the
ability to handle that idle state. Henceforth, if kernel does not know
about a version, it will skip that state and all the deeper state.

Once when the workaround are implemented into the kernel, we can bump up
the known version in kernel for that state, so that support can be
enabled once again in kernel.

New Device-tree :

Final output
   power-mgt {
...
 ibm,enabled-stop-levels = <0xec00>;
 ibm,cpu-idle-state-psscr-mask = <0x0 0x3003ff 0x0 0x3003ff>;
 ibm,cpu-idle-state-latencies-ns = <0x3e8 0x7d0>;
 ibm,cpu-idle-state-psscr = <0x0 0x330 0x0 0x300330>;
 ibm,cpu-idle-state-flags = <0x10 0x101000>;
 ibm,cpu-idle-state-residency-ns = <0x2710 0x4e20>;
 ibm,idle-states {
 stop4 {
 flags = <0x207000>;
 compatible = "stop4,v1",
 psscr-mask = <0x0 0x3003ff>;
 latency-ns = <0x186a0>;
 residency-ns = <0x989680>;
 psscr = <0x0 0x300374>;
 ...
  };
...
stop11 {
 ...
 compatible = "stop11,v1",
 ...
  };
 };

Since state pointer is being passed to stop loop, I have separated idle
fast path for lite, shallow and deep states. This improves the
readability of the code and also future maintainability of the code.
Stop handle corresponding to each state can be called directly since
state pointer is being passed now.

Signed-off-by: Abhishek Goel 
---
 arch/powerpc/include/asm/cpuidle.h|   8 +-
 arch/powerpc/platforms/powernv/idle.c | 331 +++---
 2 files changed, 299 insertions(+), 40 deletions(-)

diff --git a/arch/powerpc/include/asm/cpuidle.h 
b/arch/powerpc/include/asm/cpuidle.h
index 9844b3ded187..5eb9a98fcb86 100644
--- a/arch/powerpc/include/asm/cpuidle.h
+++ b/arch/powerpc/include/asm/cpuidle.h
@@ -70,15 +70,19 @@
 
 #ifndef __ASSEMBLY__
 
-#define PNV_IDLE_NAME_LEN16
+#define PNV_VERSION_LEN25
+#define PNV_IDLE_NAME_LEN  16
 struct pnv_idle_states_t {
char name[PNV_IDLE_NAME_LEN];
+   char compat_version[PNV_VERSION_LEN];
u32 latency_ns;
u32 residency_ns;
u64 psscr_val;
u64 psscr_mask;
-   u32 flags;
+   u64 flags;
bool valid;
+   unsigned long (*stop_handle)(struct pnv_idle_states_t *state,
+bool mmu_on);
 };
 
 extern struct pnv_idle_states_t *pnv_idle_states;
diff --git a/arch/powerpc/platforms/powernv/idle.c 
b/arch/powerpc/platforms/powernv/idle.c
index db810c0a16e1..7398a66e1ddb 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -49,6 +49,12 @@ static bool default_stop_found;
 static u64 pnv_first_tb_loss_level = MAX_STOP_STATE + 1;
 static u64 pnv_first_spr_loss_level = MAX_STOP_STATE + 1;
 
+struct stop_version_t {
+   const char compat_version[PNV_VERSION_LEN];
+   unsigned long (*stop_handle)(struct pnv_idle_states_t *state,
+bool mmu_on);
+};
+
 /*
  * psscr value and mask of the deepest stop idle state.
  * Used when a cpu is offlined.
@@ -599,40 +605,152 @@ struct p9_sprs {
u64 uamor;
 };
 
-static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on)
+/* Splitting the previous power9_idle_stop into three functions
+ * to separately handle lite, shallow and deep states.
+ */
+
+static unsigned long power9_stop_lite(struct pnv_idle_states_t *state,
+ bool mmu_on)
 {
-   int cpu = raw_smp_processor_id();
-   int first = cpu_first_thread_sibling(cpu);
-   unsigned long *state = _ptrs[first]->idle_state;
-   unsigned long core_thread_mask = 

[RFC 1/3] cpuidle/powernv : Pass state pointer instead of values to stop loop

2019-08-23 Thread Abhishek Goel
Instead of passing psscr_val and psscr_mask, we will pass state pointer
to stop loop. This will help to figure out the method to enter/exit idle
state. Removed psscr_mask and psscr_val from driver idle code. Same has
also been done in platform idle code.
Also, added some cleanups and debugging info.

Signed-off-by: Abhishek Goel 
---
 arch/powerpc/include/asm/processor.h  |  5 +-
 arch/powerpc/platforms/powernv/idle.c | 50 ---
 drivers/cpuidle/cpuidle-powernv.c | 69 +--
 3 files changed, 55 insertions(+), 69 deletions(-)

diff --git a/arch/powerpc/include/asm/processor.h 
b/arch/powerpc/include/asm/processor.h
index a9993e7a443b..855666e8d271 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -39,6 +39,7 @@
 #include 
 #include 
 #include 
+#include 
 
 /* We do _not_ want to define new machine types at all, those must die
  * in favor of using the device-tree
@@ -419,9 +420,7 @@ enum idle_boot_override {IDLE_NO_OVERRIDE = 0, 
IDLE_POWERSAVE_OFF};
 extern int powersave_nap;  /* set if nap mode can be used in idle loop */
 
 extern void power7_idle_type(unsigned long type);
-extern void power9_idle_type(unsigned long stop_psscr_val,
- unsigned long stop_psscr_mask);
-
+extern void power9_idle_type(struct pnv_idle_states_t *state);
 extern void flush_instruction_cache(void);
 extern void hard_reset_now(void);
 extern void poweroff_now(void);
diff --git a/arch/powerpc/platforms/powernv/idle.c 
b/arch/powerpc/platforms/powernv/idle.c
index 09f49eed7fb8..db810c0a16e1 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -40,8 +40,7 @@ int nr_pnv_idle_states;
  * The default stop state that will be used by ppc_md.power_save
  * function on platforms that support stop instruction.
  */
-static u64 pnv_default_stop_val;
-static u64 pnv_default_stop_mask;
+struct pnv_idle_states_t *pnv_default_state;
 static bool default_stop_found;
 
 /*
@@ -54,9 +53,7 @@ static u64 pnv_first_spr_loss_level = MAX_STOP_STATE + 1;
  * psscr value and mask of the deepest stop idle state.
  * Used when a cpu is offlined.
  */
-static u64 pnv_deepest_stop_psscr_val;
-static u64 pnv_deepest_stop_psscr_mask;
-static u64 pnv_deepest_stop_flag;
+static struct pnv_idle_states_t *pnv_deepest_state;
 static bool deepest_stop_found;
 
 static unsigned long power7_offline_type;
@@ -78,7 +75,7 @@ static int pnv_save_sprs_for_deep_states(void)
uint64_t hid5_val   = mfspr(SPRN_HID5);
uint64_t hmeer_val  = mfspr(SPRN_HMEER);
uint64_t msr_val = MSR_IDLE;
-   uint64_t psscr_val = pnv_deepest_stop_psscr_val;
+   uint64_t psscr_val = pnv_deepest_state->psscr_val;
 
for_each_present_cpu(cpu) {
uint64_t pir = get_hard_smp_processor_id(cpu);
@@ -804,9 +801,13 @@ static unsigned long power9_idle_stop(unsigned long psscr, 
bool mmu_on)
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
-static unsigned long power9_offline_stop(unsigned long psscr)
+static unsigned long power9_offline_stop(struct pnv_idle_states_t *state)
 {
unsigned long srr1;
+   unsigned long psscr;
+
+   psscr = mfspr(SPRN_PSSCR);
+   psscr = (psscr & ~state->psscr_mask) | state->psscr_val;
 
 #ifndef CONFIG_KVM_BOOK3S_HV_POSSIBLE
__ppc64_runlatch_off();
@@ -841,8 +842,7 @@ static unsigned long power9_offline_stop(unsigned long 
psscr)
 }
 #endif
 
-void power9_idle_type(unsigned long stop_psscr_val,
- unsigned long stop_psscr_mask)
+void power9_idle_type(struct pnv_idle_states_t *state)
 {
unsigned long psscr;
unsigned long srr1;
@@ -851,7 +851,7 @@ void power9_idle_type(unsigned long stop_psscr_val,
return;
 
psscr = mfspr(SPRN_PSSCR);
-   psscr = (psscr & ~stop_psscr_mask) | stop_psscr_val;
+   psscr = (psscr & ~state->psscr_mask) | state->psscr_val;
 
__ppc64_runlatch_off();
srr1 = power9_idle_stop(psscr, true);
@@ -867,7 +867,7 @@ void power9_idle_type(unsigned long stop_psscr_val,
  */
 void power9_idle(void)
 {
-   power9_idle_type(pnv_default_stop_val, pnv_default_stop_mask);
+   power9_idle_type(pnv_default_state);
 }
 
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
@@ -970,12 +970,7 @@ unsigned long pnv_cpu_offline(unsigned int cpu)
__ppc64_runlatch_off();
 
if (cpu_has_feature(CPU_FTR_ARCH_300) && deepest_stop_found) {
-   unsigned long psscr;
-
-   psscr = mfspr(SPRN_PSSCR);
-   psscr = (psscr & ~pnv_deepest_stop_psscr_mask) |
-   pnv_deepest_stop_psscr_val;
-   srr1 = power9_offline_stop(psscr);
+   srr1 = power9_offline_stop(pnv_deepest_state);
} else if (cpu_has_feature(CPU_FTR_ARCH_206) && power7_offline_type) {
srr1 = power7_offline();
} else {
@@ -1124,16 +1119,13 @@ static void __init 

[RFC 0/3] New idle device-tree format and support for versioned stop state

2019-08-23 Thread Abhishek Goel
Background
--

Previously if a older kernel runs on a newer firmware, it may enable
all available states irrespective of its capability of handling it.
Consider a case that some stop state has a bug, we end up disabling all
the stop states. This patch introduces selective control to solve this
problem.

Previous version of these patches can be found at:
https://lkml.org/lkml/2018/10/11/544
These patch however also had patches for support of opal save-restore
which now I am decoupling and will take them seperately.
I have posted the corresponding skiboot patches for this kernel patchset
here : https://patchwork.ozlabs.org/cover/1144587/

What's new?


Add stop states under ibm,idle-states in addition to the current array
based device tree properties.

New device tree format adds a compatible flag which has version
corresponding to every state, so that only kernel which has the capability
to handle the version of stop state will enable it. Drawback of the array
based dt node is that versioning of idle states is not possible.

Older kernel will still see stop0 and stop0_lite in older format and we
will deprecate it after some time.

Consider a case that stop4 has a bug. We take the following steps to
mitigate the problem.

1) Change compatible string for stop4 in OPAL to "stop4,v2" from
"stop4,v1", i.e. basicallly bump up the previous version and ship the
new firmware.

2) The kernel will ignore stop4 as it won't be able to recognize this
new version. Kernel will also ignore all the deeper states because its
possible that a cpu have requested for a deeper state but was never able
to enter into it. But we will still have shallower states that are there
before stop 4. This, thus prevents from completely disabling stop states.

Linux kernel can now look at the version string and decide if it has the
ability to handle that idle state. Henceforth, if kernel does not know
about a version, it will skip that state and all the deeper state.

Once when the workaround are implemented into the kernel, we can bump up
the known version in kernel for that state, so that support can be
enabled once again in kernel.

New Device-tree :

Final output
   power-mgt {
...
 ibm,enabled-stop-levels = <0xec00>;
 ibm,cpu-idle-state-psscr-mask = <0x0 0x3003ff 0x0 0x3003ff>;
 ibm,cpu-idle-state-latencies-ns = <0x3e8 0x7d0>;
 ibm,cpu-idle-state-psscr = <0x0 0x330 0x0 0x300330>;
 ibm,cpu-idle-state-flags = <0x10 0x101000>;
 ibm,cpu-idle-state-residency-ns = <0x2710 0x4e20>;
 ibm,idle-states {
 stop4 {
 flags = <0x207000>;
 compatible = "stop4,v1",
 psscr-mask = <0x0 0x3003ff>;
 latency-ns = <0x186a0>;
 residency-ns = <0x989680>;
 psscr = <0x0 0x300374>;
 ...
  };
...
stop11 {
 ...
 compatible = "stop11,v1",
 ...
  };
 };


Abhishek Goel (3):
  cpuidle/powernv : Pass state pointer instead of values to stop loop
  cpuidle/powernv: Add support for versioned stop states
  cpuidle/powernv : Add flags to identify stop state type

 arch/powerpc/include/asm/cpuidle.h|   8 +-
 arch/powerpc/include/asm/opal-api.h   |   7 +
 arch/powerpc/include/asm/processor.h  |   5 +-
 arch/powerpc/platforms/powernv/idle.c | 371 +-
 drivers/cpuidle/cpuidle-powernv.c |  81 +++---
 5 files changed, 363 insertions(+), 109 deletions(-)

-- 
2.17.1



Re: [PATCH v6 3/7] powerpc/powernv: Introduce FW_FEATURE_ULTRAVISOR

2019-08-23 Thread Michael Ellerman
Claudio Carvalho  writes:

> In PEF enabled systems, some of the resources which were previously
> hypervisor privileged are now ultravisor privileged and controlled by
> the ultravisor firmware.
>
> This adds FW_FEATURE_ULTRAVISOR to indicate if PEF is enabled.
>
> The host kernel can use FW_FEATURE_ULTRAVISOR, for instance, to skip
> accessing resources (e.g. PTCR and LDBAR) in case PEF is enabled.
>
> Signed-off-by: Claudio Carvalho 
> [ andmike: Device node name to "ibm,ultravisor" ]

^ I dropped this comment as it refers to a previous version of the patch
and is not accurate anymore.

cheers

> Signed-off-by: Michael Anderson 
> ---
>  arch/powerpc/include/asm/firmware.h |  5 +++--
>  arch/powerpc/include/asm/ultravisor.h   | 14 
>  arch/powerpc/kernel/prom.c  |  4 
>  arch/powerpc/platforms/powernv/Makefile |  1 +
>  arch/powerpc/platforms/powernv/ultravisor.c | 24 +
>  5 files changed, 46 insertions(+), 2 deletions(-)
>  create mode 100644 arch/powerpc/include/asm/ultravisor.h
>  create mode 100644 arch/powerpc/platforms/powernv/ultravisor.c
>
> diff --git a/arch/powerpc/include/asm/firmware.h 
> b/arch/powerpc/include/asm/firmware.h
> index faeca8b76c8c..b3e214a97f3a 100644
> --- a/arch/powerpc/include/asm/firmware.h
> +++ b/arch/powerpc/include/asm/firmware.h
> @@ -50,6 +50,7 @@
>  #define FW_FEATURE_DRC_INFO  ASM_CONST(0x0008)
>  #define FW_FEATURE_BLOCK_REMOVE ASM_CONST(0x0010)
>  #define FW_FEATURE_PAPR_SCM  ASM_CONST(0x0020)
> +#define FW_FEATURE_ULTRAVISORASM_CONST(0x0040)
>  
>  #ifndef __ASSEMBLY__
>  
> @@ -68,9 +69,9 @@ enum {
>   FW_FEATURE_TYPE1_AFFINITY | FW_FEATURE_PRRN |
>   FW_FEATURE_HPT_RESIZE | FW_FEATURE_DRMEM_V2 |
>   FW_FEATURE_DRC_INFO | FW_FEATURE_BLOCK_REMOVE |
> - FW_FEATURE_PAPR_SCM,
> + FW_FEATURE_PAPR_SCM | FW_FEATURE_ULTRAVISOR,
>   FW_FEATURE_PSERIES_ALWAYS = 0,
> - FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL,
> + FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL | FW_FEATURE_ULTRAVISOR,
>   FW_FEATURE_POWERNV_ALWAYS = 0,
>   FW_FEATURE_PS3_POSSIBLE = FW_FEATURE_LPAR | FW_FEATURE_PS3_LV1,
>   FW_FEATURE_PS3_ALWAYS = FW_FEATURE_LPAR | FW_FEATURE_PS3_LV1,
> diff --git a/arch/powerpc/include/asm/ultravisor.h 
> b/arch/powerpc/include/asm/ultravisor.h
> new file mode 100644
> index ..dc6e1ea198f2
> --- /dev/null
> +++ b/arch/powerpc/include/asm/ultravisor.h
> @@ -0,0 +1,14 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Ultravisor definitions
> + *
> + * Copyright 2019, IBM Corporation.
> + *
> + */
> +#ifndef _ASM_POWERPC_ULTRAVISOR_H
> +#define _ASM_POWERPC_ULTRAVISOR_H
> +
> +int early_init_dt_scan_ultravisor(unsigned long node, const char *uname,
> +   int depth, void *data);
> +
> +#endif   /* _ASM_POWERPC_ULTRAVISOR_H */
> diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
> index 7159e791a70d..5828f1c81dc9 100644
> --- a/arch/powerpc/kernel/prom.c
> +++ b/arch/powerpc/kernel/prom.c
> @@ -55,6 +55,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #include 
>  
> @@ -702,6 +703,9 @@ void __init early_init_devtree(void *params)
>  #ifdef CONFIG_PPC_POWERNV
>   /* Some machines might need OPAL info for debugging, grab it now. */
>   of_scan_flat_dt(early_init_dt_scan_opal, NULL);
> +
> + /* Scan tree for ultravisor feature */
> + of_scan_flat_dt(early_init_dt_scan_ultravisor, NULL);
>  #endif
>  
>  #ifdef CONFIG_FA_DUMP
> diff --git a/arch/powerpc/platforms/powernv/Makefile 
> b/arch/powerpc/platforms/powernv/Makefile
> index 69a3aefa905b..49186f0985a4 100644
> --- a/arch/powerpc/platforms/powernv/Makefile
> +++ b/arch/powerpc/platforms/powernv/Makefile
> @@ -4,6 +4,7 @@ obj-y += idle.o opal-rtc.o opal-nvram.o 
> opal-lpc.o opal-flash.o
>  obj-y+= rng.o opal-elog.o opal-dump.o 
> opal-sysparam.o opal-sensor.o
>  obj-y+= opal-msglog.o opal-hmi.o opal-power.o 
> opal-irqchip.o
>  obj-y+= opal-kmsg.o opal-powercap.o opal-psr.o 
> opal-sensor-groups.o
> +obj-y+= ultravisor.o
>  
>  obj-$(CONFIG_SMP)+= smp.o subcore.o subcore-asm.o
>  obj-$(CONFIG_PCI)+= pci.o pci-ioda.o npu-dma.o pci-ioda-tce.o
> diff --git a/arch/powerpc/platforms/powernv/ultravisor.c 
> b/arch/powerpc/platforms/powernv/ultravisor.c
> new file mode 100644
> index ..02ac57b4bded
> --- /dev/null
> +++ b/arch/powerpc/platforms/powernv/ultravisor.c
> @@ -0,0 +1,24 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Ultravisor high level interfaces
> + *
> + * Copyright 2019, IBM Corporation.
> + *
> + */
> +#include 
> +#include 
> +#include 
> +
> +#include 
> +#include 
> +
> +int __init early_init_dt_scan_ultravisor(unsigned long node, const char 
> *uname,
> +

Re: [PATCH v7 0/7] KVMPPC driver to manage secure guest pages

2019-08-23 Thread Bharata B Rao
On Fri, Aug 23, 2019 at 02:17:47PM +1000, Paul Mackerras wrote:
> On Thu, Aug 22, 2019 at 03:56:13PM +0530, Bharata B Rao wrote:
> > Hi,
> > 
> > A pseries guest can be run as a secure guest on Ultravisor-enabled
> > POWER platforms. On such platforms, this driver will be used to manage
> > the movement of guest pages between the normal memory managed by
> > hypervisor(HV) and secure memory managed by Ultravisor(UV).
> > 
> > Private ZONE_DEVICE memory equal to the amount of secure memory
> > available in the platform for running secure guests is created.
> > Whenever a page belonging to the guest becomes secure, a page from
> > this private device memory is used to represent and track that secure
> > page on the HV side. The movement of pages between normal and secure
> > memory is done via migrate_vma_pages(). The reverse movement is driven
> > via pagemap_ops.migrate_to_ram().
> > 
> > The page-in or page-out requests from UV will come to HV as hcalls and
> > HV will call back into UV via uvcalls to satisfy these page requests.
> > 
> > These patches are against hmm.git
> > (https://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma.git/log/?h=hmm)
> > 
> > plus
> > 
> > Claudio Carvalho's base ultravisor enablement patchset v6
> > (https://lore.kernel.org/linuxppc-dev/20190822034838.27876-1-cclau...@linux.ibm.com/T/#t)
> 
> How are you thinking these patches will go upstream?  Are you going to
> send them via the hmm tree?
> 
> I assume you need Claudio's patchset as a prerequisite for your series
> to compile, which means the hmm maintainers would need to pull in a
> topic branch from Michael Ellerman's powerpc tree, or something like
> that.

I was hoping that changes required from hmm.git would hit upstream soon,
will reflect in  mpe's powerpc tree at which time these patches can go
via powerpc tree along with or after Claudio's patchset.

Though this depends on migrate_vma and memremap changes that
happen to be in hmm.git, this is majorly a kvmppc change. Hence I thought
it would be appropriate for this to go via your or mpe's tree together
with required dependencies.

Regards,
Bharata.



[PATCH v2] powerpc/powernv: Add ultravisor message log interface

2019-08-23 Thread Claudio Carvalho
Ultravisor (UV) provides an in-memory console which follows the OPAL
in-memory console structure.

This patch extends the OPAL msglog code to also initialize the UV memory
console and provide a sysfs interface (uv_msglog) for userspace to view
the UV message log.

CC: Madhavan Srinivasan 
CC: Oliver O'Halloran 
Signed-off-by: Claudio Carvalho 
---
This patch depends on the "kvmppc: Paravirtualize KVM to support
ultravisor" patchset submitted by Claudio Carvalho.
---
 arch/powerpc/platforms/powernv/opal-msglog.c | 99 ++--
 1 file changed, 72 insertions(+), 27 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/opal-msglog.c 
b/arch/powerpc/platforms/powernv/opal-msglog.c
index dc51d03c6370..da73908fdabe 100644
--- a/arch/powerpc/platforms/powernv/opal-msglog.c
+++ b/arch/powerpc/platforms/powernv/opal-msglog.c
@@ -11,6 +11,7 @@
 #include 
 #include 
 #include 
+#include 
 
 /* OPAL in-memory console. Defined in OPAL source at core/console.c */
 struct memcons {
@@ -28,24 +29,26 @@ struct memcons {
 };
 
 static struct memcons *opal_memcons = NULL;
+static struct memcons *opal_uv_memcons;
 
-ssize_t opal_msglog_copy(char *to, loff_t pos, size_t count)
+static ssize_t msglog_copy(struct memcons *memcons, const char *bin_attr_name,
+  char *to, loff_t pos, size_t count)
 {
const char *conbuf;
ssize_t ret;
size_t first_read = 0;
uint32_t out_pos, avail;
 
-   if (!opal_memcons)
+   if (!memcons)
return -ENODEV;
 
-   out_pos = be32_to_cpu(READ_ONCE(opal_memcons->out_pos));
+   out_pos = be32_to_cpu(READ_ONCE(memcons->out_pos));
 
/* Now we've read out_pos, put a barrier in before reading the new
 * data it points to in conbuf. */
smp_rmb();
 
-   conbuf = phys_to_virt(be64_to_cpu(opal_memcons->obuf_phys));
+   conbuf = phys_to_virt(be64_to_cpu(memcons->obuf_phys));
 
/* When the buffer has wrapped, read from the out_pos marker to the end
 * of the buffer, and then read the remaining data as in the un-wrapped
@@ -53,7 +56,7 @@ ssize_t opal_msglog_copy(char *to, loff_t pos, size_t count)
if (out_pos & MEMCONS_OUT_POS_WRAP) {
 
out_pos &= MEMCONS_OUT_POS_MASK;
-   avail = be32_to_cpu(opal_memcons->obuf_size) - out_pos;
+   avail = be32_to_cpu(memcons->obuf_size) - out_pos;
 
ret = memory_read_from_buffer(to, count, ,
conbuf + out_pos, avail);
@@ -71,8 +74,8 @@ ssize_t opal_msglog_copy(char *to, loff_t pos, size_t count)
}
 
/* Sanity check. The firmware should not do this to us. */
-   if (out_pos > be32_to_cpu(opal_memcons->obuf_size)) {
-   pr_err("OPAL: memory console corruption. Aborting read.\n");
+   if (out_pos > be32_to_cpu(memcons->obuf_size)) {
+   pr_err("OPAL: %s corruption. Aborting read.\n", bin_attr_name);
return -EINVAL;
}
 
@@ -86,53 +89,95 @@ ssize_t opal_msglog_copy(char *to, loff_t pos, size_t count)
return ret;
 }
 
+#define BIN_ATTR_NAME_OPAL "msglog"
+#define BIN_ATTR_NAME_UV   "uv_msglog"
+
+ssize_t opal_msglog_copy(char *to, loff_t pos, size_t count)
+{
+   return msglog_copy(opal_memcons, BIN_ATTR_NAME_OPAL, to, pos,
+  count);
+}
+
 static ssize_t opal_msglog_read(struct file *file, struct kobject *kobj,
struct bin_attribute *bin_attr, char *to,
loff_t pos, size_t count)
 {
-   return opal_msglog_copy(to, pos, count);
+   return msglog_copy(opal_memcons, BIN_ATTR_NAME_OPAL, to, pos,
+  count);
+}
+
+static ssize_t opal_uv_msglog_read(struct file *file, struct kobject *kobj,
+  struct bin_attribute *bin_attr, char *to,
+  loff_t pos, size_t count)
+{
+   return msglog_copy(opal_uv_memcons, BIN_ATTR_NAME_UV, to, pos,
+  count);
 }
 
 static struct bin_attribute opal_msglog_attr = {
-   .attr = {.name = "msglog", .mode = 0400},
+   .attr = {.name = BIN_ATTR_NAME_OPAL, .mode = 0400},
.read = opal_msglog_read
 };
 
-void __init opal_msglog_init(void)
+static struct bin_attribute opal_uv_msglog_attr = {
+   .attr = {.name = BIN_ATTR_NAME_UV, .mode = 0400},
+   .read = opal_uv_msglog_read
+};
+
+static void __init msglog_init(struct memcons **memcons,
+  struct bin_attribute *bin_attr,
+  const char *dt_prop_name)
 {
-   u64 mcaddr;
-   struct memcons *mc;
+   u64 memcons_addr;
 
-   if (of_property_read_u64(opal_node, "ibm,opal-memcons", )) {
-   pr_warn("OPAL: Property ibm,opal-memcons not found, no message 
log\n");
+   if (of_property_read_u64(opal_node, dt_prop_name, _addr)) {
+   pr_warn("OPAL: Property '%s' not found,