[PATCH v2] ocxl: control via sysfs whether the FPGA is reloaded on a link reset

2020-03-16 Thread Philippe Bergheaud
Some opencapi FPGA images allow to control if the FPGA should be reloaded
on the next adapter reset. If it is supported, the image specifies it
through a Vendor Specific DVSEC in the config space of function 0.

Signed-off-by: Philippe Bergheaud 
---
Changelog:
v2:
  - refine ResetReload debug message
  - do not call get_function_0() if pci_dev is for function 0

 Documentation/ABI/testing/sysfs-class-ocxl | 10 
 drivers/misc/ocxl/config.c | 64 +-
 drivers/misc/ocxl/ocxl_internal.h  |  6 ++
 drivers/misc/ocxl/sysfs.c  | 35 
 include/misc/ocxl-config.h |  1 +
 5 files changed, 115 insertions(+), 1 deletion(-)

diff --git a/Documentation/ABI/testing/sysfs-class-ocxl 
b/Documentation/ABI/testing/sysfs-class-ocxl
index b5b1fa197592..b9ea671d5805 100644
--- a/Documentation/ABI/testing/sysfs-class-ocxl
+++ b/Documentation/ABI/testing/sysfs-class-ocxl
@@ -33,3 +33,13 @@ Date:January 2018
 Contact:   linuxppc-dev@lists.ozlabs.org
 Description:   read/write
Give access the global mmio area for the AFU
+
+What:  /sys/class/ocxl//reload_on_reset
+Date:  February 2020
+Contact:   linuxppc-dev@lists.ozlabs.org
+Description:   read/write
+   Control whether the FPGA is reloaded on a link reset
+   0   Do not reload FPGA image from flash
+   1   Reload FPGA image from flash
+   unavailable
+   The device does not support this capability
diff --git a/drivers/misc/ocxl/config.c b/drivers/misc/ocxl/config.c
index c8e19bfb5ef9..05500fdece7e 100644
--- a/drivers/misc/ocxl/config.c
+++ b/drivers/misc/ocxl/config.c
@@ -71,6 +71,20 @@ static int find_dvsec_afu_ctrl(struct pci_dev *dev, u8 
afu_idx)
return 0;
 }
 
+/**
+ * get_function_0() - Find a related PCI device (function 0)
+ * @device: PCI device to match
+ *
+ * Returns a pointer to the related device, or null if not found
+ */
+static struct pci_dev *get_function_0(struct pci_dev *dev)
+{
+   unsigned int devfn = PCI_DEVFN(PCI_SLOT(dev->devfn), 0);
+
+   return pci_get_domain_bus_and_slot(pci_domain_nr(dev->bus),
+  dev->bus->number, devfn);
+}
+
 static void read_pasid(struct pci_dev *dev, struct ocxl_fn_config *fn)
 {
u16 val;
@@ -159,7 +173,7 @@ static int read_dvsec_afu_info(struct pci_dev *dev, struct 
ocxl_fn_config *fn)
 static int read_dvsec_vendor(struct pci_dev *dev)
 {
int pos;
-   u32 cfg, tlx, dlx;
+   u32 cfg, tlx, dlx, reset_reload;
 
/*
 * vendor specific DVSEC is optional
@@ -183,6 +197,54 @@ static int read_dvsec_vendor(struct pci_dev *dev)
dev_dbg(>dev, "  CFG version = 0x%x\n", cfg);
dev_dbg(>dev, "  TLX version = 0x%x\n", tlx);
dev_dbg(>dev, "  DLX version = 0x%x\n", dlx);
+   if (ocxl_config_get_reset_reload(dev, _reload) != 0)
+   dev_dbg(>dev, "  ResetReload is not available\n");
+   else
+   dev_dbg(>dev, "  ResetReload = 0x%x\n", reset_reload);
+   return 0;
+}
+
+int ocxl_config_get_reset_reload(struct pci_dev *dev, int *val)
+{
+   int reset_reload = -1;
+   int pos = 0;
+   struct pci_dev *dev0 = dev;
+
+   if (PCI_FUNC(dev->devfn) != 0)
+   dev0 = get_function_0(dev);
+
+   if (dev0)
+   pos = find_dvsec(dev0, OCXL_DVSEC_VENDOR_ID);
+
+   if (pos)
+   pci_read_config_dword(dev0,
+ pos + OCXL_DVSEC_VENDOR_RESET_RELOAD,
+ _reload);
+   if (reset_reload == -1)
+   return reset_reload;
+
+   *val = reset_reload & BIT(0);
+   return 0;
+}
+
+int ocxl_config_set_reset_reload(struct pci_dev *dev, int val)
+{
+   int reset_reload = -1;
+   int pos = 0;
+   struct pci_dev *dev0 = get_function_0(dev);
+
+   if (dev0)
+   pos = find_dvsec(dev0, OCXL_DVSEC_VENDOR_ID);
+
+   if (pos)
+   pci_read_config_dword(dev0,
+ pos + OCXL_DVSEC_VENDOR_RESET_RELOAD,
+ _reload);
+   if (reset_reload == -1)
+   return reset_reload;
+
+   val &= BIT(0);
+   pci_write_config_dword(dev0, pos + OCXL_DVSEC_VENDOR_RESET_RELOAD, val);
return 0;
 }
 
diff --git a/drivers/misc/ocxl/ocxl_internal.h 
b/drivers/misc/ocxl/ocxl_internal.h
index 345bf843a38e..af9a84aeee6f 100644
--- a/drivers/misc/ocxl/ocxl_internal.h
+++ b/drivers/misc/ocxl/ocxl_internal.h
@@ -112,6 +112,12 @@ void ocxl_actag_afu_free(struct ocxl_fn *fn, u32 start, 
u32 size);
  */
 int ocxl_config_get_pasid_info(struct pci_dev *dev, int *count);
 
+/*
+ * Control whether the FPGA is reloaded on a link reset
+ */
+int ocxl_c

[PATCH] ocxl: control via sysfs whether the FPGA is reloaded on a link reset

2020-03-11 Thread Philippe Bergheaud
Some opencapi FPGA images allow to control if the FPGA should be reloaded
on the next adapter reset. If it is supported, the image specifies it
through a Vendor Specific DVSEC in the config space of function 0.

This patch adds an interface to sysfs to control that behavior, if possible.

Signed-off-by: Philippe Bergheaud 
---
 Documentation/ABI/testing/sysfs-class-ocxl | 10 
 drivers/misc/ocxl/config.c | 59 +-
 drivers/misc/ocxl/ocxl_internal.h  |  6 +++
 drivers/misc/ocxl/sysfs.c  | 35 +
 include/misc/ocxl-config.h |  1 +
 5 files changed, 110 insertions(+), 1 deletion(-)

diff --git a/Documentation/ABI/testing/sysfs-class-ocxl 
b/Documentation/ABI/testing/sysfs-class-ocxl
index b5b1fa197592..b9ea671d5805 100644
--- a/Documentation/ABI/testing/sysfs-class-ocxl
+++ b/Documentation/ABI/testing/sysfs-class-ocxl
@@ -33,3 +33,13 @@ Date:January 2018
 Contact:   linuxppc-dev@lists.ozlabs.org
 Description:   read/write
Give access the global mmio area for the AFU
+
+What:  /sys/class/ocxl//reload_on_reset
+Date:  February 2020
+Contact:   linuxppc-dev@lists.ozlabs.org
+Description:   read/write
+   Control whether the FPGA is reloaded on a link reset
+   0   Do not reload FPGA image from flash
+   1   Reload FPGA image from flash
+   unavailable
+   The device does not support this capability
diff --git a/drivers/misc/ocxl/config.c b/drivers/misc/ocxl/config.c
index c8e19bfb5ef9..3488463c1640 100644
--- a/drivers/misc/ocxl/config.c
+++ b/drivers/misc/ocxl/config.c
@@ -71,6 +71,20 @@ static int find_dvsec_afu_ctrl(struct pci_dev *dev, u8 
afu_idx)
return 0;
 }
 
+/**
+ * get_function_0() - Find a related PCI device (function 0)
+ * @device: PCI device to match
+ *
+ * Returns a pointer to the related device, or null if not found
+ */
+static struct pci_dev *get_function_0(struct pci_dev *dev)
+{
+   unsigned int devfn = PCI_DEVFN(PCI_SLOT(dev->devfn), 0);
+
+   return pci_get_domain_bus_and_slot(pci_domain_nr(dev->bus),
+  dev->bus->number, devfn);
+}
+
 static void read_pasid(struct pci_dev *dev, struct ocxl_fn_config *fn)
 {
u16 val;
@@ -159,7 +173,7 @@ static int read_dvsec_afu_info(struct pci_dev *dev, struct 
ocxl_fn_config *fn)
 static int read_dvsec_vendor(struct pci_dev *dev)
 {
int pos;
-   u32 cfg, tlx, dlx;
+   u32 cfg, tlx, dlx, reset_reload;
 
/*
 * vendor specific DVSEC is optional
@@ -178,11 +192,54 @@ static int read_dvsec_vendor(struct pci_dev *dev)
pci_read_config_dword(dev, pos + OCXL_DVSEC_VENDOR_CFG_VERS, );
pci_read_config_dword(dev, pos + OCXL_DVSEC_VENDOR_TLX_VERS, );
pci_read_config_dword(dev, pos + OCXL_DVSEC_VENDOR_DLX_VERS, );
+   pci_read_config_dword(dev, pos + OCXL_DVSEC_VENDOR_RESET_RELOAD, 
_reload);
 
dev_dbg(>dev, "Vendor specific DVSEC:\n");
dev_dbg(>dev, "  CFG version = 0x%x\n", cfg);
dev_dbg(>dev, "  TLX version = 0x%x\n", tlx);
dev_dbg(>dev, "  DLX version = 0x%x\n", dlx);
+   dev_dbg(>dev, "  ResetReload = 0x%x\n", reset_reload);
+   return 0;
+}
+
+int ocxl_config_get_reset_reload(struct pci_dev *dev, int *val)
+{
+   int reset_reload = -1;
+   int pos = 0;
+   struct pci_dev *dev0 = get_function_0(dev);
+
+   if (dev0)
+   pos = find_dvsec(dev0, OCXL_DVSEC_VENDOR_ID);
+
+   if (pos)
+   pci_read_config_dword(dev0,
+ pos + OCXL_DVSEC_VENDOR_RESET_RELOAD,
+ _reload);
+   if (reset_reload == -1)
+   return reset_reload;
+
+   *val = reset_reload & BIT(0);
+   return 0;
+}
+
+int ocxl_config_set_reset_reload(struct pci_dev *dev, int val)
+{
+   int reset_reload = -1;
+   int pos = 0;
+   struct pci_dev *dev0 = get_function_0(dev);
+
+   if (dev0)
+   pos = find_dvsec(dev0, OCXL_DVSEC_VENDOR_ID);
+
+   if (pos)
+   pci_read_config_dword(dev0,
+ pos + OCXL_DVSEC_VENDOR_RESET_RELOAD,
+ _reload);
+   if (reset_reload == -1)
+   return reset_reload;
+
+   val &= BIT(0);
+   pci_write_config_dword(dev0, pos + OCXL_DVSEC_VENDOR_RESET_RELOAD, val);
return 0;
 }
 
diff --git a/drivers/misc/ocxl/ocxl_internal.h 
b/drivers/misc/ocxl/ocxl_internal.h
index 345bf843a38e..af9a84aeee6f 100644
--- a/drivers/misc/ocxl/ocxl_internal.h
+++ b/drivers/misc/ocxl/ocxl_internal.h
@@ -112,6 +112,12 @@ void ocxl_actag_afu_free(struct ocxl_fn *fn, u32 start, 
u32 size);
  */
 int ocxl_config_get_pasid_info(struct pci_

Re: [PATCH v4 1/2] cxl: Set the PBCQ Tunnel BAR register when enabling capi mode

2018-05-15 Thread Philippe Bergheaud

On 15/05/2018 07:30, Michael Ellerman wrote:

Philippe Bergheaud <fe...@linux.ibm.com> writes:


On 14/05/2018 12:51, Michael Ellerman wrote:

Philippe Bergheaud <fe...@linux.ibm.com> writes:


Skiboot used to set the default Tunnel BAR register value when capi mode
was enabled. This approach was ok for the cxl driver, but prevented other
drivers from choosing different values.

Skiboot versions > 5.11 will not set the default value any longer. This
patch modifies the cxl driver to set/reset the Tunnel BAR register when
entering/exiting the cxl mode, with pnv_pci_set_tunnel_bar().

That should work with old skiboot (since we are re-writing the value
already set) and new skiboot.

But doesn't that mean new skiboot can't boot any old kernel? That seems
undesirable.

Yes, with new skiboot, all kernels will boot.

OK.


Capi mode tunnelled operations did not work until linux-4.17-rc1 (Apr
15, 2008). With new skiboot, kernels since linux-4.17-rc1 will loose

 ^
 1


tunneled operations in capi mode, until this patch set is merged.

So it would be preferable if I put this patch into 4.17, so that the
4.17 release works. Am I right?


Yes.




Capi mode tunneled operations are not used in any IBM or partner GA
products. The feature will be fully supported after this patch set and
the libcxl PR https://github.com/ibm-capi/libcxl/pull/29 are merged.

OK. In general I ignore that sort of stuff because people also ignore it
when they're filing bugs against the kernel. So if it's in the tree we
want it to work (as much as possible), regardless of what's been GA'ed,
or is supported officially etc.

cheers





Re: [PATCH v4 1/2] cxl: Set the PBCQ Tunnel BAR register when enabling capi mode

2018-05-14 Thread Philippe Bergheaud

On 14/05/2018 12:51, Michael Ellerman wrote:

Philippe Bergheaud <fe...@linux.ibm.com> writes:


Skiboot used to set the default Tunnel BAR register value when capi mode
was enabled. This approach was ok for the cxl driver, but prevented other
drivers from choosing different values.

Skiboot versions > 5.11 will not set the default value any longer. This
patch modifies the cxl driver to set/reset the Tunnel BAR register when
entering/exiting the cxl mode, with pnv_pci_set_tunnel_bar().

That should work with old skiboot (since we are re-writing the value
already set) and new skiboot.

But doesn't that mean new skiboot can't boot any old kernel? That seems
undesirable.

cheers


Yes, with new skiboot, all kernels will boot.

Capi mode tunnelled operations did not work until linux-4.17-rc1 (Apr 
15, 2008). With new skiboot, kernels since linux-4.17-rc1 will loose 
tunneled operations in capi mode, until this patch set is merged.


Capi mode tunneled operations are not used in any IBM or partner GA 
products. The feature will be fully supported after this patch set and 
the libcxl PR https://github.com/ibm-capi/libcxl/pull/29 are merged.


Philippe



[PATCH v4 2/2] cxl: Report the tunneled operations status

2018-05-14 Thread Philippe Bergheaud
Failure to synchronize the tunneled operations does not prevent
the initialization of the cxl card. This patch reports the tunneled
operations status via /sys.

Signed-off-by: Philippe Bergheaud <fe...@linux.ibm.com>
---
v3: Added this patch to report the tunneled operations status.

v4: Updated Documentation/ABI/testing/sysfs-class-cxl.
---
 Documentation/ABI/testing/sysfs-class-cxl |  8 
 drivers/misc/cxl/cxl.h|  1 +
 drivers/misc/cxl/pci.c|  7 ++-
 drivers/misc/cxl/sysfs.c  | 10 ++
 4 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/Documentation/ABI/testing/sysfs-class-cxl 
b/Documentation/ABI/testing/sysfs-class-cxl
index 640f65e79ef1..8e69345c37cc 100644
--- a/Documentation/ABI/testing/sysfs-class-cxl
+++ b/Documentation/ABI/testing/sysfs-class-cxl
@@ -244,3 +244,11 @@ Description:read only
 Returns 1 if the psl timebase register is synchronized
 with the core timebase register, 0 otherwise.
 Users:  https://github.com/ibm-capi/libcxl
+
+What:   /sys/class/cxl//tunneled_ops_supported
+Date:   May 2018
+Contact:linuxppc-dev@lists.ozlabs.org
+Description:read only
+Returns 1 if tunneled operations are supported in capi mode,
+0 otherwise.
+Users:  https://github.com/ibm-capi/libcxl
diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index a4c9c8297a6d..918d4fb742d1 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -717,6 +717,7 @@ struct cxl {
bool perst_select_user;
bool perst_same_image;
bool psl_timebase_synced;
+   bool tunneled_ops_supported;
 
/*
 * number of contexts mapped on to this card. Possible values are:
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index 355c789406f7..008f50a0c465 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -1742,9 +1742,14 @@ static int cxl_configure_adapter(struct cxl *adapter, 
struct pci_dev *dev)
/* Required for devices using CAPP DMA mode, harmless for others */
pci_set_master(dev);
 
-   if (cxl_is_power9())
+   adapter->tunneled_ops_supported = false;
+
+   if (cxl_is_power9()) {
if (pnv_pci_set_tunnel_bar(dev, 0x0002E000ull, 1))
dev_info(>dev, "Tunneled operations 
unsupported\n");
+   else
+   adapter->tunneled_ops_supported = true;
+   }
 
if ((rc = pnv_phb_to_cxl_mode(dev, adapter->native->sl_ops->capi_mode)))
goto err;
diff --git a/drivers/misc/cxl/sysfs.c b/drivers/misc/cxl/sysfs.c
index 95285b7f636f..4b5a4c5d3c01 100644
--- a/drivers/misc/cxl/sysfs.c
+++ b/drivers/misc/cxl/sysfs.c
@@ -78,6 +78,15 @@ static ssize_t psl_timebase_synced_show(struct device 
*device,
return scnprintf(buf, PAGE_SIZE, "%i\n", adapter->psl_timebase_synced);
 }
 
+static ssize_t tunneled_ops_supported_show(struct device *device,
+   struct device_attribute *attr,
+   char *buf)
+{
+   struct cxl *adapter = to_cxl_adapter(device);
+
+   return scnprintf(buf, PAGE_SIZE, "%i\n", 
adapter->tunneled_ops_supported);
+}
+
 static ssize_t reset_adapter_store(struct device *device,
   struct device_attribute *attr,
   const char *buf, size_t count)
@@ -183,6 +192,7 @@ static struct device_attribute adapter_attrs[] = {
__ATTR_RO(base_image),
__ATTR_RO(image_loaded),
__ATTR_RO(psl_timebase_synced),
+   __ATTR_RO(tunneled_ops_supported),
__ATTR_RW(load_image_on_perst),
__ATTR_RW(perst_reloads_same_image),
__ATTR(reset, S_IWUSR, NULL, reset_adapter_store),
-- 
2.16.3



[PATCH v4 1/2] cxl: Set the PBCQ Tunnel BAR register when enabling capi mode

2018-05-14 Thread Philippe Bergheaud
Skiboot used to set the default Tunnel BAR register value when capi mode
was enabled. This approach was ok for the cxl driver, but prevented other
drivers from choosing different values.

Skiboot versions > 5.11 will not set the default value any longer. This
patch modifies the cxl driver to set/reset the Tunnel BAR register when
entering/exiting the cxl mode, with pnv_pci_set_tunnel_bar().

That should work with old skiboot (since we are re-writing the value
already set) and new skiboot.

Signed-off-by: Philippe Bergheaud <fe...@linux.ibm.com>
Reviewed-by: Christophe Lombard <clomb...@linux.vnet.ibm.com>
Acked-by: Frederic Barrat <fbar...@linux.vnet.ibm.com>
---
v2: Restrict tunnel bar setting to power9.
Do not fail cxl_configure_adapter() on tunnel bar setting error.
Log an info message instead, and continue configuring capi mode.

v3,v4: No change.
---
 drivers/misc/cxl/pci.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index 83f1d08058fc..355c789406f7 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -1742,6 +1742,10 @@ static int cxl_configure_adapter(struct cxl *adapter, 
struct pci_dev *dev)
/* Required for devices using CAPP DMA mode, harmless for others */
pci_set_master(dev);
 
+   if (cxl_is_power9())
+   if (pnv_pci_set_tunnel_bar(dev, 0x0002E000ull, 1))
+   dev_info(>dev, "Tunneled operations 
unsupported\n");
+
if ((rc = pnv_phb_to_cxl_mode(dev, adapter->native->sl_ops->capi_mode)))
goto err;
 
@@ -1768,6 +1772,8 @@ static void cxl_deconfigure_adapter(struct cxl *adapter)
 {
struct pci_dev *pdev = to_pci_dev(adapter->dev.parent);
 
+   if (cxl_is_power9())
+   pnv_pci_set_tunnel_bar(pdev, 0x0002E000ull, 0);
cxl_native_release_psl_err_irq(adapter);
cxl_unmap_adapter_regs(adapter);
 
-- 
2.16.3



[PATCH v3 2/2] cxl: Report the tunneled operations status

2018-04-25 Thread Philippe Bergheaud
Failure to synchronize the tunneled operations does not prevent
the initialization of the cxl card. This patch reports the tunneled
operations status via /sys.

Signed-off-by: Philippe Bergheaud <fe...@linux.ibm.com>
---
v3: Added this patch to report the tunneled operations status.
---
 drivers/misc/cxl/cxl.h   |  1 +
 drivers/misc/cxl/pci.c   |  7 ++-
 drivers/misc/cxl/sysfs.c | 10 ++
 3 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index a4c9c8297a6d..918d4fb742d1 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -717,6 +717,7 @@ struct cxl {
bool perst_select_user;
bool perst_same_image;
bool psl_timebase_synced;
+   bool tunneled_ops_supported;
 
/*
 * number of contexts mapped on to this card. Possible values are:
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index 355c789406f7..008f50a0c465 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -1742,9 +1742,14 @@ static int cxl_configure_adapter(struct cxl *adapter, 
struct pci_dev *dev)
/* Required for devices using CAPP DMA mode, harmless for others */
pci_set_master(dev);
 
-   if (cxl_is_power9())
+   adapter->tunneled_ops_supported = false;
+
+   if (cxl_is_power9()) {
if (pnv_pci_set_tunnel_bar(dev, 0x0002E000ull, 1))
dev_info(>dev, "Tunneled operations 
unsupported\n");
+   else
+   adapter->tunneled_ops_supported = true;
+   }
 
if ((rc = pnv_phb_to_cxl_mode(dev, adapter->native->sl_ops->capi_mode)))
goto err;
diff --git a/drivers/misc/cxl/sysfs.c b/drivers/misc/cxl/sysfs.c
index 95285b7f636f..4b5a4c5d3c01 100644
--- a/drivers/misc/cxl/sysfs.c
+++ b/drivers/misc/cxl/sysfs.c
@@ -78,6 +78,15 @@ static ssize_t psl_timebase_synced_show(struct device 
*device,
return scnprintf(buf, PAGE_SIZE, "%i\n", adapter->psl_timebase_synced);
 }
 
+static ssize_t tunneled_ops_supported_show(struct device *device,
+   struct device_attribute *attr,
+   char *buf)
+{
+   struct cxl *adapter = to_cxl_adapter(device);
+
+   return scnprintf(buf, PAGE_SIZE, "%i\n", 
adapter->tunneled_ops_supported);
+}
+
 static ssize_t reset_adapter_store(struct device *device,
   struct device_attribute *attr,
   const char *buf, size_t count)
@@ -183,6 +192,7 @@ static struct device_attribute adapter_attrs[] = {
__ATTR_RO(base_image),
__ATTR_RO(image_loaded),
__ATTR_RO(psl_timebase_synced),
+   __ATTR_RO(tunneled_ops_supported),
__ATTR_RW(load_image_on_perst),
__ATTR_RW(perst_reloads_same_image),
__ATTR(reset, S_IWUSR, NULL, reset_adapter_store),
-- 
2.16.3



[PATCH v3 1/2] cxl: Set the PBCQ Tunnel BAR register when enabling capi mode

2018-04-25 Thread Philippe Bergheaud
Skiboot used to set the default Tunnel BAR register value when capi mode
was enabled. This approach was ok for the cxl driver, but prevented other
drivers from choosing different values.

Skiboot versions > 5.11 will not set the default value any longer. This
patch modifies the cxl driver to set/reset the Tunnel BAR register when
entering/exiting the cxl mode, with pnv_pci_set_tunnel_bar().

Signed-off-by: Philippe Bergheaud <fe...@linux.ibm.com>
Reviewed-by: Christophe Lombard <clomb...@linux.vnet.ibm.com>
---
v2: Restrict tunnel bar setting to power9.
Do not fail cxl_configure_adapter() on tunnel bar setting error.
Log an info message instead, and continue configuring capi mode.

v3: No change.
---
 drivers/misc/cxl/pci.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index 83f1d08058fc..355c789406f7 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -1742,6 +1742,10 @@ static int cxl_configure_adapter(struct cxl *adapter, 
struct pci_dev *dev)
/* Required for devices using CAPP DMA mode, harmless for others */
pci_set_master(dev);
 
+   if (cxl_is_power9())
+   if (pnv_pci_set_tunnel_bar(dev, 0x0002E000ull, 1))
+   dev_info(>dev, "Tunneled operations 
unsupported\n");
+
if ((rc = pnv_phb_to_cxl_mode(dev, adapter->native->sl_ops->capi_mode)))
goto err;
 
@@ -1768,6 +1772,8 @@ static void cxl_deconfigure_adapter(struct cxl *adapter)
 {
struct pci_dev *pdev = to_pci_dev(adapter->dev.parent);
 
+   if (cxl_is_power9())
+   pnv_pci_set_tunnel_bar(pdev, 0x0002E000ull, 0);
cxl_native_release_psl_err_irq(adapter);
cxl_unmap_adapter_regs(adapter);
 
-- 
2.16.3



[PATCH v2] cxl: Set the PBCQ Tunnel BAR register when enabling capi mode

2018-04-13 Thread Philippe Bergheaud
Skiboot used to set the default Tunnel BAR register value when capi mode
was enabled. This approach was ok for the cxl driver, but prevented other
drivers from choosing different values.

Skiboot versions > 5.11 will not set the default value any longer. This
patch modifies the cxl driver to set/reset the Tunnel BAR register when
entering/exiting the cxl mode, with pnv_pci_set_tunnel_bar().

Signed-off-by: Philippe Bergheaud <fe...@linux.ibm.com>
---
Changelog:

v2: Restrict tunnel bar setting to power9.
Do not fail cxl_configure_adapter() on tunnel bar setting error.
Log an info message instead, and continue configuring capi mode.
---
 drivers/misc/cxl/pci.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index 83f1d08058fc..355c789406f7 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -1742,6 +1742,10 @@ static int cxl_configure_adapter(struct cxl *adapter, 
struct pci_dev *dev)
/* Required for devices using CAPP DMA mode, harmless for others */
pci_set_master(dev);
 
+   if (cxl_is_power9())
+   if (pnv_pci_set_tunnel_bar(dev, 0x0002E000ull, 1))
+   dev_info(>dev, "Tunneled operations 
unsupported\n");
+
if ((rc = pnv_phb_to_cxl_mode(dev, adapter->native->sl_ops->capi_mode)))
goto err;
 
@@ -1768,6 +1772,8 @@ static void cxl_deconfigure_adapter(struct cxl *adapter)
 {
struct pci_dev *pdev = to_pci_dev(adapter->dev.parent);
 
+   if (cxl_is_power9())
+   pnv_pci_set_tunnel_bar(pdev, 0x0002E000ull, 0);
cxl_native_release_psl_err_irq(adapter);
cxl_unmap_adapter_regs(adapter);
 
-- 
2.16.2



[PATCH] cxl: Set the PBCQ Tunnel BAR register when enabling capi mode

2018-04-12 Thread Philippe Bergheaud
Skiboot used to set the default Tunnel BAR register value when capi mode
was enabled. This approach was ok for the cxl driver, but prevented other
drivers from choosing different values.

Skiboot versions > 5.11 will not set the default value any longer. This
patch modifies the cxl driver to set/reset the Tunnel BAR register when
entering/exiting the cxl mode, with pnv_pci_set_tunnel_bar().

Signed-off-by: Philippe Bergheaud <fe...@linux.ibm.com>
---
 drivers/misc/cxl/pci.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index 83f1d08058fc..3beff9188446 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -1742,6 +1742,9 @@ static int cxl_configure_adapter(struct cxl *adapter, 
struct pci_dev *dev)
/* Required for devices using CAPP DMA mode, harmless for others */
pci_set_master(dev);
 
+   if ((rc = pnv_pci_set_tunnel_bar(dev, 0x0002E000ull, 1)))
+   goto err;
+
if ((rc = pnv_phb_to_cxl_mode(dev, adapter->native->sl_ops->capi_mode)))
goto err;
 
@@ -1768,6 +1771,7 @@ static void cxl_deconfigure_adapter(struct cxl *adapter)
 {
struct pci_dev *pdev = to_pci_dev(adapter->dev.parent);
 
+   pnv_pci_set_tunnel_bar(pdev, 0x0002E000ull, 0);
cxl_native_release_psl_err_irq(adapter);
cxl_unmap_adapter_regs(adapter);
 
-- 
2.16.2



[PATCH v10 2/2] cxl: read PHB indications from the device tree

2018-03-02 Thread Philippe Bergheaud
Configure the P9 XSL_DSNCTL register with PHB indications found
in the device tree, or else use legacy hard-coded values.

Signed-off-by: Philippe Bergheaud <fe...@linux.vnet.ibm.com>
Reviewed-by: Frederic Barrat <fbar...@linux.vnet.ibm.com>
---
Changelog:

v2: New patch. Use the new device tree property "ibm,phb-indications".

v3: No change.

v4: No functional change.
Drop cosmetic fix in comment.

v5: get_phb_indications():
  - make static variables local to function.
  - return static variable values by arguments.

v6: get_phb_indications():
  - acquire a mutex before setting the phb indications.

v7: get_phb_indications():
cxl_get_xsl9_dsnctl():
  - return -ENODEV instead of -1.

v8: get_phb_indications():
  - stay on the safe side: acquire the mutex unconditionally

v9,v10: No change.
---
 drivers/misc/cxl/cxl.h|  2 +-
 drivers/misc/cxl/cxllib.c |  2 +-
 drivers/misc/cxl/pci.c| 48 ++-
 3 files changed, 45 insertions(+), 7 deletions(-)

diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index 4f015da78f28..a7689944b351 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -1065,7 +1065,7 @@ int cxl_psl_purge(struct cxl_afu *afu);
 int cxl_calc_capp_routing(struct pci_dev *dev, u64 *chipid,
  u32 *phb_index, u64 *capp_unit_id);
 int cxl_slot_is_switched(struct pci_dev *dev);
-int cxl_get_xsl9_dsnctl(u64 capp_unit_id, u64 *reg);
+int cxl_get_xsl9_dsnctl(struct pci_dev *dev, u64 capp_unit_id, u64 *reg);
 u64 cxl_calculate_sr(bool master, bool kernel, bool real_mode, bool p9);
 
 void cxl_native_irq_dump_regs_psl9(struct cxl_context *ctx);
diff --git a/drivers/misc/cxl/cxllib.c b/drivers/misc/cxl/cxllib.c
index 30ccba436b3b..bea1eb004b49 100644
--- a/drivers/misc/cxl/cxllib.c
+++ b/drivers/misc/cxl/cxllib.c
@@ -99,7 +99,7 @@ int cxllib_get_xsl_config(struct pci_dev *dev, struct 
cxllib_xsl_config *cfg)
if (rc)
return rc;
 
-   rc = cxl_get_xsl9_dsnctl(capp_unit_id, >dsnctl);
+   rc = cxl_get_xsl9_dsnctl(dev, capp_unit_id, >dsnctl);
if (rc)
return rc;
if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index 758842f65a1b..8d179e64a296 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -407,21 +407,59 @@ int cxl_calc_capp_routing(struct pci_dev *dev, u64 
*chipid,
return 0;
 }
 
-int cxl_get_xsl9_dsnctl(u64 capp_unit_id, u64 *reg)
+static DEFINE_MUTEX(indications_mutex);
+
+static int get_phb_indications(struct pci_dev *dev, u64 *capiind, u64 *asnind,
+  u64 *nbwind)
+{
+   static u64 nbw, asn, capi = 0;
+   struct device_node *np;
+   const __be32 *prop;
+
+   mutex_lock(_mutex);
+   if (!capi) {
+   if (!(np = pnv_pci_get_phb_node(dev))) {
+   mutex_unlock(_mutex);
+   return -ENODEV;
+   }
+
+   prop = of_get_property(np, "ibm,phb-indications", NULL);
+   if (!prop) {
+   nbw = 0x0300UL; /* legacy values */
+   asn = 0x0400UL;
+   capi = 0x0200UL;
+   } else {
+   nbw = (u64)be32_to_cpu(prop[2]);
+   asn = (u64)be32_to_cpu(prop[1]);
+   capi = (u64)be32_to_cpu(prop[0]);
+   }
+   of_node_put(np);
+   }
+   *capiind = capi;
+   *asnind = asn;
+   *nbwind = nbw;
+   mutex_unlock(_mutex);
+   return 0;
+}
+
+int cxl_get_xsl9_dsnctl(struct pci_dev *dev, u64 capp_unit_id, u64 *reg)
 {
u64 xsl_dsnctl;
+   u64 capiind, asnind, nbwind;
 
/*
 * CAPI Identifier bits [0:7]
 * bit 61:60 MSI bits --> 0
 * bit 59 TVT selector --> 0
 */
+   if (get_phb_indications(dev, , , ))
+   return -ENODEV;
 
/*
 * Tell XSL where to route data to.
 * The field chipid should match the PHB CAPI_CMPM register
 */
-   xsl_dsnctl = ((u64)0x2 << (63-7)); /* Bit 57 */
+   xsl_dsnctl = (capiind << (63-15)); /* Bit 57 */
xsl_dsnctl |= (capp_unit_id << (63-15));
 
/* nMMU_ID Defaults to: b’01001’*/
@@ -435,14 +473,14 @@ int cxl_get_xsl9_dsnctl(u64 capp_unit_id, u64 *reg)
 * nbwind=0x03, bits [57:58], must include capi indicator.
 * Not supported on P9 DD1.
 */
-   xsl_dsnctl |= ((u64)0x03 << (63-47));
+   xsl_dsnctl |= (nbwind << (63-55));
 
/*
 * Upper 16b address bits of ASB_Notify messages sent to the
 * system. Need to match the PHB’s ASN Compare/Mask Register.
 * Not supported on P9 DD1.
 */
-   

[PATCH v10 1/2] powerpc/powernv: Enable tunneled operations

2018-03-02 Thread Philippe Bergheaud
P9 supports PCI tunneled operations (atomics and as_notify). This
patch adds support for tunneled operations on powernv, with a new
API, to be called by device drivers:

pnv_pci_enable_tunnel()
   Enable tunnel operations, tell driver the 16-bit ASN indication
   used by kernel.

pnv_pci_disable_tunnel()
   Disable tunnel operations.

pnv_pci_set_tunnel_bar()
   Tell kernel the Tunnel BAR Response address used by driver.
   This function uses two new OPAL calls, as the PBCQ Tunnel BAR
   register is configured by skiboot.

pnv_pci_get_as_notify_info()
   Return the ASN info of the thread to be woken up.

Signed-off-by: Philippe Bergheaud <fe...@linux.vnet.ibm.com>
Reviewed-by: Frederic Barrat <fbar...@linux.vnet.ibm.com>
---
Changelog:

v2: Do not set the ASN indication. Get it from the device tree.

v3: Make pnv_pci_get_phb_node() available when compiling without cxl.

v4: Add pnv_pci_get_as_notify_info().
Rebase opal call numbers on skiboot 5.9.6.

v5: pnv_pci_get_tunnel_ind():
  - fix node reference count
pnv_pci_get_as_notify_info():
  - fail if task == NULL
  - read pid from mm->context.id
  - explain that thread.tidr require CONFIG_PPC64

v6: pnv_pci_get_tunnel_ind():
  - check if radix is enabled, or else return an error
pnv_pci_get_as_notify_info():
  - remove a capi-specific comment, irrelevant for pci

v7: pnv_pci_set_tunnel_bar():
  - setting the tunnel bar more than once with the same value
is not an error

v8: No change

v9: Rename pnv_pci_get_tunnel_ind() into pnv_pci_enable_tunnel():
  - Increase real window size to accept as_notify messages.
New api pnv_pci_disable_tunnel():
  - Restore real window size to its default value.
Adjust opal call numbers.

v10: Adjust opal call numbers to their final values.
---
 arch/powerpc/include/asm/opal-api.h|   4 +-
 arch/powerpc/include/asm/opal.h|   2 +
 arch/powerpc/include/asm/pnv-pci.h |   6 ++
 arch/powerpc/platforms/powernv/opal-wrappers.S |   2 +
 arch/powerpc/platforms/powernv/pci-cxl.c   |   8 --
 arch/powerpc/platforms/powernv/pci.c   | 135 +
 6 files changed, 148 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/include/asm/opal-api.h 
b/arch/powerpc/include/asm/opal-api.h
index 94bd1bf2c873..d886a5b7ff21 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -204,7 +204,9 @@
 #define OPAL_NPU_SPA_SETUP 159
 #define OPAL_NPU_SPA_CLEAR_CACHE   160
 #define OPAL_NPU_TL_SET161
-#define OPAL_LAST  161
+#define OPAL_PCI_GET_PBCQ_TUNNEL_BAR   164
+#define OPAL_PCI_SET_PBCQ_TUNNEL_BAR   165
+#define OPAL_LAST  165
 
 /* Device tree flags */
 
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 12e70fb58700..dde60089d0d4 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -204,6 +204,8 @@ int64_t opal_unregister_dump_region(uint32_t id);
 int64_t opal_slw_set_reg(uint64_t cpu_pir, uint64_t sprn, uint64_t val);
 int64_t opal_config_cpu_idle_state(uint64_t state, uint64_t flag);
 int64_t opal_pci_set_phb_cxl_mode(uint64_t phb_id, uint64_t mode, uint64_t 
pe_number);
+int64_t opal_pci_get_pbcq_tunnel_bar(uint64_t phb_id, uint64_t *addr);
+int64_t opal_pci_set_pbcq_tunnel_bar(uint64_t phb_id, uint64_t addr);
 int64_t opal_ipmi_send(uint64_t interface, struct opal_ipmi_msg *msg,
uint64_t msg_len);
 int64_t opal_ipmi_recv(uint64_t interface, struct opal_ipmi_msg *msg,
diff --git a/arch/powerpc/include/asm/pnv-pci.h 
b/arch/powerpc/include/asm/pnv-pci.h
index 3e5cf251ad9a..d2d8c28db336 100644
--- a/arch/powerpc/include/asm/pnv-pci.h
+++ b/arch/powerpc/include/asm/pnv-pci.h
@@ -29,6 +29,12 @@ extern int pnv_pci_set_power_state(uint64_t id, uint8_t 
state,
 extern int pnv_pci_set_p2p(struct pci_dev *initiator, struct pci_dev *target,
   u64 desc);
 
+extern int pnv_pci_enable_tunnel(struct pci_dev *dev, uint64_t *asnind);
+extern int pnv_pci_disable_tunnel(struct pci_dev *dev);
+extern int pnv_pci_set_tunnel_bar(struct pci_dev *dev, uint64_t addr,
+ int enable);
+extern int pnv_pci_get_as_notify_info(struct task_struct *task, u32 *lpid,
+ u32 *pid, u32 *tid);
 int pnv_phb_to_cxl_mode(struct pci_dev *dev, uint64_t mode);
 int pnv_cxl_ioda_msi_setup(struct pci_dev *dev, unsigned int hwirq,
   unsigned int virq);
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S 
b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 1b2936ba6040..3da30c2f26b4 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -323,3 +323,5 @@ OPAL_CALL(opal_sensor_group_clear,  
OPAL_SEN

[PATCH v9 1/2] powerpc/powernv: Enable tunneled operations

2018-02-20 Thread Philippe Bergheaud
P9 supports PCI tunneled operations (atomics and as_notify). This
patch adds support for tunneled operations on powernv, with a new
API, to be called by device drivers:

pnv_pci_enable_tunnel()
   Enable tunnel operations, tell driver the 16-bit ASN indication
   used by kernel.

pnv_pci_disable_tunnel()
   Disable tunnel operations.

pnv_pci_set_tunnel_bar()
   Tell kernel the Tunnel BAR Response address used by driver.
   This function uses two new OPAL calls, as the PBCQ Tunnel BAR
   register is configured by skiboot.

pnv_pci_get_as_notify_info()
   Return the ASN info of the thread to be woken up.

Signed-off-by: Philippe Bergheaud <fe...@linux.vnet.ibm.com>
---
Changelog:

v2: Do not set the ASN indication. Get it from the device tree.

v3: Make pnv_pci_get_phb_node() available when compiling without cxl.

v4: Add pnv_pci_get_as_notify_info().
Rebase opal call numbers on skiboot 5.9.6.

v5: pnv_pci_get_tunnel_ind():
  - fix node reference count
pnv_pci_get_as_notify_info():
  - fail if task == NULL
  - read pid from mm->context.id
  - explain that thread.tidr require CONFIG_PPC64

v6: pnv_pci_get_tunnel_ind():
  - check if radix is enabled, or else return an error
pnv_pci_get_as_notify_info():
  - remove a capi-specific comment, irrelevant for pci

v7: pnv_pci_set_tunnel_bar():
  - setting the tunnel bar more than once with the same value
is not an error

v8: No change

v9: Rename pnv_pci_get_tunnel_ind() into pnv_pci_enable_tunnel():
  - Increase real window size to accept as_notify messages.
New api pnv_pci_disable_tunnel():
  - Restore real window size to its default value.
Adjust opal call numbers.

This patch depends on the following skiboot patches:
  https://patchwork.ozlabs.org/patch/874415/
  https://patchwork.ozlabs.org/patch/874416/
---
 arch/powerpc/include/asm/opal-api.h|   4 +-
 arch/powerpc/include/asm/opal.h|   2 +
 arch/powerpc/include/asm/pnv-pci.h |   6 ++
 arch/powerpc/platforms/powernv/opal-wrappers.S |   2 +
 arch/powerpc/platforms/powernv/pci-cxl.c   |   8 --
 arch/powerpc/platforms/powernv/pci.c   | 135 +
 6 files changed, 148 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/include/asm/opal-api.h 
b/arch/powerpc/include/asm/opal-api.h
index 94bd1bf2c873..07b5e2240ecc 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -204,7 +204,9 @@
 #define OPAL_NPU_SPA_SETUP 159
 #define OPAL_NPU_SPA_CLEAR_CACHE   160
 #define OPAL_NPU_TL_SET161
-#define OPAL_LAST  161
+#define OPAL_PCI_GET_PBCQ_TUNNEL_BAR   162
+#define OPAL_PCI_SET_PBCQ_TUNNEL_BAR   163
+#define OPAL_LAST  163
 
 /* Device tree flags */
 
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 12e70fb58700..dde60089d0d4 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -204,6 +204,8 @@ int64_t opal_unregister_dump_region(uint32_t id);
 int64_t opal_slw_set_reg(uint64_t cpu_pir, uint64_t sprn, uint64_t val);
 int64_t opal_config_cpu_idle_state(uint64_t state, uint64_t flag);
 int64_t opal_pci_set_phb_cxl_mode(uint64_t phb_id, uint64_t mode, uint64_t 
pe_number);
+int64_t opal_pci_get_pbcq_tunnel_bar(uint64_t phb_id, uint64_t *addr);
+int64_t opal_pci_set_pbcq_tunnel_bar(uint64_t phb_id, uint64_t addr);
 int64_t opal_ipmi_send(uint64_t interface, struct opal_ipmi_msg *msg,
uint64_t msg_len);
 int64_t opal_ipmi_recv(uint64_t interface, struct opal_ipmi_msg *msg,
diff --git a/arch/powerpc/include/asm/pnv-pci.h 
b/arch/powerpc/include/asm/pnv-pci.h
index 3e5cf251ad9a..d2d8c28db336 100644
--- a/arch/powerpc/include/asm/pnv-pci.h
+++ b/arch/powerpc/include/asm/pnv-pci.h
@@ -29,6 +29,12 @@ extern int pnv_pci_set_power_state(uint64_t id, uint8_t 
state,
 extern int pnv_pci_set_p2p(struct pci_dev *initiator, struct pci_dev *target,
   u64 desc);
 
+extern int pnv_pci_enable_tunnel(struct pci_dev *dev, uint64_t *asnind);
+extern int pnv_pci_disable_tunnel(struct pci_dev *dev);
+extern int pnv_pci_set_tunnel_bar(struct pci_dev *dev, uint64_t addr,
+ int enable);
+extern int pnv_pci_get_as_notify_info(struct task_struct *task, u32 *lpid,
+ u32 *pid, u32 *tid);
 int pnv_phb_to_cxl_mode(struct pci_dev *dev, uint64_t mode);
 int pnv_cxl_ioda_msi_setup(struct pci_dev *dev, unsigned int hwirq,
   unsigned int virq);
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S 
b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 1b2936ba6040..3da30c2f26b4 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -323,3 +323,5 @@ OPAL_CALL(opal_sensor_gr

[PATCH v9 2/2] cxl: read PHB indications from the device tree

2018-02-20 Thread Philippe Bergheaud
Configure the P9 XSL_DSNCTL register with PHB indications found
in the device tree, or else use legacy hard-coded values.

Signed-off-by: Philippe Bergheaud <fe...@linux.vnet.ibm.com>
---
Changelog:

v2: New patch. Use the new device tree property "ibm,phb-indications".

v3: No change.

v4: No functional change.
Drop cosmetic fix in comment.

v5: get_phb_indications():
  - make static variables local to function.
  - return static variable values by arguments.

v6: get_phb_indications():
  - acquire a mutex before setting the phb indications.

v7: get_phb_indications():
cxl_get_xsl9_dsnctl():
  - return -ENODEV instead of -1.

v8: get_phb_indications():
  - stay on the safe side: acquire the mutex unconditionally

v9: No change.

This patch depends on the following skiboot patch:
  https://patchwork.ozlabs.org/patch/874415/
---
 drivers/misc/cxl/cxl.h|  2 +-
 drivers/misc/cxl/cxllib.c |  2 +-
 drivers/misc/cxl/pci.c| 48 ++-
 3 files changed, 45 insertions(+), 7 deletions(-)

diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index 4f015da78f28..a7689944b351 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -1065,7 +1065,7 @@ int cxl_psl_purge(struct cxl_afu *afu);
 int cxl_calc_capp_routing(struct pci_dev *dev, u64 *chipid,
  u32 *phb_index, u64 *capp_unit_id);
 int cxl_slot_is_switched(struct pci_dev *dev);
-int cxl_get_xsl9_dsnctl(u64 capp_unit_id, u64 *reg);
+int cxl_get_xsl9_dsnctl(struct pci_dev *dev, u64 capp_unit_id, u64 *reg);
 u64 cxl_calculate_sr(bool master, bool kernel, bool real_mode, bool p9);
 
 void cxl_native_irq_dump_regs_psl9(struct cxl_context *ctx);
diff --git a/drivers/misc/cxl/cxllib.c b/drivers/misc/cxl/cxllib.c
index 30ccba436b3b..bea1eb004b49 100644
--- a/drivers/misc/cxl/cxllib.c
+++ b/drivers/misc/cxl/cxllib.c
@@ -99,7 +99,7 @@ int cxllib_get_xsl_config(struct pci_dev *dev, struct 
cxllib_xsl_config *cfg)
if (rc)
return rc;
 
-   rc = cxl_get_xsl9_dsnctl(capp_unit_id, >dsnctl);
+   rc = cxl_get_xsl9_dsnctl(dev, capp_unit_id, >dsnctl);
if (rc)
return rc;
if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index 758842f65a1b..8d179e64a296 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -407,21 +407,59 @@ int cxl_calc_capp_routing(struct pci_dev *dev, u64 
*chipid,
return 0;
 }
 
-int cxl_get_xsl9_dsnctl(u64 capp_unit_id, u64 *reg)
+static DEFINE_MUTEX(indications_mutex);
+
+static int get_phb_indications(struct pci_dev *dev, u64 *capiind, u64 *asnind,
+  u64 *nbwind)
+{
+   static u64 nbw, asn, capi = 0;
+   struct device_node *np;
+   const __be32 *prop;
+
+   mutex_lock(_mutex);
+   if (!capi) {
+   if (!(np = pnv_pci_get_phb_node(dev))) {
+   mutex_unlock(_mutex);
+   return -ENODEV;
+   }
+
+   prop = of_get_property(np, "ibm,phb-indications", NULL);
+   if (!prop) {
+   nbw = 0x0300UL; /* legacy values */
+   asn = 0x0400UL;
+   capi = 0x0200UL;
+   } else {
+   nbw = (u64)be32_to_cpu(prop[2]);
+   asn = (u64)be32_to_cpu(prop[1]);
+   capi = (u64)be32_to_cpu(prop[0]);
+   }
+   of_node_put(np);
+   }
+   *capiind = capi;
+   *asnind = asn;
+   *nbwind = nbw;
+   mutex_unlock(_mutex);
+   return 0;
+}
+
+int cxl_get_xsl9_dsnctl(struct pci_dev *dev, u64 capp_unit_id, u64 *reg)
 {
u64 xsl_dsnctl;
+   u64 capiind, asnind, nbwind;
 
/*
 * CAPI Identifier bits [0:7]
 * bit 61:60 MSI bits --> 0
 * bit 59 TVT selector --> 0
 */
+   if (get_phb_indications(dev, , , ))
+   return -ENODEV;
 
/*
 * Tell XSL where to route data to.
 * The field chipid should match the PHB CAPI_CMPM register
 */
-   xsl_dsnctl = ((u64)0x2 << (63-7)); /* Bit 57 */
+   xsl_dsnctl = (capiind << (63-15)); /* Bit 57 */
xsl_dsnctl |= (capp_unit_id << (63-15));
 
/* nMMU_ID Defaults to: b’01001’*/
@@ -435,14 +473,14 @@ int cxl_get_xsl9_dsnctl(u64 capp_unit_id, u64 *reg)
 * nbwind=0x03, bits [57:58], must include capi indicator.
 * Not supported on P9 DD1.
 */
-   xsl_dsnctl |= ((u64)0x03 << (63-47));
+   xsl_dsnctl |= (nbwind << (63-55));
 
/*
 * Upper 16b address bits of ASB_Notify messages sent to the
 * system. Need to match the PHB’s ASN Compare/Mask Register.
 * Not supported on P9 DD1.
  

[PATCH v8 1/2] powerpc/powernv: Enable tunneled operations

2018-01-22 Thread Philippe Bergheaud
P9 supports PCI tunneled operations (atomics and as_notify). This
patch adds support for tunneled operations on powernv, with a new
API, to be called by device drivers:

pnv_pci_get_tunnel_ind()
   Tell driver the 16-bit ASN indication used by kernel.

pnv_pci_set_tunnel_bar()
   Tell kernel the Tunnel BAR Response address used by driver.
   This function uses two new OPAL calls, as the PBCQ Tunnel BAR
   register is configured by skiboot.

pnv_pci_get_as_notify_info()
   Return the ASN info of the thread to be woken up.

Signed-off-by: Philippe Bergheaud <fe...@linux.vnet.ibm.com>
---
Changelog:

v2: Do not set the ASN indication. Get it from the device tree.

v3: Make pnv_pci_get_phb_node() available when compiling without cxl.

v4: Add pnv_pci_get_as_notify_info().
Rebase opal call numbers on skiboot 5.9.6.

v5: pnv_pci_get_tunnel_ind():
  - fix node reference count
pnv_pci_get_as_notify_info():
  - fail if task == NULL
  - read pid from mm->context.id
  - explain that thread.tidr require CONFIG_PPC64

v6: pnv_pci_get_tunnel_ind():
  - check if radix is enabled, or else return an error
pnv_pci_get_as_notify_info():
  - remove a capi-specific comment, irrelevant for pci

v7: pnv_pci_set_tunnel_bar():
  - setting the tunnel bar more than once with the same value
is not an error

v8: No change

This patch depends on the following skiboot patches:
  https://patchwork.ozlabs.org/patch/858324/
  https://patchwork.ozlabs.org/patch/858325/
---
 arch/powerpc/include/asm/opal-api.h|   4 +-
 arch/powerpc/include/asm/opal.h|   2 +
 arch/powerpc/include/asm/pnv-pci.h |   5 ++
 arch/powerpc/platforms/powernv/opal-wrappers.S |   2 +
 arch/powerpc/platforms/powernv/pci-cxl.c   |   8 --
 arch/powerpc/platforms/powernv/pci.c   | 107 +
 6 files changed, 119 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/include/asm/opal-api.h 
b/arch/powerpc/include/asm/opal-api.h
index 233c7504b1f2..b901f4d9f009 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -201,7 +201,9 @@
 #define OPAL_SET_POWER_SHIFT_RATIO 155
 #define OPAL_SENSOR_GROUP_CLEAR156
 #define OPAL_PCI_SET_P2P   157
-#define OPAL_LAST  157
+#define OPAL_PCI_GET_PBCQ_TUNNEL_BAR   159
+#define OPAL_PCI_SET_PBCQ_TUNNEL_BAR   160
+#define OPAL_LAST  160
 
 /* Device tree flags */
 
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 0c545f7fc77b..8705e422b893 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -198,6 +198,8 @@ int64_t opal_unregister_dump_region(uint32_t id);
 int64_t opal_slw_set_reg(uint64_t cpu_pir, uint64_t sprn, uint64_t val);
 int64_t opal_config_cpu_idle_state(uint64_t state, uint64_t flag);
 int64_t opal_pci_set_phb_cxl_mode(uint64_t phb_id, uint64_t mode, uint64_t 
pe_number);
+int64_t opal_pci_get_pbcq_tunnel_bar(uint64_t phb_id, uint64_t *addr);
+int64_t opal_pci_set_pbcq_tunnel_bar(uint64_t phb_id, uint64_t addr);
 int64_t opal_ipmi_send(uint64_t interface, struct opal_ipmi_msg *msg,
uint64_t msg_len);
 int64_t opal_ipmi_recv(uint64_t interface, struct opal_ipmi_msg *msg,
diff --git a/arch/powerpc/include/asm/pnv-pci.h 
b/arch/powerpc/include/asm/pnv-pci.h
index 3e5cf251ad9a..c69de3276b5e 100644
--- a/arch/powerpc/include/asm/pnv-pci.h
+++ b/arch/powerpc/include/asm/pnv-pci.h
@@ -29,6 +29,11 @@ extern int pnv_pci_set_power_state(uint64_t id, uint8_t 
state,
 extern int pnv_pci_set_p2p(struct pci_dev *initiator, struct pci_dev *target,
   u64 desc);
 
+extern int pnv_pci_get_tunnel_ind(struct pci_dev *dev, uint64_t *ind);
+extern int pnv_pci_set_tunnel_bar(struct pci_dev *dev, uint64_t addr,
+ int enable);
+extern int pnv_pci_get_as_notify_info(struct task_struct *task, u32 *lpid,
+ u32 *pid, u32 *tid);
 int pnv_phb_to_cxl_mode(struct pci_dev *dev, uint64_t mode);
 int pnv_cxl_ioda_msi_setup(struct pci_dev *dev, unsigned int hwirq,
   unsigned int virq);
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S 
b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 6f4b00a2ac46..5da790fb7fef 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -320,3 +320,5 @@ OPAL_CALL(opal_set_powercap,
OPAL_SET_POWERCAP);
 OPAL_CALL(opal_get_power_shift_ratio,  OPAL_GET_POWER_SHIFT_RATIO);
 OPAL_CALL(opal_set_power_shift_ratio,  OPAL_SET_POWER_SHIFT_RATIO);
 OPAL_CALL(opal_sensor_group_clear, OPAL_SENSOR_GROUP_CLEAR);
+OPAL_CALL(opal_pci_get_pbcq_tunnel_bar,
OPAL_PCI_GET_PBCQ_TUNNEL_BAR);
+OPAL_CALL(opal_pci_set_pbcq_t

[PATCH v8 2/2] cxl: read PHB indications from the device tree

2018-01-22 Thread Philippe Bergheaud
Configure the P9 XSL_DSNCTL register with PHB indications found
in the device tree, or else use legacy hard-coded values.

Signed-off-by: Philippe Bergheaud <fe...@linux.vnet.ibm.com>
---
Changelog:

v2: New patch. Use the new device tree property "ibm,phb-indications".

v3: No change.

v4: No functional change.
Drop cosmetic fix in comment.

v5: get_phb_indications():
  - make static variables local to function.
  - return static variable values by arguments.

v6: get_phb_indications():
  - acquire a mutex before setting the phb indications.

v7: get_phb_indications():
cxl_get_xsl9_dsnctl():
  - return -ENODEV instead of -1.

v8: get_phb_indications():
  - stay on the safe side: acquire the mutex unconditionally

This patch depends on the following skiboot patch:
  https://patchwork.ozlabs.org/patch/858324/
---
 drivers/misc/cxl/cxl.h|  2 +-
 drivers/misc/cxl/cxllib.c |  2 +-
 drivers/misc/cxl/pci.c| 48 ++-
 3 files changed, 45 insertions(+), 7 deletions(-)

diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index e46a4062904a..5a6e9a921c2b 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -1062,7 +1062,7 @@ int cxl_psl_purge(struct cxl_afu *afu);
 int cxl_calc_capp_routing(struct pci_dev *dev, u64 *chipid,
  u32 *phb_index, u64 *capp_unit_id);
 int cxl_slot_is_switched(struct pci_dev *dev);
-int cxl_get_xsl9_dsnctl(u64 capp_unit_id, u64 *reg);
+int cxl_get_xsl9_dsnctl(struct pci_dev *dev, u64 capp_unit_id, u64 *reg);
 u64 cxl_calculate_sr(bool master, bool kernel, bool real_mode, bool p9);
 
 void cxl_native_irq_dump_regs_psl9(struct cxl_context *ctx);
diff --git a/drivers/misc/cxl/cxllib.c b/drivers/misc/cxl/cxllib.c
index dc9bc1807fdf..61f80d586279 100644
--- a/drivers/misc/cxl/cxllib.c
+++ b/drivers/misc/cxl/cxllib.c
@@ -99,7 +99,7 @@ int cxllib_get_xsl_config(struct pci_dev *dev, struct 
cxllib_xsl_config *cfg)
if (rc)
return rc;
 
-   rc = cxl_get_xsl9_dsnctl(capp_unit_id, >dsnctl);
+   rc = cxl_get_xsl9_dsnctl(dev, capp_unit_id, >dsnctl);
if (rc)
return rc;
if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index 19969ee86d6f..12e5cae6d452 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -409,21 +409,59 @@ int cxl_calc_capp_routing(struct pci_dev *dev, u64 
*chipid,
return 0;
 }
 
-int cxl_get_xsl9_dsnctl(u64 capp_unit_id, u64 *reg)
+static DEFINE_MUTEX(indications_mutex);
+
+static int get_phb_indications(struct pci_dev *dev, u64* capiind, u64 *asnind,
+  u64 *nbwind)
+{
+   static u64 nbw, asn, capi = 0;
+   struct device_node *np;
+   const __be32 *prop;
+
+   mutex_lock(_mutex);
+   if (!capi) {
+   if (!(np = pnv_pci_get_phb_node(dev))) {
+   mutex_unlock(_mutex);
+   return -ENODEV;
+   }
+
+   prop = of_get_property(np, "ibm,phb-indications", NULL);
+   if (!prop) {
+   nbw = 0x0300UL; /* legacy values */
+   asn = 0x0400UL;
+   capi = 0x0200UL;
+   } else {
+   nbw = (u64)be32_to_cpu(prop[2]);
+   asn = (u64)be32_to_cpu(prop[1]);
+   capi = (u64)be32_to_cpu(prop[0]);
+   }
+   of_node_put(np);
+   }
+   *capiind = capi;
+   *asnind = asn;
+   *nbwind = nbw;
+   mutex_unlock(_mutex);
+   return 0;
+}
+
+int cxl_get_xsl9_dsnctl(struct pci_dev *dev, u64 capp_unit_id, u64 *reg)
 {
u64 xsl_dsnctl;
+   u64 capiind, asnind, nbwind;
 
/*
 * CAPI Identifier bits [0:7]
 * bit 61:60 MSI bits --> 0
 * bit 59 TVT selector --> 0
 */
+   if (get_phb_indications(dev, , , ))
+   return -ENODEV;
 
/*
 * Tell XSL where to route data to.
 * The field chipid should match the PHB CAPI_CMPM register
 */
-   xsl_dsnctl = ((u64)0x2 << (63-7)); /* Bit 57 */
+   xsl_dsnctl = (capiind << (63-15)); /* Bit 57 */
xsl_dsnctl |= (capp_unit_id << (63-15));
 
/* nMMU_ID Defaults to: b’01001’*/
@@ -437,14 +475,14 @@ int cxl_get_xsl9_dsnctl(u64 capp_unit_id, u64 *reg)
 * nbwind=0x03, bits [57:58], must include capi indicator.
 * Not supported on P9 DD1.
 */
-   xsl_dsnctl |= ((u64)0x03 << (63-47));
+   xsl_dsnctl |= (nbwind << (63-55));
 
/*
 * Upper 16b address bits of ASB_Notify messages sent to the
 * system. Need to match the PHB’s ASN Compare/Mask Register.
 * Not supported on P9 DD1.
  

[PATCH v7 2/2] cxl: read PHB indications from the device tree

2018-01-15 Thread Philippe Bergheaud
Configure the P9 XSL_DSNCTL register with PHB indications found
in the device tree, or else use legacy hard-coded values.

Signed-off-by: Philippe Bergheaud <fe...@linux.vnet.ibm.com>
---
Changelog:

v2: New patch. Use the new device tree property "ibm,phb-indications".

v3: No change.

v4: No functional change.
Drop cosmetic fix in comment.

v5: get_phb_indications():
  - make static variables local to function.
  - return static variable values by arguments.

v6: get_phb_indications():
  - acquire a mutex before setting the phb indications.

v7: get_phb_indications():
cxl_get_xsl9_dsnctl():
  - return -ENODEV instead of -1.

This patch depends on the following skiboot patch:
  https://patchwork.ozlabs.org/patch/858324/
---
 drivers/misc/cxl/cxl.h|  2 +-
 drivers/misc/cxl/cxllib.c |  2 +-
 drivers/misc/cxl/pci.c| 50 ++-
 3 files changed, 47 insertions(+), 7 deletions(-)

diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index e46a4062904a..5a6e9a921c2b 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -1062,7 +1062,7 @@ int cxl_psl_purge(struct cxl_afu *afu);
 int cxl_calc_capp_routing(struct pci_dev *dev, u64 *chipid,
  u32 *phb_index, u64 *capp_unit_id);
 int cxl_slot_is_switched(struct pci_dev *dev);
-int cxl_get_xsl9_dsnctl(u64 capp_unit_id, u64 *reg);
+int cxl_get_xsl9_dsnctl(struct pci_dev *dev, u64 capp_unit_id, u64 *reg);
 u64 cxl_calculate_sr(bool master, bool kernel, bool real_mode, bool p9);
 
 void cxl_native_irq_dump_regs_psl9(struct cxl_context *ctx);
diff --git a/drivers/misc/cxl/cxllib.c b/drivers/misc/cxl/cxllib.c
index dc9bc1807fdf..61f80d586279 100644
--- a/drivers/misc/cxl/cxllib.c
+++ b/drivers/misc/cxl/cxllib.c
@@ -99,7 +99,7 @@ int cxllib_get_xsl_config(struct pci_dev *dev, struct 
cxllib_xsl_config *cfg)
if (rc)
return rc;
 
-   rc = cxl_get_xsl9_dsnctl(capp_unit_id, >dsnctl);
+   rc = cxl_get_xsl9_dsnctl(dev, capp_unit_id, >dsnctl);
if (rc)
return rc;
if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index 19969ee86d6f..89840181fc03 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -409,21 +409,61 @@ int cxl_calc_capp_routing(struct pci_dev *dev, u64 
*chipid,
return 0;
 }
 
-int cxl_get_xsl9_dsnctl(u64 capp_unit_id, u64 *reg)
+static DEFINE_MUTEX(indications_mutex);
+
+static int get_phb_indications(struct pci_dev *dev, u64* capiind, u64 *asnind,
+  u64 *nbwind)
+{
+   static u64 nbw, asn, capi = 0;
+   struct device_node *np;
+   const __be32 *prop;
+
+   if (!capi) {
+   mutex_lock(_mutex);
+   if (!capi) {
+   if (!(np = pnv_pci_get_phb_node(dev))) {
+   mutex_unlock(_mutex);
+   return -ENODEV;
+   }
+
+   prop = of_get_property(np, "ibm,phb-indications", NULL);
+   if (!prop) {
+   nbw = 0x0300UL; /* legacy values */
+   asn = 0x0400UL;
+   capi = 0x0200UL;
+   } else {
+   nbw = (u64)be32_to_cpu(prop[2]);
+   asn = (u64)be32_to_cpu(prop[1]);
+   capi = (u64)be32_to_cpu(prop[0]);
+   }
+   of_node_put(np);
+   }
+   mutex_unlock(_mutex);
+   }
+   *capiind = capi;
+   *asnind = asn;
+   *nbwind = nbw;
+   return 0;
+}
+
+int cxl_get_xsl9_dsnctl(struct pci_dev *dev, u64 capp_unit_id, u64 *reg)
 {
u64 xsl_dsnctl;
+   u64 capiind, asnind, nbwind;
 
/*
 * CAPI Identifier bits [0:7]
 * bit 61:60 MSI bits --> 0
 * bit 59 TVT selector --> 0
 */
+   if (get_phb_indications(dev, , , ))
+   return -ENODEV;
 
/*
 * Tell XSL where to route data to.
 * The field chipid should match the PHB CAPI_CMPM register
 */
-   xsl_dsnctl = ((u64)0x2 << (63-7)); /* Bit 57 */
+   xsl_dsnctl = (capiind << (63-15)); /* Bit 57 */
xsl_dsnctl |= (capp_unit_id << (63-15));
 
/* nMMU_ID Defaults to: b’01001’*/
@@ -437,14 +477,14 @@ int cxl_get_xsl9_dsnctl(u64 capp_unit_id, u64 *reg)
 * nbwind=0x03, bits [57:58], must include capi indicator.
 * Not supported on P9 DD1.
 */
-   xsl_dsnctl |= ((u64)0x03 << (63-47));
+   xsl_dsnctl |= (nbwind << (63-55));
 
/*
 * Upper 16b address bits of ASB_Notify messages sent to the
 * system. Need to match the PHB’s A

[PATCH v7 1/2] powerpc/powernv: Enable tunneled operations

2018-01-15 Thread Philippe Bergheaud
P9 supports PCI tunneled operations (atomics and as_notify). This
patch adds support for tunneled operations on powernv, with a new
API, to be called by device drivers:

pnv_pci_get_tunnel_ind()
   Tell driver the 16-bit ASN indication used by kernel.

pnv_pci_set_tunnel_bar()
   Tell kernel the Tunnel BAR Response address used by driver.
   This function uses two new OPAL calls, as the PBCQ Tunnel BAR
   register is configured by skiboot.

pnv_pci_get_as_notify_info()
   Return the ASN info of the thread to be woken up.

Signed-off-by: Philippe Bergheaud <fe...@linux.vnet.ibm.com>
---
Changelog:

v2: Do not set the ASN indication. Get it from the device tree.

v3: Make pnv_pci_get_phb_node() available when compiling without cxl.

v4: Add pnv_pci_get_as_notify_info().
Rebase opal call numbers on skiboot 5.9.6.

v5: pnv_pci_get_tunnel_ind():
  - fix node reference count
pnv_pci_get_as_notify_info():
  - fail if task == NULL
  - read pid from mm->context.id
  - explain that thread.tidr require CONFIG_PPC64

v6: pnv_pci_get_tunnel_ind():
  - check if radix is enabled, or else return an error
pnv_pci_get_as_notify_info():
  - remove a capi-specific comment, irrelevant for pci

v7: pnv_pci_set_tunnel_bar():
  - setting the tunnel bar more than once with the same value
is not an error

This patch depends on the following skiboot patches:
  https://patchwork.ozlabs.org/patch/858324/
  https://patchwork.ozlabs.org/patch/858325/
---
 arch/powerpc/include/asm/opal-api.h|   4 +-
 arch/powerpc/include/asm/opal.h|   2 +
 arch/powerpc/include/asm/pnv-pci.h |   5 ++
 arch/powerpc/platforms/powernv/opal-wrappers.S |   2 +
 arch/powerpc/platforms/powernv/pci-cxl.c   |   8 --
 arch/powerpc/platforms/powernv/pci.c   | 107 +
 6 files changed, 119 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/include/asm/opal-api.h 
b/arch/powerpc/include/asm/opal-api.h
index 233c7504b1f2..b901f4d9f009 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -201,7 +201,9 @@
 #define OPAL_SET_POWER_SHIFT_RATIO 155
 #define OPAL_SENSOR_GROUP_CLEAR156
 #define OPAL_PCI_SET_P2P   157
-#define OPAL_LAST  157
+#define OPAL_PCI_GET_PBCQ_TUNNEL_BAR   159
+#define OPAL_PCI_SET_PBCQ_TUNNEL_BAR   160
+#define OPAL_LAST  160
 
 /* Device tree flags */
 
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 0c545f7fc77b..8705e422b893 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -198,6 +198,8 @@ int64_t opal_unregister_dump_region(uint32_t id);
 int64_t opal_slw_set_reg(uint64_t cpu_pir, uint64_t sprn, uint64_t val);
 int64_t opal_config_cpu_idle_state(uint64_t state, uint64_t flag);
 int64_t opal_pci_set_phb_cxl_mode(uint64_t phb_id, uint64_t mode, uint64_t 
pe_number);
+int64_t opal_pci_get_pbcq_tunnel_bar(uint64_t phb_id, uint64_t *addr);
+int64_t opal_pci_set_pbcq_tunnel_bar(uint64_t phb_id, uint64_t addr);
 int64_t opal_ipmi_send(uint64_t interface, struct opal_ipmi_msg *msg,
uint64_t msg_len);
 int64_t opal_ipmi_recv(uint64_t interface, struct opal_ipmi_msg *msg,
diff --git a/arch/powerpc/include/asm/pnv-pci.h 
b/arch/powerpc/include/asm/pnv-pci.h
index 3e5cf251ad9a..c69de3276b5e 100644
--- a/arch/powerpc/include/asm/pnv-pci.h
+++ b/arch/powerpc/include/asm/pnv-pci.h
@@ -29,6 +29,11 @@ extern int pnv_pci_set_power_state(uint64_t id, uint8_t 
state,
 extern int pnv_pci_set_p2p(struct pci_dev *initiator, struct pci_dev *target,
   u64 desc);
 
+extern int pnv_pci_get_tunnel_ind(struct pci_dev *dev, uint64_t *ind);
+extern int pnv_pci_set_tunnel_bar(struct pci_dev *dev, uint64_t addr,
+ int enable);
+extern int pnv_pci_get_as_notify_info(struct task_struct *task, u32 *lpid,
+ u32 *pid, u32 *tid);
 int pnv_phb_to_cxl_mode(struct pci_dev *dev, uint64_t mode);
 int pnv_cxl_ioda_msi_setup(struct pci_dev *dev, unsigned int hwirq,
   unsigned int virq);
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S 
b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 6f4b00a2ac46..5da790fb7fef 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -320,3 +320,5 @@ OPAL_CALL(opal_set_powercap,
OPAL_SET_POWERCAP);
 OPAL_CALL(opal_get_power_shift_ratio,  OPAL_GET_POWER_SHIFT_RATIO);
 OPAL_CALL(opal_set_power_shift_ratio,  OPAL_SET_POWER_SHIFT_RATIO);
 OPAL_CALL(opal_sensor_group_clear, OPAL_SENSOR_GROUP_CLEAR);
+OPAL_CALL(opal_pci_get_pbcq_tunnel_bar,
OPAL_PCI_GET_PBCQ_TUNNEL_BAR);
+OPAL_CALL(opal_pci_set_pbcq_t

[PATCH v6 2/2] cxl: read PHB indications from the device tree

2018-01-11 Thread Philippe Bergheaud
Configure the P9 XSL_DSNCTL register with PHB indications found
in the device tree, or else use legacy hard-coded values.

Signed-off-by: Philippe Bergheaud <fe...@linux.vnet.ibm.com>
---
Changelog:

v2: New patch. Use the new device tree property "ibm,phb-indications".

v3: No change.

v4: No functional change.
Drop cosmetic fix in comment.

v5: get_phb_indications():
  - make static variables local to function.
  - return static variable values by arguments.

v6: get_phb_indications():
  - acquire a mutex before setting the phb indications.

This patch depends on the following skiboot patch:
  https://patchwork.ozlabs.org/patch/858324/
---
 drivers/misc/cxl/cxl.h|  2 +-
 drivers/misc/cxl/cxllib.c |  2 +-
 drivers/misc/cxl/pci.c| 50 ++-
 3 files changed, 47 insertions(+), 7 deletions(-)

diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index e46a4062904a..5a6e9a921c2b 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -1062,7 +1062,7 @@ int cxl_psl_purge(struct cxl_afu *afu);
 int cxl_calc_capp_routing(struct pci_dev *dev, u64 *chipid,
  u32 *phb_index, u64 *capp_unit_id);
 int cxl_slot_is_switched(struct pci_dev *dev);
-int cxl_get_xsl9_dsnctl(u64 capp_unit_id, u64 *reg);
+int cxl_get_xsl9_dsnctl(struct pci_dev *dev, u64 capp_unit_id, u64 *reg);
 u64 cxl_calculate_sr(bool master, bool kernel, bool real_mode, bool p9);
 
 void cxl_native_irq_dump_regs_psl9(struct cxl_context *ctx);
diff --git a/drivers/misc/cxl/cxllib.c b/drivers/misc/cxl/cxllib.c
index dc9bc1807fdf..61f80d586279 100644
--- a/drivers/misc/cxl/cxllib.c
+++ b/drivers/misc/cxl/cxllib.c
@@ -99,7 +99,7 @@ int cxllib_get_xsl_config(struct pci_dev *dev, struct 
cxllib_xsl_config *cfg)
if (rc)
return rc;
 
-   rc = cxl_get_xsl9_dsnctl(capp_unit_id, >dsnctl);
+   rc = cxl_get_xsl9_dsnctl(dev, capp_unit_id, >dsnctl);
if (rc)
return rc;
if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index 19969ee86d6f..150883d761f1 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -409,21 +409,61 @@ int cxl_calc_capp_routing(struct pci_dev *dev, u64 
*chipid,
return 0;
 }
 
-int cxl_get_xsl9_dsnctl(u64 capp_unit_id, u64 *reg)
+static DEFINE_MUTEX(indications_mutex);
+
+static int get_phb_indications(struct pci_dev *dev, u64* capiind, u64 *asnind,
+  u64 *nbwind)
+{
+   static u64 nbw, asn, capi = 0;
+   struct device_node *np;
+   const __be32 *prop;
+
+   if (!capi) {
+   mutex_lock(_mutex);
+   if (!capi) {
+   if (!(np = pnv_pci_get_phb_node(dev))) {
+   mutex_unlock(_mutex);
+   return -1;
+   }
+
+   prop = of_get_property(np, "ibm,phb-indications", NULL);
+   if (!prop) {
+   nbw = 0x0300UL; /* legacy values */
+   asn = 0x0400UL;
+   capi = 0x0200UL;
+   } else {
+   nbw = (u64)be32_to_cpu(prop[2]);
+   asn = (u64)be32_to_cpu(prop[1]);
+   capi = (u64)be32_to_cpu(prop[0]);
+   }
+   of_node_put(np);
+   }
+   mutex_unlock(_mutex);
+   }
+   *capiind = capi;
+   *asnind = asn;
+   *nbwind = nbw;
+   return 0;
+}
+
+int cxl_get_xsl9_dsnctl(struct pci_dev *dev, u64 capp_unit_id, u64 *reg)
 {
u64 xsl_dsnctl;
+   u64 capiind, asnind, nbwind;
 
/*
 * CAPI Identifier bits [0:7]
 * bit 61:60 MSI bits --> 0
 * bit 59 TVT selector --> 0
 */
+   if (get_phb_indications(dev, , , ))
+   return -1;
 
/*
 * Tell XSL where to route data to.
 * The field chipid should match the PHB CAPI_CMPM register
 */
-   xsl_dsnctl = ((u64)0x2 << (63-7)); /* Bit 57 */
+   xsl_dsnctl = (capiind << (63-15)); /* Bit 57 */
xsl_dsnctl |= (capp_unit_id << (63-15));
 
/* nMMU_ID Defaults to: b’01001’*/
@@ -437,14 +477,14 @@ int cxl_get_xsl9_dsnctl(u64 capp_unit_id, u64 *reg)
 * nbwind=0x03, bits [57:58], must include capi indicator.
 * Not supported on P9 DD1.
 */
-   xsl_dsnctl |= ((u64)0x03 << (63-47));
+   xsl_dsnctl |= (nbwind << (63-55));
 
/*
 * Upper 16b address bits of ASB_Notify messages sent to the
 * system. Need to match the PHB’s ASN Compare/Mask Register.
 * Not supported on P9 DD1.
 */
-   xsl_ds

[PATCH v6 1/2] powerpc/powernv: Enable tunneled operations

2018-01-11 Thread Philippe Bergheaud
P9 supports PCI tunneled operations (atomics and as_notify). This
patch adds support for tunneled operations on powernv, with a new
API, to be called by device drivers:

pnv_pci_get_tunnel_ind()
   Tell driver the 16-bit ASN indication used by kernel.

pnv_pci_set_tunnel_bar()
   Tell kernel the Tunnel BAR Response address used by driver.
   This function uses two new OPAL calls, as the PBCQ Tunnel BAR
   register is configured by skiboot.

pnv_pci_get_as_notify_info()
   Return the ASN info of the thread to be woken up.

Signed-off-by: Philippe Bergheaud <fe...@linux.vnet.ibm.com>
---
Changelog:

v2: Do not set the ASN indication. Get it from the device tree.

v3: Make pnv_pci_get_phb_node() available when compiling without cxl.

v4: Add pnv_pci_get_as_notify_info().
Rebase opal call numbers on skiboot 5.9.6.

v5: pnv_pci_get_tunnel_ind():
  - fix node reference count
pnv_pci_get_as_notify_info():
  - fail if task == NULL
  - read pid from mm->context.id
  - explain that thread.tidr require CONFIG_PPC64

v6: pnv_pci_get_tunnel_ind():
  - check if radix is enabled, or else return an error
 pnv_pci_get_as_notify_info():
  - remove a capi-specific comment, irrelevant for pci

This patch depends on the following skiboot patches:
  https://patchwork.ozlabs.org/patch/858324/
  https://patchwork.ozlabs.org/patch/858325/
---
 arch/powerpc/include/asm/opal-api.h|   4 +-
 arch/powerpc/include/asm/opal.h|   2 +
 arch/powerpc/include/asm/pnv-pci.h |   5 ++
 arch/powerpc/platforms/powernv/opal-wrappers.S |   2 +
 arch/powerpc/platforms/powernv/pci-cxl.c   |   8 --
 arch/powerpc/platforms/powernv/pci.c   | 106 +
 6 files changed, 118 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/include/asm/opal-api.h 
b/arch/powerpc/include/asm/opal-api.h
index 233c7504b1f2..b901f4d9f009 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -201,7 +201,9 @@
 #define OPAL_SET_POWER_SHIFT_RATIO 155
 #define OPAL_SENSOR_GROUP_CLEAR156
 #define OPAL_PCI_SET_P2P   157
-#define OPAL_LAST  157
+#define OPAL_PCI_GET_PBCQ_TUNNEL_BAR   159
+#define OPAL_PCI_SET_PBCQ_TUNNEL_BAR   160
+#define OPAL_LAST  160
 
 /* Device tree flags */
 
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 0c545f7fc77b..8705e422b893 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -198,6 +198,8 @@ int64_t opal_unregister_dump_region(uint32_t id);
 int64_t opal_slw_set_reg(uint64_t cpu_pir, uint64_t sprn, uint64_t val);
 int64_t opal_config_cpu_idle_state(uint64_t state, uint64_t flag);
 int64_t opal_pci_set_phb_cxl_mode(uint64_t phb_id, uint64_t mode, uint64_t 
pe_number);
+int64_t opal_pci_get_pbcq_tunnel_bar(uint64_t phb_id, uint64_t *addr);
+int64_t opal_pci_set_pbcq_tunnel_bar(uint64_t phb_id, uint64_t addr);
 int64_t opal_ipmi_send(uint64_t interface, struct opal_ipmi_msg *msg,
uint64_t msg_len);
 int64_t opal_ipmi_recv(uint64_t interface, struct opal_ipmi_msg *msg,
diff --git a/arch/powerpc/include/asm/pnv-pci.h 
b/arch/powerpc/include/asm/pnv-pci.h
index 3e5cf251ad9a..c69de3276b5e 100644
--- a/arch/powerpc/include/asm/pnv-pci.h
+++ b/arch/powerpc/include/asm/pnv-pci.h
@@ -29,6 +29,11 @@ extern int pnv_pci_set_power_state(uint64_t id, uint8_t 
state,
 extern int pnv_pci_set_p2p(struct pci_dev *initiator, struct pci_dev *target,
   u64 desc);
 
+extern int pnv_pci_get_tunnel_ind(struct pci_dev *dev, uint64_t *ind);
+extern int pnv_pci_set_tunnel_bar(struct pci_dev *dev, uint64_t addr,
+ int enable);
+extern int pnv_pci_get_as_notify_info(struct task_struct *task, u32 *lpid,
+ u32 *pid, u32 *tid);
 int pnv_phb_to_cxl_mode(struct pci_dev *dev, uint64_t mode);
 int pnv_cxl_ioda_msi_setup(struct pci_dev *dev, unsigned int hwirq,
   unsigned int virq);
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S 
b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 6f4b00a2ac46..5da790fb7fef 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -320,3 +320,5 @@ OPAL_CALL(opal_set_powercap,
OPAL_SET_POWERCAP);
 OPAL_CALL(opal_get_power_shift_ratio,  OPAL_GET_POWER_SHIFT_RATIO);
 OPAL_CALL(opal_set_power_shift_ratio,  OPAL_SET_POWER_SHIFT_RATIO);
 OPAL_CALL(opal_sensor_group_clear, OPAL_SENSOR_GROUP_CLEAR);
+OPAL_CALL(opal_pci_get_pbcq_tunnel_bar,
OPAL_PCI_GET_PBCQ_TUNNEL_BAR);
+OPAL_CALL(opal_pci_set_pbcq_tunnel_bar,
OPAL_PCI_SET_PBCQ_TUNNEL_BAR);
diff --git a/arch/powerpc/platforms/powernv/pci-cxl.c 
b/arch/powerpc/platforms/po

[PATCH v5 2/2] cxl: read PHB indications from the device tree

2017-12-22 Thread Philippe Bergheaud
Configure the P9 XSL_DSNCTL register with PHB indications found
in the device tree, or else use legacy hard-coded values.

Signed-off-by: Philippe Bergheaud <fe...@linux.vnet.ibm.com>
---
Changelog:

v2: New patch. Use the new device tree property "ibm,phb-indications".

v3: No change.

v4: No functional change.
Drop cosmetic fix in comment.

v5: get_phb_indications():
  - make static variables local to function.
  - return static variable values by arguments.

This patch depends on the following skiboot prerequisite:

https://patchwork.ozlabs.org/patch/849162/
---
 drivers/misc/cxl/cxl.h|  2 +-
 drivers/misc/cxl/cxllib.c |  2 +-
 drivers/misc/cxl/pci.c| 42 +-
 3 files changed, 39 insertions(+), 7 deletions(-)

diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index e46a4062904a..5a6e9a921c2b 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -1062,7 +1062,7 @@ int cxl_psl_purge(struct cxl_afu *afu);
 int cxl_calc_capp_routing(struct pci_dev *dev, u64 *chipid,
  u32 *phb_index, u64 *capp_unit_id);
 int cxl_slot_is_switched(struct pci_dev *dev);
-int cxl_get_xsl9_dsnctl(u64 capp_unit_id, u64 *reg);
+int cxl_get_xsl9_dsnctl(struct pci_dev *dev, u64 capp_unit_id, u64 *reg);
 u64 cxl_calculate_sr(bool master, bool kernel, bool real_mode, bool p9);
 
 void cxl_native_irq_dump_regs_psl9(struct cxl_context *ctx);
diff --git a/drivers/misc/cxl/cxllib.c b/drivers/misc/cxl/cxllib.c
index dc9bc1807fdf..61f80d586279 100644
--- a/drivers/misc/cxl/cxllib.c
+++ b/drivers/misc/cxl/cxllib.c
@@ -99,7 +99,7 @@ int cxllib_get_xsl_config(struct pci_dev *dev, struct 
cxllib_xsl_config *cfg)
if (rc)
return rc;
 
-   rc = cxl_get_xsl9_dsnctl(capp_unit_id, >dsnctl);
+   rc = cxl_get_xsl9_dsnctl(dev, capp_unit_id, >dsnctl);
if (rc)
return rc;
if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index 19969ee86d6f..1d38fff2139f 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -409,21 +409,53 @@ int cxl_calc_capp_routing(struct pci_dev *dev, u64 
*chipid,
return 0;
 }
 
-int cxl_get_xsl9_dsnctl(u64 capp_unit_id, u64 *reg)
+static int get_phb_indications(struct pci_dev *dev, u64* capiind, u64 *asnind,
+  u64 *nbwind)
+{
+   static u64 nbw, asn, capi = 0;
+   struct device_node *np;
+   const __be32 *prop;
+
+   if (!capi) {
+   if (!(np = pnv_pci_get_phb_node(dev)))
+   return -1;
+
+   prop = of_get_property(np, "ibm,phb-indications", NULL);
+   if (!prop) {
+   nbw = 0x0300UL; /* legacy values */
+   asn = 0x0400UL;
+   capi = 0x0200UL;
+   } else {
+   nbw = (u64)be32_to_cpu(prop[2]);
+   asn = (u64)be32_to_cpu(prop[1]);
+   capi = (u64)be32_to_cpu(prop[0]);
+   }
+   of_node_put(np);
+   }
+   *capiind = capi;
+   *asnind = asn;
+   *nbwind = nbw;
+   return 0;
+}
+
+int cxl_get_xsl9_dsnctl(struct pci_dev *dev, u64 capp_unit_id, u64 *reg)
 {
u64 xsl_dsnctl;
+   u64 capiind, asnind, nbwind;
 
/*
 * CAPI Identifier bits [0:7]
 * bit 61:60 MSI bits --> 0
 * bit 59 TVT selector --> 0
 */
+   if (get_phb_indications(dev, , , ))
+   return -1;
 
/*
 * Tell XSL where to route data to.
 * The field chipid should match the PHB CAPI_CMPM register
 */
-   xsl_dsnctl = ((u64)0x2 << (63-7)); /* Bit 57 */
+   xsl_dsnctl = (capiind << (63-15)); /* Bit 57 */
xsl_dsnctl |= (capp_unit_id << (63-15));
 
/* nMMU_ID Defaults to: b’01001’*/
@@ -437,14 +469,14 @@ int cxl_get_xsl9_dsnctl(u64 capp_unit_id, u64 *reg)
 * nbwind=0x03, bits [57:58], must include capi indicator.
 * Not supported on P9 DD1.
 */
-   xsl_dsnctl |= ((u64)0x03 << (63-47));
+   xsl_dsnctl |= (nbwind << (63-55));
 
/*
 * Upper 16b address bits of ASB_Notify messages sent to the
 * system. Need to match the PHB’s ASN Compare/Mask Register.
 * Not supported on P9 DD1.
 */
-   xsl_dsnctl |= ((u64)0x04 << (63-55));
+   xsl_dsnctl |= asnind;
}
 
*reg = xsl_dsnctl;
@@ -464,7 +496,7 @@ static int init_implementation_adapter_regs_psl9(struct cxl 
*adapter,
if (rc)
return rc;
 
-   rc = cxl_get_xsl9_dsnctl(capp_unit_id, _dsnctl);
+   rc = cxl_get_xsl9_dsnctl(dev, capp_unit_id, _dsnctl);
if (rc)
return rc;
 
-- 
2.15.1



[PATCH v5 1/2] powerpc/powernv: Enable tunneled operations

2017-12-22 Thread Philippe Bergheaud
P9 supports PCI tunneled operations (atomics and as_notify). This
patch adds support for tunneled operations on powernv, with a new
API, to be called by device drivers:

pnv_pci_get_tunnel_ind()
   Tell driver the 16-bit ASN indication used by kernel.

pnv_pci_set_tunnel_bar()
   Tell kernel the Tunnel BAR Response address used by driver.
   This function uses two new OPAL calls, as the PBCQ Tunnel BAR
   register is configured by skiboot.

pnv_pci_get_as_notify_info()
   Return the ASN info of the thread to be woken up.

Signed-off-by: Philippe Bergheaud <fe...@linux.vnet.ibm.com>
---
Changelog:

v2: Do not set the ASN indication. Get it from the device tree.

v3: Make pnv_pci_get_phb_node() available when compiling without cxl.

v4: Add pnv_pci_get_as_notify_info().
Rebase opal call numbers on skiboot 5.9.6.

v5: pnv_pci_get_tunnel_ind():
  - fix node reference count
pnv_pci_get_as_notify_info():
  - fail if task == NULL
  - read pid from mm->context.id
  - explain that thread.tidr require CONFIG_PPC64

This patch depends on the following skiboot prerequisites:

https://patchwork.ozlabs.org/patch/849162/
https://patchwork.ozlabs.org/patch/849163/
---
 arch/powerpc/include/asm/opal-api.h|   4 +-
 arch/powerpc/include/asm/opal.h|   2 +
 arch/powerpc/include/asm/pnv-pci.h |   5 ++
 arch/powerpc/platforms/powernv/opal-wrappers.S |   2 +
 arch/powerpc/platforms/powernv/pci-cxl.c   |   8 --
 arch/powerpc/platforms/powernv/pci.c   | 106 +
 6 files changed, 118 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/include/asm/opal-api.h 
b/arch/powerpc/include/asm/opal-api.h
index 233c7504b1f2..b901f4d9f009 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -201,7 +201,9 @@
 #define OPAL_SET_POWER_SHIFT_RATIO 155
 #define OPAL_SENSOR_GROUP_CLEAR156
 #define OPAL_PCI_SET_P2P   157
-#define OPAL_LAST  157
+#define OPAL_PCI_GET_PBCQ_TUNNEL_BAR   159
+#define OPAL_PCI_SET_PBCQ_TUNNEL_BAR   160
+#define OPAL_LAST  160
 
 /* Device tree flags */
 
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 0c545f7fc77b..8705e422b893 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -198,6 +198,8 @@ int64_t opal_unregister_dump_region(uint32_t id);
 int64_t opal_slw_set_reg(uint64_t cpu_pir, uint64_t sprn, uint64_t val);
 int64_t opal_config_cpu_idle_state(uint64_t state, uint64_t flag);
 int64_t opal_pci_set_phb_cxl_mode(uint64_t phb_id, uint64_t mode, uint64_t 
pe_number);
+int64_t opal_pci_get_pbcq_tunnel_bar(uint64_t phb_id, uint64_t *addr);
+int64_t opal_pci_set_pbcq_tunnel_bar(uint64_t phb_id, uint64_t addr);
 int64_t opal_ipmi_send(uint64_t interface, struct opal_ipmi_msg *msg,
uint64_t msg_len);
 int64_t opal_ipmi_recv(uint64_t interface, struct opal_ipmi_msg *msg,
diff --git a/arch/powerpc/include/asm/pnv-pci.h 
b/arch/powerpc/include/asm/pnv-pci.h
index 3e5cf251ad9a..c69de3276b5e 100644
--- a/arch/powerpc/include/asm/pnv-pci.h
+++ b/arch/powerpc/include/asm/pnv-pci.h
@@ -29,6 +29,11 @@ extern int pnv_pci_set_power_state(uint64_t id, uint8_t 
state,
 extern int pnv_pci_set_p2p(struct pci_dev *initiator, struct pci_dev *target,
   u64 desc);
 
+extern int pnv_pci_get_tunnel_ind(struct pci_dev *dev, uint64_t *ind);
+extern int pnv_pci_set_tunnel_bar(struct pci_dev *dev, uint64_t addr,
+ int enable);
+extern int pnv_pci_get_as_notify_info(struct task_struct *task, u32 *lpid,
+ u32 *pid, u32 *tid);
 int pnv_phb_to_cxl_mode(struct pci_dev *dev, uint64_t mode);
 int pnv_cxl_ioda_msi_setup(struct pci_dev *dev, unsigned int hwirq,
   unsigned int virq);
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S 
b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 6f4b00a2ac46..5da790fb7fef 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -320,3 +320,5 @@ OPAL_CALL(opal_set_powercap,
OPAL_SET_POWERCAP);
 OPAL_CALL(opal_get_power_shift_ratio,  OPAL_GET_POWER_SHIFT_RATIO);
 OPAL_CALL(opal_set_power_shift_ratio,  OPAL_SET_POWER_SHIFT_RATIO);
 OPAL_CALL(opal_sensor_group_clear, OPAL_SENSOR_GROUP_CLEAR);
+OPAL_CALL(opal_pci_get_pbcq_tunnel_bar,
OPAL_PCI_GET_PBCQ_TUNNEL_BAR);
+OPAL_CALL(opal_pci_set_pbcq_tunnel_bar,
OPAL_PCI_SET_PBCQ_TUNNEL_BAR);
diff --git a/arch/powerpc/platforms/powernv/pci-cxl.c 
b/arch/powerpc/platforms/powernv/pci-cxl.c
index 94498a04558b..cee003de63af 100644
--- a/arch/powerpc/platforms/powernv/pci-cxl.c
+++ b/arch/powerpc/platforms/powernv/pci-cxl.c
@@ -16,14 +16,6 @@
 
 #inclu

[PATCH v4 2/2] cxl: read PHB indications from the device tree

2017-12-15 Thread Philippe Bergheaud
Configure the P9 XSL_DSNCTL register with PHB indications found
in the device tree, or else use legacy hard-coded values.

Signed-off-by: Philippe Bergheaud <fe...@linux.vnet.ibm.com>
---
Changelog:

v2: New patch. Use the new device tree property "ibm,phb-indications".

v3: No change.

v4: No functional change.
Drop cosmetic fix in comment.

This patch depends on the following skiboot prerequisite:

https://patchwork.ozlabs.org/patch/849162/
---
 drivers/misc/cxl/cxl.h|  2 +-
 drivers/misc/cxl/cxllib.c |  2 +-
 drivers/misc/cxl/pci.c| 40 +++-
 3 files changed, 37 insertions(+), 7 deletions(-)

diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index e46a4062904a..5a6e9a921c2b 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -1062,7 +1062,7 @@ int cxl_psl_purge(struct cxl_afu *afu);
 int cxl_calc_capp_routing(struct pci_dev *dev, u64 *chipid,
  u32 *phb_index, u64 *capp_unit_id);
 int cxl_slot_is_switched(struct pci_dev *dev);
-int cxl_get_xsl9_dsnctl(u64 capp_unit_id, u64 *reg);
+int cxl_get_xsl9_dsnctl(struct pci_dev *dev, u64 capp_unit_id, u64 *reg);
 u64 cxl_calculate_sr(bool master, bool kernel, bool real_mode, bool p9);
 
 void cxl_native_irq_dump_regs_psl9(struct cxl_context *ctx);
diff --git a/drivers/misc/cxl/cxllib.c b/drivers/misc/cxl/cxllib.c
index dc9bc1807fdf..61f80d586279 100644
--- a/drivers/misc/cxl/cxllib.c
+++ b/drivers/misc/cxl/cxllib.c
@@ -99,7 +99,7 @@ int cxllib_get_xsl_config(struct pci_dev *dev, struct 
cxllib_xsl_config *cfg)
if (rc)
return rc;
 
-   rc = cxl_get_xsl9_dsnctl(capp_unit_id, >dsnctl);
+   rc = cxl_get_xsl9_dsnctl(dev, capp_unit_id, >dsnctl);
if (rc)
return rc;
if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index 19969ee86d6f..c58fb28685af 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -409,7 +409,36 @@ int cxl_calc_capp_routing(struct pci_dev *dev, u64 *chipid,
return 0;
 }
 
-int cxl_get_xsl9_dsnctl(u64 capp_unit_id, u64 *reg)
+static u64 nbwind = 0;
+static u64 asnind = 0;
+static u64 capiind = 0;
+
+static int get_phb_indications(struct pci_dev *dev)
+{
+   struct device_node *np;
+   const __be32 *prop;
+
+   if (capiind)
+   return 0;
+
+   if (!(np = pnv_pci_get_phb_node(dev)))
+   return -1;
+
+   prop = of_get_property(np, "ibm,phb-indications", NULL);
+   if (!prop) {
+   nbwind = 0x0300UL; /* legacy values */
+   asnind = 0x0400UL;
+   capiind = 0x0200UL;
+   } else {
+   nbwind = (u64)be32_to_cpu(prop[2]);
+   asnind = (u64)be32_to_cpu(prop[1]);
+   capiind = (u64)be32_to_cpu(prop[0]);
+   }
+   of_node_put(np);
+   return 0;
+}
+
+int cxl_get_xsl9_dsnctl(struct pci_dev *dev, u64 capp_unit_id, u64 *reg)
 {
u64 xsl_dsnctl;
 
@@ -423,7 +452,8 @@ int cxl_get_xsl9_dsnctl(u64 capp_unit_id, u64 *reg)
 * Tell XSL where to route data to.
 * The field chipid should match the PHB CAPI_CMPM register
 */
-   xsl_dsnctl = ((u64)0x2 << (63-7)); /* Bit 57 */
+   get_phb_indications(dev);
+   xsl_dsnctl = (capiind << (63-15)); /* Bit 57 */
xsl_dsnctl |= (capp_unit_id << (63-15));
 
/* nMMU_ID Defaults to: b’01001’*/
@@ -437,14 +467,14 @@ int cxl_get_xsl9_dsnctl(u64 capp_unit_id, u64 *reg)
 * nbwind=0x03, bits [57:58], must include capi indicator.
 * Not supported on P9 DD1.
 */
-   xsl_dsnctl |= ((u64)0x03 << (63-47));
+   xsl_dsnctl |= (nbwind << (63-55));
 
/*
 * Upper 16b address bits of ASB_Notify messages sent to the
 * system. Need to match the PHB’s ASN Compare/Mask Register.
 * Not supported on P9 DD1.
 */
-   xsl_dsnctl |= ((u64)0x04 << (63-55));
+   xsl_dsnctl |= asnind;
}
 
*reg = xsl_dsnctl;
@@ -464,7 +494,7 @@ static int init_implementation_adapter_regs_psl9(struct cxl 
*adapter,
if (rc)
return rc;
 
-   rc = cxl_get_xsl9_dsnctl(capp_unit_id, _dsnctl);
+   rc = cxl_get_xsl9_dsnctl(dev, capp_unit_id, _dsnctl);
if (rc)
return rc;
 
-- 
2.15.0



[PATCH v4 1/2] powerpc/powernv: Enable tunneled operations

2017-12-15 Thread Philippe Bergheaud
P9 supports PCI tunneled operations (atomics and as_notify). This
patch adds support for tunneled operations on powernv, with a new
API, to be called by device drivers:

pnv_pci_get_tunnel_ind()
   Tell driver the 16-bit ASN indication used by kernel.

pnv_pci_set_tunnel_bar()
   Tell kernel the Tunnel BAR Response address used by driver.
   This function uses two new OPAL calls, as the PBCQ Tunnel BAR
   register is configured by skiboot.

void pnv_pci_get_as_notify_info()
   Return the ASN info of the thread to be woken up.

Signed-off-by: Philippe Bergheaud <fe...@linux.vnet.ibm.com>
---
Changelog:

v2: Do not set the ASN indication. Get it from the device tree.

v3: Make pnv_pci_get_phb_node() available when compiling without cxl.

v4: Add pnv_pci_get_as_notify_info().
Rebase opal call numbers on skiboot 5.9.6.

This patch depends on the following skiboot prerequisites:

https://patchwork.ozlabs.org/patch/849162/
https://patchwork.ozlabs.org/patch/849163/
---
 arch/powerpc/include/asm/opal-api.h|  4 +-
 arch/powerpc/include/asm/opal.h|  2 +
 arch/powerpc/include/asm/pnv-pci.h |  5 ++
 arch/powerpc/platforms/powernv/opal-wrappers.S |  2 +
 arch/powerpc/platforms/powernv/pci-cxl.c   |  8 ---
 arch/powerpc/platforms/powernv/pci.c   | 93 ++
 6 files changed, 105 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/include/asm/opal-api.h 
b/arch/powerpc/include/asm/opal-api.h
index 233c7504b1f2..b901f4d9f009 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -201,7 +201,9 @@
 #define OPAL_SET_POWER_SHIFT_RATIO 155
 #define OPAL_SENSOR_GROUP_CLEAR156
 #define OPAL_PCI_SET_P2P   157
-#define OPAL_LAST  157
+#define OPAL_PCI_GET_PBCQ_TUNNEL_BAR   159
+#define OPAL_PCI_SET_PBCQ_TUNNEL_BAR   160
+#define OPAL_LAST  160
 
 /* Device tree flags */
 
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 0c545f7fc77b..8705e422b893 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -198,6 +198,8 @@ int64_t opal_unregister_dump_region(uint32_t id);
 int64_t opal_slw_set_reg(uint64_t cpu_pir, uint64_t sprn, uint64_t val);
 int64_t opal_config_cpu_idle_state(uint64_t state, uint64_t flag);
 int64_t opal_pci_set_phb_cxl_mode(uint64_t phb_id, uint64_t mode, uint64_t 
pe_number);
+int64_t opal_pci_get_pbcq_tunnel_bar(uint64_t phb_id, uint64_t *addr);
+int64_t opal_pci_set_pbcq_tunnel_bar(uint64_t phb_id, uint64_t addr);
 int64_t opal_ipmi_send(uint64_t interface, struct opal_ipmi_msg *msg,
uint64_t msg_len);
 int64_t opal_ipmi_recv(uint64_t interface, struct opal_ipmi_msg *msg,
diff --git a/arch/powerpc/include/asm/pnv-pci.h 
b/arch/powerpc/include/asm/pnv-pci.h
index 3e5cf251ad9a..4839e09663f2 100644
--- a/arch/powerpc/include/asm/pnv-pci.h
+++ b/arch/powerpc/include/asm/pnv-pci.h
@@ -29,6 +29,11 @@ extern int pnv_pci_set_power_state(uint64_t id, uint8_t 
state,
 extern int pnv_pci_set_p2p(struct pci_dev *initiator, struct pci_dev *target,
   u64 desc);
 
+extern int pnv_pci_get_tunnel_ind(struct pci_dev *dev, uint64_t *ind);
+extern int pnv_pci_set_tunnel_bar(struct pci_dev *dev, uint64_t addr,
+ int enable);
+extern void pnv_pci_get_as_notify_info(struct task_struct *task, u32 *lpid,
+  u32 *pid, u32 *tid);
 int pnv_phb_to_cxl_mode(struct pci_dev *dev, uint64_t mode);
 int pnv_cxl_ioda_msi_setup(struct pci_dev *dev, unsigned int hwirq,
   unsigned int virq);
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S 
b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 6f4b00a2ac46..5da790fb7fef 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -320,3 +320,5 @@ OPAL_CALL(opal_set_powercap,
OPAL_SET_POWERCAP);
 OPAL_CALL(opal_get_power_shift_ratio,  OPAL_GET_POWER_SHIFT_RATIO);
 OPAL_CALL(opal_set_power_shift_ratio,  OPAL_SET_POWER_SHIFT_RATIO);
 OPAL_CALL(opal_sensor_group_clear, OPAL_SENSOR_GROUP_CLEAR);
+OPAL_CALL(opal_pci_get_pbcq_tunnel_bar,
OPAL_PCI_GET_PBCQ_TUNNEL_BAR);
+OPAL_CALL(opal_pci_set_pbcq_tunnel_bar,
OPAL_PCI_SET_PBCQ_TUNNEL_BAR);
diff --git a/arch/powerpc/platforms/powernv/pci-cxl.c 
b/arch/powerpc/platforms/powernv/pci-cxl.c
index 94498a04558b..cee003de63af 100644
--- a/arch/powerpc/platforms/powernv/pci-cxl.c
+++ b/arch/powerpc/platforms/powernv/pci-cxl.c
@@ -16,14 +16,6 @@
 
 #include "pci.h"
 
-struct device_node *pnv_pci_get_phb_node(struct pci_dev *dev)
-{
-   struct pci_controller *hose = pci_bus_to_host(dev->bus);
-
-   return of_node_get(hose->dn);
-}
-EXPORT_SYMBO

Re: [PATCH V2] cxl: Add support for ASB_Notify on POWER9

2017-12-12 Thread Philippe Bergheaud

On 01/12/2017 16:53, Christophe Lombard wrote:

The POWER9 core supports a new feature: ASB_Notify which requires the
support of the Special Purpose Register: TIDR.

The ASB_Notify command, generated by the AFU, will attempt to
wake-up the host thread identified by the particular LPID:PID:TID.

This patch assign a unique TIDR (thread id) for the current thread which
will be used in the process element entry.

A next patch will handle a new kind of "compatible" property in the
device-tree (PHB DT node) indicating which version of CAPI and which
features are supported.

Signed-off-by: Christophe Lombard <clomb...@linux.vnet.ibm.com>



Reviewed-by: Philippe Bergheaud <fe...@linux.vnet.ibm.com>



[PATCH v3 2/2] cxl: read PHB indications from the device tree

2017-12-06 Thread Philippe Bergheaud
Configure the P9 XSL_DSNCTL register with PHB indications found
in the device tree, or else use legacy hard-coded values.

Signed-off-by: Philippe Bergheaud <fe...@linux.vnet.ibm.com>
---
Changelog:

v2: New patch. Use the new device tree property "ibm,phb-indications".

v3: No change.

This patch depends on the following skiboot prerequisite:

https://patchwork.ozlabs.org/patch/843474/
---
 drivers/misc/cxl/cxl.h|  2 +-
 drivers/misc/cxl/cxllib.c |  2 +-
 drivers/misc/cxl/pci.c| 44 +---
 3 files changed, 39 insertions(+), 9 deletions(-)

diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index e46a4062904a..5a6e9a921c2b 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -1062,7 +1062,7 @@ int cxl_psl_purge(struct cxl_afu *afu);
 int cxl_calc_capp_routing(struct pci_dev *dev, u64 *chipid,
  u32 *phb_index, u64 *capp_unit_id);
 int cxl_slot_is_switched(struct pci_dev *dev);
-int cxl_get_xsl9_dsnctl(u64 capp_unit_id, u64 *reg);
+int cxl_get_xsl9_dsnctl(struct pci_dev *dev, u64 capp_unit_id, u64 *reg);
 u64 cxl_calculate_sr(bool master, bool kernel, bool real_mode, bool p9);
 
 void cxl_native_irq_dump_regs_psl9(struct cxl_context *ctx);
diff --git a/drivers/misc/cxl/cxllib.c b/drivers/misc/cxl/cxllib.c
index dc9bc1807fdf..61f80d586279 100644
--- a/drivers/misc/cxl/cxllib.c
+++ b/drivers/misc/cxl/cxllib.c
@@ -99,7 +99,7 @@ int cxllib_get_xsl_config(struct pci_dev *dev, struct 
cxllib_xsl_config *cfg)
if (rc)
return rc;
 
-   rc = cxl_get_xsl9_dsnctl(capp_unit_id, >dsnctl);
+   rc = cxl_get_xsl9_dsnctl(dev, capp_unit_id, >dsnctl);
if (rc)
return rc;
if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index 19969ee86d6f..a891abb1edbb 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -409,7 +409,36 @@ int cxl_calc_capp_routing(struct pci_dev *dev, u64 *chipid,
return 0;
 }
 
-int cxl_get_xsl9_dsnctl(u64 capp_unit_id, u64 *reg)
+static u64 nbwind = 0;
+static u64 asnind = 0;
+static u64 capiind = 0;
+
+static int get_phb_indications(struct pci_dev *dev)
+{
+   struct device_node *np;
+   const __be32 *prop;
+
+   if (capiind)
+   return 0;
+
+   if (!(np = pnv_pci_get_phb_node(dev)))
+   return -1;
+
+   prop = of_get_property(np, "ibm,phb-indications", NULL);
+   if (!prop) {
+   nbwind = 0x0300UL; /* legacy values */
+   asnind = 0x0400UL;
+   capiind = 0x0200UL;
+   } else {
+   nbwind = (u64)be32_to_cpu(prop[2]);
+   asnind = (u64)be32_to_cpu(prop[1]);
+   capiind = (u64)be32_to_cpu(prop[0]);
+   }
+   of_node_put(np);
+   return 0;
+}
+
+int cxl_get_xsl9_dsnctl(struct pci_dev *dev, u64 capp_unit_id, u64 *reg)
 {
u64 xsl_dsnctl;
 
@@ -423,10 +452,11 @@ int cxl_get_xsl9_dsnctl(u64 capp_unit_id, u64 *reg)
 * Tell XSL where to route data to.
 * The field chipid should match the PHB CAPI_CMPM register
 */
-   xsl_dsnctl = ((u64)0x2 << (63-7)); /* Bit 57 */
+   get_phb_indications(dev);
+   xsl_dsnctl = (capiind << (63-15)); /* Bit 57 */
xsl_dsnctl |= (capp_unit_id << (63-15));
 
-   /* nMMU_ID Defaults to: b’01001’*/
+   /* nMMU_ID Defaults to: b'01001'*/
xsl_dsnctl |= ((u64)0x09 << (63-28));
 
if (!(cxl_is_power9_dd1())) {
@@ -437,14 +467,14 @@ int cxl_get_xsl9_dsnctl(u64 capp_unit_id, u64 *reg)
 * nbwind=0x03, bits [57:58], must include capi indicator.
 * Not supported on P9 DD1.
 */
-   xsl_dsnctl |= ((u64)0x03 << (63-47));
+   xsl_dsnctl |= (nbwind << (63-55));
 
/*
 * Upper 16b address bits of ASB_Notify messages sent to the
-* system. Need to match the PHB’s ASN Compare/Mask Register.
+* system. Need to match the PHB's ASN Compare/Mask Register.
 * Not supported on P9 DD1.
 */
-   xsl_dsnctl |= ((u64)0x04 << (63-55));
+   xsl_dsnctl |= asnind;
}
 
*reg = xsl_dsnctl;
@@ -464,7 +494,7 @@ static int init_implementation_adapter_regs_psl9(struct cxl 
*adapter,
if (rc)
return rc;
 
-   rc = cxl_get_xsl9_dsnctl(capp_unit_id, _dsnctl);
+   rc = cxl_get_xsl9_dsnctl(dev, capp_unit_id, _dsnctl);
if (rc)
return rc;
 
-- 
2.15.0



[PATCH v3 1/2] powerpc/powernv: Enable tunneled operations

2017-12-06 Thread Philippe Bergheaud
P9 supports PCI tunneled operations (atomics and as_notify). This
patch adds support for tunneled operations on powernv, with a new
API, to be called by device drivers:

pnv_pci_get_tunnel_ind()
   Tell driver the 16-bit ASN indication used by kernel.

pnv_pci_set_tunnel_bar()
   Tell kernel the Tunnel BAR Response address used by driver.

The latter uses two new OPAL calls, as the PBCQ Tunnel BAR register
is configured by skiboot.

Signed-off-by: Philippe Bergheaud <fe...@linux.vnet.ibm.com>
---
Changelog:

v2: Do not set the ASN indication. Get it from the device tree.

v3: Make pnv_pci_get_phb_node() available when compiling without cxl.

This patch depends on the following skiboot prerequisites:

https://patchwork.ozlabs.org/patch/843474/
https://patchwork.ozlabs.org/patch/843473/
---

 arch/powerpc/include/asm/opal-api.h|  4 +-
 arch/powerpc/include/asm/opal.h|  2 +
 arch/powerpc/include/asm/pnv-pci.h |  4 +-
 arch/powerpc/platforms/powernv/opal-wrappers.S |  2 +
 arch/powerpc/platforms/powernv/pci-cxl.c   |  8 ---
 arch/powerpc/platforms/powernv/pci.c   | 77 ++
 6 files changed, 87 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/include/asm/opal-api.h 
b/arch/powerpc/include/asm/opal-api.h
index 233c7504b1f2..1aa069cc2d61 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -201,7 +201,9 @@
 #define OPAL_SET_POWER_SHIFT_RATIO 155
 #define OPAL_SENSOR_GROUP_CLEAR156
 #define OPAL_PCI_SET_P2P   157
-#define OPAL_LAST  157
+#define OPAL_PCI_GET_PBCQ_TUNNEL_BAR   158
+#define OPAL_PCI_SET_PBCQ_TUNNEL_BAR   159
+#define OPAL_LAST  159
 
 /* Device tree flags */
 
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 0c545f7fc77b..8705e422b893 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -198,6 +198,8 @@ int64_t opal_unregister_dump_region(uint32_t id);
 int64_t opal_slw_set_reg(uint64_t cpu_pir, uint64_t sprn, uint64_t val);
 int64_t opal_config_cpu_idle_state(uint64_t state, uint64_t flag);
 int64_t opal_pci_set_phb_cxl_mode(uint64_t phb_id, uint64_t mode, uint64_t 
pe_number);
+int64_t opal_pci_get_pbcq_tunnel_bar(uint64_t phb_id, uint64_t *addr);
+int64_t opal_pci_set_pbcq_tunnel_bar(uint64_t phb_id, uint64_t addr);
 int64_t opal_ipmi_send(uint64_t interface, struct opal_ipmi_msg *msg,
uint64_t msg_len);
 int64_t opal_ipmi_recv(uint64_t interface, struct opal_ipmi_msg *msg,
diff --git a/arch/powerpc/include/asm/pnv-pci.h 
b/arch/powerpc/include/asm/pnv-pci.h
index 3e5cf251ad9a..f064e93495ac 100644
--- a/arch/powerpc/include/asm/pnv-pci.h
+++ b/arch/powerpc/include/asm/pnv-pci.h
@@ -28,7 +28,9 @@ extern int pnv_pci_set_power_state(uint64_t id, uint8_t state,
   struct opal_msg *msg);
 extern int pnv_pci_set_p2p(struct pci_dev *initiator, struct pci_dev *target,
   u64 desc);
-
+extern int pnv_pci_get_tunnel_ind(struct pci_dev *dev, uint64_t *ind);
+extern int pnv_pci_set_tunnel_bar(struct pci_dev *dev, uint64_t addr,
+ int enable);
 int pnv_phb_to_cxl_mode(struct pci_dev *dev, uint64_t mode);
 int pnv_cxl_ioda_msi_setup(struct pci_dev *dev, unsigned int hwirq,
   unsigned int virq);
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S 
b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 6f4b00a2ac46..5da790fb7fef 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -320,3 +320,5 @@ OPAL_CALL(opal_set_powercap,
OPAL_SET_POWERCAP);
 OPAL_CALL(opal_get_power_shift_ratio,  OPAL_GET_POWER_SHIFT_RATIO);
 OPAL_CALL(opal_set_power_shift_ratio,  OPAL_SET_POWER_SHIFT_RATIO);
 OPAL_CALL(opal_sensor_group_clear, OPAL_SENSOR_GROUP_CLEAR);
+OPAL_CALL(opal_pci_get_pbcq_tunnel_bar,
OPAL_PCI_GET_PBCQ_TUNNEL_BAR);
+OPAL_CALL(opal_pci_set_pbcq_tunnel_bar,
OPAL_PCI_SET_PBCQ_TUNNEL_BAR);
diff --git a/arch/powerpc/platforms/powernv/pci-cxl.c 
b/arch/powerpc/platforms/powernv/pci-cxl.c
index 94498a04558b..cee003de63af 100644
--- a/arch/powerpc/platforms/powernv/pci-cxl.c
+++ b/arch/powerpc/platforms/powernv/pci-cxl.c
@@ -16,14 +16,6 @@
 
 #include "pci.h"
 
-struct device_node *pnv_pci_get_phb_node(struct pci_dev *dev)
-{
-   struct pci_controller *hose = pci_bus_to_host(dev->bus);
-
-   return of_node_get(hose->dn);
-}
-EXPORT_SYMBOL(pnv_pci_get_phb_node);
-
 int pnv_phb_to_cxl_mode(struct pci_dev *dev, uint64_t mode)
 {
struct pci_controller *hose = pci_bus_to_host(dev->bus);
diff --git a/arch/powerpc/platforms/powernv/pci.c 
b/arch/powerpc/platforms/powernv/pci.c
index 5

[PATCH v2 2/2] cxl: read PHB indications from the device tree

2017-12-01 Thread Philippe Bergheaud
Configure the P9 XSL_DSNCTL register with PHB indications found
in the device tree, or else use legacy hard-coded values.

Signed-off-by: Philippe Bergheaud <fe...@linux.vnet.ibm.com>
---
Changelog:

v2: New patch. Use the new device tree property "ibm,phb-indications".

This patch depends on the following skiboot prerequisite:

https://patchwork.ozlabs.org/patch/843474/
---
 drivers/misc/cxl/cxl.h|  2 +-
 drivers/misc/cxl/cxllib.c |  2 +-
 drivers/misc/cxl/pci.c| 44 +---
 3 files changed, 39 insertions(+), 9 deletions(-)

diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index e46a4062904a..5a6e9a921c2b 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -1062,7 +1062,7 @@ int cxl_psl_purge(struct cxl_afu *afu);
 int cxl_calc_capp_routing(struct pci_dev *dev, u64 *chipid,
  u32 *phb_index, u64 *capp_unit_id);
 int cxl_slot_is_switched(struct pci_dev *dev);
-int cxl_get_xsl9_dsnctl(u64 capp_unit_id, u64 *reg);
+int cxl_get_xsl9_dsnctl(struct pci_dev *dev, u64 capp_unit_id, u64 *reg);
 u64 cxl_calculate_sr(bool master, bool kernel, bool real_mode, bool p9);
 
 void cxl_native_irq_dump_regs_psl9(struct cxl_context *ctx);
diff --git a/drivers/misc/cxl/cxllib.c b/drivers/misc/cxl/cxllib.c
index dc9bc1807fdf..61f80d586279 100644
--- a/drivers/misc/cxl/cxllib.c
+++ b/drivers/misc/cxl/cxllib.c
@@ -99,7 +99,7 @@ int cxllib_get_xsl_config(struct pci_dev *dev, struct 
cxllib_xsl_config *cfg)
if (rc)
return rc;
 
-   rc = cxl_get_xsl9_dsnctl(capp_unit_id, >dsnctl);
+   rc = cxl_get_xsl9_dsnctl(dev, capp_unit_id, >dsnctl);
if (rc)
return rc;
if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index bb7fd3f4edab..e21c45758558 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -409,7 +409,36 @@ int cxl_calc_capp_routing(struct pci_dev *dev, u64 *chipid,
return 0;
 }
 
-int cxl_get_xsl9_dsnctl(u64 capp_unit_id, u64 *reg)
+static u64 nbwind = 0;
+static u64 asnind = 0;
+static u64 capiind = 0;
+
+static int get_phb_indications(struct pci_dev *dev)
+{
+   struct device_node *np;
+   const __be32 *prop;
+
+   if (capiind)
+   return 0;
+
+   if (!(np = pnv_pci_get_phb_node(dev)))
+   return -1;
+
+   prop = of_get_property(np, "ibm,phb-indications", NULL);
+   if (!prop) {
+   nbwind = 0x0300UL; /* legacy values */
+   asnind = 0x0400UL;
+   capiind = 0x0200UL;
+   } else {
+   nbwind = (u64)be32_to_cpu(prop[2]);
+   asnind = (u64)be32_to_cpu(prop[1]);
+   capiind = (u64)be32_to_cpu(prop[0]);
+   }
+   of_node_put(np);
+   return 0;
+}
+
+int cxl_get_xsl9_dsnctl(struct pci_dev *dev, u64 capp_unit_id, u64 *reg)
 {
u64 xsl_dsnctl;
 
@@ -423,10 +452,11 @@ int cxl_get_xsl9_dsnctl(u64 capp_unit_id, u64 *reg)
 * Tell XSL where to route data to.
 * The field chipid should match the PHB CAPI_CMPM register
 */
-   xsl_dsnctl = ((u64)0x2 << (63-7)); /* Bit 57 */
+   get_phb_indications(dev);
+   xsl_dsnctl = (capiind << (63-15)); /* Bit 57 */
xsl_dsnctl |= (capp_unit_id << (63-15));
 
-   /* nMMU_ID Defaults to: b’01001’*/
+   /* nMMU_ID Defaults to: b'01001'*/
xsl_dsnctl |= ((u64)0x09 << (63-28));
 
if (!(cxl_is_power9_dd1())) {
@@ -437,14 +467,14 @@ int cxl_get_xsl9_dsnctl(u64 capp_unit_id, u64 *reg)
 * nbwind=0x03, bits [57:58], must include capi indicator.
 * Not supported on P9 DD1.
 */
-   xsl_dsnctl |= ((u64)0x03 << (63-47));
+   xsl_dsnctl |= (nbwind << (63-55));
 
/*
 * Upper 16b address bits of ASB_Notify messages sent to the
-* system. Need to match the PHB’s ASN Compare/Mask Register.
+* system. Need to match the PHB's ASN Compare/Mask Register.
 * Not supported on P9 DD1.
 */
-   xsl_dsnctl |= ((u64)0x04 << (63-55));
+   xsl_dsnctl |= asnind;
}
 
*reg = xsl_dsnctl;
@@ -464,7 +494,7 @@ static int init_implementation_adapter_regs_psl9(struct cxl 
*adapter,
if (rc)
return rc;
 
-   rc = cxl_get_xsl9_dsnctl(capp_unit_id, _dsnctl);
+   rc = cxl_get_xsl9_dsnctl(dev, capp_unit_id, _dsnctl);
if (rc)
return rc;
 
-- 
2.15.0



[PATCH v2 1/2] powerpc/powernv: Enable tunneled operations

2017-12-01 Thread Philippe Bergheaud
P9 supports PCI tunneled operations (atomics and as_notify). This
patch adds support for tunneled operations on powernv, with a new
API, to be called by device drivers:

pnv_pci_get_tunnel_ind()
   Tell driver the 16-bit ASN indication used by kernel.

pnv_pci_set_tunnel_bar()
   Tell kernel the Tunnel BAR Response address used by driver.

The latter uses two new OPAL calls, as the PBCQ Tunnel BAR register
is configured by skiboot.

Signed-off-by: Philippe Bergheaud <fe...@linux.vnet.ibm.com>
---
Changelog:

v2: Do not set the ASN indication. Get it from the device tree.

This patch depends on the following skiboot prerequisites:

https://patchwork.ozlabs.org/patch/843474/
https://patchwork.ozlabs.org/patch/843473/
---
 arch/powerpc/include/asm/opal-api.h|  4 +-
 arch/powerpc/include/asm/opal.h|  2 +
 arch/powerpc/include/asm/pnv-pci.h |  4 +-
 arch/powerpc/platforms/powernv/opal-wrappers.S |  2 +
 arch/powerpc/platforms/powernv/pci.c   | 69 ++
 5 files changed, 79 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/opal-api.h 
b/arch/powerpc/include/asm/opal-api.h
index 233c7504b1f2..1aa069cc2d61 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -201,7 +201,9 @@
 #define OPAL_SET_POWER_SHIFT_RATIO 155
 #define OPAL_SENSOR_GROUP_CLEAR156
 #define OPAL_PCI_SET_P2P   157
-#define OPAL_LAST  157
+#define OPAL_PCI_GET_PBCQ_TUNNEL_BAR   158
+#define OPAL_PCI_SET_PBCQ_TUNNEL_BAR   159
+#define OPAL_LAST  159
 
 /* Device tree flags */
 
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 0c545f7fc77b..8705e422b893 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -198,6 +198,8 @@ int64_t opal_unregister_dump_region(uint32_t id);
 int64_t opal_slw_set_reg(uint64_t cpu_pir, uint64_t sprn, uint64_t val);
 int64_t opal_config_cpu_idle_state(uint64_t state, uint64_t flag);
 int64_t opal_pci_set_phb_cxl_mode(uint64_t phb_id, uint64_t mode, uint64_t 
pe_number);
+int64_t opal_pci_get_pbcq_tunnel_bar(uint64_t phb_id, uint64_t *addr);
+int64_t opal_pci_set_pbcq_tunnel_bar(uint64_t phb_id, uint64_t addr);
 int64_t opal_ipmi_send(uint64_t interface, struct opal_ipmi_msg *msg,
uint64_t msg_len);
 int64_t opal_ipmi_recv(uint64_t interface, struct opal_ipmi_msg *msg,
diff --git a/arch/powerpc/include/asm/pnv-pci.h 
b/arch/powerpc/include/asm/pnv-pci.h
index 3e5cf251ad9a..f064e93495ac 100644
--- a/arch/powerpc/include/asm/pnv-pci.h
+++ b/arch/powerpc/include/asm/pnv-pci.h
@@ -28,7 +28,9 @@ extern int pnv_pci_set_power_state(uint64_t id, uint8_t state,
   struct opal_msg *msg);
 extern int pnv_pci_set_p2p(struct pci_dev *initiator, struct pci_dev *target,
   u64 desc);
-
+extern int pnv_pci_get_tunnel_ind(struct pci_dev *dev, uint64_t *ind);
+extern int pnv_pci_set_tunnel_bar(struct pci_dev *dev, uint64_t addr,
+ int enable);
 int pnv_phb_to_cxl_mode(struct pci_dev *dev, uint64_t mode);
 int pnv_cxl_ioda_msi_setup(struct pci_dev *dev, unsigned int hwirq,
   unsigned int virq);
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S 
b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 6f4b00a2ac46..5da790fb7fef 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -320,3 +320,5 @@ OPAL_CALL(opal_set_powercap,
OPAL_SET_POWERCAP);
 OPAL_CALL(opal_get_power_shift_ratio,  OPAL_GET_POWER_SHIFT_RATIO);
 OPAL_CALL(opal_set_power_shift_ratio,  OPAL_SET_POWER_SHIFT_RATIO);
 OPAL_CALL(opal_sensor_group_clear, OPAL_SENSOR_GROUP_CLEAR);
+OPAL_CALL(opal_pci_get_pbcq_tunnel_bar,
OPAL_PCI_GET_PBCQ_TUNNEL_BAR);
+OPAL_CALL(opal_pci_set_pbcq_tunnel_bar,
OPAL_PCI_SET_PBCQ_TUNNEL_BAR);
diff --git a/arch/powerpc/platforms/powernv/pci.c 
b/arch/powerpc/platforms/powernv/pci.c
index 5422f4a6317c..63a903c88b72 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -38,6 +38,7 @@
 #include "pci.h"
 
 static DEFINE_MUTEX(p2p_mutex);
+static DEFINE_MUTEX(tunnel_mutex);
 
 int pnv_pci_get_slot_id(struct device_node *np, uint64_t *id)
 {
@@ -1092,6 +1093,74 @@ int pnv_pci_set_p2p(struct pci_dev *initiator, struct 
pci_dev *target, u64 desc)
 }
 EXPORT_SYMBOL_GPL(pnv_pci_set_p2p);
 
+int pnv_pci_get_tunnel_ind(struct pci_dev *dev, u64 *asnind)
+{
+   struct device_node *np;
+   const __be32 *prop;
+
+   if (!(np = pnv_pci_get_phb_node(dev)))
+   return -ENXIO;
+
+   prop = of_get_property(np, "ibm,phb-indications", NULL);
+   if (!prop || !prop[1])
+  

[PATCH] powerpc/powernv: Enable tunneled operations

2017-10-25 Thread Philippe Bergheaud
P9 supports PCI tunneled operations (atomics and as_notify).

This patch adds support for tunneled operations on powernv, by adding a
new API to be called by drivers:

pnv_pci_get_tunnel_ind() -- tell driver the 16-bit ASN indication set by
kernel.

pnv_pci_set_tunnel_bar() -- tell kernel the Tunnel BAR address mask used
by driver.

These functions use four new OPAL calls, as PBCQ and PHB configurations
are done by skiboot.

Signed-off-by: Philippe Bergheaud <fe...@linux.vnet.ibm.com>
---
This patch depends on the following skiboot prerequisites:

  https://patchwork.ozlabs.org/patch/829294/
  [1/2] phb4: set PHB CMPM registers for tunneled operations

  https://patchwork.ozlabs.org/patch/829293/
  [2/2] phb4: set PBCQ Tunnel BAR for tunneled operations

 arch/powerpc/include/asm/opal-api.h| 13 +++-
 arch/powerpc/include/asm/opal.h|  4 ++
 arch/powerpc/include/asm/pnv-pci.h |  3 +
 arch/powerpc/platforms/powernv/opal-wrappers.S |  4 ++
 arch/powerpc/platforms/powernv/pci.c   | 92 ++
 5 files changed, 115 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/opal-api.h 
b/arch/powerpc/include/asm/opal-api.h
index 450a60b81d2a..607d94d8d863 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -200,7 +200,11 @@
 #define OPAL_SET_POWER_SHIFT_RATIO 155
 #define OPAL_SENSOR_GROUP_CLEAR156
 #define OPAL_PCI_SET_P2P   157
-#define OPAL_LAST  157
+#define OPAL_PCI_GET_PHB_CMPM  158
+#define OPAL_PCI_SET_PHB_CMPM  159
+#define OPAL_PCI_GET_PBCQ_TUNNEL_BAR   160
+#define OPAL_PCI_SET_PBCQ_TUNNEL_BAR   161
+#define OPAL_LAST  161
 
 /* Device tree flags */
 
@@ -1106,6 +1110,13 @@ enum {
 #define OPAL_PCI_P2P_LOAD  0x2
 #define OPAL_PCI_P2P_STORE 0x4
 
+/* PHB Compare/Mask registers */
+enum {
+   OPAL_PHB_ASN_CMPM   = 0,
+   OPAL_PHB_CAPI_CMPM  = 1,
+   OPAL_PHB_PBL_NBW_CMPM   = 2,
+};
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* __OPAL_API_H */
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 726c23304a57..556c16d60930 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -198,6 +198,10 @@ int64_t opal_unregister_dump_region(uint32_t id);
 int64_t opal_slw_set_reg(uint64_t cpu_pir, uint64_t sprn, uint64_t val);
 int64_t opal_config_cpu_idle_state(uint64_t state, uint64_t flag);
 int64_t opal_pci_set_phb_cxl_mode(uint64_t phb_id, uint64_t mode, uint64_t 
pe_number);
+int64_t opal_pci_get_phb_cmpm(uint64_t phb_id, uint64_t phb_reg, uint64_t 
*ind);
+int64_t opal_pci_set_phb_cmpm(uint64_t phb_id, uint64_t phb_reg, uint64_t ind);
+int64_t opal_pci_get_pbcq_tunnel_bar(uint64_t phb_id, uint64_t *addr);
+int64_t opal_pci_set_pbcq_tunnel_bar(uint64_t phb_id, uint64_t addr);
 int64_t opal_ipmi_send(uint64_t interface, struct opal_ipmi_msg *msg,
uint64_t msg_len);
 int64_t opal_ipmi_recv(uint64_t interface, struct opal_ipmi_msg *msg,
diff --git a/arch/powerpc/include/asm/pnv-pci.h 
b/arch/powerpc/include/asm/pnv-pci.h
index 3e5cf251ad9a..73b320732fff 100644
--- a/arch/powerpc/include/asm/pnv-pci.h
+++ b/arch/powerpc/include/asm/pnv-pci.h
@@ -28,6 +28,9 @@ extern int pnv_pci_set_power_state(uint64_t id, uint8_t state,
   struct opal_msg *msg);
 extern int pnv_pci_set_p2p(struct pci_dev *initiator, struct pci_dev *target,
   u64 desc);
+extern int pnv_pci_get_tunnel_ind(struct pci_dev *dev, uint64_t *ind);
+extern int pnv_pci_set_tunnel_bar(struct pci_dev *dev, uint64_t addr,
+ int enable);
 
 int pnv_phb_to_cxl_mode(struct pci_dev *dev, uint64_t mode);
 int pnv_cxl_ioda_msi_setup(struct pci_dev *dev, unsigned int hwirq,
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S 
b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 8c1ede2d3f7e..9de8cd43380f 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -319,3 +319,7 @@ OPAL_CALL(opal_set_powercap,
OPAL_SET_POWERCAP);
 OPAL_CALL(opal_get_power_shift_ratio,  OPAL_GET_POWER_SHIFT_RATIO);
 OPAL_CALL(opal_set_power_shift_ratio,  OPAL_SET_POWER_SHIFT_RATIO);
 OPAL_CALL(opal_sensor_group_clear, OPAL_SENSOR_GROUP_CLEAR);
+OPAL_CALL(opal_pci_get_phb_cmpm,   OPAL_PCI_GET_PHB_CMPM);
+OPAL_CALL(opal_pci_set_phb_cmpm,   OPAL_PCI_SET_PHB_CMPM);
+OPAL_CALL(opal_pci_get_pbcq_tunnel_bar,
OPAL_PCI_GET_PBCQ_TUNNEL_BAR);
+OPAL_CALL(opal_pci_set_pbcq_tunnel_bar,
OPAL_PCI_SET_PBCQ_TUNNEL_BAR);
diff --git a/arch/powerpc/platforms/powernv/pci.c 
b/arch/powerpc/platforms/powernv/pci.c
index 5422f4a6317c..26230a45e24e 

Re: [PATCH RFC] Interface to set SPRN_TIDR

2017-09-01 Thread Philippe Bergheaud

On 31/08/2017 20:06, Sukadev Bhattiprolu wrote:

felix [fe...@linux.vnet.ibm.com] wrote:

On 31/08/2017 01:32, Sukadev Bhattiprolu wrote:

Michael Neuling [mi...@neuling.org] wrote:

Suka,

Please CC Christophe who as an alternative way of doing this. We ned to get
agreement across all users of TIDR/AS_notify...

Mikey,

Thanks. There is overlap between the two patches. I will send a patch on
top of Christophe's for the interfaces to assign/clear the TIDR value and
clear the thread->tidr during arch_dup_task_struct(). I will also drop the
CONFIG_VAS check since its not only for VAS.

Christophe, can you let me know of any other comments on this patch?

Suka

Suka,

I am seconding Christophe on this matter. I think that your patch now
fulfills the CAPI use case requirements, with one exception: CAPI does not
restrict assigning a thread id to the current task. Please find a few minor
questions below.

Philippe


His patch is here:

https://urldefense.proofpoint.com/v2/url?u=https-3A__lists.ozlabs.org_pipermail_linuxppc-2Ddev_2017-2DAugust_161582.html=DwIFAw=jf_iaSHvJObTbx-siA1ZOg=KC0fX9VGJYXlSiH9qN2ZONDbh8vUCZFX8GUhF3rHAvg=XQenBfWewOBjWopgf1Fh2UAVGnlzq766MNuzx7jYfuA=07WOVTh9f_4IBZfCJes4lvc7LWenBlqVfAXIXxL2QH4=

Mikey

On Tue, 2017-08-29 at 19:38 -0700, Sukadev Bhattiprolu wrote:

We need the SPRN_TIDR to be set for use with fast thread-wakeup
(core-to-core wakeup) in VAS. Each user thread that has a receive
window setup and expects to be notified when a sender issues a paste
needs to have a unique SPRN_TIDR value.

The SPRN_TIDR value only needs to unique within the process but for
now we use a globally unique thread id as described below.

Signed-off-by: Sukadev Bhattiprolu 
---
Changelog[v2]
- Michael Ellerman: Use an interface to assign TIDR so it is
  assigned to only threads that need it; move assignment to
  restore_sprs(). Drop lint from rebase;


   arch/powerpc/include/asm/processor.h |  4 ++
   arch/powerpc/include/asm/switch_to.h |  3 ++
   arch/powerpc/kernel/process.c| 97

   3 files changed, 104 insertions(+)

diff --git a/arch/powerpc/include/asm/processor.h
b/arch/powerpc/include/asm/processor.h
index fab7ff8..bf6ba63 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -232,6 +232,10 @@ struct debug_reg {
   struct thread_struct {
unsigned long   ksp;/* Kernel stack pointer */

+#ifdef CONFIG_PPC_VAS
+   unsigned long   tidr;
+#endif
+
   #ifdef CONFIG_PPC64
unsigned long   ksp_vsid;
   #endif
diff --git a/arch/powerpc/include/asm/switch_to.h
b/arch/powerpc/include/asm/switch_to.h
index 17c8380..4962455 100644
--- a/arch/powerpc/include/asm/switch_to.h
+++ b/arch/powerpc/include/asm/switch_to.h
@@ -91,4 +91,7 @@ static inline void clear_task_ebb(struct task_struct *t)
   #endif
   }

+extern void set_thread_tidr(struct task_struct *t);
+extern void clear_thread_tidr(struct task_struct *t);
+
   #endif /* _ASM_POWERPC_SWITCH_TO_H */
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 1f0fd36..13abb22 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1132,6 +1132,10 @@ static inline void restore_sprs(struct thread_struct
*old_thread,
mtspr(SPRN_TAR, new_thread->tar);
}
   #endif
+#ifdef CONFIG_PPC_VAS
+   if (old_thread->tidr != new_thread->tidr)
+   mtspr(SPRN_TIDR, new_thread->tidr);
+#endif
   }

   #ifdef CONFIG_PPC_BOOK3S_64
@@ -1446,9 +1450,97 @@ void flush_thread(void)
   #endif /* CONFIG_HAVE_HW_BREAKPOINT */
   }

+#ifdef CONFIG_PPC_VAS
+static DEFINE_SPINLOCK(vas_thread_id_lock);
+static DEFINE_IDA(vas_thread_ida);
+
+/*
+ * We need to assign an unique thread id to each thread in a process. This
+ * thread id is intended to be used with the Fast Thread-wakeup (aka Core-
+ * to-core wakeup) mechanism being implemented on top of Virtual Accelerator
+ * Switchboard (VAS).
+ *
+ * To get a unique thread-id per process we could simply use task_pid_nr()
+ * but the problem is that task_pid_nr() is not yet available for the thread
+ * when copy_thread() is called. Fixing that would require changing more
+ * intrusive arch-neutral code in code path in copy_process()?.
+ *
+ * Further, to assign unique thread ids within each process, we need an
+ * atomic field (or an IDR) in task_struct, which again intrudes into the
+ * arch-neutral code.
+ *
+ * So try to assign globally unique thraed ids for now.
+ *
+ * NOTE: TIDR 0 indicates that the thread does not need a TIDR value.
+ *  For now, only threads that expect to be notified by the VAS
+ *  hardware need a TIDR value and we assign values > 0 for those.
+ */
+#define MAX_THREAD_CONTEXT ((1 << 15) - 2)

Why are you excluding ((1 << 15) - 1)?

You are right. I don't need to exclude that. Also, TIDR is a 16-bit (0:15 in
VAS's Local Notify TID) value 

[PATCH v2] cxl: Use fixed width predefined types in data structure.

2016-08-05 Thread Philippe Bergheaud
This patch fixes a regression introduced by commit b810253.

It substitutes the type __u8 to u8 in the uapi header cxl.h,
because the latter is not always defined in userland build
environments, in particular when cross-compiling libcxl on
x86_64 linux machines (RHEL6.7 and Ubuntu 16.04).

This patch also changes the size of the field data_size, and
makes it constant, to support 32-bit userland applications
running on big-endian ppc64 kernels transparently.

This breaks the (young) API that has been merged in v4.8.

Signed-off-by: Philippe Bergheaud <fe...@linux.vnet.ibm.com>
---
Changes since v1:
  Added an explanation for the proposed API change in the log.

Note:
As far as I know, cxlflash is the only known user of the API.

 include/uapi/misc/cxl.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/uapi/misc/cxl.h b/include/uapi/misc/cxl.h
index cbae529..180d526 100644
--- a/include/uapi/misc/cxl.h
+++ b/include/uapi/misc/cxl.h
@@ -136,8 +136,8 @@ struct cxl_event_afu_driver_reserved {
 *
 * Of course the contents will be ABI, but that's up the AFU driver.
 */
-   size_t data_size;
-   u8 data[];
+   __u32 data_size;
+   __u8 data[];
 };
 
 struct cxl_event {
-- 
2.8.0



[PATCH] cxl: Use fixed width predefined types in data structure.

2016-08-04 Thread Philippe Bergheaud
This patch fixes a regression introduced by commit b810253.
It substitutes the type __u8 to u8 in the uapi header cxl.h,
because the latter is not always defined in userland build
environments, in particular when cross-compiling libcxl on
x86_64 linux machines (RHEL6.7 and Ubuntu 16.04).

It also makes the definition of cxl_event_afu_driver_reserved
more consistent with the other definitions in the header file.

Signed-off-by: Philippe Bergheaud <fe...@linux.vnet.ibm.com>
---
 include/uapi/misc/cxl.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/uapi/misc/cxl.h b/include/uapi/misc/cxl.h
index cbae529..180d526 100644
--- a/include/uapi/misc/cxl.h
+++ b/include/uapi/misc/cxl.h
@@ -136,8 +136,8 @@ struct cxl_event_afu_driver_reserved {
 *
 * Of course the contents will be ABI, but that's up the AFU driver.
 */
-   size_t data_size;
-   u8 data[];
+   __u32 data_size;
+   __u8 data[];
 };
 
 struct cxl_event {
-- 
2.8.0



[PATCH v3] cxl: Refine slice error debug messages

2016-07-05 Thread Philippe Bergheaud
The PSL Slice Error Register (PSL_SERR_An) reports implementation
dependent AFU errors, in the form of a bitmap. The PSL_SERR_An
register content is printed in the form of hex dump debug message.

This patch decodes the PSL_ERR_An register contents, and prints a
specific error message for each possible error bit. It also dumps
the secondary registers AFU_ERR_An and PSL_DSISR_An, that may
contain extra debug information.

This patch also removes the large WARN message that used to report
the cxl slice error interrupt, and replaces it by a short informative
message, that draws attention to AFU implementation errors.

Signed-off-by: Philippe Bergheaud <fe...@linux.vnet.ibm.com>
---
Changes since v1:
  - Rebased on Ian's patch
"cxl: Abstract the differences between the PSL and XSL"

Changes since v2:
  - Added description

 drivers/misc/cxl/cxl.h| 15 +++
 drivers/misc/cxl/guest.c  |  9 ++---
 drivers/misc/cxl/irq.c| 29 +
 drivers/misc/cxl/native.c | 12 +++-
 4 files changed, 57 insertions(+), 8 deletions(-)

diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index 7745252..d928a8c 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -188,6 +188,18 @@ static const cxl_p2n_reg_t CXL_PSL_WED_An = {0x0A0};
 #define CXL_PSL_ID_An_F(1ull << (63-31))
 #define CXL_PSL_ID_An_L(1ull << (63-30))
 
+/** CXL_PSL_SERR_An /
+#define CXL_PSL_SERR_An_afuto  (1ull << (63-0))
+#define CXL_PSL_SERR_An_afudis (1ull << (63-1))
+#define CXL_PSL_SERR_An_afuov  (1ull << (63-2))
+#define CXL_PSL_SERR_An_badsrc (1ull << (63-3))
+#define CXL_PSL_SERR_An_badctx (1ull << (63-4))
+#define CXL_PSL_SERR_An_llcmdis(1ull << (63-5))
+#define CXL_PSL_SERR_An_llcmdto(1ull << (63-6))
+#define CXL_PSL_SERR_An_afupar (1ull << (63-7))
+#define CXL_PSL_SERR_An_afudup (1ull << (63-8))
+#define CXL_PSL_SERR_An_AE (1ull << (63-30))
+
 /** CXL_PSL_SCNTL_An /
 #define CXL_PSL_SCNTL_An_CR  (0x1ull << (63-15))
 /* Programming Modes: */
@@ -905,4 +917,7 @@ extern const struct cxl_backend_ops *cxl_ops;
 
 /* check if the given pci_dev is on the the cxl vphb bus */
 bool cxl_pci_is_vphb_device(struct pci_dev *dev);
+
+/* decode AFU error bits in the PSL register PSL_SERR_An */
+void cxl_afu_decode_psl_serr(struct cxl_afu *afu, u64 serr);
 #endif
diff --git a/drivers/misc/cxl/guest.c b/drivers/misc/cxl/guest.c
index bc8d0b9..d516d0a 100644
--- a/drivers/misc/cxl/guest.c
+++ b/drivers/misc/cxl/guest.c
@@ -196,15 +196,18 @@ static irqreturn_t guest_slice_irq_err(int irq, void 
*data)
 {
struct cxl_afu *afu = data;
int rc;
-   u64 serr;
+   u64 serr, afu_error, dsisr;
 
-   WARN(irq, "CXL SLICE ERROR interrupt %i\n", irq);
rc = cxl_h_get_fn_error_interrupt(afu->guest->handle, );
if (rc) {
dev_crit(>dev, "Couldn't read PSL_SERR_An: %d\n", rc);
return IRQ_HANDLED;
}
-   dev_crit(>dev, "PSL_SERR_An: 0x%.16llx\n", serr);
+   afu_error = cxl_p2n_read(afu, CXL_AFU_ERR_An);
+   dsisr = cxl_p2n_read(afu, CXL_PSL_DSISR_An);
+   cxl_afu_decode_psl_serr(afu, serr);
+   dev_crit(>dev, "AFU_ERR_An: 0x%.16llx\n", afu_error);
+   dev_crit(>dev, "PSL_DSISR_An: 0x%.16llx\n", dsisr);
 
rc = cxl_h_ack_fn_error_interrupt(afu->guest->handle, serr);
if (rc)
diff --git a/drivers/misc/cxl/irq.c b/drivers/misc/cxl/irq.c
index 8def455..40fffe4 100644
--- a/drivers/misc/cxl/irq.c
+++ b/drivers/misc/cxl/irq.c
@@ -374,3 +374,32 @@ void afu_release_irqs(struct cxl_context *ctx, void 
*cookie)
 
ctx->irq_count = 0;
 }
+
+void cxl_afu_decode_psl_serr(struct cxl_afu *afu, u64 serr)
+{
+   dev_crit(>dev,
+"PSL Slice error received. Check AFU for root cause.\n");
+   dev_crit(>dev, "PSL_SERR_An: 0x%016llx\n", serr);
+   if (serr & CXL_PSL_SERR_An_afuto)
+   dev_crit(>dev, "AFU MMIO Timeout\n");
+   if (serr & CXL_PSL_SERR_An_afudis)
+   dev_crit(>dev,
+"MMIO targeted Accelerator that was not enabled\n");
+   if (serr & CXL_PSL_SERR_An_afuov)
+   dev_crit(>dev, "AFU CTAG Overflow\n");
+   if (serr & CXL_PSL_SERR_An_badsrc)
+   dev_crit(>dev, "Bad Interrupt Source\n");
+   if (serr & CXL_PSL_SERR_An_badctx)
+   dev_crit(>dev, "Bad Context Handle\n");
+   if (serr & CXL_PSL_SERR_An_llcmdis)
+   dev_crit(>dev, "LLCMD to Disabled AFU\n");
+   if (serr & CXL_PSL_SERR_An_llcmdto)
+   

[PATCH v2] cxl: Refine slice error debug messages

2016-07-04 Thread Philippe Bergheaud
Signed-off-by: Philippe Bergheaud <fe...@linux.vnet.ibm.com>
---
Changes since v1:
  - Rebased on Ian's patch
"cxl: Abstract the differences between the PSL and XSL"

 drivers/misc/cxl/cxl.h| 15 +++
 drivers/misc/cxl/guest.c  |  9 ++---
 drivers/misc/cxl/irq.c| 29 +
 drivers/misc/cxl/native.c | 12 +++-
 4 files changed, 57 insertions(+), 8 deletions(-)

diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index 7745252..d928a8c 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -188,6 +188,18 @@ static const cxl_p2n_reg_t CXL_PSL_WED_An = {0x0A0};
 #define CXL_PSL_ID_An_F(1ull << (63-31))
 #define CXL_PSL_ID_An_L(1ull << (63-30))
 
+/** CXL_PSL_SERR_An /
+#define CXL_PSL_SERR_An_afuto  (1ull << (63-0))
+#define CXL_PSL_SERR_An_afudis (1ull << (63-1))
+#define CXL_PSL_SERR_An_afuov  (1ull << (63-2))
+#define CXL_PSL_SERR_An_badsrc (1ull << (63-3))
+#define CXL_PSL_SERR_An_badctx (1ull << (63-4))
+#define CXL_PSL_SERR_An_llcmdis(1ull << (63-5))
+#define CXL_PSL_SERR_An_llcmdto(1ull << (63-6))
+#define CXL_PSL_SERR_An_afupar (1ull << (63-7))
+#define CXL_PSL_SERR_An_afudup (1ull << (63-8))
+#define CXL_PSL_SERR_An_AE (1ull << (63-30))
+
 /** CXL_PSL_SCNTL_An /
 #define CXL_PSL_SCNTL_An_CR  (0x1ull << (63-15))
 /* Programming Modes: */
@@ -905,4 +917,7 @@ extern const struct cxl_backend_ops *cxl_ops;
 
 /* check if the given pci_dev is on the the cxl vphb bus */
 bool cxl_pci_is_vphb_device(struct pci_dev *dev);
+
+/* decode AFU error bits in the PSL register PSL_SERR_An */
+void cxl_afu_decode_psl_serr(struct cxl_afu *afu, u64 serr);
 #endif
diff --git a/drivers/misc/cxl/guest.c b/drivers/misc/cxl/guest.c
index bc8d0b9..d516d0a 100644
--- a/drivers/misc/cxl/guest.c
+++ b/drivers/misc/cxl/guest.c
@@ -196,15 +196,18 @@ static irqreturn_t guest_slice_irq_err(int irq, void 
*data)
 {
struct cxl_afu *afu = data;
int rc;
-   u64 serr;
+   u64 serr, afu_error, dsisr;
 
-   WARN(irq, "CXL SLICE ERROR interrupt %i\n", irq);
rc = cxl_h_get_fn_error_interrupt(afu->guest->handle, );
if (rc) {
dev_crit(>dev, "Couldn't read PSL_SERR_An: %d\n", rc);
return IRQ_HANDLED;
}
-   dev_crit(>dev, "PSL_SERR_An: 0x%.16llx\n", serr);
+   afu_error = cxl_p2n_read(afu, CXL_AFU_ERR_An);
+   dsisr = cxl_p2n_read(afu, CXL_PSL_DSISR_An);
+   cxl_afu_decode_psl_serr(afu, serr);
+   dev_crit(>dev, "AFU_ERR_An: 0x%.16llx\n", afu_error);
+   dev_crit(>dev, "PSL_DSISR_An: 0x%.16llx\n", dsisr);
 
rc = cxl_h_ack_fn_error_interrupt(afu->guest->handle, serr);
if (rc)
diff --git a/drivers/misc/cxl/irq.c b/drivers/misc/cxl/irq.c
index 8def455..40fffe4 100644
--- a/drivers/misc/cxl/irq.c
+++ b/drivers/misc/cxl/irq.c
@@ -374,3 +374,32 @@ void afu_release_irqs(struct cxl_context *ctx, void 
*cookie)
 
ctx->irq_count = 0;
 }
+
+void cxl_afu_decode_psl_serr(struct cxl_afu *afu, u64 serr)
+{
+   dev_crit(>dev,
+"PSL Slice error received. Check AFU for root cause.\n");
+   dev_crit(>dev, "PSL_SERR_An: 0x%016llx\n", serr);
+   if (serr & CXL_PSL_SERR_An_afuto)
+   dev_crit(>dev, "AFU MMIO Timeout\n");
+   if (serr & CXL_PSL_SERR_An_afudis)
+   dev_crit(>dev,
+"MMIO targeted Accelerator that was not enabled\n");
+   if (serr & CXL_PSL_SERR_An_afuov)
+   dev_crit(>dev, "AFU CTAG Overflow\n");
+   if (serr & CXL_PSL_SERR_An_badsrc)
+   dev_crit(>dev, "Bad Interrupt Source\n");
+   if (serr & CXL_PSL_SERR_An_badctx)
+   dev_crit(>dev, "Bad Context Handle\n");
+   if (serr & CXL_PSL_SERR_An_llcmdis)
+   dev_crit(>dev, "LLCMD to Disabled AFU\n");
+   if (serr & CXL_PSL_SERR_An_llcmdto)
+   dev_crit(>dev, "LLCMD Timeout to AFU\n");
+   if (serr & CXL_PSL_SERR_An_afupar)
+   dev_crit(>dev, "AFU MMIO Parity Error\n");
+   if (serr & CXL_PSL_SERR_An_afudup)
+   dev_crit(>dev, "AFU MMIO Duplicate CTAG Error\n");
+   if (serr & CXL_PSL_SERR_An_AE)
+   dev_crit(>dev,
+"AFU asserted JDONE with JERROR in AFU Directed 
Mode\n");
+}
diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c
index d867326..c4d8815 100644
--- a/drivers/misc/cxl/native.c
+++ b/drivers/misc/cxl/native.c
@@ -767,7 +767,7 @

[PATCH v2] cxl: Ignore CAPI adapters misplaced in switched slots

2016-07-01 Thread Philippe Bergheaud
One should not attempt to switch a PHB into CAPI mode if there is
a switch between the PHB and the adapter. This patch modifies the
cxl driver to ignore CAPI adapters misplaced in switched slots.

Signed-off-by: Philippe Bergheaud <fe...@linux.vnet.ibm.com>
---
This patch fixes Bz 142217.

Changes since v1:
  - As suggested by Fred, tell user that CAPI adapter is misplaced
s/dev_dbg/dev_info/

 drivers/misc/cxl/pci.c | 29 +
 1 file changed, 29 insertions(+)

diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index a08fcc8..0d69eac 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -1280,6 +1280,30 @@ static void cxl_pci_remove_adapter(struct cxl *adapter)
device_unregister(>dev);
 }
 
+#define CXL_MAX_PCIEX_PARENT 2
+
+static int cxl_slot_is_switched(struct pci_dev *dev)
+{
+   struct device_node *np;
+   int depth = 0;
+   const __be32 *prop;
+
+   if (!(np = pci_device_to_OF_node(dev))) {
+   pr_err("cxl: np = NULL\n");
+   return -ENODEV;
+   }
+   of_node_get(np);
+   while (np) {
+   np = of_get_next_parent(np);
+   prop = of_get_property(np, "device_type", NULL);
+   if (!prop || strcmp((char *)prop, "pciex"))
+   break;
+   depth++;
+   }
+   of_node_put(np);
+   return (depth > CXL_MAX_PCIEX_PARENT);
+}
+
 static int cxl_probe(struct pci_dev *dev, const struct pci_device_id *id)
 {
struct cxl *adapter;
@@ -1291,6 +1315,11 @@ static int cxl_probe(struct pci_dev *dev, const struct 
pci_device_id *id)
return -ENODEV;
}
 
+   if (cxl_slot_is_switched(dev)) {
+   dev_info(>dev, "Ignoring card on incompatible PCI slot\n");
+   return -ENODEV;
+   }
+
if (cxl_verbose)
dump_cxl_config_space(dev);
 
-- 
2.8.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH] cxl: Ignore CAPI adapters misplaced in switched slots

2016-06-30 Thread Philippe Bergheaud
One should not attempt to switch a PHB into CAPI mode if there is
a switch between the PHB and the adapter. This patch modifies the
cxl driver to ignore CAPI adapters misplaced in switched slots.

Signed-off-by: Philippe Bergheaud <fe...@linux.vnet.ibm.com>
---
This patch fixes Bz 142217.

 drivers/misc/cxl/pci.c | 29 +
 1 file changed, 29 insertions(+)

diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index a08fcc8..2f978ed 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -1280,6 +1280,30 @@ static void cxl_pci_remove_adapter(struct cxl *adapter)
device_unregister(>dev);
 }
 
+#define CXL_MAX_PCIEX_PARENT 2
+
+static int cxl_slot_is_switched(struct pci_dev *dev)
+{
+   struct device_node *np;
+   int depth = 0;
+   const __be32 *prop;
+
+   if (!(np = pci_device_to_OF_node(dev))) {
+   pr_err("cxl: np = NULL\n");
+   return -ENODEV;
+   }
+   of_node_get(np);
+   while (np) {
+   np = of_get_next_parent(np);
+   prop = of_get_property(np, "device_type", NULL);
+   if (!prop || strcmp((char *)prop, "pciex"))
+   break;
+   depth++;
+   }
+   of_node_put(np);
+   return (depth > CXL_MAX_PCIEX_PARENT);
+}
+
 static int cxl_probe(struct pci_dev *dev, const struct pci_device_id *id)
 {
struct cxl *adapter;
@@ -1291,6 +1315,11 @@ static int cxl_probe(struct pci_dev *dev, const struct 
pci_device_id *id)
return -ENODEV;
}
 
+   if (cxl_slot_is_switched(dev)) {
+   dev_dbg(>dev, "cxl_init_adapter: Ignoring switched slot 
device\n");
+   return -ENODEV;
+   }
+
if (cxl_verbose)
dump_cxl_config_space(dev);
 
-- 
2.8.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [RESEND, v7, 2/2] cxl: Add set and get private data to context struct

2016-06-28 Thread Philippe Bergheaud

Michael Ellerman wrote:

On Fri, 2016-24-06 at 06:47:07 UTC, Philippe Bergheaud wrote:


From: Michael Neuling <mi...@neuling.org>

This provides AFU drivers a means to associate private data with a cxl
context. This is particularly intended for make the new callbacks for
driver specific events easier for AFU drivers to use, as they can easily
get back to any private data structures they may use.

Signed-off-by: Michael Neuling <mi...@neuling.org>
Signed-off-by: Ian Munsie <imun...@au1.ibm.com>
Signed-off-by: Philippe Bergheaud <fe...@linux.vnet.ibm.com
Reviewed-by: Matthew R. Ochs <mro...@linux.vnet.ibm.com>
Reviewed-by: Andrew Donnellan <andrew.donnel...@au1.ibm.com>
---
No changes since v1. Added Matt Ochs reviewed-by tag.



I'm not sure why this is RESEND ?


Sorry. That was because I made a typo in the title (I typed v6 instead of v7).

Does this pair with [v7 1/2] ?


Yes it does.

  https://patchwork.ozlabs.org/patch/639644/

It's a lot simpler if you just resend the whole series each time there is an
update to any patch.


Got it, thank you.

Philippe

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[RESEND v7,2/2] cxl: Add set and get private data to context struct

2016-06-24 Thread Philippe Bergheaud
From: Michael Neuling <mi...@neuling.org>

This provides AFU drivers a means to associate private data with a cxl
context. This is particularly intended for make the new callbacks for
driver specific events easier for AFU drivers to use, as they can easily
get back to any private data structures they may use.

Signed-off-by: Michael Neuling <mi...@neuling.org>
Signed-off-by: Ian Munsie <imun...@au1.ibm.com>
Signed-off-by: Philippe Bergheaud <fe...@linux.vnet.ibm.com
Reviewed-by: Matthew R. Ochs <mro...@linux.vnet.ibm.com>
---
No changes since v1. Added Matt Ochs reviewed-by tag.

 drivers/misc/cxl/api.c | 21 +
 drivers/misc/cxl/cxl.h |  3 +++
 include/misc/cxl.h |  7 +++
 3 files changed, 31 insertions(+)

diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c
index dd1988e..271bf77 100644
--- a/drivers/misc/cxl/api.c
+++ b/drivers/misc/cxl/api.c
@@ -94,6 +94,27 @@ static irq_hw_number_t cxl_find_afu_irq(struct cxl_context 
*ctx, int num)
return 0;
 }
 
+
+int cxl_set_priv(struct cxl_context *ctx, void *priv)
+{
+   if (!ctx)
+   return -EINVAL;
+
+   ctx->priv = priv;
+
+   return 0;
+}
+EXPORT_SYMBOL_GPL(cxl_set_priv);
+
+void *cxl_get_priv(struct cxl_context *ctx)
+{
+   if (!ctx)
+   return ERR_PTR(-EINVAL);
+
+   return ctx->priv;
+}
+EXPORT_SYMBOL_GPL(cxl_get_priv);
+
 int cxl_allocate_afu_irqs(struct cxl_context *ctx, int num)
 {
int res;
diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index b0027e6..1e56304 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -478,6 +478,9 @@ struct cxl_context {
/* Only used in PR mode */
u64 process_token;
 
+   /* driver private data */
+   void *priv;
+
unsigned long *irq_bitmap; /* Accessed from IRQ context */
struct cxl_irq_ranges irqs;
struct list_head irq_names;
diff --git a/include/misc/cxl.h b/include/misc/cxl.h
index 17419f6..b6d040f 100644
--- a/include/misc/cxl.h
+++ b/include/misc/cxl.h
@@ -86,6 +86,13 @@ struct cxl_context *cxl_dev_context_init(struct pci_dev 
*dev);
 int cxl_release_context(struct cxl_context *ctx);
 
 /*
+ * Set and get private data associated with a context. Allows drivers to have a
+ * back pointer to some useful structure.
+ */
+int cxl_set_priv(struct cxl_context *ctx, void *priv);
+void *cxl_get_priv(struct cxl_context *ctx);
+
+/*
  * Allocate AFU interrupts for this context. num=0 will allocate the default
  * for this AFU as given in the AFU descriptor. This number doesn't include the
  * interrupt 0 (CAIA defines AFU IRQ 0 for page faults). Each interrupt to be
-- 
2.8.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[v6,2/2] cxl: Add set and get private data to context struct

2016-06-23 Thread Philippe Bergheaud
From: Michael Neuling <mi...@neuling.org>

This provides AFU drivers a means to associate private data with a cxl
context. This is particularly intended for make the new callbacks for
driver specific events easier for AFU drivers to use, as they can easily
get back to any private data structures they may use.

Signed-off-by: Michael Neuling <mi...@neuling.org>
Signed-off-by: Ian Munsie <imun...@au1.ibm.com>
Signed-off-by: Philippe Bergheaud <fe...@linux.vnet.ibm.com
Reviewed-by: Matthew R. Ochs <mro...@linux.vnet.ibm.com>
---
No changes since v1. Added Matt Ochs reviewed-by tag.

 drivers/misc/cxl/api.c | 21 +
 drivers/misc/cxl/cxl.h |  3 +++
 include/misc/cxl.h |  7 +++
 3 files changed, 31 insertions(+)

diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c
index dd1988e..271bf77 100644
--- a/drivers/misc/cxl/api.c
+++ b/drivers/misc/cxl/api.c
@@ -94,6 +94,27 @@ static irq_hw_number_t cxl_find_afu_irq(struct cxl_context 
*ctx, int num)
return 0;
 }
 
+
+int cxl_set_priv(struct cxl_context *ctx, void *priv)
+{
+   if (!ctx)
+   return -EINVAL;
+
+   ctx->priv = priv;
+
+   return 0;
+}
+EXPORT_SYMBOL_GPL(cxl_set_priv);
+
+void *cxl_get_priv(struct cxl_context *ctx)
+{
+   if (!ctx)
+   return ERR_PTR(-EINVAL);
+
+   return ctx->priv;
+}
+EXPORT_SYMBOL_GPL(cxl_get_priv);
+
 int cxl_allocate_afu_irqs(struct cxl_context *ctx, int num)
 {
int res;
diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index b0027e6..1e56304 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -478,6 +478,9 @@ struct cxl_context {
/* Only used in PR mode */
u64 process_token;
 
+   /* driver private data */
+   void *priv;
+
unsigned long *irq_bitmap; /* Accessed from IRQ context */
struct cxl_irq_ranges irqs;
struct list_head irq_names;
diff --git a/include/misc/cxl.h b/include/misc/cxl.h
index 17419f6..b6d040f 100644
--- a/include/misc/cxl.h
+++ b/include/misc/cxl.h
@@ -86,6 +86,13 @@ struct cxl_context *cxl_dev_context_init(struct pci_dev 
*dev);
 int cxl_release_context(struct cxl_context *ctx);
 
 /*
+ * Set and get private data associated with a context. Allows drivers to have a
+ * back pointer to some useful structure.
+ */
+int cxl_set_priv(struct cxl_context *ctx, void *priv);
+void *cxl_get_priv(struct cxl_context *ctx);
+
+/*
  * Allocate AFU interrupts for this context. num=0 will allocate the default
  * for this AFU as given in the AFU descriptor. This number doesn't include the
  * interrupt 0 (CAIA defines AFU IRQ 0 for page faults). Each interrupt to be
-- 
2.8.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[v7, 1/2] cxl: Add mechanism for delivering AFU driver specific events

2016-06-23 Thread Philippe Bergheaud
This adds an afu_driver_ops structure with fetch_event() and
event_delivered() callbacks. An AFU driver such as cxlflash can fill
this out and associate it with a context to enable passing custom AFU
specific events to userspace.

This also adds a new kernel API function cxl_context_pending_events(),
that the AFU driver can use to notify the cxl driver that new specific
events are ready to be delivered, and wake up anyone waiting on the
context wait queue.

The current count of AFU driver specific events is stored in the field
afu_driver_events of the context structure.

The cxl driver checks the afu_driver_events count during poll, select,
read, etc. calls to check if an AFU driver specific event is pending,
and calls fetch_event() to obtain and deliver that event. This way, the
cxl driver takes care of all the usual locking semantics around these
calls and handles all the generic cxl events, so that the AFU driver
only needs to worry about it's own events.

fetch_event() return a struct cxl_event_afu_driver_reserved, allocated
by the AFU driver, and filled in with the specific event information and
size. Total event size (header + data) should not be greater than
CXL_READ_MIN_SIZE (4K).

Th cxl driver prepends an appropriate cxl event header, copies the event
to userspace, and finally calls event_delivered() to return the status of
the operation to the AFU driver. The event is identified by the context
and cxl_event_afu_driver_reserved pointers.

Since AFU drivers provide their own means for userspace to obtain the
AFU file descriptor (i.e. cxlflash uses an ioctl on their scsi file
descriptor to obtain the AFU file descriptor) and the generic cxl driver
will never use this event, the ABI of the event is up to each individual
AFU driver.

Signed-off-by: Philippe Bergheaud <fe...@linux.vnet.ibm.com>
---
Changes since v6:
- Dropped cxl_unset_driver_ops
  (passing NULL to cxl_set_driver_ops will do)
- Dropped CXL_MAX_EVENT_DATA_SIZE
  (event size now limited to CXL_READ_MIN_SIZE)

Changes since v5:
- s/deliver_event/fetch_event/
- Fixed the handling of fetch_event errors
- Documented the return codes of event_delivered

Changes since v4:
- Addressed comments from Vaibhav:
  - Changed struct cxl_event_afu_driver_reserved from
{ __u64 reserved[4]; } to { size_t data_size; u8 data[]; }
  - Modified deliver_event to return a struct cxl_event_afu_driver_reserved
  - Added new callback event_delivered
  - Added static function afu_driver_event_copy

Changes since v3:
- Removed driver ops callback ctx_event_pending
- Created cxl function cxl_context_pending_events
- Created cxl function cxl_unset_driver_ops
- Added atomic event counter afu_driver_events

Changes since v2:
- Fixed some typos spotted by Matt Ochs

Changes since v1:
- Rebased on upstream
- Bumped cxl api version to 3
- Addressed comments from mpe:
  - Clarified commit message & some comments
  - Mentioned 'cxlflash' as a possible user of this event
  - Check driver ops on registration and warn if missing calls
  - Remove redundant checks where driver ops is used
  - Simplified ctx_event_pending and removed underscore version
  - Changed deliver_event to take the context as the first argument

 drivers/misc/cxl/Kconfig |  5 
 drivers/misc/cxl/api.c   | 17 +
 drivers/misc/cxl/cxl.h   |  7 +-
 drivers/misc/cxl/file.c  | 64 ++--
 include/misc/cxl.h   | 48 
 include/uapi/misc/cxl.h  | 17 +
 6 files changed, 149 insertions(+), 9 deletions(-)

diff --git a/drivers/misc/cxl/Kconfig b/drivers/misc/cxl/Kconfig
index 8756d06..560412c 100644
--- a/drivers/misc/cxl/Kconfig
+++ b/drivers/misc/cxl/Kconfig
@@ -15,12 +15,17 @@ config CXL_EEH
bool
default n
 
+config CXL_AFU_DRIVER_OPS
+   bool
+   default n
+
 config CXL
tristate "Support for IBM Coherent Accelerators (CXL)"
depends on PPC_POWERNV && PCI_MSI && EEH
select CXL_BASE
select CXL_KERNEL_API
select CXL_EEH
+   select CXL_AFU_DRIVER_OPS
default m
help
  Select this option to enable driver support for IBM Coherent
diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c
index 6d228cc..dd1988e 100644
--- a/drivers/misc/cxl/api.c
+++ b/drivers/misc/cxl/api.c
@@ -323,6 +323,23 @@ struct cxl_context *cxl_fops_get_context(struct file *file)
 }
 EXPORT_SYMBOL_GPL(cxl_fops_get_context);
 
+void cxl_set_driver_ops(struct cxl_context *ctx,
+   struct cxl_afu_driver_ops *ops)
+{
+   WARN_ON(!ops->fetch_event || !ops->event_delivered);
+   atomic_set(>afu_driver_events, 0);
+   ctx->afu_driver_ops = ops;
+}
+EXPORT_SYMBOL_GPL(cxl_set_driver_ops);
+
+void cxl_context_events_pending(struct cxl_context *ctx,
+   unsigned int new_events)
+{
+   atomic_add(new_events, >afu_driver_event

Re: [v6, 1/2] cxl: Add mechanism for delivering AFU driver specific events

2016-06-22 Thread Philippe Bergheaud

Should also check against the length of user-buffer (count) provided in the read
call.Ideally this condition check should be moved to the read call where
you have access to the count variable.

Right now libcxl is using a harcoded value of CXL_READ_MIN_SIZE to
issue the read call and in kernel code we have a check to ensure that
read buffer is atleast CXL_READ_MIN_SIZE in size.

But it might be a good idea to decouple driver from
CXL_MAX_EVENT_DATA_SIZE. Ideally the maximum event size that we can
support should be dependent on the amount user buffer we receive in the
read call. That way future libcxl can support larger event_data without
needing a change to the cxl.h


[...]

+#define CXL_MAX_EVENT_DATA_SIZE 128
+



Agree with Matt's earlier comments. 128 is very small and I would prefer
for atleast a page size (4k/64K) limit.



afu_read() enforces a minimum buffer size of CXL_READ_MIN_SIZE = 4K, as 
documented in Documentation/powerpc/cxl.txt. This information is missing from 
the man pages of the libcxl functions cxl_read_event/cxl_read_expected_event. I 
will fix these.

Regarding the maximum event size, as afu_read returns one event per call, and 
as there is no API to tell userland the maximum size of a cxl event, I think 
that we should simply use (and document) the same value (4K) as the maximum cxl 
event size.

Philippe



___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [v6, 1/2] cxl: Add mechanism for delivering AFU driver specific events

2016-06-22 Thread Philippe Bergheaud

Matthew R. Ochs wrote:

On Jun 21, 2016, at 5:34 AM, Vaibhav Jain  wrote:

Hi Ian,

Ian Munsie  writes:



Excerpts from Vaibhav Jain's message of 2016-06-20 14:20:16 +0530:

What exactly is the use case for this API? I'd vote to drop it if we can
do without it.


Agree with this. Functionality of this API can be merged with
cxl_set_driver_ops when called with NULL arg for cxl_afu_driver_ops.



Passing a NULL arg instead of calling an 'unset' API is fine with us.

I'll add that for cxlflash, I can't envision a scenario where we'll unset the
driver ops for a context.


Agreed, thanks. I will drop API cxl_unset_driver_ops() in v7.

Philippe

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[v6,2/2] cxl: Add set and get private data to context struct

2016-06-16 Thread Philippe Bergheaud
From: Michael Neuling <mi...@neuling.org>

This provides AFU drivers a means to associate private data with a cxl
context. This is particularly intended for make the new callbacks for
driver specific events easier for AFU drivers to use, as they can easily
get back to any private data structures they may use.

Signed-off-by: Michael Neuling <mi...@neuling.org>
Signed-off-by: Ian Munsie <imun...@au1.ibm.com>
Signed-off-by: Philippe Bergheaud <fe...@linux.vnet.ibm.com
Reviewed-by: Matthew R. Ochs <mro...@linux.vnet.ibm.com>
---
No changes since v1. Added Matt Ochs reviewed-by tag.

 drivers/misc/cxl/api.c | 21 +
 drivers/misc/cxl/cxl.h |  3 +++
 include/misc/cxl.h |  7 +++
 3 files changed, 31 insertions(+)

diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c
index 23f98f4..28d5b41 100644
--- a/drivers/misc/cxl/api.c
+++ b/drivers/misc/cxl/api.c
@@ -94,6 +94,27 @@ static irq_hw_number_t cxl_find_afu_irq(struct cxl_context 
*ctx, int num)
return 0;
 }
 
+
+int cxl_set_priv(struct cxl_context *ctx, void *priv)
+{
+   if (!ctx)
+   return -EINVAL;
+
+   ctx->priv = priv;
+
+   return 0;
+}
+EXPORT_SYMBOL_GPL(cxl_set_priv);
+
+void *cxl_get_priv(struct cxl_context *ctx)
+{
+   if (!ctx)
+   return ERR_PTR(-EINVAL);
+
+   return ctx->priv;
+}
+EXPORT_SYMBOL_GPL(cxl_get_priv);
+
 int cxl_allocate_afu_irqs(struct cxl_context *ctx, int num)
 {
int res;
diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index b0027e6..1e56304 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -478,6 +478,9 @@ struct cxl_context {
/* Only used in PR mode */
u64 process_token;
 
+   /* driver private data */
+   void *priv;
+
unsigned long *irq_bitmap; /* Accessed from IRQ context */
struct cxl_irq_ranges irqs;
struct list_head irq_names;
diff --git a/include/misc/cxl.h b/include/misc/cxl.h
index 1d8dde8..8f89110 100644
--- a/include/misc/cxl.h
+++ b/include/misc/cxl.h
@@ -86,6 +86,13 @@ struct cxl_context *cxl_dev_context_init(struct pci_dev 
*dev);
 int cxl_release_context(struct cxl_context *ctx);
 
 /*
+ * Set and get private data associated with a context. Allows drivers to have a
+ * back pointer to some useful structure.
+ */
+int cxl_set_priv(struct cxl_context *ctx, void *priv);
+void *cxl_get_priv(struct cxl_context *ctx);
+
+/*
  * Allocate AFU interrupts for this context. num=0 will allocate the default
  * for this AFU as given in the AFU descriptor. This number doesn't include the
  * interrupt 0 (CAIA defines AFU IRQ 0 for page faults). Each interrupt to be
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[v6, 1/2] cxl: Add mechanism for delivering AFU driver specific events

2016-06-16 Thread Philippe Bergheaud
This adds an afu_driver_ops structure with fetch_event() and
event_delivered() callbacks. An AFU driver such as cxlflash can fill
this out and associate it with a context to enable passing custom
AFU specific events to userspace.

This also adds a new kernel API function cxl_context_pending_events(),
that the AFU driver can use to notify the cxl driver that new specific
events are ready to be delivered, and wake up anyone waiting on the
context wait queue.

The current count of AFU driver specific events is stored in the field
afu_driver_events of the context structure.

The cxl driver checks the afu_driver_events count during poll, select,
read, etc. calls to check if an AFU driver specific event is pending,
and calls fetch_event() to obtain and deliver that event. This way,
the cxl driver takes care of all the usual locking semantics around these
calls and handles all the generic cxl events, so that the AFU driver only
needs to worry about it's own events.

fetch_event() return a struct cxl_event_afu_driver_reserved, allocated
by the AFU driver, and filled in with the specific event information and
size. Data size should not be greater than CXL_MAX_EVENT_DATA_SIZE.

Th cxl driver prepends an appropriate cxl event header, copies the event
to userspace, and finally calls event_delivered() to return the status of
the operation to the AFU driver. The event is identified by the context
and cxl_event_afu_driver_reserved pointers.

Since AFU drivers provide their own means for userspace to obtain the
AFU file descriptor (i.e. cxlflash uses an ioctl on their scsi file
descriptor to obtain the AFU file descriptor) and the generic cxl driver
will never use this event, the ABI of the event is up to each individual
AFU driver.

Signed-off-by: Philippe Bergheaud <fe...@linux.vnet.ibm.com>
---
Changes since v5:
- s/deliver_event/fetch_event/
- Fixed the handling of fetch_event errors
- Documented the return codes of event_delivered

Changes since v4:
- Addressed comments from Vaibhav:
  - Changed struct cxl_event_afu_driver_reserved from
{ __u64 reserved[4]; } to { size_t data_size; u8 data[]; }
  - Modified deliver_event to return a struct cxl_event_afu_driver_reserved
  - Added new callback event_delivered
  - Added static function afu_driver_event_copy

Changes since v3:
- Removed driver ops callback ctx_event_pending
- Created cxl function cxl_context_pending_events
- Created cxl function cxl_unset_driver_ops
- Added atomic event counter afu_driver_events

Changes since v2:
- Fixed some typos spotted by Matt Ochs

Changes since v1:
- Rebased on upstream
- Bumped cxl api version to 3
- Addressed comments from mpe:
  - Clarified commit message & some comments
  - Mentioned 'cxlflash' as a possible user of this event
  - Check driver ops on registration and warn if missing calls
  - Remove redundant checks where driver ops is used
  - Simplified ctx_event_pending and removed underscore version
  - Changed deliver_event to take the context as the first argument

 drivers/misc/cxl/Kconfig |  5 +
 drivers/misc/cxl/api.c   | 27 ++
 drivers/misc/cxl/cxl.h   |  7 +-
 drivers/misc/cxl/file.c  | 58 +---
 include/misc/cxl.h   | 53 +++
 include/uapi/misc/cxl.h  | 21 ++
 6 files changed, 162 insertions(+), 9 deletions(-)

diff --git a/drivers/misc/cxl/Kconfig b/drivers/misc/cxl/Kconfig
index 8756d06..560412c 100644
--- a/drivers/misc/cxl/Kconfig
+++ b/drivers/misc/cxl/Kconfig
@@ -15,12 +15,17 @@ config CXL_EEH
bool
default n
 
+config CXL_AFU_DRIVER_OPS
+   bool
+   default n
+
 config CXL
tristate "Support for IBM Coherent Accelerators (CXL)"
depends on PPC_POWERNV && PCI_MSI && EEH
select CXL_BASE
select CXL_KERNEL_API
select CXL_EEH
+   select CXL_AFU_DRIVER_OPS
default m
help
  Select this option to enable driver support for IBM Coherent
diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c
index 6d228cc..23f98f4 100644
--- a/drivers/misc/cxl/api.c
+++ b/drivers/misc/cxl/api.c
@@ -323,6 +323,33 @@ struct cxl_context *cxl_fops_get_context(struct file *file)
 }
 EXPORT_SYMBOL_GPL(cxl_fops_get_context);
 
+void cxl_set_driver_ops(struct cxl_context *ctx,
+   struct cxl_afu_driver_ops *ops)
+{
+   WARN_ON(!ops->fetch_event || !ops->event_delivered);
+   atomic_set(>afu_driver_events, 0);
+   ctx->afu_driver_ops = ops;
+}
+EXPORT_SYMBOL_GPL(cxl_set_driver_ops);
+
+int cxl_unset_driver_ops(struct cxl_context *ctx)
+{
+   if (atomic_read(>afu_driver_events))
+   return -EBUSY;
+
+   ctx->afu_driver_ops = NULL;
+   return 0;
+}
+EXPORT_SYMBOL_GPL(cxl_unset_driver_ops);
+
+void cxl_context_events_pending(struct cxl_context *ctx,
+   unsigned int new_events

Re: [v5, 1/2] cxl: Add mechanism for delivering AFU driver specific events

2016-06-16 Thread Philippe Bergheaud

Michael Ellerman wrote:

On Mon, 2016-23-05 at 12:49:01 UTC, Philippe Bergheaud wrote:


This adds an afu_driver_ops structure with deliver_event() and
event_delivered() callbacks. An AFU driver such as cxlflash can fill
this out and associate it with a context to enable passing custom
AFU specific events to userspace.

This also adds a new kernel API function cxl_context_pending_events(),
that the AFU driver can use to notify the cxl driver that new specific
events are ready to be delivered, and wake up anyone waiting on the
context wait queue.


...


Signed-off-by: Philippe Bergheaud <fe...@linux.vnet.ibm.com>



It looks like the discussion has settled down on this one, and everyone was OK
with it, except maybe the naming?

So can we either get some ACKs, or a v6 with new naming?


I am about to send a v6.

Philippe

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[RESEND v5,2/2] cxl: Add set and get private data to context struct

2016-05-23 Thread Philippe Bergheaud
From: Michael Neuling <mi...@neuling.org>

This provides AFU drivers a means to associate private data with a cxl
context. This is particularly intended for make the new callbacks for
driver specific events easier for AFU drivers to use, as they can easily
get back to any private data structures they may use.

Signed-off-by: Michael Neuling <mi...@neuling.org>
Signed-off-by: Ian Munsie <imun...@au1.ibm.com>
Signed-off-by: Philippe Bergheaud <fe...@linux.vnet.ibm.com
Reviewed-by: Matthew R. Ochs <mro...@linux.vnet.ibm.com>
---
No changes since v1, added Matt Ochs reviewed-by tag.

 drivers/misc/cxl/api.c | 21 +
 drivers/misc/cxl/cxl.h |  3 +++
 include/misc/cxl.h |  7 +++
 3 files changed, 31 insertions(+)

diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c
index 71673cb..efeaa6e 100644
--- a/drivers/misc/cxl/api.c
+++ b/drivers/misc/cxl/api.c
@@ -94,6 +94,27 @@ static irq_hw_number_t cxl_find_afu_irq(struct cxl_context 
*ctx, int num)
return 0;
 }
 
+
+int cxl_set_priv(struct cxl_context *ctx, void *priv)
+{
+   if (!ctx)
+   return -EINVAL;
+
+   ctx->priv = priv;
+
+   return 0;
+}
+EXPORT_SYMBOL_GPL(cxl_set_priv);
+
+void *cxl_get_priv(struct cxl_context *ctx)
+{
+   if (!ctx)
+   return ERR_PTR(-EINVAL);
+
+   return ctx->priv;
+}
+EXPORT_SYMBOL_GPL(cxl_get_priv);
+
 int cxl_allocate_afu_irqs(struct cxl_context *ctx, int num)
 {
int res;
diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index b0027e6..1e56304 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -478,6 +478,9 @@ struct cxl_context {
/* Only used in PR mode */
u64 process_token;
 
+   /* driver private data */
+   void *priv;
+
unsigned long *irq_bitmap; /* Accessed from IRQ context */
struct cxl_irq_ranges irqs;
struct list_head irq_names;
diff --git a/include/misc/cxl.h b/include/misc/cxl.h
index ef3115e..667f99d 100644
--- a/include/misc/cxl.h
+++ b/include/misc/cxl.h
@@ -86,6 +86,13 @@ struct cxl_context *cxl_dev_context_init(struct pci_dev 
*dev);
 int cxl_release_context(struct cxl_context *ctx);
 
 /*
+ * Set and get private data associated with a context. Allows drivers to have a
+ * back pointer to some useful structure.
+ */
+int cxl_set_priv(struct cxl_context *ctx, void *priv);
+void *cxl_get_priv(struct cxl_context *ctx);
+
+/*
  * Allocate AFU interrupts for this context. num=0 will allocate the default
  * for this AFU as given in the AFU descriptor. This number doesn't include the
  * interrupt 0 (CAIA defines AFU IRQ 0 for page faults). Each interrupt to be
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[v4,2/2] cxl: Add set and get private data to context struct

2016-05-23 Thread Philippe Bergheaud
From: Michael Neuling <mi...@neuling.org>

This provides AFU drivers a means to associate private data with a cxl
context. This is particularly intended for make the new callbacks for
driver specific events easier for AFU drivers to use, as they can easily
get back to any private data structures they may use.

Signed-off-by: Michael Neuling <mi...@neuling.org>
Signed-off-by: Ian Munsie <imun...@au1.ibm.com>
Signed-off-by: Philippe Bergheaud <fe...@linux.vnet.ibm.com
Reviewed-by: Matthew R. Ochs <mro...@linux.vnet.ibm.com>
---
No changes since v1, added Matt Ochs reviewed-by tag.

 drivers/misc/cxl/api.c | 21 +
 drivers/misc/cxl/cxl.h |  3 +++
 include/misc/cxl.h |  7 +++
 3 files changed, 31 insertions(+)

diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c
index 71673cb..efeaa6e 100644
--- a/drivers/misc/cxl/api.c
+++ b/drivers/misc/cxl/api.c
@@ -94,6 +94,27 @@ static irq_hw_number_t cxl_find_afu_irq(struct cxl_context 
*ctx, int num)
return 0;
 }
 
+
+int cxl_set_priv(struct cxl_context *ctx, void *priv)
+{
+   if (!ctx)
+   return -EINVAL;
+
+   ctx->priv = priv;
+
+   return 0;
+}
+EXPORT_SYMBOL_GPL(cxl_set_priv);
+
+void *cxl_get_priv(struct cxl_context *ctx)
+{
+   if (!ctx)
+   return ERR_PTR(-EINVAL);
+
+   return ctx->priv;
+}
+EXPORT_SYMBOL_GPL(cxl_get_priv);
+
 int cxl_allocate_afu_irqs(struct cxl_context *ctx, int num)
 {
int res;
diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index b0027e6..1e56304 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -478,6 +478,9 @@ struct cxl_context {
/* Only used in PR mode */
u64 process_token;
 
+   /* driver private data */
+   void *priv;
+
unsigned long *irq_bitmap; /* Accessed from IRQ context */
struct cxl_irq_ranges irqs;
struct list_head irq_names;
diff --git a/include/misc/cxl.h b/include/misc/cxl.h
index ef3115e..667f99d 100644
--- a/include/misc/cxl.h
+++ b/include/misc/cxl.h
@@ -86,6 +86,13 @@ struct cxl_context *cxl_dev_context_init(struct pci_dev 
*dev);
 int cxl_release_context(struct cxl_context *ctx);
 
 /*
+ * Set and get private data associated with a context. Allows drivers to have a
+ * back pointer to some useful structure.
+ */
+int cxl_set_priv(struct cxl_context *ctx, void *priv);
+void *cxl_get_priv(struct cxl_context *ctx);
+
+/*
  * Allocate AFU interrupts for this context. num=0 will allocate the default
  * for this AFU as given in the AFU descriptor. This number doesn't include the
  * interrupt 0 (CAIA defines AFU IRQ 0 for page faults). Each interrupt to be
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[v5, 1/2] cxl: Add mechanism for delivering AFU driver specific events

2016-05-23 Thread Philippe Bergheaud
This adds an afu_driver_ops structure with deliver_event() and
event_delivered() callbacks. An AFU driver such as cxlflash can fill
this out and associate it with a context to enable passing custom
AFU specific events to userspace.

This also adds a new kernel API function cxl_context_pending_events(),
that the AFU driver can use to notify the cxl driver that new specific
events are ready to be delivered, and wake up anyone waiting on the
context wait queue.

The current count of AFU driver specific events is stored in the field
afu_driver_events of the context structure.

The cxl driver checks the afu_driver_events count during poll, select,
read, etc. calls to check if an AFU driver specific event is pending,
and calls deliver_event() to obtain and deliver that event. This way,
the cxl driver takes care of all the usual locking semantics around these
calls and handles all the generic cxl events, so that the AFU driver only
needs to worry about it's own events.

deliver_event() return a struct cxl_event_afu_driver_reserved, allocated
by the AFU driver, and filled in with the specific event information and
size. Data size should not be greater than CXL_MAX_EVENT_DATA_SIZE.

Th cxl driver prepends an appropriate cxl event header, copies the event
to userspace, and finally calls event_delivered() to return the status of
the operation to the AFU driver. The event is identified by the context
and cxl_event_afu_driver_reserved pointers.

Since AFU drivers provide their own means for userspace to obtain the
AFU file descriptor (i.e. cxlflash uses an ioctl on their scsi file
descriptor to obtain the AFU file descriptor) and the generic cxl driver
will never use this event, the ABI of the event is up to each individual
AFU driver.

Signed-off-by: Philippe Bergheaud <fe...@linux.vnet.ibm.com>
---
Changes since v4:
- Addressed comments from Vaibhav:
  - Changed struct cxl_event_afu_driver_reserved from
{ __u64 reserved[4]; } to { size_t data_size; u8 data[]; }
  - Modified deliver_event to return a struct cxl_event_afu_driver_reserved
  - Added new callback event_delivered
  - Added static function afu_driver_event_copy

Changes since v3:
- Removed driver ops callback ctx_event_pending
- Created cxl function cxl_context_pending_events
- Created cxl function cxl_unset_driver_ops
- Added atomic event counter afu_driver_events

Changes since v2:
- Fixed some typos spotted by Matt Ochs

Changes since v1:
- Rebased on upstream
- Bumped cxl api version to 3
- Addressed comments from mpe:
  - Clarified commit message & some comments
  - Mentioned 'cxlflash' as a possible user of this event
  - Check driver ops on registration and warn if missing calls
  - Remove redundant checks where driver ops is used
  - Simplified ctx_event_pending and removed underscore version
  - Changed deliver_event to take the context as the first argument

 drivers/misc/cxl/Kconfig |  5 +
 drivers/misc/cxl/api.c   | 27 +
 drivers/misc/cxl/cxl.h   |  7 ++-
 drivers/misc/cxl/file.c  | 52 
 include/misc/cxl.h   | 50 ++
 include/uapi/misc/cxl.h  | 21 +++
 6 files changed, 153 insertions(+), 9 deletions(-)

diff --git a/drivers/misc/cxl/Kconfig b/drivers/misc/cxl/Kconfig
index 8756d06..560412c 100644
--- a/drivers/misc/cxl/Kconfig
+++ b/drivers/misc/cxl/Kconfig
@@ -15,12 +15,17 @@ config CXL_EEH
bool
default n
 
+config CXL_AFU_DRIVER_OPS
+   bool
+   default n
+
 config CXL
tristate "Support for IBM Coherent Accelerators (CXL)"
depends on PPC_POWERNV && PCI_MSI && EEH
select CXL_BASE
select CXL_KERNEL_API
select CXL_EEH
+   select CXL_AFU_DRIVER_OPS
default m
help
  Select this option to enable driver support for IBM Coherent
diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c
index 6d228cc..71673cb 100644
--- a/drivers/misc/cxl/api.c
+++ b/drivers/misc/cxl/api.c
@@ -323,6 +323,33 @@ struct cxl_context *cxl_fops_get_context(struct file *file)
 }
 EXPORT_SYMBOL_GPL(cxl_fops_get_context);
 
+void cxl_set_driver_ops(struct cxl_context *ctx,
+   struct cxl_afu_driver_ops *ops)
+{
+   WARN_ON(!ops->deliver_event || !ops->event_delivered);
+   atomic_set(>afu_driver_events, 0);
+   ctx->afu_driver_ops = ops;
+}
+EXPORT_SYMBOL_GPL(cxl_set_driver_ops);
+
+int cxl_unset_driver_ops(struct cxl_context *ctx)
+{
+   if (atomic_read(>afu_driver_events))
+   return -EBUSY;
+
+   ctx->afu_driver_ops = NULL;
+   return 0;
+}
+EXPORT_SYMBOL_GPL(cxl_unset_driver_ops);
+
+void cxl_context_events_pending(struct cxl_context *ctx,
+   unsigned int new_events)
+{
+   atomic_add(new_events, >afu_driver_events);
+   wake_up_all(>wq);
+}
+EXPORT_SYMBOL_GPL(cxl_context_ev

[RESEND v4,1/2] cxl: Add mechanism for delivering AFU driver

2016-05-18 Thread Philippe Bergheaud
From: Ian Munsie <imun...@au1.ibm.com>

This adds an afu_driver_ops structure with deliver_event callback. An
AFU driver such as cxlflash can fill this out and associate it with a
context to enable passing custom AFU specific events to userspace.

This also adds a new kernel API function cxl_context_pending_events(),
that the AFU driver can use to notify the cxl driver that new specific
events are ready to be delivered, and wake up anyone waiting on the
context wait queue.

The current count of AFU driver specific events is stored in the field
afu_driver_events of the context structure.

The cxl driver will check the afu_driver_events count during poll,
select, read, etc. calls to check if an AFU driver specific event is
pending, and will call deliver_event() to deliver that event. This way,
the cxl driver takes care of all the usual locking semantics around
these calls and handles all the generic cxl events, so that the AFU
driver only needs to worry about it's own events.

The deliver_event() call is passed a struct cxl_event buffer to fill in.
The header will already be filled in for an AFU driver event, and the
AFU driver is expected to expand the header.size as necessary (up to
max_size, defined by struct cxl_event_afu_driver_reserved) and fill out
it's own information.

Since AFU drivers provide their own means for userspace to obtain the
AFU file descriptor (i.e. cxlflash uses an ioctl on their scsi file
descriptor to obtain the AFU file descriptor) and the generic cxl driver
will never use this event, the ABI of the event is up to each individual
AFU driver.

Signed-off-by: Philippe Bergheaud <fe...@linux.vnet.ibm.com>
---
Changes since v3:
- Removed driver ops callback ctx_event_pending
- Created cxl function cxl_context_pending_events
- Created cxl function cxl_unset_driver_ops
- Added atomic event counter afu_driver_events

Changes since v2:
- Fixed some typos spotted by Matt Ochs

Changes since v1:
- Rebased on upstream
- Bumped cxl api version to 3
- Addressed comments from mpe:
  - Clarified commit message & some comments
  - Mentioned 'cxlflash' as a possible user of this event
  - Check driver ops on registration and warn if missing calls
  - Remove redundant checks where driver ops is used
  - Simplified ctx_event_pending and removed underscore version
  - Changed deliver_event to take the context as the first argument

 drivers/misc/cxl/Kconfig |  5 +
 drivers/misc/cxl/api.c   | 27 +++
 drivers/misc/cxl/cxl.h   |  6 +-
 drivers/misc/cxl/file.c  | 36 +---
 include/misc/cxl.h   | 43 +++
 include/uapi/misc/cxl.h  | 22 ++
 6 files changed, 127 insertions(+), 12 deletions(-)

diff --git a/drivers/misc/cxl/Kconfig b/drivers/misc/cxl/Kconfig
index 8756d06..560412c 100644
--- a/drivers/misc/cxl/Kconfig
+++ b/drivers/misc/cxl/Kconfig
@@ -15,12 +15,17 @@ config CXL_EEH
bool
default n
 
+config CXL_AFU_DRIVER_OPS
+   bool
+   default n
+
 config CXL
tristate "Support for IBM Coherent Accelerators (CXL)"
depends on PPC_POWERNV && PCI_MSI && EEH
select CXL_BASE
select CXL_KERNEL_API
select CXL_EEH
+   select CXL_AFU_DRIVER_OPS
default m
help
  Select this option to enable driver support for IBM Coherent
diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c
index 2107c94..cd47f39 100644
--- a/drivers/misc/cxl/api.c
+++ b/drivers/misc/cxl/api.c
@@ -313,6 +313,33 @@ struct cxl_context *cxl_fops_get_context(struct file *file)
 }
 EXPORT_SYMBOL_GPL(cxl_fops_get_context);
 
+void cxl_set_driver_ops(struct cxl_context *ctx,
+   struct cxl_afu_driver_ops *ops)
+{
+   WARN_ON(!ops->deliver_event);
+   atomic_set(>afu_driver_events, 0);
+   ctx->afu_driver_ops = ops;
+}
+EXPORT_SYMBOL_GPL(cxl_set_driver_ops);
+
+int cxl_unset_driver_ops(struct cxl_context *ctx)
+{
+   if (atomic_read(>afu_driver_events))
+   return -EBUSY;
+
+   ctx->afu_driver_ops = NULL;
+   return 0;
+}
+EXPORT_SYMBOL_GPL(cxl_unset_driver_ops);
+
+void cxl_context_events_pending(struct cxl_context *ctx,
+   unsigned int new_events)
+{
+   atomic_add(new_events, >afu_driver_events);
+   wake_up_all(>wq);
+}
+EXPORT_SYMBOL_GPL(cxl_context_events_pending);
+
 int cxl_start_work(struct cxl_context *ctx,
   struct cxl_ioctl_start_work *work)
 {
diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index 73dc2a3..b48828c 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -24,6 +24,7 @@
 #include 
 #include 
 
+#include 
 #include 
 
 extern uint cxl_verbose;
@@ -34,7 +35,7 @@ extern uint cxl_verbose;
  * Bump version each time a user API change is made, whether it is
  * backwards compatible ot not.
  */
-#define CXL_API_VERSION 2
+

[v3, 1/2] cxl: Add mechanism for delivering AFU driver specific events

2016-05-18 Thread Philippe Bergheaud
From: Ian Munsie <imun...@au1.ibm.com>

This adds an afu_driver_ops structure with deliver_event callback. An
AFU driver such as cxlflash can fill this out and associate it with a
context to enable passing custom AFU specific events to userspace.

This also adds a new kernel API function cxl_context_pending_events(),
that the AFU driver can use to notify the cxl driver that new specific
events are ready to be delivered, and wake up anyone waiting on the
context wait queue.

The current count of AFU driver specific events is stored in the field
afu_driver_events of the context structure.

The cxl driver will check the afu_driver_events count during poll,
select, read, etc. calls to check if an AFU driver specific event is
pending, and will call deliver_event() to deliver that event. This way,
the cxl driver takes care of all the usual locking semantics around
these calls and handles all the generic cxl events, so that the AFU
driver only needs to worry about it's own events.

The deliver_event() call is passed a struct cxl_event buffer to fill in.
The header will already be filled in for an AFU driver event, and the
AFU driver is expected to expand the header.size as necessary (up to
max_size, defined by struct cxl_event_afu_driver_reserved) and fill out
it's own information.

Since AFU drivers provide their own means for userspace to obtain the
AFU file descriptor (i.e. cxlflash uses an ioctl on their scsi file
descriptor to obtain the AFU file descriptor) and the generic cxl driver
will never use this event, the ABI of the event is up to each individual
AFU driver.

Signed-off-by: Philippe Bergheaud <fe...@linux.vnet.ibm.com>
---
Changes since v3:
- Removed driver ops callback ctx_event_pending
- Created cxl function cxl_context_pending_events
- Created cxl function cxl_unset_driver_ops
- Added atomic event counter afu_driver_events

Changes since v2:
- Fixed some typos spotted by Matt Ochs

Changes since v1:
- Rebased on upstream
- Bumped cxl api version to 3
- Addressed comments from mpe:
  - Clarified commit message & some comments
  - Mentioned 'cxlflash' as a possible user of this event
  - Check driver ops on registration and warn if missing calls
  - Remove redundant checks where driver ops is used
  - Simplified ctx_event_pending and removed underscore version
  - Changed deliver_event to take the context as the first argument

 drivers/misc/cxl/Kconfig |  5 +
 drivers/misc/cxl/api.c   | 27 +++
 drivers/misc/cxl/cxl.h   |  6 +-
 drivers/misc/cxl/file.c  | 36 +---
 include/misc/cxl.h   | 43 +++
 include/uapi/misc/cxl.h  | 22 ++
 6 files changed, 127 insertions(+), 12 deletions(-)

diff --git a/drivers/misc/cxl/Kconfig b/drivers/misc/cxl/Kconfig
index 8756d06..560412c 100644
--- a/drivers/misc/cxl/Kconfig
+++ b/drivers/misc/cxl/Kconfig
@@ -15,12 +15,17 @@ config CXL_EEH
bool
default n
 
+config CXL_AFU_DRIVER_OPS
+   bool
+   default n
+
 config CXL
tristate "Support for IBM Coherent Accelerators (CXL)"
depends on PPC_POWERNV && PCI_MSI && EEH
select CXL_BASE
select CXL_KERNEL_API
select CXL_EEH
+   select CXL_AFU_DRIVER_OPS
default m
help
  Select this option to enable driver support for IBM Coherent
diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c
index 2107c94..cd47f39 100644
--- a/drivers/misc/cxl/api.c
+++ b/drivers/misc/cxl/api.c
@@ -313,6 +313,33 @@ struct cxl_context *cxl_fops_get_context(struct file *file)
 }
 EXPORT_SYMBOL_GPL(cxl_fops_get_context);
 
+void cxl_set_driver_ops(struct cxl_context *ctx,
+   struct cxl_afu_driver_ops *ops)
+{
+   WARN_ON(!ops->deliver_event);
+   atomic_set(>afu_driver_events, 0);
+   ctx->afu_driver_ops = ops;
+}
+EXPORT_SYMBOL_GPL(cxl_set_driver_ops);
+
+int cxl_unset_driver_ops(struct cxl_context *ctx)
+{
+   if (atomic_read(>afu_driver_events))
+   return -EBUSY;
+
+   ctx->afu_driver_ops = NULL;
+   return 0;
+}
+EXPORT_SYMBOL_GPL(cxl_unset_driver_ops);
+
+void cxl_context_events_pending(struct cxl_context *ctx,
+   unsigned int new_events)
+{
+   atomic_add(new_events, >afu_driver_events);
+   wake_up_all(>wq);
+}
+EXPORT_SYMBOL_GPL(cxl_context_events_pending);
+
 int cxl_start_work(struct cxl_context *ctx,
   struct cxl_ioctl_start_work *work)
 {
diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index 73dc2a3..b48828c 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -24,6 +24,7 @@
 #include 
 #include 
 
+#include 
 #include 
 
 extern uint cxl_verbose;
@@ -34,7 +35,7 @@ extern uint cxl_verbose;
  * Bump version each time a user API change is made, whether it is
  * backwards compatible ot not.
  */
-#define CXL_API_VERSION 2
+

[v4,2/2] cxl: Add set and get private data to context struct

2016-05-18 Thread Philippe Bergheaud
From: Michael Neuling <mi...@neuling.org>

This provides AFU drivers a means to associate private data with a cxl
context. This is particularly intended for make the new callbacks for
driver specific events easier for AFU drivers to use, as they can easily
get back to any private data structures they may use.

Signed-off-by: Michael Neuling <mi...@neuling.org>
Signed-off-by: Ian Munsie <imun...@au1.ibm.com>
Signed-off-by: Philippe Bergheaud <fe...@linux.vnet.ibm.com
Reviewed-by: Matthew R. Ochs <mro...@linux.vnet.ibm.com>
---
No changes since v1, added Matt Ochs reviewed-by tag.

 drivers/misc/cxl/api.c | 21 +
 drivers/misc/cxl/cxl.h |  3 +++
 include/misc/cxl.h |  7 +++
 3 files changed, 31 insertions(+)

diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c
index cd47f39..0eb4bbf 100644
--- a/drivers/misc/cxl/api.c
+++ b/drivers/misc/cxl/api.c
@@ -103,6 +103,27 @@ static irq_hw_number_t cxl_find_afu_irq(struct cxl_context 
*ctx, int num)
return 0;
 }
 
+
+int cxl_set_priv(struct cxl_context *ctx, void *priv)
+{
+   if (!ctx)
+   return -EINVAL;
+
+   ctx->priv = priv;
+
+   return 0;
+}
+EXPORT_SYMBOL_GPL(cxl_set_priv);
+
+void *cxl_get_priv(struct cxl_context *ctx)
+{
+   if (!ctx)
+   return ERR_PTR(-EINVAL);
+
+   return ctx->priv;
+}
+EXPORT_SYMBOL_GPL(cxl_get_priv);
+
 int cxl_allocate_afu_irqs(struct cxl_context *ctx, int num)
 {
int res;
diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index b48828c..b0e51a3 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -485,6 +485,9 @@ struct cxl_context {
/* Only used in PR mode */
u64 process_token;
 
+   /* driver private data */
+   void *priv;
+
unsigned long *irq_bitmap; /* Accessed from IRQ context */
struct cxl_irq_ranges irqs;
struct list_head irq_names;
diff --git a/include/misc/cxl.h b/include/misc/cxl.h
index 8a7677e2..e91d849 100644
--- a/include/misc/cxl.h
+++ b/include/misc/cxl.h
@@ -86,6 +86,13 @@ struct cxl_context *cxl_dev_context_init(struct pci_dev 
*dev);
 int cxl_release_context(struct cxl_context *ctx);
 
 /*
+ * Set and get private data associated with a context. Allows drivers to have a
+ * back pointer to some useful structure.
+ */
+int cxl_set_priv(struct cxl_context *ctx, void *priv);
+void *cxl_get_priv(struct cxl_context *ctx);
+
+/*
  * Allocate AFU interrupts for this context. num=0 will allocate the default
  * for this AFU as given in the AFU descriptor. This number doesn't include the
  * interrupt 0 (CAIA defines AFU IRQ 0 for page faults). Each interrupt to be
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH] cxl: Refine slice error debug messages.

2016-05-11 Thread Philippe Bergheaud
Signed-off-by: Philippe Bergheaud <fe...@linux.vnet.ibm.com>
---

 drivers/misc/cxl/cxl.h| 15 +++
 drivers/misc/cxl/guest.c  | 11 ---
 drivers/misc/cxl/irq.c| 27 +++
 drivers/misc/cxl/native.c | 27 ++-
 4 files changed, 68 insertions(+), 12 deletions(-)

diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index 73dc2a3..a904f86 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -191,6 +191,18 @@ static const cxl_p2n_reg_t CXL_PSL_WED_An = {0x0A0};
 #define CXL_PSL_ID_An_F(1ull << (63-31))
 #define CXL_PSL_ID_An_L(1ull << (63-30))
 
+/** CXL_PSL_SERR_An /
+#define CXL_PSL_SERR_An_afuto  (1ull << (63-0))
+#define CXL_PSL_SERR_An_afudis (1ull << (63-1))
+#define CXL_PSL_SERR_An_afuov  (1ull << (63-2))
+#define CXL_PSL_SERR_An_badsrc (1ull << (63-3))
+#define CXL_PSL_SERR_An_badctx (1ull << (63-4))
+#define CXL_PSL_SERR_An_llcmdis(1ull << (63-5))
+#define CXL_PSL_SERR_An_llcmdto(1ull << (63-6))
+#define CXL_PSL_SERR_An_afupar (1ull << (63-7))
+#define CXL_PSL_SERR_An_afudup (1ull << (63-8))
+#define CXL_PSL_SERR_An_AE (1ull << (63-30))
+
 /** CXL_PSL_SCNTL_An /
 #define CXL_PSL_SCNTL_An_CR  (0x1ull << (63-15))
 /* Programming Modes: */
@@ -884,4 +896,7 @@ extern const struct cxl_backend_ops *cxl_ops;
 
 /* check if the given pci_dev is on the the cxl vphb bus */
 bool cxl_pci_is_vphb_device(struct pci_dev *dev);
+
+/* decode AFU error bits in the PSL register PSL_SERR_An */
+void cxl_afu_decode_psl_serr(struct cxl_afu *afu, u64 serr);
 #endif
diff --git a/drivers/misc/cxl/guest.c b/drivers/misc/cxl/guest.c
index 8213372..9de113c 100644
--- a/drivers/misc/cxl/guest.c
+++ b/drivers/misc/cxl/guest.c
@@ -193,15 +193,20 @@ static irqreturn_t guest_slice_irq_err(int irq, void 
*data)
 {
struct cxl_afu *afu = data;
int rc;
-   u64 serr;
+   u64 serr, afu_error, dsisr;
 
-   WARN(irq, "CXL SLICE ERROR interrupt %i\n", irq);
+   dev_crit(>dev,
+"PSL Slice error received. Check AFU for root cause\n");
rc = cxl_h_get_fn_error_interrupt(afu->guest->handle, );
if (rc) {
dev_crit(>dev, "Couldn't read PSL_SERR_An: %d\n", rc);
return IRQ_HANDLED;
}
-   dev_crit(>dev, "PSL_SERR_An: 0x%.16llx\n", serr);
+   afu_error = cxl_p2n_read(afu, CXL_AFU_ERR_An);
+   dsisr = cxl_p2n_read(afu, CXL_PSL_DSISR_An);
+   cxl_afu_decode_psl_serr(afu, serr);
+   dev_crit(>dev, "AFU_ERR_An: 0x%.16llx\n", afu_error);
+   dev_crit(>dev, "PSL_DSISR_An: 0x%.16llx\n", dsisr);
 
rc = cxl_h_ack_fn_error_interrupt(afu->guest->handle, serr);
if (rc)
diff --git a/drivers/misc/cxl/irq.c b/drivers/misc/cxl/irq.c
index 8def455..9e63734 100644
--- a/drivers/misc/cxl/irq.c
+++ b/drivers/misc/cxl/irq.c
@@ -374,3 +374,30 @@ void afu_release_irqs(struct cxl_context *ctx, void 
*cookie)
 
ctx->irq_count = 0;
 }
+
+void cxl_afu_decode_psl_serr(struct cxl_afu *afu, u64 serr)
+{
+   dev_crit(>dev, "PSL_SERR_An: 0x%016llx\n", serr);
+   if (serr & CXL_PSL_SERR_An_afuto)
+   dev_crit(>dev, "AFU MMIO Timeout\n");
+   if (serr & CXL_PSL_SERR_An_afudis)
+   dev_crit(>dev,
+"MMIO targeted Accelerator that was not enabled\n");
+   if (serr & CXL_PSL_SERR_An_afuov)
+   dev_crit(>dev, "AFU CTAG Overflow\n");
+   if (serr & CXL_PSL_SERR_An_badsrc)
+   dev_crit(>dev, "Bad Interrupt Source\n");
+   if (serr & CXL_PSL_SERR_An_badctx)
+   dev_crit(>dev, "Bad Context Handle\n");
+   if (serr & CXL_PSL_SERR_An_llcmdis)
+   dev_crit(>dev, "LLCMD to Disabled AFU\n");
+   if (serr & CXL_PSL_SERR_An_llcmdto)
+   dev_crit(>dev, "LLCMD Timeout to AFU\n");
+   if (serr & CXL_PSL_SERR_An_afupar)
+   dev_crit(>dev, "AFU MMIO Parity Error\n");
+   if (serr & CXL_PSL_SERR_An_afudup)
+   dev_crit(>dev, "AFU MMIO Duplicate CTAG Error\n");
+   if (serr & CXL_PSL_SERR_An_AE)
+   dev_crit(>dev,
+"AFU asserted JDONE with JERROR in AFU Directed 
Mode\n");
+}
diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c
index ecf7557..46b6ee1 100644
--- a/drivers/misc/cxl/native.c
+++ b/drivers/misc/cxl/native.c
@@ -738,20 +738,25 @@ static int native_get_irq_info(struct cxl_afu *afu, 
struct cxl_irq_info *info)
 static irqretu

[PATCH v3 2/2] cxl: Configure the PSL for two CAPI ports on POWER8NVL

2016-03-31 Thread Philippe Bergheaud
The POWER8NVL chip has two CAPI ports.  Configure the PSL to route
data to the port corresponding to the CAPP unit.

Signed-off-by: Philippe Bergheaud <fe...@linux.vnet.ibm.com>
---
V2:
  - Complete rewrite after Mikey's review
V3:
  Fixes suggested by Michael:
  - s/capp_unit_id(/get_capp_unit_id(/
  - Fix block commenting style
  - Remove extra space
  - Use of_property_read_u32
  - Add blank line after return
  - Fix logic for phb_index > 1 on POWERNVL
  - s/cappunitid/capp_unit_id/
  - Add error message for -ENODEV

 drivers/misc/cxl/pci.c | 41 -
 1 file changed, 40 insertions(+), 1 deletion(-)

diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index 2844e97..94fd3f7 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -21,6 +21,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "cxl.h"
 #include 
@@ -321,12 +322,43 @@ static void dump_afu_descriptor(struct cxl_afu *afu)
 #undef show_reg
 }
 
+#define CAPP_UNIT0_ID 0xBA
+#define CAPP_UNIT1_ID 0XBE
+
+static u64 get_capp_unit_id(struct device_node *np)
+{
+   u32 phb_index;
+
+   /*
+* For chips other than POWER8NVL, we only have CAPP 0,
+* irrespective of which PHB is used.
+*/
+   if (!pvr_version_is(PVR_POWER8NVL))
+   return CAPP_UNIT0_ID;
+
+   /*
+* For POWER8NVL, assume CAPP 0 is attached to PHB0 and
+* CAPP 1 is attached to PHB1.
+*/
+   if (of_property_read_u32(np, "ibm,phb-index", _index))
+   return 0;
+
+   if (phb_index == 0)
+   return CAPP_UNIT0_ID;
+
+   if (phb_index == 1)
+   return CAPP_UNIT1_ID;
+
+   return 0;
+}
+
 static int init_implementation_adapter_regs(struct cxl *adapter, struct 
pci_dev *dev)
 {
struct device_node *np;
const __be32 *prop;
u64 psl_dsnctl;
u64 chipid;
+   u64 capp_unit_id;
 
if (!(np = pnv_pci_get_phb_node(dev)))
return -ENODEV;
@@ -336,10 +368,17 @@ static int init_implementation_adapter_regs(struct cxl 
*adapter, struct pci_dev
if (!np)
return -ENODEV;
chipid = be32_to_cpup(prop);
+   capp_unit_id = get_capp_unit_id(np);
of_node_put(np);
+   if (!capp_unit_id) {
+   pr_err("cxl: invalid capp unit id\n");
+   return -ENODEV;
+   }
 
/* Tell PSL where to route data to */
-   psl_dsnctl = 0x02E89200ULL | (chipid << (63-5));
+   psl_dsnctl = 0x9200ULL | (chipid << (63-5));
+   psl_dsnctl |= (capp_unit_id << (63-13));
+
cxl_p1_write(adapter, CXL_PSL_DSNDCTL, psl_dsnctl);
cxl_p1_write(adapter, CXL_PSL_RESLCKTO, 0x2000200ULL);
/* snoop write mask */
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v3 1/2] powerpc: Define PVR value for POWER8NVL processor

2016-03-31 Thread Philippe Bergheaud
Signed-off-by: Philippe Bergheaud <fe...@linux.vnet.ibm.com>
---
V2:
  - New patch, added to patch set
V3:
  - no change

 arch/powerpc/include/asm/reg.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index f5f4c66..cf09c6e 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -1182,6 +1182,7 @@
 #define PVR_970GX  0x0045
 #define PVR_POWER7p0x004A
 #define PVR_POWER8E0x004B
+#define PVR_POWER8NVL  0x004C
 #define PVR_POWER8 0x004D
 #define PVR_BE 0x0070
 #define PVR_PA6T   0x0090
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH] Added a 5ms wait after a msi-irq is masked

2016-03-22 Thread Philippe Bergheaud

Sorry, I've sent this to the wrong list.

Philippe

Philippe Bergheaud wrote:

From: Vaibhav Jain <vaib...@linux.vnet.ibm.com>

Adds a 5ms wait to phb3_msi_set_xive after the interrupt is masked so
that the kernel delays cleanup until an irq if its in-flight is
handled. The value 5ms is the worst case time needed by an irq to be
presented to the host after its generated.

Signed-off-by: Vaibhav Jain <vaib...@linux.vnet.ibm.com>
---
This patch requires the following patches:
https://patchwork.ozlabs.org/patch/581764/
https://patchwork.ozlabs.org/patch/581765/

 hw/phb3.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/hw/phb3.c b/hw/phb3.c
index fbdcb9e..e5d49b2 100644
--- a/hw/phb3.c
+++ b/hw/phb3.c
@@ -1751,6 +1751,8 @@ static int64_t phb3_msi_set_xive(void *data,
PHB_IVC_UPDATE_ENABLE_Q |
PHB_IVC_UPDATE_ENABLE_GEN;
out_be64(p->regs + PHB_IVC_UPDATE, ivc);
+   /* wait for 5ms before signalling the interrupt is masked */
+   time_wait_ms(5);
}
 
 	return OPAL_SUCCESS;


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH] Added a 5ms wait after a msi-irq is masked

2016-03-22 Thread Philippe Bergheaud
From: Vaibhav Jain 

Adds a 5ms wait to phb3_msi_set_xive after the interrupt is masked so
that the kernel delays cleanup until an irq if its in-flight is
handled. The value 5ms is the worst case time needed by an irq to be
presented to the host after its generated.

Signed-off-by: Vaibhav Jain 
---
This patch requires the following patches:
https://patchwork.ozlabs.org/patch/581764/
https://patchwork.ozlabs.org/patch/581765/

 hw/phb3.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/hw/phb3.c b/hw/phb3.c
index fbdcb9e..e5d49b2 100644
--- a/hw/phb3.c
+++ b/hw/phb3.c
@@ -1751,6 +1751,8 @@ static int64_t phb3_msi_set_xive(void *data,
PHB_IVC_UPDATE_ENABLE_Q |
PHB_IVC_UPDATE_ENABLE_GEN;
out_be64(p->regs + PHB_IVC_UPDATE, ivc);
+   /* wait for 5ms before signalling the interrupt is masked */
+   time_wait_ms(5);
}
 
return OPAL_SUCCESS;
-- 
2.7.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 1/2] powerpc: Define PVR value for POWER8NVL processor

2016-03-19 Thread Philippe Bergheaud
Signed-off-by: Philippe Bergheaud <fe...@linux.vnet.ibm.com>
---
V2:
  - New patch, added to patch set

 arch/powerpc/include/asm/reg.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index c4cb2ff..6a6de4a 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -1174,6 +1174,7 @@
 #define PVR_970GX  0x0045
 #define PVR_POWER7p0x004A
 #define PVR_POWER8E0x004B
+#define PVR_POWER8NVL  0x004C
 #define PVR_POWER8 0x004D
 #define PVR_BE 0x0070
 #define PVR_PA6T   0x0090
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 2/2] cxl: Configure the PSL for two CAPI ports on POWER8NVL

2016-03-19 Thread Philippe Bergheaud
The POWER8NVL chip has two CAPI ports.  Configure the PSL to route
data to the port corresponding to the CAPP unit.

Signed-off-by: Philippe Bergheaud <fe...@linux.vnet.ibm.com>
---
V2:
  - Complete rewrite after Mikey's review

 drivers/misc/cxl/pci.c | 31 ++-
 1 file changed, 30 insertions(+), 1 deletion(-)

diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index 0c6c17a1..924ba63 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -22,6 +22,7 @@
 #include  /* for struct pci_controller */
 #include 
 #include 
+#include 
 
 #include "cxl.h"
 #include 
@@ -340,12 +341,35 @@ static void dump_afu_descriptor(struct cxl_afu *afu)
 #undef show_reg
 }
 
+#define CAPP_UNIT0_ID 0xBA
+#define CAPP_UNIT1_ID 0XBE
+
+static u64 capp_unit_id(struct device_node *np)
+{
+   const __be32 *prop;
+   u64 phb_index;
+
+   /* For chips other than POWER8NVL, we only have CAPP 0,
+* irrespective of which PHB is used */
+   if (!pvr_version_is(PVR_POWER8NVL))
+   return CAPP_UNIT0_ID ;
+
+   /* For POWER8NVL, assume CAPP 0 is attached to PHB0 and
+* CAPP 1 is attached to PHB1 */
+   prop = of_get_property(np, "ibm,phb-index", NULL);
+   if (!prop)
+   return 0;
+   phb_index = be32_to_cpup(prop);
+   return phb_index ? CAPP_UNIT1_ID : CAPP_UNIT0_ID;
+}
+
 static int init_implementation_adapter_regs(struct cxl *adapter, struct 
pci_dev *dev)
 {
struct device_node *np;
const __be32 *prop;
u64 psl_dsnctl;
u64 chipid;
+   u64 cappunitid;
 
if (!(np = pnv_pci_get_phb_node(dev)))
return -ENODEV;
@@ -355,10 +379,15 @@ static int init_implementation_adapter_regs(struct cxl 
*adapter, struct pci_dev
if (!np)
return -ENODEV;
chipid = be32_to_cpup(prop);
+   cappunitid = capp_unit_id(np);
of_node_put(np);
+   if (!cappunitid)
+   return -ENODEV;
 
/* Tell PSL where to route data to */
-   psl_dsnctl = 0x02E89200ULL | (chipid << (63-5));
+   psl_dsnctl = 0x9200ULL | (chipid << (63-5));
+   psl_dsnctl |= (cappunitid << (63-13));
+
cxl_p1_write(adapter, CXL_PSL_DSNDCTL, psl_dsnctl);
cxl_p1_write(adapter, CXL_PSL_RESLCKTO, 0x2000200ULL);
/* snoop write mask */
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH] cxl: Configure the PSL for dual port CAPI on Naples

2016-03-16 Thread Philippe Bergheaud

Michael Neuling wrote:

On Tue, 2016-03-15 at 15:26 +0100, Philippe Bergheaud wrote:

Naples CPUs have two CAPI ports.  



Naples is an internal name, don't use that.  Use POWER8NVL is the name
we use in the kernel.

alsi, it's a "chip" that has two CAPI ports, not the CPU.


OK, I will rephrase.



Configure the PSL to route data to
the port corresponding to the PHB index.



Isn't this capp unit in reality, not phb index?


Yes, I meant capp unit port.



Signed-off-by: Philippe Bergheaud <fe...@linux.vnet.ibm.com>
---
drivers/misc/cxl/pci.c | 15 ++-
1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index 0c6c17a1..3db0a0b 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -340,12 +340,15 @@ static void dump_afu_descriptor(struct cxl_afu
*afu)
#undef show_reg
}

+#define CPU_IS_NAPLES() (cur_cpu_spec->pvr_value == 0x004c)



Use pvr_version_is(PVR_POWER8NVL))


OK.



+
static int init_implementation_adapter_regs(struct cxl *adapter,
struct pci_dev *dev)
{
struct device_node *np;
const __be32 *prop;
u64 psl_dsnctl;
u64 chipid;
+   u64 phb_index;

if (!(np = pnv_pci_get_phb_node(dev)))
return -ENODEV;
@@ -355,10 +358,20 @@ static int
init_implementation_adapter_regs(struct cxl *adapter, struct pci_dev
if (!np)
return -ENODEV;
chipid = be32_to_cpup(prop);
-   of_node_put(np);

/* Tell PSL where to route data to */
psl_dsnctl = 0x02E89200ULL | (chipid << (63-5));
+   if (CPU_IS_NAPLES()) {
+   prop = of_get_property(np, "ibm,phb-index", NULL);
+   if (!prop) {
+   of_node_put(np);
+   return -ENODEV;
+   }
+   phb_index = be32_to_cpup(prop);
+   psl_dsnctl |= (phb_index << (63-11));



Looking at the psl docs, cappunitid in the dsndctl is bits 6 to 13.  So
why 11 here?


Because on POWER8NVL, dsndctl bit 11 == phb_index == cappunitid.
Bits 6-10 and 12-13 do not change between POWER8 and POWER8NVL.


Can you abstract this better and make it clear what's happening?  Try
something like this:

int capp_unit()
{

if (!pvr_version_is(PVR_POWER8NVL))
/* For chips other than POWER8NVL, we only have CAPP 0
 * irrespective of which PHB is used */
return 0;

	/* For POWER8NVL, assume CAPP 0 is attached to PHB0 and 
 * CAPP 1 is attached to PHB1*/

prop = of_get_property(np, "ibm,phb-index", NULL);
if (!prop) {
of_node_put(np);
return -ENODEV;
}
return be32_to_cpup(prop);
}

Then you can do something like (although you need to fix the error
case)
psl_dsnctl |= (capp_unit(p) << (63-13));

Mikey


OK. I will. Thank you.

Philippe



+   }
+   of_node_put(np);
+
cxl_p1_write(adapter, CXL_PSL_DSNDCTL, psl_dsnctl);
cxl_p1_write(adapter, CXL_PSL_RESLCKTO, 0x2000200ULL);
/* snoop write mask */


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH] cxl: Configure the PSL for dual port CAPI on Naples

2016-03-15 Thread Philippe Bergheaud
Naples CPUs have two CAPI ports.  Configure the PSL to route data to
the port corresponding to the PHB index.

Signed-off-by: Philippe Bergheaud <fe...@linux.vnet.ibm.com>
---
 drivers/misc/cxl/pci.c | 15 ++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index 0c6c17a1..3db0a0b 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -340,12 +340,15 @@ static void dump_afu_descriptor(struct cxl_afu *afu)
 #undef show_reg
 }
 
+#define CPU_IS_NAPLES() (cur_cpu_spec->pvr_value == 0x004c)
+
 static int init_implementation_adapter_regs(struct cxl *adapter, struct 
pci_dev *dev)
 {
struct device_node *np;
const __be32 *prop;
u64 psl_dsnctl;
u64 chipid;
+   u64 phb_index;
 
if (!(np = pnv_pci_get_phb_node(dev)))
return -ENODEV;
@@ -355,10 +358,20 @@ static int init_implementation_adapter_regs(struct cxl 
*adapter, struct pci_dev
if (!np)
return -ENODEV;
chipid = be32_to_cpup(prop);
-   of_node_put(np);
 
/* Tell PSL where to route data to */
psl_dsnctl = 0x02E89200ULL | (chipid << (63-5));
+   if (CPU_IS_NAPLES()) {
+   prop = of_get_property(np, "ibm,phb-index", NULL);
+   if (!prop) {
+   of_node_put(np);
+   return -ENODEV;
+   }
+   phb_index = be32_to_cpup(prop);
+   psl_dsnctl |= (phb_index << (63-11));
+   }
+   of_node_put(np);
+
cxl_p1_write(adapter, CXL_PSL_DSNDCTL, psl_dsnctl);
cxl_p1_write(adapter, CXL_PSL_RESLCKTO, 0x2000200ULL);
/* snoop write mask */
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH][RESEND] cxl: Set up and enable PSL Timebase

2015-08-28 Thread Philippe Bergheaud

This patch configures the PSL Timebase function and enables it,
after the CAPP has been initialized by OPAL.

Acked-by: Ian Munsie imun...@au1.ibm.com
Signed-off-by: Philippe Bergheaud fe...@linux.vnet.ibm.com
---
 drivers/misc/cxl/cxl.h |  5 +
 drivers/misc/cxl/pci.c | 57 +-
 2 files changed, 61 insertions(+), 1 deletion(-)

diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index e7af256..19489c6 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -83,8 +83,10 @@ static const cxl_p1_reg_t CXL_PSL_AFUSEL  = {0x00B0};
 /* 0x00C0:7EFF Implementation dependent area */
 static const cxl_p1_reg_t CXL_PSL_FIR1  = {0x0100};
 static const cxl_p1_reg_t CXL_PSL_FIR2  = {0x0108};
+static const cxl_p1_reg_t CXL_PSL_Timebase  = {0x0110};
 static const cxl_p1_reg_t CXL_PSL_VERSION   = {0x0118};
 static const cxl_p1_reg_t CXL_PSL_RESLCKTO  = {0x0128};
+static const cxl_p1_reg_t CXL_PSL_TB_CTLSTAT = {0x0140};
 static const cxl_p1_reg_t CXL_PSL_FIR_CNTL  = {0x0148};
 static const cxl_p1_reg_t CXL_PSL_DSNDCTL   = {0x0150};
 static const cxl_p1_reg_t CXL_PSL_SNWRALLOC = {0x0158};
@@ -152,6 +154,9 @@ static const cxl_p2n_reg_t CXL_PSL_WED_An = {0x0A0};
 #define CXL_PSL_SPAP_Size_Shift 4
 #define CXL_PSL_SPAP_V0x0001ULL

+/** CXL_PSL_Control /
+#define CXL_PSL_Control_tb 0x0001ULL
+
 /** CXL_PSL_DLCNTL */
 #define CXL_PSL_DLCNTL_D (0x1ull  (63-28))
 #define CXL_PSL_DLCNTL_C (0x1ull  (63-29))
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index 03ddb2d..0f2ba4a 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -370,6 +370,55 @@ static int init_implementation_adapter_regs(struct cxl 
*adapter, struct pci_dev
return 0;
 }

+#define TBSYNC_CNT(n) (((u64)n  0x7)  (63-6))
+#define _2048_250MHZ_CYCLES 1
+
+static int cxl_setup_psl_timebase(struct cxl *adapter, struct pci_dev *dev)
+{
+   u64 psl_tb;
+   int delta;
+   unsigned int retry = 0;
+   struct device_node *np;
+
+   if (!(np = pnv_pci_get_phb_node(dev)))
+   return -ENODEV;
+
+   /* Do not fail when CAPP timebase sync is not supported by OPAL */
+   of_node_get(np);
+   if (! of_get_property(np, ibm,capp-timebase-sync, NULL)) {
+   of_node_put(np);
+   pr_err(PSL: Timebase sync: OPAL support missing\n);
+   return 0;
+   }
+   of_node_put(np);
+
+   /*
+* Setup PSL Timebase Control and Status register
+* with the recommended Timebase Sync Count value
+*/
+   cxl_p1_write(adapter, CXL_PSL_TB_CTLSTAT,
+TBSYNC_CNT(2 * _2048_250MHZ_CYCLES));
+
+   /* Enable PSL Timebase */
+   cxl_p1_write(adapter, CXL_PSL_Control, 0x);
+   cxl_p1_write(adapter, CXL_PSL_Control, CXL_PSL_Control_tb);
+
+   /* Wait until CORE TB and PSL TB difference = 16usecs */
+   do {
+   msleep(1);
+   if (retry++  5) {
+   pr_err(PSL: Timebase sync: giving up!\n);
+   return -EIO;
+   }
+   psl_tb = cxl_p1_read(adapter, CXL_PSL_Timebase);
+   delta = mftb() - psl_tb;
+   if (delta  0)
+   delta = -delta;
+   } while (cputime_to_usecs(delta)  16);
+
+   return 0;
+}
+
 static int init_implementation_afu_regs(struct cxl_afu *afu)
 {
/* read/write masks for this slice */
@@ -1069,9 +1118,12 @@ err1:
return NULL;
 }

+#define CXL_PSL_ErrIVTE_tberror (0x1ull  (63-31))
+
 static int sanitise_adapter_regs(struct cxl *adapter)
 {
-   cxl_p1_write(adapter, CXL_PSL_ErrIVTE, 0x);
+   /* Clear PSL tberror bit by writing 1 to it */
+   cxl_p1_write(adapter, CXL_PSL_ErrIVTE, CXL_PSL_ErrIVTE_tberror);
return cxl_tlb_slb_invalidate(adapter);
 }

@@ -1124,6 +1176,9 @@ static int cxl_configure_adapter(struct cxl *adapter, 
struct pci_dev *dev)
if ((rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_SNOOP_ON)))
goto err;

+   if ((rc = cxl_setup_psl_timebase(adapter, dev)))
+   goto err;
+
if ((rc = cxl_register_psl_err_irq(adapter)))
goto err;

--
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH V2] cxl: Set up and enable PSL Timebase

2015-06-22 Thread Philippe Bergheaud

Philippe Bergheaud wrote:

This patch configures the PSL Timebase function and enables it,
after the CAPP has been initialized by OPAL.

V2:
 - Clear CXL_PSL_ErrIVTE_tberror bit
 - Define the sync count unit
 - Wait 1ms before each test
 - Use negative error code
 - Do not ignore errors
 - Except if timebase is not supported by OPAL
 - Be silent on success

Signed-off-by: Philippe Bergheaud fe...@linux.vnet.ibm.com
---
 drivers/misc/cxl/cxl.h |5 
 drivers/misc/cxl/pci.c |   57 +++-
 2 files changed, 61 insertions(+), 1 deletions(-)

diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index a1cee47..38a7cf9 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -82,8 +82,10 @@ static const cxl_p1_reg_t CXL_PSL_AFUSEL  = {0x00B0};
 /* 0x00C0:7EFF Implementation dependent area */
 static const cxl_p1_reg_t CXL_PSL_FIR1  = {0x0100};
 static const cxl_p1_reg_t CXL_PSL_FIR2  = {0x0108};
+static const cxl_p1_reg_t CXL_PSL_Timebase  = {0x0110};
 static const cxl_p1_reg_t CXL_PSL_VERSION   = {0x0118};
 static const cxl_p1_reg_t CXL_PSL_RESLCKTO  = {0x0128};
+static const cxl_p1_reg_t CXL_PSL_TB_CTLSTAT = {0x0140};
 static const cxl_p1_reg_t CXL_PSL_FIR_CNTL  = {0x0148};
 static const cxl_p1_reg_t CXL_PSL_DSNDCTL   = {0x0150};
 static const cxl_p1_reg_t CXL_PSL_SNWRALLOC = {0x0158};
@@ -151,6 +153,9 @@ static const cxl_p2n_reg_t CXL_PSL_WED_An = {0x0A0};
 #define CXL_PSL_SPAP_Size_Shift 4
 #define CXL_PSL_SPAP_V0x0001ULL
 
+/** CXL_PSL_Control /

+#define CXL_PSL_Control_tb 0x0001ULL
+
 /** CXL_PSL_DLCNTL */
 #define CXL_PSL_DLCNTL_D (0x1ull  (63-28))
 #define CXL_PSL_DLCNTL_C (0x1ull  (63-29))
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index fc938de..ea1a79f 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -360,6 +360,55 @@ static int init_implementation_adapter_regs(struct cxl 
*adapter, struct pci_dev
return 0;
 }
 
+#define TBSYNC_CNT(n) (((u64)n  0x7)  (63-6))

+#define _2048_250MHZ_CYCLES 1
+
+static int cxl_setup_psl_timebase(struct cxl *adapter, struct pci_dev *dev)
+{
+   u64 psl_tb;
+   int delta;
+   unsigned int retry = 0;
+   struct device_node *np;
+
+   if (!(np = pnv_pci_to_phb_node(dev)))
+   return -ENODEV;
+
+   /* Do not fail when CAPP timebase sync is not supported by OPAL */
+   of_node_get(np);
+   if (! of_get_property(np, ibm,capp-timebase-sync, NULL)) {
+   of_node_put(np);
+   pr_err(PSL: Timebase sync: OPAL support missing\n);
+   return 0;
+   }
+   of_node_put(np);
+
+   /*
+* Setup PSL Timebase Control and Status register
+* with the recommended Timebase Sync Count value
+*/
+   cxl_p1_write(adapter, CXL_PSL_TB_CTLSTAT,
+TBSYNC_CNT(2 * _2048_250MHZ_CYCLES));
+
+   /* Enable PSL Timebase */
+   cxl_p1_write(adapter, CXL_PSL_Control, 0x);
+   cxl_p1_write(adapter, CXL_PSL_Control, CXL_PSL_Control_tb);
+
+   /* Wait until CORE TB and PSL TB difference = 16usecs */
+   do {
+   msleep(1);
+   if (retry++  5) {
+   pr_err(PSL: Timebase sync: giving up!\n);
+   return -EIO;
+   }
+   psl_tb = cxl_p1_read(adapter, CXL_PSL_Timebase);
+   delta = mftb() - psl_tb;
+   if (delta  0)
+   delta = -delta;
+   } while (cputime_to_usecs(delta)  16);
+
+   return 0;
+}
+
 static int init_implementation_afu_regs(struct cxl_afu *afu)
 {
/* read/write masks for this slice */
@@ -952,9 +1001,12 @@ static struct cxl *cxl_alloc_adapter(struct pci_dev *dev)
return adapter;
 }
 
+#define CXL_PSL_ErrIVTE_tberror (0x1ull  (63-31))

+
 static int sanitise_adapter_regs(struct cxl *adapter)
 {
-   cxl_p1_write(adapter, CXL_PSL_ErrIVTE, 0x);
+   /* Clear PSL tberror bit by writing 1 to it */
+   cxl_p1_write(adapter, CXL_PSL_ErrIVTE, CXL_PSL_ErrIVTE_tberror);
return cxl_tlb_slb_invalidate(adapter);
 }
 
@@ -995,6 +1047,9 @@ static struct cxl *cxl_init_adapter(struct pci_dev *dev)

if ((rc = pnv_phb_to_cxl(dev, OPAL_PHB_CAPI_MODE_CAPI)))
goto err3;
 
+	if ((rc = cxl_setup_psl_timebase(adapter, dev)))

+   goto err3;
+
if ((rc = cxl_register_psl_err_irq(adapter)))
goto err3;
 

Update.

With ipmitool:
PSL timebase sync always succeeds

With bml:
1. PSL timebase sync fails after a clean shutdown / reboot
2. PSL timebase sync succeeds after a checkstop halt / reboot
3. PSL timebase sync succeeds after a pci off / on cycle

It seems that PERST does not reset the PSL correctly, with bml.

Philippe

[PATCH V2] cxl: Set up and enable PSL Timebase

2015-06-10 Thread Philippe Bergheaud
This patch configures the PSL Timebase function and enables it,
after the CAPP has been initialized by OPAL.

V2:
 - Clear CXL_PSL_ErrIVTE_tberror bit
 - Define the sync count unit
 - Wait 1ms before each test
 - Use negative error code
 - Do not ignore errors
 - Except if timebase is not supported by OPAL
 - Be silent on success

Signed-off-by: Philippe Bergheaud fe...@linux.vnet.ibm.com
---
 drivers/misc/cxl/cxl.h |5 
 drivers/misc/cxl/pci.c |   57 +++-
 2 files changed, 61 insertions(+), 1 deletions(-)

diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index a1cee47..38a7cf9 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -82,8 +82,10 @@ static const cxl_p1_reg_t CXL_PSL_AFUSEL  = {0x00B0};
 /* 0x00C0:7EFF Implementation dependent area */
 static const cxl_p1_reg_t CXL_PSL_FIR1  = {0x0100};
 static const cxl_p1_reg_t CXL_PSL_FIR2  = {0x0108};
+static const cxl_p1_reg_t CXL_PSL_Timebase  = {0x0110};
 static const cxl_p1_reg_t CXL_PSL_VERSION   = {0x0118};
 static const cxl_p1_reg_t CXL_PSL_RESLCKTO  = {0x0128};
+static const cxl_p1_reg_t CXL_PSL_TB_CTLSTAT = {0x0140};
 static const cxl_p1_reg_t CXL_PSL_FIR_CNTL  = {0x0148};
 static const cxl_p1_reg_t CXL_PSL_DSNDCTL   = {0x0150};
 static const cxl_p1_reg_t CXL_PSL_SNWRALLOC = {0x0158};
@@ -151,6 +153,9 @@ static const cxl_p2n_reg_t CXL_PSL_WED_An = {0x0A0};
 #define CXL_PSL_SPAP_Size_Shift 4
 #define CXL_PSL_SPAP_V0x0001ULL
 
+/** CXL_PSL_Control /
+#define CXL_PSL_Control_tb 0x0001ULL
+
 /** CXL_PSL_DLCNTL */
 #define CXL_PSL_DLCNTL_D (0x1ull  (63-28))
 #define CXL_PSL_DLCNTL_C (0x1ull  (63-29))
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index fc938de..ea1a79f 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -360,6 +360,55 @@ static int init_implementation_adapter_regs(struct cxl 
*adapter, struct pci_dev
return 0;
 }
 
+#define TBSYNC_CNT(n) (((u64)n  0x7)  (63-6))
+#define _2048_250MHZ_CYCLES 1
+
+static int cxl_setup_psl_timebase(struct cxl *adapter, struct pci_dev *dev)
+{
+   u64 psl_tb;
+   int delta;
+   unsigned int retry = 0;
+   struct device_node *np;
+
+   if (!(np = pnv_pci_to_phb_node(dev)))
+   return -ENODEV;
+
+   /* Do not fail when CAPP timebase sync is not supported by OPAL */
+   of_node_get(np);
+   if (! of_get_property(np, ibm,capp-timebase-sync, NULL)) {
+   of_node_put(np);
+   pr_err(PSL: Timebase sync: OPAL support missing\n);
+   return 0;
+   }
+   of_node_put(np);
+
+   /*
+* Setup PSL Timebase Control and Status register
+* with the recommended Timebase Sync Count value
+*/
+   cxl_p1_write(adapter, CXL_PSL_TB_CTLSTAT,
+TBSYNC_CNT(2 * _2048_250MHZ_CYCLES));
+
+   /* Enable PSL Timebase */
+   cxl_p1_write(adapter, CXL_PSL_Control, 0x);
+   cxl_p1_write(adapter, CXL_PSL_Control, CXL_PSL_Control_tb);
+
+   /* Wait until CORE TB and PSL TB difference = 16usecs */
+   do {
+   msleep(1);
+   if (retry++  5) {
+   pr_err(PSL: Timebase sync: giving up!\n);
+   return -EIO;
+   }
+   psl_tb = cxl_p1_read(adapter, CXL_PSL_Timebase);
+   delta = mftb() - psl_tb;
+   if (delta  0)
+   delta = -delta;
+   } while (cputime_to_usecs(delta)  16);
+
+   return 0;
+}
+
 static int init_implementation_afu_regs(struct cxl_afu *afu)
 {
/* read/write masks for this slice */
@@ -952,9 +1001,12 @@ static struct cxl *cxl_alloc_adapter(struct pci_dev *dev)
return adapter;
 }
 
+#define CXL_PSL_ErrIVTE_tberror (0x1ull  (63-31))
+
 static int sanitise_adapter_regs(struct cxl *adapter)
 {
-   cxl_p1_write(adapter, CXL_PSL_ErrIVTE, 0x);
+   /* Clear PSL tberror bit by writing 1 to it */
+   cxl_p1_write(adapter, CXL_PSL_ErrIVTE, CXL_PSL_ErrIVTE_tberror);
return cxl_tlb_slb_invalidate(adapter);
 }
 
@@ -995,6 +1047,9 @@ static struct cxl *cxl_init_adapter(struct pci_dev *dev)
if ((rc = pnv_phb_to_cxl(dev, OPAL_PHB_CAPI_MODE_CAPI)))
goto err3;
 
+   if ((rc = cxl_setup_psl_timebase(adapter, dev)))
+   goto err3;
+
if ((rc = cxl_register_psl_err_irq(adapter)))
goto err3;
 
-- 
1.7.2.5

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH] cxl: Set up and enable PSL Timebase

2015-06-01 Thread Philippe Bergheaud

Michael Neuling wrote:

On Mon, 2015-06-01 at 09:37 +0200, Philippe Bergheaud wrote:


Michael Neuling wrote:


On Thu, 2015-05-28 at 15:12 +0200, Philippe Bergheaud wrote:



This patch configures the PSL Timebase function and enables it,
after the CAPP has been initialized by OPAL. Failures are reported
and ignored.



Needs an Signed-off-by.


Yes.


Comments inline.




---
drivers/misc/cxl/cxl.h |5 +
drivers/misc/cxl/pci.c |   35 +++
2 files changed, 40 insertions(+), 0 deletions(-)

diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index a1cee47..38a7cf9 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -82,8 +82,10 @@ static const cxl_p1_reg_t CXL_PSL_AFUSEL  = {0x00B0};
/* 0x00C0:7EFF Implementation dependent area */
static const cxl_p1_reg_t CXL_PSL_FIR1  = {0x0100};
static const cxl_p1_reg_t CXL_PSL_FIR2  = {0x0108};
+static const cxl_p1_reg_t CXL_PSL_Timebase  = {0x0110};
static const cxl_p1_reg_t CXL_PSL_VERSION   = {0x0118};
static const cxl_p1_reg_t CXL_PSL_RESLCKTO  = {0x0128};
+static const cxl_p1_reg_t CXL_PSL_TB_CTLSTAT = {0x0140};
static const cxl_p1_reg_t CXL_PSL_FIR_CNTL  = {0x0148};
static const cxl_p1_reg_t CXL_PSL_DSNDCTL   = {0x0150};
static const cxl_p1_reg_t CXL_PSL_SNWRALLOC = {0x0158};
@@ -151,6 +153,9 @@ static const cxl_p2n_reg_t CXL_PSL_WED_An = {0x0A0};
#define CXL_PSL_SPAP_Size_Shift 4
#define CXL_PSL_SPAP_V0x0001ULL

+/** CXL_PSL_Control /
+#define CXL_PSL_Control_tb 0x0001ULL
+
/** CXL_PSL_DLCNTL */
#define CXL_PSL_DLCNTL_D (0x1ull  (63-28))
#define CXL_PSL_DLCNTL_C (0x1ull  (63-29))
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index fc938de..afd89cc 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -360,6 +360,38 @@ static int init_implementation_adapter_regs(struct cxl 
*adapter, struct pci_dev
return 0;
}

+#define TBSYNC_CNT(n) (((u64)n  0x7)  (63-6))
+
+static int cxl_setup_psl_timebase(struct cxl *adapter, struct pci_dev *dev)
+{
+   u64 psl_tb;
+   int delta;
+   unsigned int retry = 0;
+
+   /*
+* Setup PSL Timebase Control and Status register
+* with the recommended Timebase Sync Count value
+*/
+	cxl_p1_write(adapter, CXL_PSL_TB_CTLSTAT, TBSYNC_CNT(2));  



2?  


Quoting the PSL workbook description of the PSL_TB_CTLSTAT register:

4:6 tbsync_cnt
TimebaseSyncCount. Number of 250MHz cycles x 2048 before initiating another 
Timebase Recalibration sequence.
Processor chipTimebase facilities receive a tod_sync pulse every 16us or 4000 
250 MHz cycles so '010' is the Recommended value.
000 = never
001 = 2048
010 = 4096 (2 * 2048)
...
111 = 14336 (7 * 2048)

Will make the TimebaseSyncCount unit explicit. Something like:

#define _2048_250MHZ_CYCLES 1
cxl_p1_write(adapter, CXL_PSL_TB_CTLSTAT, TBSYNC_CNT(2 * _2048_250MHZ_CYCLES));



Sounds good!



+
+   /* Enable PSL Timebase */
+   cxl_p1_write(adapter, CXL_PSL_Control, CXL_PSL_Control_tb);
+   /* Wait until CORE TB and PSL TB difference = 16usecs */



How many tries does this normally take?


Two. The second attempt always succeds.



Ok



Should we have a sleep in here to wait for it to sync rather than just
coming back around right away?


Yes, will add msleep(1) at the beginning of the loop (as the first attempt 
always fails).



Humm, ok.  Is there any documentation to say how long it's suppose to
take?  

Could not find any.
With msleep(1) at the beginning if the loop, the first attempt always succeeds, 
as far as I can see.

Philippe

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH] cxl: Set up and enable PSL Timebase

2015-06-01 Thread Philippe Bergheaud

Michael Neuling wrote:

On Thu, 2015-05-28 at 15:12 +0200, Philippe Bergheaud wrote:


This patch configures the PSL Timebase function and enables it,
after the CAPP has been initialized by OPAL. Failures are reported
and ignored.



Needs an Signed-off-by.

Yes.

Comments inline.



---
drivers/misc/cxl/cxl.h |5 +
drivers/misc/cxl/pci.c |   35 +++
2 files changed, 40 insertions(+), 0 deletions(-)

diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index a1cee47..38a7cf9 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -82,8 +82,10 @@ static const cxl_p1_reg_t CXL_PSL_AFUSEL  = {0x00B0};
/* 0x00C0:7EFF Implementation dependent area */
static const cxl_p1_reg_t CXL_PSL_FIR1  = {0x0100};
static const cxl_p1_reg_t CXL_PSL_FIR2  = {0x0108};
+static const cxl_p1_reg_t CXL_PSL_Timebase  = {0x0110};
static const cxl_p1_reg_t CXL_PSL_VERSION   = {0x0118};
static const cxl_p1_reg_t CXL_PSL_RESLCKTO  = {0x0128};
+static const cxl_p1_reg_t CXL_PSL_TB_CTLSTAT = {0x0140};
static const cxl_p1_reg_t CXL_PSL_FIR_CNTL  = {0x0148};
static const cxl_p1_reg_t CXL_PSL_DSNDCTL   = {0x0150};
static const cxl_p1_reg_t CXL_PSL_SNWRALLOC = {0x0158};
@@ -151,6 +153,9 @@ static const cxl_p2n_reg_t CXL_PSL_WED_An = {0x0A0};
#define CXL_PSL_SPAP_Size_Shift 4
#define CXL_PSL_SPAP_V0x0001ULL

+/** CXL_PSL_Control /
+#define CXL_PSL_Control_tb 0x0001ULL
+
/** CXL_PSL_DLCNTL */
#define CXL_PSL_DLCNTL_D (0x1ull  (63-28))
#define CXL_PSL_DLCNTL_C (0x1ull  (63-29))
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index fc938de..afd89cc 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -360,6 +360,38 @@ static int init_implementation_adapter_regs(struct cxl 
*adapter, struct pci_dev
return 0;
}

+#define TBSYNC_CNT(n) (((u64)n  0x7)  (63-6))
+
+static int cxl_setup_psl_timebase(struct cxl *adapter, struct pci_dev *dev)
+{
+   u64 psl_tb;
+   int delta;
+   unsigned int retry = 0;
+
+   /*
+* Setup PSL Timebase Control and Status register
+* with the recommended Timebase Sync Count value
+*/
+	cxl_p1_write(adapter, CXL_PSL_TB_CTLSTAT, TBSYNC_CNT(2));  



2?  

Quoting the PSL workbook description of the PSL_TB_CTLSTAT register:

4:6 tbsync_cnt
TimebaseSyncCount. Number of 250MHz cycles x 2048 before initiating another 
Timebase Recalibration sequence.
Processor chipTimebase facilities receive a tod_sync pulse every 16us or 4000 
250 MHz cycles so '010' is the Recommended value.
000 = never
001 = 2048
010 = 4096 (2 * 2048)
...
111 = 14336 (7 * 2048)

Will make the TimebaseSyncCount unit explicit. Something like:

#define _2048_250MHZ_CYCLES 1
cxl_p1_write(adapter, CXL_PSL_TB_CTLSTAT, TBSYNC_CNT(2 * _2048_250MHZ_CYCLES));



+
+   /* Enable PSL Timebase */
+   cxl_p1_write(adapter, CXL_PSL_Control, CXL_PSL_Control_tb);
+   /* Wait until CORE TB and PSL TB difference = 16usecs */



How many tries does this normally take?

Two. The second attempt always succeds.

Should we have a sleep in here to wait for it to sync rather than just
coming back around right away?

Yes, will add msleep(1) at the beginning of the loop (as the first attempt 
always fails).



+   do {
+   if (retry++  5) {
+   pr_err(PSL: Timebase sync: giving up!\n);
+   return 1;



Please use negative error codes here.  -EIO?

OK.



+   }
+   psl_tb = cxl_p1_read(adapter, CXL_PSL_Timebase);
+   delta = mftb() - psl_tb;
+   if (delta  0)
+   delta = -delta;
+   } while (cputime_to_usecs(delta)  16);
+
+   dev_info(dev-dev, PSL: Timebase synced\n);
+   return 0;
+}
+
static int init_implementation_afu_regs(struct cxl_afu *afu)
{
/* read/write masks for this slice */
@@ -995,6 +1027,9 @@ static struct cxl *cxl_init_adapter(struct pci_dev *dev)
if ((rc = pnv_phb_to_cxl(dev, OPAL_PHB_CAPI_MODE_CAPI)))
goto err3;

+   /* Don't care if this one fails: */
+   cxl_setup_psl_timebase(adapter, dev);



And check it here.

OK.

Thank you,
Philippe

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH] cxl: Set up and enable PSL Timebase

2015-06-01 Thread Philippe Bergheaud

Michael Neuling wrote:
 Please use negative error codes here.  -EIO?
 And check it here.

Mikey,

I am reluctant to fail the entire CAPI init after a PSL timebase sync failure.
If we ignore the error, the CAPI device stays available (without timebase sync).
If we honour the error, the CAPI device fails entirely.

I know three reasons why PSL timebase sync can fail:
1. h/w failure
2. OPAL did not initialize the CAPP timebase (wrong OPAL version)
3. the PCIe bus was not powered off/on between shutdown and reboot

I think that it is premature to choose to fail the entire CAPI init in all 
cases.
In particular, point 3. introduces a regression, as PCIe off/on was never a 
requirement for booting CAPI on P8.

I have tried one workaround do far: forcing the 0 to 1 transition of the tb bit 
of the PSL register TB_CTLSTAT.
In vain.

What do you think?

Philippe

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH] cxl: Set up and enable PSL Timebase

2015-05-28 Thread Philippe Bergheaud
This patch configures the PSL Timebase function and enables it,
after the CAPP has been initialized by OPAL. Failures are reported
and ignored.
---
 drivers/misc/cxl/cxl.h |5 +
 drivers/misc/cxl/pci.c |   35 +++
 2 files changed, 40 insertions(+), 0 deletions(-)

diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index a1cee47..38a7cf9 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -82,8 +82,10 @@ static const cxl_p1_reg_t CXL_PSL_AFUSEL  = {0x00B0};
 /* 0x00C0:7EFF Implementation dependent area */
 static const cxl_p1_reg_t CXL_PSL_FIR1  = {0x0100};
 static const cxl_p1_reg_t CXL_PSL_FIR2  = {0x0108};
+static const cxl_p1_reg_t CXL_PSL_Timebase  = {0x0110};
 static const cxl_p1_reg_t CXL_PSL_VERSION   = {0x0118};
 static const cxl_p1_reg_t CXL_PSL_RESLCKTO  = {0x0128};
+static const cxl_p1_reg_t CXL_PSL_TB_CTLSTAT = {0x0140};
 static const cxl_p1_reg_t CXL_PSL_FIR_CNTL  = {0x0148};
 static const cxl_p1_reg_t CXL_PSL_DSNDCTL   = {0x0150};
 static const cxl_p1_reg_t CXL_PSL_SNWRALLOC = {0x0158};
@@ -151,6 +153,9 @@ static const cxl_p2n_reg_t CXL_PSL_WED_An = {0x0A0};
 #define CXL_PSL_SPAP_Size_Shift 4
 #define CXL_PSL_SPAP_V0x0001ULL
 
+/** CXL_PSL_Control /
+#define CXL_PSL_Control_tb 0x0001ULL
+
 /** CXL_PSL_DLCNTL */
 #define CXL_PSL_DLCNTL_D (0x1ull  (63-28))
 #define CXL_PSL_DLCNTL_C (0x1ull  (63-29))
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index fc938de..afd89cc 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -360,6 +360,38 @@ static int init_implementation_adapter_regs(struct cxl 
*adapter, struct pci_dev
return 0;
 }
 
+#define TBSYNC_CNT(n) (((u64)n  0x7)  (63-6))
+
+static int cxl_setup_psl_timebase(struct cxl *adapter, struct pci_dev *dev)
+{
+   u64 psl_tb;
+   int delta;
+   unsigned int retry = 0;
+
+   /*
+* Setup PSL Timebase Control and Status register
+* with the recommended Timebase Sync Count value
+*/
+   cxl_p1_write(adapter, CXL_PSL_TB_CTLSTAT, TBSYNC_CNT(2));
+
+   /* Enable PSL Timebase */
+   cxl_p1_write(adapter, CXL_PSL_Control, CXL_PSL_Control_tb);
+   /* Wait until CORE TB and PSL TB difference = 16usecs */
+   do {
+   if (retry++  5) {
+   pr_err(PSL: Timebase sync: giving up!\n);
+   return 1;
+   }
+   psl_tb = cxl_p1_read(adapter, CXL_PSL_Timebase);
+   delta = mftb() - psl_tb;
+   if (delta  0)
+   delta = -delta;
+   } while (cputime_to_usecs(delta)  16);
+
+   dev_info(dev-dev, PSL: Timebase synced\n);
+   return 0;
+}
+
 static int init_implementation_afu_regs(struct cxl_afu *afu)
 {
/* read/write masks for this slice */
@@ -995,6 +1027,9 @@ static struct cxl *cxl_init_adapter(struct pci_dev *dev)
if ((rc = pnv_phb_to_cxl(dev, OPAL_PHB_CAPI_MODE_CAPI)))
goto err3;
 
+   /* Don't care if this one fails: */
+   cxl_setup_psl_timebase(adapter, dev);
+
if ((rc = cxl_register_psl_err_irq(adapter)))
goto err3;
 
-- 
1.7.2.5

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH] cxl: Fix a typo in ABI documentation

2015-03-26 Thread Philippe Bergheaud

Fix the attribute name of the configuration record class ID.

Signed-off-by: Philippe Bergheaud fe...@linux.vnet.ibm.com
---
 Documentation/ABI/testing/sysfs-class-cxl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/ABI/testing/sysfs-class-cxl 
b/Documentation/ABI/testing/sysfs-class-cxl
index 3680364..d46bba8 100644
--- a/Documentation/ABI/testing/sysfs-class-cxl
+++ b/Documentation/ABI/testing/sysfs-class-cxl
@@ -100,7 +100,7 @@ Description:read only
Hexadecimal value of the device ID found in this AFU
configuration record.

-What:   /sys/class/cxl/afu/crconfig num/vendor
+What:   /sys/class/cxl/afu/crconfig num/class
 Date:   February 2015
 Contact:linuxppc-dev@lists.ozlabs.org
 Description:read only
-- 1.8.3.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH] Update CXL ABI documentation

2014-12-12 Thread Philippe Bergheaud

From: Philippe Bergheaud fe...@linux.vnet.ibm.com

This fixes two typos and explains where shared attributes are stored.

Signed-off-by: Philippe Bergheaud fe...@linux.vnet.ibm.com
Acked-by: Michael Neuling mi...@neuling.org
---
 Documentation/ABI/testing/sysfs-class-cxl |   11 ---
 1 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-class-cxl 
b/Documentation/ABI/testing/sysfs-class-cxl
index faf9479..3393ee6 100644
--- a/Documentation/ABI/testing/sysfs-class-cxl
+++ b/Documentation/ABI/testing/sysfs-class-cxl
@@ -1,3 +1,9 @@
+Note: Attributes that are shared between devices are stored in the directory
+pointed to by the symlink device/.
+Example: The real path of the attribute /sys/class/cxl/afu0.0s/irqs_max is
+/sys/class/cxl/afu0.0s/device/irqs_max, i.e. /sys/class/cxl/afu0.0/irqs_max.
+
+
 Slave contexts (eg. /sys/class/cxl/afu0.0s):

 What:   /sys/class/cxl/afu/irqs_max
@@ -67,7 +73,7 @@ Contact:linuxppc-dev@lists.ozlabs.org
 Description:read only
 Decimal value of the current version of the kernel/user API.

-What:   /sys/class/cxl/afu/api_version_com
+What:   /sys/class/cxl/afu/api_version_compatible
 Date:   September 2014
 Contact:linuxppc-dev@lists.ozlabs.org
 Description:read only
@@ -75,7 +81,6 @@ Description:read only
 this this kernel supports.


-
 Master contexts (eg. /sys/class/cxl/afu0.0m)

 What:   /sys/class/cxl/afum/mmio_size
@@ -106,7 +111,7 @@ Contact:linuxppc-dev@lists.ozlabs.org
 Description:read only
 Identifies the CAIA Version the card implements.

-What:   /sys/class/cxl/card/psl_version
+What:   /sys/class/cxl/card/psl_revision
 Date:   September 2014
 Contact:linuxppc-dev@lists.ozlabs.org
 Description:read only
--
1.7.2.5

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH] PPC: bpf_jit_comp: add SKF_AD_PKTTYPE instruction

2014-11-03 Thread Philippe Bergheaud

Denis Kirjanov wrote:

Any feedback from PPC folks?


I have reviewed the patch and it looks fine to me.
I have tested successfuly on ppc64le.
I could not test it on ppc64.

Philippe


On 10/26/14, Denis Kirjanov k...@linux-powerpc.org wrote:


Cc: Matt Evans m...@ozlabs.org
Signed-off-by: Denis Kirjanov k...@linux-powerpc.org
---
arch/powerpc/include/asm/ppc-opcode.h | 1 +
arch/powerpc/net/bpf_jit.h| 7 +++
arch/powerpc/net/bpf_jit_comp.c   | 5 +
3 files changed, 13 insertions(+)

diff --git a/arch/powerpc/include/asm/ppc-opcode.h
b/arch/powerpc/include/asm/ppc-opcode.h
index 6f85362..1a52877 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -204,6 +204,7 @@
#define PPC_INST_ERATSX_DOT 0x7c000127

/* Misc instructions for BPF compiler */
+#define PPC_INST_LBZ   0x8800
#define PPC_INST_LD 0xe800
#define PPC_INST_LHZ0xa000
#define PPC_INST_LHBRX  0x7c00062c
diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
index 9aee27c..c406aa9 100644
--- a/arch/powerpc/net/bpf_jit.h
+++ b/arch/powerpc/net/bpf_jit.h
@@ -87,6 +87,9 @@ DECLARE_LOAD_FUNC(sk_load_byte_msh);
#define PPC_STD(r, base, i) EMIT(PPC_INST_STD | ___PPC_RS(r) |\
 ___PPC_RA(base) | ((i)  0xfffc))

+
+#define PPC_LBZ(r, base, i)EMIT(PPC_INST_LBZ | ___PPC_RT(r) |\
+___PPC_RA(base) | IMM_L(i))
#define PPC_LD(r, base, i)  EMIT(PPC_INST_LD | ___PPC_RT(r) | \
 ___PPC_RA(base) | IMM_L(i))
#define PPC_LWZ(r, base, i) EMIT(PPC_INST_LWZ | ___PPC_RT(r) |\
@@ -96,6 +99,10 @@ DECLARE_LOAD_FUNC(sk_load_byte_msh);
#define PPC_LHBRX(r, base, b)   EMIT(PPC_INST_LHBRX | ___PPC_RT(r) |  \
 ___PPC_RA(base) | ___PPC_RB(b))
/* Convenience helpers for the above with 'far' offsets: */
+#define PPC_LBZ_OFFS(r, base, i) do { if ((i)  32768) PPC_LBZ(r, base, i);
 \
+   else {  PPC_ADDIS(r, base, IMM_HA(i));\
+   PPC_LBZ(r, r, IMM_L(i)); } } while(0)
+
#define PPC_LD_OFFS(r, base, i) do { if ((i)  32768) PPC_LD(r, base, i);
 \
else {  PPC_ADDIS(r, base, IMM_HA(i));\
PPC_LD(r, r, IMM_L(i)); } } while(0)
diff --git a/arch/powerpc/net/bpf_jit_comp.c
b/arch/powerpc/net/bpf_jit_comp.c
index cbae2df..d110e28 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -407,6 +407,11 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32
*image,
PPC_LHZ_OFFS(r_A, r_skb, offsetof(struct sk_buff,
  queue_mapping));
break;
+   case BPF_ANC | SKF_AD_PKTTYPE:
+   PPC_LBZ_OFFS(r_A, r_skb, PKT_TYPE_OFFSET());
+   PPC_ANDI(r_A, r_A, PKT_TYPE_MAX);
+   PPC_SRWI(r_A, r_A, 5);
+   break;
case BPF_ANC | SKF_AD_CPU:
#ifdef CONFIG_SMP
/*
--
2.1.0




___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH] powerpc: memcpy optimization for 64bit LE

2014-05-05 Thread Philippe Bergheaud

Anton Blanchard wrote:

Unaligned stores take alignment exceptions on POWER7 running in little-endian.
This is a dumb little-endian base memcpy that prevents unaligned stores.
Once booted the feature fixup code switches over to the VMX copy loops
(which are already endian safe).

The question is what we do before that switch over. The base 64bit
memcpy takes alignment exceptions on POWER7 so we can't use it as is.
Fixing the causes of alignment exception would slow it down, because
we'd need to ensure all loads and stores are aligned either through
rotate tricks or bytewise loads and stores. Either would be bad for
all other 64bit platforms.

[ I simplified the loop a bit - Anton ]

Got it.

The 3 instructions that you have removed were modifying r5 for no reason,
as the last instruction was always resetting r5 to its initial value.

Philippe

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [git pull] Please pull abiv2 branch

2014-04-29 Thread Philippe Bergheaud

Rusty Russell wrote:

Philippe Bergheaud fe...@linux.vnet.ibm.com writes:


Anton Blanchard wrote:


Here are the ABIv2 patches rebased against 3.15-rc2.


After recompiling 3.15-rc2 with the ABIv2 patches,
I see the following line in Modules.symvers:

0x TOC. vmlinux EXPORT_SYMBOL

Kernel will not load modules because TOC. has no CRC.
Is this expected ? Shouldn't TOC. have a CRC ?



What happens when you try to load a module?  It should work...


My mistake, sorry: kernel 3.15-rc2 crashes at boot, in the SLES12
Beta5 environment that I am using, before any module load attempt.

The problem happens with the SLES12 kernel of the day 3.12.17,
plus the backported ABIv2 patch set.  Boot fails with:
kernel: ibmveth: no symbol version for TOC.
kernel: ibmveth: Unknown symbol TOC. (err -22)
kernel: scsi_mod: no symbol version for TOC.
kernel: scsi_mod: Unknown symbol TOC. (err -22)

In the rescue shell, repeating a plain modprobe fails again:
:/# modprobe scsi_mod
modprobe: ERROR: could not insert 'scsi_mod': Invalid argument

And finally, modprobe succeeds with --force:
:/# modprobe --force scsi_mod
scsi_mod: module has bad taint, not creating trace events

Philippe

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [git pull] Please pull abiv2 branch

2014-04-28 Thread Philippe Bergheaud

Anton Blanchard wrote:

Here are the ABIv2 patches rebased against 3.15-rc2.


After recompiling 3.15-rc2 with the ABIv2 patches,
I see the following line in Modules.symvers:

0x TOC. vmlinux EXPORT_SYMBOL

Kernel will not load modules because TOC. has no CRC.
Is this expected ? Shouldn't TOC. have a CRC ?

Philippe






___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 15/33] powerpc: Fix ABIv2 issues with stack offsets in assembly code

2014-04-01 Thread Philippe Bergheaud

Anton Blanchard wrote:

diff --git a/arch/powerpc/lib/memcpy_64.S b/arch/powerpc/lib/memcpy_64.S
index 72ad055..01da956 100644
--- a/arch/powerpc/lib/memcpy_64.S
+++ b/arch/powerpc/lib/memcpy_64.S
@@ -12,7 +12,7 @@
.align  7
 _GLOBAL(memcpy)
 BEGIN_FTR_SECTION
-   std r3,48(r1)   /* save destination pointer for return value */
+   std r3,STK_PARAM(R3)(r1)/* save destination pointer for return 
value */
 FTR_SECTION_ELSE
 #ifndef SELFTEST
b   memcpy_power7


This chunk is rejected when applied to linux-3.14, because of the reference to 
SELTEST.
The last three context lines should rather read:

 FTR_SECTION_ELSE
b   memcpy_power7
 ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)

Same issue with [PATCH 16/33] powerpc: Fix unsafe accesses to parameter area in 
ELFv2.

Philippe

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH] powerpc: set default kernel thread priority to medium-low

2013-12-11 Thread Philippe Bergheaud

Benjamin Herrenschmidt wrote:

On Wed, 2013-12-11 at 17:29 +1100, Michael Ellerman wrote:



It would be nice if you could make an assertion about what the state of HMT
handling should be once your patch is applied.

I think it's:

* The kernel should use HMT_MEDIUM_LOW as it's default priority
* The kernel should use HMT_LOW as it's low priority

Which would imply:

* The kernel should not use HMT_MEDIUM anywhere ..
* Nor should it use any of the other higher HMT modes.

Do you agree?

Not entirely.  HT_MEDIUM might still be used by the kernel, in places where a 
priority higher than the default is required.

The reason I ask is I still see HMT_MEDIUM used in a few places, and it's not
clear to me if that is correct.



HMT_MEDIUM used to be our default no ?

Yes, but I am not sure that all references to HMT_MEDIUM were references to the 
default kernel priority.

Also there's an open question... when doing things with interrupts off
(or worse, in real mode) such as some KVM hcalls etc... should we on the
contrary boost up to limit interrupt latency ?

Yes. I think that there are cases when one should consider using HT_MEDIUM.

Shouldn't we define a new macro HMT_DEFAULT, to identify explicitely where the 
default priority is required?

Philippe

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH] powerpc: set default kernel thread priority to medium-low

2013-12-11 Thread Philippe Bergheaud

Michael Ellerman wrote:

On Wed, 2013-12-11 at 11:30 +0100, Philippe Bergheaud wrote:


Benjamin Herrenschmidt wrote:


On Wed, 2013-12-11 at 17:29 +1100, Michael Ellerman wrote:




It would be nice if you could make an assertion about what the state of HMT
handling should be once your patch is applied.

I think it's:

* The kernel should use HMT_MEDIUM_LOW as it's default priority
* The kernel should use HMT_LOW as it's low priority

Which would imply:

* The kernel should not use HMT_MEDIUM anywhere ..
* Nor should it use any of the other higher HMT modes.

Do you agree?




Not entirely.  HT_MEDIUM might still be used by the kernel, in places where a
priority higher than the default is required.



Right. But any code that currently uses HMT_MEDIUM is at the default level,
whereas once your patch is applied any code still using HMT_MEDIUM will be
boosted vs the default.

So any code that still uses HMT_MEDIUM after your patch seems like a bug to me.



The reason I ask is I still see HMT_MEDIUM used in a few places, and it's not
clear to me if that is correct.



HMT_MEDIUM used to be our default no ?




Yes, but I am not sure that all references to HMT_MEDIUM were references to
the default kernel priority.



What were they references to? Regardless they will now have the effect of
boosting the priority in those code sections. It would be good to understand,
and document, any places where we still use HMT_MEDIUM and why.

Yes. This needs to be documented.




Also there's an open question... when doing things with interrupts off
(or worse, in real mode) such as some KVM hcalls etc... should we on the
contrary boost up to limit interrupt latency ?




Yes. I think that there are cases when one should consider using HT_MEDIUM.



Or HIGH?

Correct, I had not thought of that option.


But let's not get side-tracked on that until we've got the default sorted.



Shouldn't we define a new macro HMT_DEFAULT, to identify explicitely where
the default priority is required?



That might help clarify things yes.

cheers


Thank you for the help. I will rework this.

Philippe

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH] powerpc: set default kernel thread priority to medium-low

2013-12-09 Thread Philippe Bergheaud
All the important PThread locking occurs in GLIBC libpthread.so

For scaling to large core counts we need to stay out of the kernel and 
scheduler as much as possible which implies increasing the spin time in user 
mode. For POWER implementations with SMT this implies that user mode needs to 
manage SMT priority for spinning and active (in the critical region) threads.

Libpthread must be able to raise and lower the the SMT priority versus the 
default to be effective.

This lowers the default kernel thread priority from medium to medium-low.

Signed-off-by: Philippe Bergheaud fe...@linux.vnet.ibm.com
---
 arch/powerpc/include/asm/exception-64s.h|2 +-
 arch/powerpc/include/asm/ppc_asm.h  |4 ++--
 arch/powerpc/include/asm/processor.h|2 +-
 arch/powerpc/include/asm/spinlock.h |8 
 arch/powerpc/kernel/entry_64.S  |2 +-
 arch/powerpc/kernel/exceptions-64s.S|4 ++--
 arch/powerpc/kernel/head_64.S   |4 ++--
 arch/powerpc/kernel/idle.c  |2 +-
 arch/powerpc/kernel/prom_init.c |2 +-
 arch/powerpc/kernel/time.c  |2 +-
 arch/powerpc/kvm/book3s_hv.c|2 +-
 arch/powerpc/kvm/book3s_hv_rmhandlers.S |4 ++--
 arch/powerpc/lib/locks.c|2 +-
 arch/powerpc/platforms/cell/beat_hvCall.S   |   16 
 arch/powerpc/platforms/powernv/opal-takeover.S  |2 +-
 arch/powerpc/platforms/pseries/hvCall.S |   10 +-
 arch/powerpc/platforms/pseries/processor_idle.c |4 ++--
 17 files changed, 36 insertions(+), 36 deletions(-)

diff --git a/arch/powerpc/include/asm/exception-64s.h 
b/arch/powerpc/include/asm/exception-64s.h
index 402c1c4..30bedd9 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -135,7 +135,7 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,941)
  */
 #define HMT_MEDIUM_PPR_DISCARD \
 BEGIN_FTR_SECTION_NESTED(942)  \
-   HMT_MEDIUM; \
+   HMT_MEDIUM_LOW; \
 END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,0,942)  /*non P7*/  
 
 /*
diff --git a/arch/powerpc/include/asm/ppc_asm.h 
b/arch/powerpc/include/asm/ppc_asm.h
index ce05bba..22d4ba4 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -478,9 +478,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_601)
  * PPR restore macros used in entry_64.S
  * Used for P7 or later processors
  */
-#define HMT_MEDIUM_LOW_HAS_PPR \
+#define HMT_LOW_HAS_PPR
\
 BEGIN_FTR_SECTION_NESTED(944)  \
-   HMT_MEDIUM_LOW; \
+   HMT_LOW;\
 END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,944)
 
 #define SET_DEFAULT_THREAD_PPR(ra, rb) \
diff --git a/arch/powerpc/include/asm/processor.h 
b/arch/powerpc/include/asm/processor.h
index b4a3045..2f8625b 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -387,7 +387,7 @@ static inline unsigned long __pack_fe01(unsigned int fpmode)
 }
 
 #ifdef CONFIG_PPC64
-#define cpu_relax()do { HMT_low(); HMT_medium(); barrier(); } while (0)
+#define cpu_relax()do { HMT_low(); HMT_medium_low(); barrier(); } while (0)
 #else
 #define cpu_relax()barrier()
 #endif
diff --git a/arch/powerpc/include/asm/spinlock.h 
b/arch/powerpc/include/asm/spinlock.h
index 5f54a74..b047a6a 100644
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -120,7 +120,7 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
if (SHARED_PROCESSOR)
__spin_yield(lock);
} while (unlikely(lock-slock != 0));
-   HMT_medium();
+   HMT_medium_low();
}
 }
 
@@ -140,7 +140,7 @@ void arch_spin_lock_flags(arch_spinlock_t *lock, unsigned 
long flags)
if (SHARED_PROCESSOR)
__spin_yield(lock);
} while (unlikely(lock-slock != 0));
-   HMT_medium();
+   HMT_medium_low();
local_irq_restore(flags_dis);
}
 }
@@ -240,7 +240,7 @@ static inline void arch_read_lock(arch_rwlock_t *rw)
if (SHARED_PROCESSOR)
__rw_yield(rw);
} while (unlikely(rw-lock  0));
-   HMT_medium();
+   HMT_medium_low();
}
 }
 
@@ -254,7 +254,7 @@ static inline void arch_write_lock(arch_rwlock_t

Re: [PATCH] powerpc: fix xmon disassembler for little-endian

2013-12-05 Thread Philippe Bergheaud

Benjamin Herrenschmidt wrote:

On Wed, 2013-12-04 at 14:45 +0100, Philippe Bergheaud wrote:



+#ifdef __LITTLE_ENDIAN__
+#define GETWORD(v) (((v)[3]  24) + ((v)[2]  16) + ((v)[1]  8) + 
(v)[0])
+#else
#define GETWORD(v)  (((v)[0]  24) + ((v)[1]  16) + ((v)[2]  8) + 
(v)[3])
+#endif

#define isxdigit(c) (('0' = (c)  (c) = '9') \
 || ('a' = (c)  (c) = 'f') \




Philippe:  Wouldn't it be better to just do a 32-bit load and let the 
endianness be worked out
by the hardware?  i.e.

#define GETWORD(v) (*(u32 *)v)


Yes, your alternative is better.
Wouldn't it narrow the scope of the macro to aligned words on POWER7?
I think that all references to GETWORD operate on aligned words anyway.



Well, xmon has to be robust ... as long as you are *certain* that even
with crap entry state it won't try to access unaligned boundaries then
go for it but we aren't looking at performance here.

Thank you Tom and Ben.  We are definitely not looking at performance here.
I prefer to stay on the safe side, and leave the original patch untouched.

Philippe

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH] powerpc: fix xmon disassembler for little-endian

2013-12-04 Thread Philippe Bergheaud

Tom Musta wrote:

On 12/2/2013 3:10 AM, Philippe Bergheaud wrote:


This patch fixes the disassembler of the powerpc kernel debugger xmon,
for little-endian.

Signed-off-by: Philippe Bergheaud fe...@linux.vnet.ibm.com
---
arch/powerpc/xmon/xmon.c |4 
1 file changed, 4 insertions(+)

diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index af9d346..6c27804 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -171,7 +171,11 @@ extern void xmon_leave(void);
#define REG %.8lx
#endif

+#ifdef __LITTLE_ENDIAN__
+#define GETWORD(v) (((v)[3]  24) + ((v)[2]  16) + ((v)[1]  8) + 
(v)[0])
+#else
#define GETWORD(v)  (((v)[0]  24) + ((v)[1]  16) + ((v)[2]  8) + 
(v)[3])
+#endif

#define isxdigit(c) (('0' = (c)  (c) = '9') \
 || ('a' = (c)  (c) = 'f') \




Philippe:  Wouldn't it be better to just do a 32-bit load and let the 
endianness be worked out
by the hardware?  i.e.

#define GETWORD(v) (*(u32 *)v)

Yes, your alternative is better.
Wouldn't it narrow the scope of the macro to aligned words on POWER7?
I think that all references to GETWORD operate on aligned words anyway.

Philippe

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH] powerpc: fix xmon disassembler for little-endian

2013-12-02 Thread Philippe Bergheaud
This patch fixes the disassembler of the powerpc kernel debugger xmon,
for little-endian.

Signed-off-by: Philippe Bergheaud fe...@linux.vnet.ibm.com
---
 arch/powerpc/xmon/xmon.c |4 
 1 file changed, 4 insertions(+)

diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index af9d346..6c27804 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -171,7 +171,11 @@ extern void xmon_leave(void);
 #define REG%.8lx
 #endif
 
+#ifdef __LITTLE_ENDIAN__
+#define GETWORD(v) (((v)[3]  24) + ((v)[2]  16) + ((v)[1]  8) + 
(v)[0])
+#else
 #define GETWORD(v) (((v)[0]  24) + ((v)[1]  16) + ((v)[2]  8) + 
(v)[3])
+#endif
 
 #define isxdigit(c)(('0' = (c)  (c) = '9') \
 || ('a' = (c)  (c) = 'f') \
-- 
1.7.10.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH v2] powerpc: memcpy optimization for 64bit LE

2013-11-07 Thread Philippe Bergheaud

Unaligned stores take alignment exceptions on POWER7 running in little-endian.
This is a dumb little-endian base memcpy that prevents unaligned stores.
Once booted the feature fixup code switches over to the VMX copy loops
(which are already endian safe).

The question is what we do before that switch over. The base 64bit
memcpy takes alignment exceptions on POWER7 so we can't use it as is.
Fixing the causes of alignment exception would slow it down, because
we'd need to ensure all loads and stores are aligned either through
rotate tricks or bytewise loads and stores. Either would be bad for
all other 64bit platforms.

Signed-off-by: Philippe Bergheaud fe...@linux.vnet.ibm.com
---
 arch/powerpc/include/asm/string.h |4 
 arch/powerpc/kernel/ppc_ksyms.c   |2 --
 arch/powerpc/lib/Makefile |2 --
 arch/powerpc/lib/memcpy_64.S  |   19 +++
 4 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/include/asm/string.h 
b/arch/powerpc/include/asm/string.h
index 0dffad6..e40010a 100644
--- a/arch/powerpc/include/asm/string.h
+++ b/arch/powerpc/include/asm/string.h
@@ -10,9 +10,7 @@
 #define __HAVE_ARCH_STRNCMP
 #define __HAVE_ARCH_STRCAT
 #define __HAVE_ARCH_MEMSET
-#ifdef __BIG_ENDIAN__
 #define __HAVE_ARCH_MEMCPY
-#endif
 #define __HAVE_ARCH_MEMMOVE
 #define __HAVE_ARCH_MEMCMP
 #define __HAVE_ARCH_MEMCHR
@@ -24,9 +22,7 @@ extern int strcmp(const char *,const char *);
 extern int strncmp(const char *, const char *, __kernel_size_t);
 extern char * strcat(char *, const char *);
 extern void * memset(void *,int,__kernel_size_t);
-#ifdef __BIG_ENDIAN__
 extern void * memcpy(void *,const void *,__kernel_size_t);
-#endif
 extern void * memmove(void *,const void *,__kernel_size_t);
 extern int memcmp(const void *,const void *,__kernel_size_t);
 extern void * memchr(const void *,int,__kernel_size_t);
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index 526ad5c..0c2dd60 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -147,9 +147,7 @@ EXPORT_SYMBOL(__ucmpdi2);
 #endif
 long long __bswapdi2(long long);
 EXPORT_SYMBOL(__bswapdi2);
-#ifdef __BIG_ENDIAN__
 EXPORT_SYMBOL(memcpy);
-#endif
 EXPORT_SYMBOL(memset);
 EXPORT_SYMBOL(memmove);
 EXPORT_SYMBOL(memcmp);
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 5310132..6670361 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -23,9 +23,7 @@ obj-y += checksum_$(CONFIG_WORD_SIZE).o
 obj-$(CONFIG_PPC64)+= checksum_wrappers_64.o
 endif

-ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),)
 obj-$(CONFIG_PPC64)+= memcpy_power7.o memcpy_64.o
-endif

 obj-$(CONFIG_PPC_EMULATE_SSTEP)+= sstep.o ldstfp.o

diff --git a/arch/powerpc/lib/memcpy_64.S b/arch/powerpc/lib/memcpy_64.S
index d2bbbc8..358cf74 100644
--- a/arch/powerpc/lib/memcpy_64.S
+++ b/arch/powerpc/lib/memcpy_64.S
@@ -12,10 +12,28 @@
.align  7
 _GLOBAL(memcpy)
 BEGIN_FTR_SECTION
+#ifdef __LITTLE_ENDIAN__
+   cmpdi cr7,r5,0  /* dumb little-endian memcpy */
+#else
std r3,48(r1)   /* save destination pointer for return value */
+#endif
 FTR_SECTION_ELSE
b   memcpy_power7
 ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
+#ifdef __LITTLE_ENDIAN__
+   addi r5,r5,-1
+   addi r9,r3,-1
+   add r5,r3,r5
+   subf r5,r9,r5
+   addi r4,r4,-1
+   mtctr r5
+   beqlr cr7
+1:
+   lbzu r10,1(r4)
+   stbu r10,1(r9)
+   bdnz 1b
+   blr
+#else
PPC_MTOCRF(0x01,r5)
cmpldi  cr1,r5,16
neg r6,r3   # LS 3 bits = # bytes to 8-byte dest bdry
@@ -201,3 +219,4 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
stb r0,0(r3)
 4: ld  r3,48(r1)   /* return dest pointer */
blr
+#endif
-- 1.7.10.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH] powerpc: memcpy optimization for 64bit LE

2013-11-06 Thread Philippe Bergheaud

Michael Neuling wrote:

Philippe Bergheaud fe...@linux.vnet.ibm.com wrote:



Unaligned stores take alignment exceptions on POWER7 running in little-endian.
This is a dumb little-endian base memcpy that prevents unaligned stores.
It is replaced by the VMX memcpy at boot.



Is this any faster than the generic version?


The little-endian assembly code of the base memcpy is similar to the code 
emitted by gcc when compiling the generic memcpy in lib/string.c, and runs at 
the same speed.
However, a little-endian assembly version of the base memcpy is required (as 
opposed to a C version), in order to use the self-modifying code 
instrumentation system.
After the cpu feature CPU_FTR_ALTIVEC is detected at boot, the slow base memcpy 
is nop'ed out, and the fast memcpy_power7 is used instead.

Philippe

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH] powerpc: memcpy optimization for 64bit LE

2013-11-05 Thread Philippe Bergheaud
Unaligned stores take alignment exceptions on POWER7 running in little-endian.
This is a dumb little-endian base memcpy that prevents unaligned stores.
It is replaced by the VMX memcpy at boot.

Signed-off-by: Philippe Bergheaud fe...@linux.vnet.ibm.com
---
 arch/powerpc/include/asm/string.h |4 
 arch/powerpc/kernel/ppc_ksyms.c   |2 --
 arch/powerpc/lib/Makefile |2 --
 arch/powerpc/lib/memcpy_64.S  |   19 +++
 4 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/include/asm/string.h 
b/arch/powerpc/include/asm/string.h
index 0dffad6..e40010a 100644
--- a/arch/powerpc/include/asm/string.h
+++ b/arch/powerpc/include/asm/string.h
@@ -10,9 +10,7 @@
 #define __HAVE_ARCH_STRNCMP
 #define __HAVE_ARCH_STRCAT
 #define __HAVE_ARCH_MEMSET
-#ifdef __BIG_ENDIAN__
 #define __HAVE_ARCH_MEMCPY
-#endif
 #define __HAVE_ARCH_MEMMOVE
 #define __HAVE_ARCH_MEMCMP
 #define __HAVE_ARCH_MEMCHR
@@ -24,9 +22,7 @@ extern int strcmp(const char *,const char *);
 extern int strncmp(const char *, const char *, __kernel_size_t);
 extern char * strcat(char *, const char *);
 extern void * memset(void *,int,__kernel_size_t);
-#ifdef __BIG_ENDIAN__
 extern void * memcpy(void *,const void *,__kernel_size_t);
-#endif
 extern void * memmove(void *,const void *,__kernel_size_t);
 extern int memcmp(const void *,const void *,__kernel_size_t);
 extern void * memchr(const void *,int,__kernel_size_t);
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index 526ad5c..0c2dd60 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -147,9 +147,7 @@ EXPORT_SYMBOL(__ucmpdi2);
 #endif
 long long __bswapdi2(long long);
 EXPORT_SYMBOL(__bswapdi2);
-#ifdef __BIG_ENDIAN__
 EXPORT_SYMBOL(memcpy);
-#endif
 EXPORT_SYMBOL(memset);
 EXPORT_SYMBOL(memmove);
 EXPORT_SYMBOL(memcmp);
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 5310132..6670361 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -23,9 +23,7 @@ obj-y += checksum_$(CONFIG_WORD_SIZE).o
 obj-$(CONFIG_PPC64)+= checksum_wrappers_64.o
 endif
 
-ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),)
 obj-$(CONFIG_PPC64)+= memcpy_power7.o memcpy_64.o 
-endif
 
 obj-$(CONFIG_PPC_EMULATE_SSTEP)+= sstep.o ldstfp.o
 
diff --git a/arch/powerpc/lib/memcpy_64.S b/arch/powerpc/lib/memcpy_64.S
index d2bbbc8..358cf74 100644
--- a/arch/powerpc/lib/memcpy_64.S
+++ b/arch/powerpc/lib/memcpy_64.S
@@ -12,10 +12,28 @@
.align  7
 _GLOBAL(memcpy)
 BEGIN_FTR_SECTION
+#ifdef __LITTLE_ENDIAN__
+   cmpdi cr7,r5,0  /* dumb little-endian memcpy */
+#else
std r3,48(r1)   /* save destination pointer for return value */
+#endif
 FTR_SECTION_ELSE
b   memcpy_power7
 ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
+#ifdef __LITTLE_ENDIAN__
+   addi r5,r5,-1
+   addi r9,r3,-1
+   add r5,r3,r5
+   subf r5,r9,r5
+   addi r4,r4,-1
+   mtctr r5
+   beqlr cr7
+1:
+   lbzu r10,1(r4)
+   stbu r10,1(r9)
+   bdnz 1b
+   blr
+#else
PPC_MTOCRF(0x01,r5)
cmpldi  cr1,r5,16
neg r6,r3   # LS 3 bits = # bytes to 8-byte dest bdry
@@ -201,3 +219,4 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
stb r0,0(r3)
 4: ld  r3,48(r1)   /* return dest pointer */
blr
+#endif
-- 
1.7.10.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH] powerpc: word-at-a-time optimization for 64bit LE

2013-09-26 Thread Philippe Bergheaud
This is an optimization for the PowerPC in 64-bit
little-endian. Bit counting is used in find_zero(), instead
of the multiply and shift.

It is modelled after Alan Modra's PowerPC LE strlen patch
http://sourceware.org/ml/libc-alpha/2013-08/msg00097.html.

Signed-off-by: Philippe Bergheaud fe...@linux.vnet.ibm.com
---
 arch/powerpc/include/asm/word-at-a-time.h |   57 -
 1 file changed, 32 insertions(+), 25 deletions(-)

diff --git a/arch/powerpc/include/asm/word-at-a-time.h 
b/arch/powerpc/include/asm/word-at-a-time.h
index 213a5f2..9a5c928 100644
--- a/arch/powerpc/include/asm/word-at-a-time.h
+++ b/arch/powerpc/include/asm/word-at-a-time.h
@@ -42,13 +42,6 @@ static inline bool has_zero(unsigned long val, unsigned long 
*data, const struct
 
 #else
 
-/*
- * This is largely generic for little-endian machines, but the
- * optimal byte mask counting is probably going to be something
- * that is architecture-specific. If you have a reliably fast
- * bit count instruction, that might be better than the multiply
- * and shift, for example.
- */
 struct word_at_a_time {
const unsigned long one_bits, high_bits;
 };
@@ -57,19 +50,32 @@ struct word_at_a_time {
 
 #ifdef CONFIG_64BIT
 
-/*
- * Jan Achrenius on G+: microoptimized version of
- * the simpler (mask  ONEBYTES) * ONEBYTES  56
- * that works for the bytemasks without having to
- * mask them first.
- */
-static inline long count_masked_bytes(unsigned long mask)
+/* Alan Modra's little-endian strlen tail for 64-bit */
+#define create_zero_mask(mask) (mask)
+
+static inline unsigned long find_zero(unsigned long mask)
 {
-   return mask*0x0001020304050608ul  56;
+   unsigned long leading_zero_bits;
+   long trailing_zero_bit_mask;
+
+   asm (addi %1,%2,-1\n\t
+andc %1,%1,%2\n\t
+popcntd %0,%1
+: =r (leading_zero_bits), =r (trailing_zero_bit_mask)
+: r (mask));
+   return leading_zero_bits  3;
 }
 
 #else  /* 32-bit case */
 
+/*
+ * This is largely generic for little-endian machines, but the
+ * optimal byte mask counting is probably going to be something
+ * that is architecture-specific. If you have a reliably fast
+ * bit count instruction, that might be better than the multiply
+ * and shift, for example.
+ */
+
 /* Carl Chatfield / Jan Achrenius G+ version for 32-bit */
 static inline long count_masked_bytes(long mask)
 {
@@ -79,6 +85,17 @@ static inline long count_masked_bytes(long mask)
return a  mask;
 }
 
+static inline unsigned long create_zero_mask(unsigned long bits)
+{
+   bits = (bits - 1)  ~bits;
+   return bits  7;
+}
+
+static inline unsigned long find_zero(unsigned long mask)
+{
+   return count_masked_bytes(mask);
+}
+
 #endif
 
 /* Return nonzero if it has a zero */
@@ -94,19 +111,9 @@ static inline unsigned long prep_zero_mask(unsigned long a, 
unsigned long bits,
return bits;
 }
 
-static inline unsigned long create_zero_mask(unsigned long bits)
-{
-   bits = (bits - 1)  ~bits;
-   return bits  7;
-}
-
 /* The mask we created is directly usable as a bytemask */
 #define zero_bytemask(mask) (mask)
 
-static inline unsigned long find_zero(unsigned long mask)
-{
-   return count_masked_bytes(mask);
-}
 #endif
 
 #endif /* _ASM_WORD_AT_A_TIME_H */
-- 
1.7.10.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH] powerpc: BPF JIT compiler for 64bit LE

2013-09-24 Thread Philippe Bergheaud
This enables the Berkeley Packet Filter JIT compiler
for the PowerPC running in 64bit Little Endian.

Signed-off-by: Philippe Bergheaud fe...@linux.vnet.ibm.com
---
 arch/powerpc/include/asm/ppc-opcode.h |1 +
 arch/powerpc/net/bpf_jit.h|   10 ++
 arch/powerpc/net/bpf_jit_64.S |9 -
 arch/powerpc/net/bpf_jit_comp.c   |   17 ++---
 4 files changed, 21 insertions(+), 16 deletions(-)

diff --git a/arch/powerpc/include/asm/ppc-opcode.h 
b/arch/powerpc/include/asm/ppc-opcode.h
index 247fa1d..23c2b63 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -154,6 +154,7 @@
 /* Misc instructions for BPF compiler */
 #define PPC_INST_LD0xe800
 #define PPC_INST_LHZ   0xa000
+#define PPC_INST_LHBRX 0x7c00062c
 #define PPC_INST_LWZ   0x8000
 #define PPC_INST_STD   0xf800
 #define PPC_INST_STDU  0xf801
diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
index 8a5dfaf..0baf2b8 100644
--- a/arch/powerpc/net/bpf_jit.h
+++ b/arch/powerpc/net/bpf_jit.h
@@ -92,6 +92,8 @@ DECLARE_LOAD_FUNC(sk_load_byte_msh);
 ___PPC_RA(base) | IMM_L(i))
 #define PPC_LHZ(r, base, i)EMIT(PPC_INST_LHZ | ___PPC_RT(r) |\
 ___PPC_RA(base) | IMM_L(i))
+#define PPC_LHBRX(r, base, b)  EMIT(PPC_INST_LHBRX | ___PPC_RT(r) |  \
+___PPC_RA(base) | ___PPC_RB(b))
 /* Convenience helpers for the above with 'far' offsets: */
 #define PPC_LD_OFFS(r, base, i) do { if ((i)  32768) PPC_LD(r, base, i); \
else {  PPC_ADDIS(r, base, IMM_HA(i));\
@@ -186,6 +188,14 @@ DECLARE_LOAD_FUNC(sk_load_byte_msh);
PPC_ORI(d, d, (uintptr_t)(i)  0x);   \
} } while (0);
 
+#define PPC_LHBRX_OFFS(r, base, i) \
+   do { PPC_LI32(r, i); PPC_LHBRX(r, r, base); } while(0)
+#ifdef __LITTLE_ENDIAN__
+#define PPC_NTOHS_OFFS(r, base, i) PPC_LHBRX_OFFS(r, base, i)
+#else
+#define PPC_NTOHS_OFFS(r, base, i) PPC_LHZ_OFFS(r, base, i)
+#endif
+
 static inline bool is_nearbranch(int offset)
 {
return (offset  32768)  (offset = -32768);
diff --git a/arch/powerpc/net/bpf_jit_64.S b/arch/powerpc/net/bpf_jit_64.S
index 7d3a3b5..e76eba7 100644
--- a/arch/powerpc/net/bpf_jit_64.S
+++ b/arch/powerpc/net/bpf_jit_64.S
@@ -43,8 +43,11 @@ sk_load_word_positive_offset:
cmpdr_scratch1, r_addr
blt bpf_slow_path_word
/* Nope, just hitting the header.  cr0 here is eq or gt! */
+#ifdef __LITTLE_ENDIAN__
+   lwbrx   r_A, r_D, r_addr
+#else
lwzxr_A, r_D, r_addr
-   /* When big endian we don't need to byteswap. */
+#endif
blr /* Return success, cr0 != LT */
 
.globl  sk_load_half
@@ -56,7 +59,11 @@ sk_load_half_positive_offset:
subir_scratch1, r_HL, 2
cmpdr_scratch1, r_addr
blt bpf_slow_path_half
+#ifdef __LITTLE_ENDIAN__
+   lhbrx   r_A, r_D, r_addr
+#else
lhzxr_A, r_D, r_addr
+#endif
blr
 
.globl  sk_load_byte
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index c427ae3..9e212f9 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -17,14 +17,8 @@
 
 #include bpf_jit.h
 
-#ifndef __BIG_ENDIAN
-/* There are endianness assumptions herein. */
-#error Little-endian PPC not supported in BPF compiler
-#endif
-
 int bpf_jit_enable __read_mostly;
 
-
 static inline void bpf_flush_icache(void *start, void *end)
 {
smp_wmb();
@@ -346,18 +340,11 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 
*image,
break;
 
/*** Ancillary info loads ***/
-
-   /* None of the BPF_S_ANC* codes appear to be passed by
-* sk_chk_filter().  The interpreter and the x86 BPF
-* compiler implement them so we do too -- they may be
-* planted in future.
-*/
case BPF_S_ANC_PROTOCOL: /* A = ntohs(skb-protocol); */
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
  protocol) != 2);
-   PPC_LHZ_OFFS(r_A, r_skb, offsetof(struct sk_buff,
- protocol));
-   /* ntohs is a NOP with BE loads. */
+   PPC_NTOHS_OFFS(r_A, r_skb, offsetof(struct sk_buff,
+   protocol));
break;
case BPF_S_ANC_IFINDEX:
PPC_LD_OFFS(r_scratch1, r_skb, offsetof(struct sk_buff,
-- 
1.7.10.4