Re: [PATCH] cxl/pci: Move tracepoint definitions to drivers/cxl/core/

2022-12-08 Thread Alison Schofield
On Thu, Dec 08, 2022 at 09:02:00AM -0800, Dan Williams wrote:
> CXL is using tracepoints for reporting RAS capability register payloads
> for AER events, and has plans to use tracepoints for the output payload
> of Get Poison List and Get Event Records commands. For organization
> purposes it would be nice to keep those all under a single + local CXL
> trace system. This also organization also potentially helps in the
> future when CXL drivers expand beyond generic memory expanders, however
> that would also entail a move away from the expander-specific
> cxl_dev_state context, save that for later.
> 
> Note that the powerpc-specific drivers/misc/cxl/ also defines a 'cxl'
> trace system, however, it is unlikely that a single platform will ever
> load both drivers simultaneously.

Verified this on top of cxl/next with the get-poison-list patchset
using the new trace file layout.

Also, confirmed that the cxl_aer_*_error events appeared correctly
in sys/kernel/debug/tracing/events/ and that 'cxl monitor' could 
be run.

Tested-by: Alison Schofield 

> 
> Cc: Steven Rostedt 
> Signed-off-by: Dan Williams 
> ---
> This patch is targeting v6.3.  I am sending it out now to enable the
> in-flight Event and Poison list patch sets to build upon. It will not
> move to a non-rebasing branch until after v6.2-rc2, but in the meantime
> I can throw it out on the list and the cxl/preview branch.
> 
>  drivers/cxl/core/Makefile  |3 +
>  drivers/cxl/core/pci.c |  112 
> 
>  drivers/cxl/core/trace.c   |5 ++
>  drivers/cxl/core/trace.h   |   11 ++--
>  drivers/cxl/cxl.h  |2 +
>  drivers/cxl/cxlpci.h   |3 +
>  drivers/cxl/pci.c  |  111 
> 
>  tools/testing/cxl/Kbuild   |2 +
>  8 files changed, 131 insertions(+), 118 deletions(-)
>  create mode 100644 drivers/cxl/core/trace.c
>  rename include/trace/events/cxl.h => drivers/cxl/core/trace.h (94%)
> 
> diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile
> index 79c7257f4107..ca4ae31d8f57 100644
> --- a/drivers/cxl/core/Makefile
> +++ b/drivers/cxl/core/Makefile
> @@ -3,6 +3,8 @@ obj-$(CONFIG_CXL_BUS) += cxl_core.o
>  obj-$(CONFIG_CXL_SUSPEND) += suspend.o
>  
>  ccflags-y += -I$(srctree)/drivers/cxl
> +CFLAGS_trace.o = -DTRACE_INCLUDE_PATH=. -I$(src)
> +
>  cxl_core-y := port.o
>  cxl_core-y += pmem.o
>  cxl_core-y += regs.o
> @@ -10,4 +12,5 @@ cxl_core-y += memdev.o
>  cxl_core-y += mbox.o
>  cxl_core-y += pci.o
>  cxl_core-y += hdm.o
> +cxl_core-$(CONFIG_TRACING) += trace.o
>  cxl_core-$(CONFIG_CXL_REGION) += region.o
> diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
> index 57764e9cd19d..1d1492440287 100644
> --- a/drivers/cxl/core/pci.c
> +++ b/drivers/cxl/core/pci.c
> @@ -9,6 +9,7 @@
>  #include 
>  #include 
>  #include "core.h"
> +#include "trace.h"
>  
>  /**
>   * DOC: cxl core pci
> @@ -622,3 +623,114 @@ void read_cdat_data(struct cxl_port *port)
>   }
>  }
>  EXPORT_SYMBOL_NS_GPL(read_cdat_data, CXL);
> +
> +void cxl_cor_error_detected(struct pci_dev *pdev)
> +{
> + struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
> + struct cxl_memdev *cxlmd = cxlds->cxlmd;
> + struct device *dev = >dev;
> + void __iomem *addr;
> + u32 status;
> +
> + if (!cxlds->regs.ras)
> + return;
> +
> + addr = cxlds->regs.ras + CXL_RAS_CORRECTABLE_STATUS_OFFSET;
> + status = readl(addr);
> + if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) {
> + writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr);
> + trace_cxl_aer_correctable_error(dev, status);
> + }
> +}
> +EXPORT_SYMBOL_NS_GPL(cxl_cor_error_detected, CXL);
> +
> +/* CXL spec rev3.0 8.2.4.16.1 */
> +static void header_log_copy(struct cxl_dev_state *cxlds, u32 *log)
> +{
> + void __iomem *addr;
> + u32 *log_addr;
> + int i, log_u32_size = CXL_HEADERLOG_SIZE / sizeof(u32);
> +
> + addr = cxlds->regs.ras + CXL_RAS_HEADER_LOG_OFFSET;
> + log_addr = log;
> +
> + for (i = 0; i < log_u32_size; i++) {
> + *log_addr = readl(addr);
> + log_addr++;
> + addr += sizeof(u32);
> + }
> +}
> +
> +/*
> + * Log the state of the RAS status registers and prepare them to log the
> + * next error status. Return 1 if reset needed.
> + */
> +static bool cxl_report_and_clear(struct cxl_dev_state *cxlds)
> +{
> + struct cxl_memdev *cxlmd = cxlds->cxlmd;
> + struct device *dev = >dev;
> + u32 hl[CXL_HEADERLOG_SIZE_U32];
> + void __iomem *addr;
> + u32 status;
> + u32 fe;
> +
> + if (!cxlds->regs.ras)
> + return false;
> +
> + addr = cxlds->regs.ras + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET;
> + status = readl(addr);
> + if (!(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK))
> + return false;
> +
> + /* If multiple errors, log header points to first error from ctrl reg */
> + if (hweight32(status) > 1) {

Re: [PATCH] cxl/pci: Move tracepoint definitions to drivers/cxl/core/

2022-12-08 Thread Dave Jiang




On 12/8/2022 10:02 AM, Dan Williams wrote:

CXL is using tracepoints for reporting RAS capability register payloads
for AER events, and has plans to use tracepoints for the output payload
of Get Poison List and Get Event Records commands. For organization
purposes it would be nice to keep those all under a single + local CXL
trace system. This also organization also potentially helps in the
future when CXL drivers expand beyond generic memory expanders, however
that would also entail a move away from the expander-specific
cxl_dev_state context, save that for later.

Note that the powerpc-specific drivers/misc/cxl/ also defines a 'cxl'
trace system, however, it is unlikely that a single platform will ever
load both drivers simultaneously.

Cc: Steven Rostedt 
Signed-off-by: Dan Williams 


Reviewed-by: Dave Jiang 


---
This patch is targeting v6.3.  I am sending it out now to enable the
in-flight Event and Poison list patch sets to build upon. It will not
move to a non-rebasing branch until after v6.2-rc2, but in the meantime
I can throw it out on the list and the cxl/preview branch.

  drivers/cxl/core/Makefile  |3 +
  drivers/cxl/core/pci.c |  112 
  drivers/cxl/core/trace.c   |5 ++
  drivers/cxl/core/trace.h   |   11 ++--
  drivers/cxl/cxl.h  |2 +
  drivers/cxl/cxlpci.h   |3 +
  drivers/cxl/pci.c  |  111 
  tools/testing/cxl/Kbuild   |2 +
  8 files changed, 131 insertions(+), 118 deletions(-)
  create mode 100644 drivers/cxl/core/trace.c
  rename include/trace/events/cxl.h => drivers/cxl/core/trace.h (94%)

diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile
index 79c7257f4107..ca4ae31d8f57 100644
--- a/drivers/cxl/core/Makefile
+++ b/drivers/cxl/core/Makefile
@@ -3,6 +3,8 @@ obj-$(CONFIG_CXL_BUS) += cxl_core.o
  obj-$(CONFIG_CXL_SUSPEND) += suspend.o
  
  ccflags-y += -I$(srctree)/drivers/cxl

+CFLAGS_trace.o = -DTRACE_INCLUDE_PATH=. -I$(src)
+
  cxl_core-y := port.o
  cxl_core-y += pmem.o
  cxl_core-y += regs.o
@@ -10,4 +12,5 @@ cxl_core-y += memdev.o
  cxl_core-y += mbox.o
  cxl_core-y += pci.o
  cxl_core-y += hdm.o
+cxl_core-$(CONFIG_TRACING) += trace.o
  cxl_core-$(CONFIG_CXL_REGION) += region.o
diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index 57764e9cd19d..1d1492440287 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -9,6 +9,7 @@
  #include 
  #include 
  #include "core.h"
+#include "trace.h"
  
  /**

   * DOC: cxl core pci
@@ -622,3 +623,114 @@ void read_cdat_data(struct cxl_port *port)
}
  }
  EXPORT_SYMBOL_NS_GPL(read_cdat_data, CXL);
+
+void cxl_cor_error_detected(struct pci_dev *pdev)
+{
+   struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
+   struct cxl_memdev *cxlmd = cxlds->cxlmd;
+   struct device *dev = >dev;
+   void __iomem *addr;
+   u32 status;
+
+   if (!cxlds->regs.ras)
+   return;
+
+   addr = cxlds->regs.ras + CXL_RAS_CORRECTABLE_STATUS_OFFSET;
+   status = readl(addr);
+   if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) {
+   writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr);
+   trace_cxl_aer_correctable_error(dev, status);
+   }
+}
+EXPORT_SYMBOL_NS_GPL(cxl_cor_error_detected, CXL);
+
+/* CXL spec rev3.0 8.2.4.16.1 */
+static void header_log_copy(struct cxl_dev_state *cxlds, u32 *log)
+{
+   void __iomem *addr;
+   u32 *log_addr;
+   int i, log_u32_size = CXL_HEADERLOG_SIZE / sizeof(u32);
+
+   addr = cxlds->regs.ras + CXL_RAS_HEADER_LOG_OFFSET;
+   log_addr = log;
+
+   for (i = 0; i < log_u32_size; i++) {
+   *log_addr = readl(addr);
+   log_addr++;
+   addr += sizeof(u32);
+   }
+}
+
+/*
+ * Log the state of the RAS status registers and prepare them to log the
+ * next error status. Return 1 if reset needed.
+ */
+static bool cxl_report_and_clear(struct cxl_dev_state *cxlds)
+{
+   struct cxl_memdev *cxlmd = cxlds->cxlmd;
+   struct device *dev = >dev;
+   u32 hl[CXL_HEADERLOG_SIZE_U32];
+   void __iomem *addr;
+   u32 status;
+   u32 fe;
+
+   if (!cxlds->regs.ras)
+   return false;
+
+   addr = cxlds->regs.ras + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET;
+   status = readl(addr);
+   if (!(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK))
+   return false;
+
+   /* If multiple errors, log header points to first error from ctrl reg */
+   if (hweight32(status) > 1) {
+   addr = cxlds->regs.ras + CXL_RAS_CAP_CONTROL_OFFSET;
+   fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK, readl(addr)));
+   } else {
+   fe = status;
+   }
+
+   header_log_copy(cxlds, hl);
+   trace_cxl_aer_uncorrectable_error(dev, status, fe, hl);
+   writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr);
+
+   return true;
+}
+

[PATCH] cxl/pci: Move tracepoint definitions to drivers/cxl/core/

2022-12-08 Thread Dan Williams
CXL is using tracepoints for reporting RAS capability register payloads
for AER events, and has plans to use tracepoints for the output payload
of Get Poison List and Get Event Records commands. For organization
purposes it would be nice to keep those all under a single + local CXL
trace system. This also organization also potentially helps in the
future when CXL drivers expand beyond generic memory expanders, however
that would also entail a move away from the expander-specific
cxl_dev_state context, save that for later.

Note that the powerpc-specific drivers/misc/cxl/ also defines a 'cxl'
trace system, however, it is unlikely that a single platform will ever
load both drivers simultaneously.

Cc: Steven Rostedt 
Signed-off-by: Dan Williams 
---
This patch is targeting v6.3.  I am sending it out now to enable the
in-flight Event and Poison list patch sets to build upon. It will not
move to a non-rebasing branch until after v6.2-rc2, but in the meantime
I can throw it out on the list and the cxl/preview branch.

 drivers/cxl/core/Makefile  |3 +
 drivers/cxl/core/pci.c |  112 
 drivers/cxl/core/trace.c   |5 ++
 drivers/cxl/core/trace.h   |   11 ++--
 drivers/cxl/cxl.h  |2 +
 drivers/cxl/cxlpci.h   |3 +
 drivers/cxl/pci.c  |  111 
 tools/testing/cxl/Kbuild   |2 +
 8 files changed, 131 insertions(+), 118 deletions(-)
 create mode 100644 drivers/cxl/core/trace.c
 rename include/trace/events/cxl.h => drivers/cxl/core/trace.h (94%)

diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile
index 79c7257f4107..ca4ae31d8f57 100644
--- a/drivers/cxl/core/Makefile
+++ b/drivers/cxl/core/Makefile
@@ -3,6 +3,8 @@ obj-$(CONFIG_CXL_BUS) += cxl_core.o
 obj-$(CONFIG_CXL_SUSPEND) += suspend.o
 
 ccflags-y += -I$(srctree)/drivers/cxl
+CFLAGS_trace.o = -DTRACE_INCLUDE_PATH=. -I$(src)
+
 cxl_core-y := port.o
 cxl_core-y += pmem.o
 cxl_core-y += regs.o
@@ -10,4 +12,5 @@ cxl_core-y += memdev.o
 cxl_core-y += mbox.o
 cxl_core-y += pci.o
 cxl_core-y += hdm.o
+cxl_core-$(CONFIG_TRACING) += trace.o
 cxl_core-$(CONFIG_CXL_REGION) += region.o
diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index 57764e9cd19d..1d1492440287 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -9,6 +9,7 @@
 #include 
 #include 
 #include "core.h"
+#include "trace.h"
 
 /**
  * DOC: cxl core pci
@@ -622,3 +623,114 @@ void read_cdat_data(struct cxl_port *port)
}
 }
 EXPORT_SYMBOL_NS_GPL(read_cdat_data, CXL);
+
+void cxl_cor_error_detected(struct pci_dev *pdev)
+{
+   struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
+   struct cxl_memdev *cxlmd = cxlds->cxlmd;
+   struct device *dev = >dev;
+   void __iomem *addr;
+   u32 status;
+
+   if (!cxlds->regs.ras)
+   return;
+
+   addr = cxlds->regs.ras + CXL_RAS_CORRECTABLE_STATUS_OFFSET;
+   status = readl(addr);
+   if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) {
+   writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr);
+   trace_cxl_aer_correctable_error(dev, status);
+   }
+}
+EXPORT_SYMBOL_NS_GPL(cxl_cor_error_detected, CXL);
+
+/* CXL spec rev3.0 8.2.4.16.1 */
+static void header_log_copy(struct cxl_dev_state *cxlds, u32 *log)
+{
+   void __iomem *addr;
+   u32 *log_addr;
+   int i, log_u32_size = CXL_HEADERLOG_SIZE / sizeof(u32);
+
+   addr = cxlds->regs.ras + CXL_RAS_HEADER_LOG_OFFSET;
+   log_addr = log;
+
+   for (i = 0; i < log_u32_size; i++) {
+   *log_addr = readl(addr);
+   log_addr++;
+   addr += sizeof(u32);
+   }
+}
+
+/*
+ * Log the state of the RAS status registers and prepare them to log the
+ * next error status. Return 1 if reset needed.
+ */
+static bool cxl_report_and_clear(struct cxl_dev_state *cxlds)
+{
+   struct cxl_memdev *cxlmd = cxlds->cxlmd;
+   struct device *dev = >dev;
+   u32 hl[CXL_HEADERLOG_SIZE_U32];
+   void __iomem *addr;
+   u32 status;
+   u32 fe;
+
+   if (!cxlds->regs.ras)
+   return false;
+
+   addr = cxlds->regs.ras + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET;
+   status = readl(addr);
+   if (!(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK))
+   return false;
+
+   /* If multiple errors, log header points to first error from ctrl reg */
+   if (hweight32(status) > 1) {
+   addr = cxlds->regs.ras + CXL_RAS_CAP_CONTROL_OFFSET;
+   fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK, readl(addr)));
+   } else {
+   fe = status;
+   }
+
+   header_log_copy(cxlds, hl);
+   trace_cxl_aer_uncorrectable_error(dev, status, fe, hl);
+   writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr);
+
+   return true;
+}
+
+pci_ers_result_t cxl_error_detected(struct pci_dev *pdev,
+   pci_channel_state_t state)