Re: [PATCH v2 3/4] PCI/AER: Fetch information for FTrace

2024-02-02 Thread Wang, Qingshun
On Fri, Feb 02, 2024 at 10:01:40AM -0800, Dan Williams wrote:
> Wang, Qingshun wrote:
> > Fetch and store the data of 3 more registers: "Link Status", "Device
> > Control 2", and "Advanced Error Capabilities and Control". This data is
> > needed for external observation to better understand ANFE.
> > 
> > Signed-off-by: "Wang, Qingshun" 
> > ---
> >  drivers/acpi/apei/ghes.c |  8 +++-
> >  drivers/cxl/core/pci.c   | 11 ++-
> >  drivers/pci/pci.h|  4 
> >  drivers/pci/pcie/aer.c   | 26 --
> >  include/linux/aer.h  |  6 --
> >  5 files changed, 45 insertions(+), 10 deletions(-)
> > 
> > diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
> > index 6034039d5cff..047cc01be68c 100644
> > --- a/drivers/acpi/apei/ghes.c
> > +++ b/drivers/acpi/apei/ghes.c
> > @@ -594,7 +594,9 @@ static void ghes_handle_aer(struct 
> > acpi_hest_generic_data *gdata)
> > if (pcie_err->validation_bits & CPER_PCIE_VALID_DEVICE_ID &&
> > pcie_err->validation_bits & CPER_PCIE_VALID_AER_INFO) {
> > struct pcie_capability_regs *pcie_caps;
> > +   u16 device_control_2 = 0;
> > u16 device_status = 0;
> > +   u16 link_status = 0;
> > unsigned int devfn;
> > int aer_severity;
> > u8 *aer_info;
> > @@ -619,7 +621,9 @@ static void ghes_handle_aer(struct 
> > acpi_hest_generic_data *gdata)
> >  
> > if (pcie_err->validation_bits & CPER_PCIE_VALID_CAPABILITY) {
> > pcie_caps = (struct pcie_capability_regs 
> > *)pcie_err->capability;
> > +   device_control_2 = pcie_caps->device_control_2;
> > device_status = pcie_caps->device_status;
> > +   link_status = pcie_caps->link_status;
> > }
> >  
> > aer_recover_queue(pcie_err->device_id.segment,
> > @@ -627,7 +631,9 @@ static void ghes_handle_aer(struct 
> > acpi_hest_generic_data *gdata)
> >   devfn, aer_severity,
> >   (struct aer_capability_regs *)
> >   aer_info,
> > - device_status);
> > + device_status,
> > + link_status,
> > + device_control_2);
> > }
> >  #endif
> >  }
> > diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
> > index 9111a4415a63..3aa57fe8db42 100644
> > --- a/drivers/cxl/core/pci.c
> > +++ b/drivers/cxl/core/pci.c
> > @@ -903,7 +903,9 @@ static void cxl_handle_rdport_errors(struct 
> > cxl_dev_state *cxlds)
> > struct aer_capability_regs aer_regs;
> > struct cxl_dport *dport;
> > struct cxl_port *port;
> > +   u16 device_control_2;
> > u16 device_status;
> > +   u16 link_status;
> > int severity;
> >  
> > port = cxl_pci_find_port(pdev, );
> > @@ -918,10 +920,17 @@ static void cxl_handle_rdport_errors(struct 
> > cxl_dev_state *cxlds)
> > if (!cxl_rch_get_aer_severity(_regs, ))
> > return;
> >  
> > +   if (pcie_capability_read_word(pdev, PCI_EXP_DEVCTL2, _control_2))
> > +   return;
> > +
> > if (pcie_capability_read_word(pdev, PCI_EXP_DEVSTA, _status))
> > return;
> >  
> > -   pci_print_aer(pdev, severity, _regs, device_status);
> > +   if (pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, _status))
> > +   return;
> > +
> > +   pci_print_aer(pdev, severity, _regs, device_status,
> > + link_status, device_control_2);
> 
> Rather than complicate the calling convention of pci_print_aer(), update
> the internals of pci_print_aer() to get these extra registers, or
> provide a new wrapper interface that satisfies the dependencies and
> switch users over to that.  Otherwise multiple touches of the same code
> path in one patch set is indicative of the need for a higher level
> helper.

Thanks for the advice, it does make sense. Will reconsider the
implementation.

--
Best regards,
Wang, Qingshun


Re: [PATCH v2 3/4] PCI/AER: Fetch information for FTrace

2024-02-02 Thread Dan Williams
Wang, Qingshun wrote:
> Fetch and store the data of 3 more registers: "Link Status", "Device
> Control 2", and "Advanced Error Capabilities and Control". This data is
> needed for external observation to better understand ANFE.
> 
> Signed-off-by: "Wang, Qingshun" 
> ---
>  drivers/acpi/apei/ghes.c |  8 +++-
>  drivers/cxl/core/pci.c   | 11 ++-
>  drivers/pci/pci.h|  4 
>  drivers/pci/pcie/aer.c   | 26 --
>  include/linux/aer.h  |  6 --
>  5 files changed, 45 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
> index 6034039d5cff..047cc01be68c 100644
> --- a/drivers/acpi/apei/ghes.c
> +++ b/drivers/acpi/apei/ghes.c
> @@ -594,7 +594,9 @@ static void ghes_handle_aer(struct acpi_hest_generic_data 
> *gdata)
>   if (pcie_err->validation_bits & CPER_PCIE_VALID_DEVICE_ID &&
>   pcie_err->validation_bits & CPER_PCIE_VALID_AER_INFO) {
>   struct pcie_capability_regs *pcie_caps;
> + u16 device_control_2 = 0;
>   u16 device_status = 0;
> + u16 link_status = 0;
>   unsigned int devfn;
>   int aer_severity;
>   u8 *aer_info;
> @@ -619,7 +621,9 @@ static void ghes_handle_aer(struct acpi_hest_generic_data 
> *gdata)
>  
>   if (pcie_err->validation_bits & CPER_PCIE_VALID_CAPABILITY) {
>   pcie_caps = (struct pcie_capability_regs 
> *)pcie_err->capability;
> + device_control_2 = pcie_caps->device_control_2;
>   device_status = pcie_caps->device_status;
> + link_status = pcie_caps->link_status;
>   }
>  
>   aer_recover_queue(pcie_err->device_id.segment,
> @@ -627,7 +631,9 @@ static void ghes_handle_aer(struct acpi_hest_generic_data 
> *gdata)
> devfn, aer_severity,
> (struct aer_capability_regs *)
> aer_info,
> -   device_status);
> +   device_status,
> +   link_status,
> +   device_control_2);
>   }
>  #endif
>  }
> diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
> index 9111a4415a63..3aa57fe8db42 100644
> --- a/drivers/cxl/core/pci.c
> +++ b/drivers/cxl/core/pci.c
> @@ -903,7 +903,9 @@ static void cxl_handle_rdport_errors(struct cxl_dev_state 
> *cxlds)
>   struct aer_capability_regs aer_regs;
>   struct cxl_dport *dport;
>   struct cxl_port *port;
> + u16 device_control_2;
>   u16 device_status;
> + u16 link_status;
>   int severity;
>  
>   port = cxl_pci_find_port(pdev, );
> @@ -918,10 +920,17 @@ static void cxl_handle_rdport_errors(struct 
> cxl_dev_state *cxlds)
>   if (!cxl_rch_get_aer_severity(_regs, ))
>   return;
>  
> + if (pcie_capability_read_word(pdev, PCI_EXP_DEVCTL2, _control_2))
> + return;
> +
>   if (pcie_capability_read_word(pdev, PCI_EXP_DEVSTA, _status))
>   return;
>  
> - pci_print_aer(pdev, severity, _regs, device_status);
> + if (pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, _status))
> + return;
> +
> + pci_print_aer(pdev, severity, _regs, device_status,
> +   link_status, device_control_2);

Rather than complicate the calling convention of pci_print_aer(), update
the internals of pci_print_aer() to get these extra registers, or
provide a new wrapper interface that satisfies the dependencies and
switch users over to that.  Otherwise multiple touches of the same code
path in one patch set is indicative of the need for a higher level
helper.


[PATCH v2 3/4] PCI/AER: Fetch information for FTrace

2024-01-25 Thread Wang, Qingshun
Fetch and store the data of 3 more registers: "Link Status", "Device
Control 2", and "Advanced Error Capabilities and Control". This data is
needed for external observation to better understand ANFE.

Signed-off-by: "Wang, Qingshun" 
---
 drivers/acpi/apei/ghes.c |  8 +++-
 drivers/cxl/core/pci.c   | 11 ++-
 drivers/pci/pci.h|  4 
 drivers/pci/pcie/aer.c   | 26 --
 include/linux/aer.h  |  6 --
 5 files changed, 45 insertions(+), 10 deletions(-)

diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 6034039d5cff..047cc01be68c 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -594,7 +594,9 @@ static void ghes_handle_aer(struct acpi_hest_generic_data 
*gdata)
if (pcie_err->validation_bits & CPER_PCIE_VALID_DEVICE_ID &&
pcie_err->validation_bits & CPER_PCIE_VALID_AER_INFO) {
struct pcie_capability_regs *pcie_caps;
+   u16 device_control_2 = 0;
u16 device_status = 0;
+   u16 link_status = 0;
unsigned int devfn;
int aer_severity;
u8 *aer_info;
@@ -619,7 +621,9 @@ static void ghes_handle_aer(struct acpi_hest_generic_data 
*gdata)
 
if (pcie_err->validation_bits & CPER_PCIE_VALID_CAPABILITY) {
pcie_caps = (struct pcie_capability_regs 
*)pcie_err->capability;
+   device_control_2 = pcie_caps->device_control_2;
device_status = pcie_caps->device_status;
+   link_status = pcie_caps->link_status;
}
 
aer_recover_queue(pcie_err->device_id.segment,
@@ -627,7 +631,9 @@ static void ghes_handle_aer(struct acpi_hest_generic_data 
*gdata)
  devfn, aer_severity,
  (struct aer_capability_regs *)
  aer_info,
- device_status);
+ device_status,
+ link_status,
+ device_control_2);
}
 #endif
 }
diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index 9111a4415a63..3aa57fe8db42 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -903,7 +903,9 @@ static void cxl_handle_rdport_errors(struct cxl_dev_state 
*cxlds)
struct aer_capability_regs aer_regs;
struct cxl_dport *dport;
struct cxl_port *port;
+   u16 device_control_2;
u16 device_status;
+   u16 link_status;
int severity;
 
port = cxl_pci_find_port(pdev, );
@@ -918,10 +920,17 @@ static void cxl_handle_rdport_errors(struct cxl_dev_state 
*cxlds)
if (!cxl_rch_get_aer_severity(_regs, ))
return;
 
+   if (pcie_capability_read_word(pdev, PCI_EXP_DEVCTL2, _control_2))
+   return;
+
if (pcie_capability_read_word(pdev, PCI_EXP_DEVSTA, _status))
return;
 
-   pci_print_aer(pdev, severity, _regs, device_status);
+   if (pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, _status))
+   return;
+
+   pci_print_aer(pdev, severity, _regs, device_status,
+ link_status, device_control_2);
 
if (severity == AER_CORRECTABLE)
cxl_handle_rdport_cor_ras(cxlds, dport);
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index f881a1b42f14..5788a94b4e95 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -412,7 +412,11 @@ struct aer_err_info {
u32 uncor_mask; /* UNCOR Error Mask */
u32 uncor_status;   /* UNCOR Error Status */
u32 uncor_severity; /* UNCOR Error Severity */
+
+   u16 link_status;
+   u32 aer_cap_ctrl;   /* AER Capabilities and Control */
u16 device_status;
+   u16 device_control_2;
struct aer_header_log_regs tlp; /* TLP Header */
 };
 
diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index 713cbf625d3f..eec3406f727a 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -825,7 +825,8 @@ EXPORT_SYMBOL_GPL(cper_severity_to_aer);
 #endif
 
 void pci_print_aer(struct pci_dev *dev, int aer_severity,
-  struct aer_capability_regs *aer, u16 device_status)
+  struct aer_capability_regs *aer, u16 device_status,
+  u16 link_status, u16 device_control_2)
 {
int layer, agent, tlp_header_valid = 0;
u32 status, mask;
@@ -850,7 +851,10 @@ void pci_print_aer(struct pci_dev *dev, int aer_severity,
info.uncor_status = aer->uncor_status;
info.uncor_severity = aer->uncor_severity;
info.uncor_mask = aer->uncor_mask;
+   info.link_status = link_status;
+   info.aer_cap_ctrl = aer->cap_control;
info.device_status = device_status;
+   info.device_control_2 = device_control_2;
info.first_error =