Re: [PATCH 2/4] PCI: Generalize TLP Header Log reading

2024-03-14 Thread Bjorn Helgaas
[+cc Greg, Jeff -- ancient history, I know, sorry!]

On Tue, Feb 06, 2024 at 03:57:15PM +0200, Ilpo Järvinen wrote:
> Both AER and DPC RP PIO provide TLP Header Log registers (PCIe r6.1
> secs 7.8.4 & 7.9.14) to convey error diagnostics but the struct is
> named after AER as the struct aer_header_log_regs. Also, not all places
> that handle TLP Header Log use the struct and the struct members are
> named individually.
> 
> Generalize the struct name and members, and use it consistently where
> TLP Header Log is being handled so that a pcie_read_tlp_log() helper
> can be easily added.
> 
> Signed-off-by: Ilpo Järvinen 

> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c 
> b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
> index bd541527c8c7..5fdf37968b2d 100644
> --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
> +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
> @@ -1,6 +1,7 @@
>  // SPDX-License-Identifier: GPL-2.0
>  /* Copyright(c) 1999 - 2018 Intel Corporation. */
>  
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -391,22 +392,6 @@ u16 ixgbe_read_pci_cfg_word(struct ixgbe_hw *hw, u32 reg)
>   return value;
>  }
>  
> -#ifdef CONFIG_PCI_IOV
> -static u32 ixgbe_read_pci_cfg_dword(struct ixgbe_hw *hw, u32 reg)
> -{
> - struct ixgbe_adapter *adapter = hw->back;
> - u32 value;
> -
> - if (ixgbe_removed(hw->hw_addr))
> - return IXGBE_FAILED_READ_CFG_DWORD;
> - pci_read_config_dword(adapter->pdev, reg, );
> - if (value == IXGBE_FAILED_READ_CFG_DWORD &&
> - ixgbe_check_cfg_remove(hw, adapter->pdev))
> - return IXGBE_FAILED_READ_CFG_DWORD;
> - return value;
> -}
> -#endif /* CONFIG_PCI_IOV */
> -
>  void ixgbe_write_pci_cfg_word(struct ixgbe_hw *hw, u32 reg, u16 value)
>  {
>   struct ixgbe_adapter *adapter = hw->back;
> @@ -11332,8 +11317,8 @@ static pci_ers_result_t 
> ixgbe_io_error_detected(struct pci_dev *pdev,
>  #ifdef CONFIG_PCI_IOV
>   struct ixgbe_hw *hw = >hw;
>   struct pci_dev *bdev, *vfdev;
> - u32 dw0, dw1, dw2, dw3;
> - int vf, pos;
> + struct pcie_tlp_log tlp_log;
> + int vf, pos, ret;
>   u16 req_id, pf_func;
>  
>   if (adapter->hw.mac.type == ixgbe_mac_82598EB ||
> @@ -11351,14 +11336,13 @@ static pci_ers_result_t 
> ixgbe_io_error_detected(struct pci_dev *pdev,
>   if (!pos)
>   goto skip_bad_vf_detection;
>  
> - dw0 = ixgbe_read_pci_cfg_dword(hw, pos + PCI_ERR_HEADER_LOG);
> - dw1 = ixgbe_read_pci_cfg_dword(hw, pos + PCI_ERR_HEADER_LOG + 4);
> - dw2 = ixgbe_read_pci_cfg_dword(hw, pos + PCI_ERR_HEADER_LOG + 8);
> - dw3 = ixgbe_read_pci_cfg_dword(hw, pos + PCI_ERR_HEADER_LOG + 12);
> - if (ixgbe_removed(hw->hw_addr))
> + ret = pcie_read_tlp_log(pdev, pos + PCI_ERR_HEADER_LOG, _log);
> + if (ret < 0) {
> + ixgbe_check_cfg_remove(hw, pdev);
>   goto skip_bad_vf_detection;
> + }
>  
> - req_id = dw1 >> 16;
> + req_id = tlp_log.dw[1] >> 16;
>   /* On the 82599 if bit 7 of the requestor ID is set then it's a VF */
>   if (!(req_id & 0x0080))
>   goto skip_bad_vf_detection;
> @@ -11369,9 +11353,8 @@ static pci_ers_result_t 
> ixgbe_io_error_detected(struct pci_dev *pdev,
>  
>   vf = FIELD_GET(0x7F, req_id);
>   e_dev_err("VF %d has caused a PCIe error\n", vf);
> - e_dev_err("TLP: dw0: %8.8x\tdw1: %8.8x\tdw2: "
> - "%8.8x\tdw3: %8.8x\n",
> - dw0, dw1, dw2, dw3);
> + e_dev_err("TLP: dw0: %8.8x\tdw1: %8.8x\tdw2: %8.8x\tdw3: 
> %8.8x\n",
> +   tlp_log.dw[0], tlp_log.dw[1], tlp_log.dw[2], 
> tlp_log.dw[3]);
>   switch (adapter->hw.mac.type) {
>   case ixgbe_mac_82599EB:
>   device_id = IXGBE_82599_VF_DEVICE_ID;

The rest of this patch is headed for v6.10, but I dropped this ixgbe
change for now.

These TLP Log registers are generic, not device-specific, and if
there's something lacking in the PCI core that leads to ixgbe reading
and dumping them itself, I'd rather improve the PCI core so all
drivers will benefit without having to add code like this.

83c61fa97a7d ("ixgbe: Add protection from VF invalid target DMA") [1]
added the ixgbe TLP Log dumping way back in v3.2 (2012).  It does do
some device-specific VF checking and so on, but even back then, it
looks like the PCI core would have dumped the log itself [2], so I
don't know why we needed the extra dumping in ixgbe.

So what I'd really like is to remove the TLP Log reading and printing
from ixgbe completely, but keep the VF checking.

Bjorn

[1] https://git.kernel.org/linus/83c61fa97a7d
[2] 
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/pci/pcie/aer/aerdrv_errprint.c?id=83c61fa97a7d#n181


[PATCH 2/4] PCI: Generalize TLP Header Log reading

2024-02-06 Thread Ilpo Järvinen
Both AER and DPC RP PIO provide TLP Header Log registers (PCIe r6.1
secs 7.8.4 & 7.9.14) to convey error diagnostics but the struct is
named after AER as the struct aer_header_log_regs. Also, not all places
that handle TLP Header Log use the struct and the struct members are
named individually.

Generalize the struct name and members, and use it consistently where
TLP Header Log is being handled so that a pcie_read_tlp_log() helper
can be easily added.

Signed-off-by: Ilpo Järvinen 
---
 drivers/firmware/efi/cper.c   |  4 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 37 +--
 drivers/pci/pci.c | 26 +
 drivers/pci/pci.h |  2 +-
 drivers/pci/pcie/aer.c| 14 ++-
 drivers/pci/pcie/dpc.c| 14 ++-
 include/linux/aer.h   | 11 +++---
 include/ras/ras_event.h   | 10 ++---
 8 files changed, 56 insertions(+), 62 deletions(-)

diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c
index 35c37f667781..d3f98161171e 100644
--- a/drivers/firmware/efi/cper.c
+++ b/drivers/firmware/efi/cper.c
@@ -445,8 +445,8 @@ static void cper_print_pcie(const char *pfx, const struct 
cper_sec_pcie *pcie,
printk("%saer_uncor_severity: 0x%08x\n",
   pfx, aer->uncor_severity);
printk("%sTLP Header: %08x %08x %08x %08x\n", pfx,
-  aer->header_log.dw0, aer->header_log.dw1,
-  aer->header_log.dw2, aer->header_log.dw3);
+  aer->header_log.dw[0], aer->header_log.dw[1],
+  aer->header_log.dw[2], aer->header_log.dw[3]);
}
 }
 
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c 
b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index bd541527c8c7..5fdf37968b2d 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright(c) 1999 - 2018 Intel Corporation. */
 
+#include 
 #include 
 #include 
 #include 
@@ -391,22 +392,6 @@ u16 ixgbe_read_pci_cfg_word(struct ixgbe_hw *hw, u32 reg)
return value;
 }
 
-#ifdef CONFIG_PCI_IOV
-static u32 ixgbe_read_pci_cfg_dword(struct ixgbe_hw *hw, u32 reg)
-{
-   struct ixgbe_adapter *adapter = hw->back;
-   u32 value;
-
-   if (ixgbe_removed(hw->hw_addr))
-   return IXGBE_FAILED_READ_CFG_DWORD;
-   pci_read_config_dword(adapter->pdev, reg, );
-   if (value == IXGBE_FAILED_READ_CFG_DWORD &&
-   ixgbe_check_cfg_remove(hw, adapter->pdev))
-   return IXGBE_FAILED_READ_CFG_DWORD;
-   return value;
-}
-#endif /* CONFIG_PCI_IOV */
-
 void ixgbe_write_pci_cfg_word(struct ixgbe_hw *hw, u32 reg, u16 value)
 {
struct ixgbe_adapter *adapter = hw->back;
@@ -11332,8 +11317,8 @@ static pci_ers_result_t ixgbe_io_error_detected(struct 
pci_dev *pdev,
 #ifdef CONFIG_PCI_IOV
struct ixgbe_hw *hw = >hw;
struct pci_dev *bdev, *vfdev;
-   u32 dw0, dw1, dw2, dw3;
-   int vf, pos;
+   struct pcie_tlp_log tlp_log;
+   int vf, pos, ret;
u16 req_id, pf_func;
 
if (adapter->hw.mac.type == ixgbe_mac_82598EB ||
@@ -11351,14 +11336,13 @@ static pci_ers_result_t 
ixgbe_io_error_detected(struct pci_dev *pdev,
if (!pos)
goto skip_bad_vf_detection;
 
-   dw0 = ixgbe_read_pci_cfg_dword(hw, pos + PCI_ERR_HEADER_LOG);
-   dw1 = ixgbe_read_pci_cfg_dword(hw, pos + PCI_ERR_HEADER_LOG + 4);
-   dw2 = ixgbe_read_pci_cfg_dword(hw, pos + PCI_ERR_HEADER_LOG + 8);
-   dw3 = ixgbe_read_pci_cfg_dword(hw, pos + PCI_ERR_HEADER_LOG + 12);
-   if (ixgbe_removed(hw->hw_addr))
+   ret = pcie_read_tlp_log(pdev, pos + PCI_ERR_HEADER_LOG, _log);
+   if (ret < 0) {
+   ixgbe_check_cfg_remove(hw, pdev);
goto skip_bad_vf_detection;
+   }
 
-   req_id = dw1 >> 16;
+   req_id = tlp_log.dw[1] >> 16;
/* On the 82599 if bit 7 of the requestor ID is set then it's a VF */
if (!(req_id & 0x0080))
goto skip_bad_vf_detection;
@@ -11369,9 +11353,8 @@ static pci_ers_result_t ixgbe_io_error_detected(struct 
pci_dev *pdev,
 
vf = FIELD_GET(0x7F, req_id);
e_dev_err("VF %d has caused a PCIe error\n", vf);
-   e_dev_err("TLP: dw0: %8.8x\tdw1: %8.8x\tdw2: "
-   "%8.8x\tdw3: %8.8x\n",
-   dw0, dw1, dw2, dw3);
+   e_dev_err("TLP: dw0: %8.8x\tdw1: %8.8x\tdw2: %8.8x\tdw3: 
%8.8x\n",
+ tlp_log.dw[0], tlp_log.dw[1], tlp_log.dw[2], 
tlp_log.dw[3]);
switch (adapter->hw.mac.type) {
case ixgbe_mac_82599EB:
device_id = IXGBE_82599_VF_DEVICE_ID;
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index