PCIe link speed degradation directly impacts system performance and
often indicates hardware issues such as faulty devices, physical layer
problems, or configuration errors.

To this end, add a RAS tracepoint to monitor link speed changes,
enabling proactive health checks and diagnostic analysis.

The output is like below:

$ echo 1 > /sys/kernel/debug/tracing/events/pci/pci_link_event/enable
$ cat /sys/kernel/debug/tracing/trace_pipe
cat /sys/kernel/debug/tracing/trace_pipe
           <...>-119     [002] .....   125.776171: pci_hp_event: 0000:00:03.0 
slot:30, event:CARD_PRESENT

           <...>-119     [002] .....   125.776197: pci_hp_event: 0000:00:03.0 
slot:30, event:LINK_UP

   irq/57-pciehp-119     [002] .....   125.904335: pcie_link_event: 
0000:00:03.0 type:4, reason:4, cur_bus_speed:2.5 GT/s PCIe, max_bus_speed:16.0 
GT/s PCIe, width:1, flit_mode:0, status:DLLLA

   irq/57-pciehp-119     [002] .....   125.907051: pcie_link_event: 
0000:00:03.0 type:4, reason:0, cur_bus_speed:2.5 GT/s PCIe, max_bus_speed:16.0 
GT/s PCIe, width:1, flit_mode:0, status:DLLLA

Suggested-by: Bjorn Helgaas <helg...@kernel.org>
Suggested-by: Ilpo Järvinen <ilpo.jarvi...@linux.intel.com>
Suggested-by: Matthew W Carlis <ma...@purestorage.com>
Suggested-by: Lukas Wunner <lu...@wunner.de>
Signed-off-by: Shuai Xue <xuesh...@linux.alibaba.com>
---
 drivers/pci/hotplug/pciehp_hpc.c |  5 ++-
 drivers/pci/pci.c                |  2 +-
 drivers/pci/pci.h                | 12 +++++--
 drivers/pci/pcie/bwctrl.c        |  4 +--
 drivers/pci/probe.c              | 10 ++++--
 include/linux/pci.h              |  1 +
 include/trace/events/pci.h       | 56 ++++++++++++++++++++++++++++++++
 7 files changed, 80 insertions(+), 10 deletions(-)

diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c
index 91d2d92717d9..232d6704cb81 100644
--- a/drivers/pci/hotplug/pciehp_hpc.c
+++ b/drivers/pci/hotplug/pciehp_hpc.c
@@ -292,7 +292,7 @@ int pciehp_check_link_status(struct controller *ctrl)
 {
        struct pci_dev *pdev = ctrl_dev(ctrl);
        bool found;
-       u16 lnk_status, linksta2;
+       u16 lnk_status;
 
        if (!pcie_wait_for_link(pdev, true)) {
                ctrl_info(ctrl, "Slot(%s): No link\n", slot_name(ctrl));
@@ -319,8 +319,7 @@ int pciehp_check_link_status(struct controller *ctrl)
                return -1;
        }
 
-       pcie_capability_read_word(pdev, PCI_EXP_LNKSTA2, &linksta2);
-       __pcie_update_link_speed(ctrl->pcie->port->subordinate, lnk_status, 
linksta2);
+       pcie_update_link_speed(ctrl->pcie->port->subordinate, PCIE_HOTPLUG);
 
        if (!found) {
                ctrl_info(ctrl, "Slot(%s): No device found\n",
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 9e42090fb108..6c13af287d63 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -4739,7 +4739,7 @@ int pcie_retrain_link(struct pci_dev *pdev, bool use_lt)
         * Link Speed.
         */
        if (pdev->subordinate)
-               pcie_update_link_speed(pdev->subordinate);
+               pcie_update_link_speed(pdev->subordinate, PCIE_LINK_RETRAIN);
 
        return rc;
 }
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 12215ee72afb..438a6d29d84e 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -452,7 +452,6 @@ static inline int pcie_dev_speed_mbps(enum pci_bus_speed 
speed)
 }
 
 u8 pcie_get_supported_speeds(struct pci_dev *dev);
-const char *pci_speed_string(enum pci_bus_speed speed);
 void __pcie_print_link_status(struct pci_dev *dev, bool verbose);
 void pcie_report_downtraining(struct pci_dev *dev);
 
@@ -461,7 +460,16 @@ static inline void __pcie_update_link_speed(struct pci_bus 
*bus, u16 linksta, u1
        bus->cur_bus_speed = pcie_link_speed[linksta & PCI_EXP_LNKSTA_CLS];
        bus->flit_mode = (linksta2 & PCI_EXP_LNKSTA2_FLIT) ? 1 : 0;
 }
-void pcie_update_link_speed(struct pci_bus *bus);
+
+enum pcie_link_change_reason {
+       PCIE_LINK_RETRAIN,
+       PCIE_ADD_BUS,
+       PCIE_BWCTRL_ENABLE,
+       PCIE_BWCTRL_IRQ,
+       PCIE_HOTPLUG
+};
+
+void pcie_update_link_speed(struct pci_bus *bus, enum pcie_link_change_reason 
reason);
 
 /* Single Root I/O Virtualization */
 struct pci_sriov {
diff --git a/drivers/pci/pcie/bwctrl.c b/drivers/pci/pcie/bwctrl.c
index 36f939f23d34..32f1b30ecb84 100644
--- a/drivers/pci/pcie/bwctrl.c
+++ b/drivers/pci/pcie/bwctrl.c
@@ -199,7 +199,7 @@ static void pcie_bwnotif_enable(struct pcie_device *srv)
         * Update after enabling notifications & clearing status bits ensures
         * link speed is up to date.
         */
-       pcie_update_link_speed(port->subordinate);
+       pcie_update_link_speed(port->subordinate, PCIE_BWCTRL_ENABLE);
 }
 
 static void pcie_bwnotif_disable(struct pci_dev *port)
@@ -234,7 +234,7 @@ static irqreturn_t pcie_bwnotif_irq(int irq, void *context)
         * speed (inside pcie_update_link_speed()) after LBMS has been
         * cleared to avoid missing link speed changes.
         */
-       pcie_update_link_speed(port->subordinate);
+       pcie_update_link_speed(port->subordinate, PCIE_BWCTRL_IRQ);
 
        return IRQ_HANDLED;
 }
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 4b8693ec9e4c..1a42a156f501 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -21,6 +21,7 @@
 #include <linux/irqdomain.h>
 #include <linux/pm_runtime.h>
 #include <linux/bitfield.h>
+#include <trace/events/pci.h>
 #include "pci.h"
 
 #define CARDBUS_LATENCY_TIMER  176     /* secondary latency timer */
@@ -788,14 +789,19 @@ const char *pci_speed_string(enum pci_bus_speed speed)
 }
 EXPORT_SYMBOL_GPL(pci_speed_string);
 
-void pcie_update_link_speed(struct pci_bus *bus)
+void pcie_update_link_speed(struct pci_bus *bus, enum pcie_link_change_reason 
reason)
 {
        struct pci_dev *bridge = bus->self;
        u16 linksta, linksta2;
 
        pcie_capability_read_word(bridge, PCI_EXP_LNKSTA, &linksta);
        pcie_capability_read_word(bridge, PCI_EXP_LNKSTA2, &linksta2);
+
        __pcie_update_link_speed(bus, linksta, linksta2);
+       trace_pcie_link_event(bus,
+                            reason,
+                            FIELD_GET(PCI_EXP_LNKSTA_NLW, linksta),
+                            linksta & PCI_EXP_LNKSTA_LINK_STATUS_MASK);
 }
 EXPORT_SYMBOL_GPL(pcie_update_link_speed);
 
@@ -882,7 +888,7 @@ static void pci_set_bus_speed(struct pci_bus *bus)
                pcie_capability_read_dword(bridge, PCI_EXP_LNKCAP, &linkcap);
                bus->max_bus_speed = pcie_link_speed[linkcap & 
PCI_EXP_LNKCAP_SLS];
 
-               pcie_update_link_speed(bus);
+               pcie_update_link_speed(bus, PCIE_ADD_BUS);
        }
 }
 
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 05e68f35f392..8346121c035d 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -305,6 +305,7 @@ enum pci_bus_speed {
        PCI_SPEED_UNKNOWN               = 0xff,
 };
 
+const char *pci_speed_string(enum pci_bus_speed speed);
 enum pci_bus_speed pcie_get_speed_cap(struct pci_dev *dev);
 enum pcie_link_width pcie_get_width_cap(struct pci_dev *dev);
 
diff --git a/include/trace/events/pci.h b/include/trace/events/pci.h
index 208609492c06..78e651b95cb3 100644
--- a/include/trace/events/pci.h
+++ b/include/trace/events/pci.h
@@ -57,6 +57,62 @@ TRACE_EVENT(pci_hp_event,
        )
 );
 
+#define PCI_EXP_LNKSTA_LINK_STATUS_MASK (PCI_EXP_LNKSTA_LBMS | \
+                                        PCI_EXP_LNKSTA_LABS | \
+                                        PCI_EXP_LNKSTA_LT | \
+                                        PCI_EXP_LNKSTA_DLLLA)
+
+#define LNKSTA_FLAGS                                   \
+       { PCI_EXP_LNKSTA_LT,    "LT"},                  \
+       { PCI_EXP_LNKSTA_DLLLA, "DLLLA"},               \
+       { PCI_EXP_LNKSTA_LBMS,  "LBMS"},                \
+       { PCI_EXP_LNKSTA_LABS,  "LABS"}
+
+TRACE_EVENT(pcie_link_event,
+
+       TP_PROTO(struct pci_bus *bus,
+                 unsigned int reason,
+                 unsigned int width,
+                 unsigned int status
+               ),
+
+       TP_ARGS(bus, reason, width, status),
+
+       TP_STRUCT__entry(
+               __string(       port_name,      pci_name(bus->self))
+               __field(        unsigned int,   type            )
+               __field(        unsigned int,   reason          )
+               __field(        unsigned int,   cur_bus_speed   )
+               __field(        unsigned int,   max_bus_speed   )
+               __field(        unsigned int,   width           )
+               __field(        unsigned int,   flit_mode       )
+               __field(        unsigned int,   link_status     )
+       ),
+
+       TP_fast_assign(
+               __assign_str(port_name);
+               __entry->type                   = pci_pcie_type(bus->self);
+               __entry->reason                 = reason;
+               __entry->cur_bus_speed          = bus->cur_bus_speed;
+               __entry->max_bus_speed          = bus->max_bus_speed;
+               __entry->width                  = width;
+               __entry->flit_mode              = bus->flit_mode;
+               __entry->link_status            = status;
+       ),
+
+       TP_printk("%s type:%d, reason:%d, cur_bus_speed:%s, max_bus_speed:%s, 
width:%u, flit_mode:%u, status:%s\n",
+               __get_str(port_name),
+               __entry->type,
+               __entry->reason,
+               pci_speed_string(__entry->cur_bus_speed),
+               pci_speed_string(__entry->max_bus_speed),
+               __entry->width,
+               __entry->flit_mode,
+               __print_flags((unsigned long)__entry->link_status, "|",
+                               LNKSTA_FLAGS)
+       )
+);
+
 #endif /* _TRACE_HW_EVENT_PCI_H */
 
 /* This part must be outside protection */
-- 
2.39.3


Reply via email to