From: Clément Léger <[email protected]>

In order to use SSE within PMU drivers, register an SSE handler for the
local PMU event. Reuse the existing overflow IRQ handler and pass
appropriate pt_regs. Add a config option RISCV_PMU_SBI_SSE to select event
delivery via SSE events.

When the SSE path is used, also honor the return value from
perf_event_overflow(). If perf core throttles or disables an event, do not
immediately restart the overflowed counters from the SSE handler.

Signed-off-by: Clément Léger <[email protected]>
Co-developed-by: Zhanpeng Zhang <[email protected]>
Signed-off-by: Zhanpeng Zhang <[email protected]>
---
 drivers/perf/Kconfig           | 10 +++++
 drivers/perf/riscv_pmu.c       | 23 +++++++++++
 drivers/perf/riscv_pmu_sbi.c   | 78 ++++++++++++++++++++++++++++++++-----
 include/linux/perf/riscv_pmu.h |  5 +++
 4 files changed, 104 insertions(+), 12 deletions(-)

diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
index ab90932fc2d0..b6c58475091c 100644
--- a/drivers/perf/Kconfig
+++ b/drivers/perf/Kconfig
@@ -105,6 +105,16 @@ config RISCV_PMU_SBI
          full perf feature support i.e. counter overflow, privilege mode
          filtering, counter configuration.
 
+config RISCV_PMU_SBI_SSE
+       depends on RISCV_PMU && RISCV_SBI_SSE
+       bool "RISC-V PMU SSE events"
+       default n
+       help
+         Say y if you want to use SSE events to deliver PMU interrupts. This
+         provides a way to profile the kernel at any level by using NMI-like
+         SSE events. Since SSE events can be intrusive, this option allows
+         selecting them only when needed.
+
 config STARFIVE_STARLINK_PMU
        depends on ARCH_STARFIVE || COMPILE_TEST
        depends on 64BIT
diff --git a/drivers/perf/riscv_pmu.c b/drivers/perf/riscv_pmu.c
index 8e3cd0f35336..a48e286d9394 100644
--- a/drivers/perf/riscv_pmu.c
+++ b/drivers/perf/riscv_pmu.c
@@ -13,6 +13,7 @@
 #include <linux/irqdesc.h>
 #include <linux/perf/riscv_pmu.h>
 #include <linux/printk.h>
+#include <linux/riscv_sbi_sse.h>
 #include <linux/smp.h>
 #include <linux/sched_clock.h>
 
@@ -254,6 +255,24 @@ void riscv_pmu_start(struct perf_event *event, int flags)
        perf_event_update_userpage(event);
 }
 
+#ifdef CONFIG_RISCV_PMU_SBI_SSE
+static void riscv_pmu_disable(struct pmu *pmu)
+{
+       struct riscv_pmu *rvpmu = to_riscv_pmu(pmu);
+
+       if (rvpmu->sse_evt)
+               sse_event_disable_local(rvpmu->sse_evt);
+}
+
+static void riscv_pmu_enable(struct pmu *pmu)
+{
+       struct riscv_pmu *rvpmu = to_riscv_pmu(pmu);
+
+       if (rvpmu->sse_evt)
+               sse_event_enable_local(rvpmu->sse_evt);
+}
+#endif
+
 static int riscv_pmu_add(struct perf_event *event, int flags)
 {
        struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
@@ -411,6 +430,10 @@ struct riscv_pmu *riscv_pmu_alloc(void)
                .event_mapped   = riscv_pmu_event_mapped,
                .event_unmapped = riscv_pmu_event_unmapped,
                .event_idx      = riscv_pmu_event_idx,
+#ifdef CONFIG_RISCV_PMU_SBI_SSE
+               .pmu_enable     = riscv_pmu_enable,
+               .pmu_disable    = riscv_pmu_disable,
+#endif
                .add            = riscv_pmu_add,
                .del            = riscv_pmu_del,
                .start          = riscv_pmu_start,
diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c
index 385af5e6e6d0..ac10ebd73c7c 100644
--- a/drivers/perf/riscv_pmu_sbi.c
+++ b/drivers/perf/riscv_pmu_sbi.c
@@ -17,6 +17,7 @@
 #include <linux/irqdomain.h>
 #include <linux/of_irq.h>
 #include <linux/of.h>
+#include <linux/riscv_sbi_sse.h>
 #include <linux/cpu_pm.h>
 #include <linux/sched/clock.h>
 #include <linux/soc/andes/irq.h>
@@ -1038,10 +1039,10 @@ static void pmu_sbi_start_overflow_mask(struct 
riscv_pmu *pmu,
                pmu_sbi_start_ovf_ctrs_sbi(cpu_hw_evt, ctr_ovf_mask);
 }
 
-static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev)
+static irqreturn_t pmu_sbi_ovf_handler(struct cpu_hw_events *cpu_hw_evt,
+                                      struct pt_regs *regs, bool from_sse)
 {
        struct perf_sample_data data;
-       struct pt_regs *regs;
        struct hw_perf_event *hw_evt;
        union sbi_pmu_ctr_info *info;
        int lidx, hidx, fidx;
@@ -1049,7 +1050,7 @@ static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev)
        struct perf_event *event;
+       int ev_overflow = 0;
        u64 overflow;
        u64 overflowed_ctrs = 0;
-       struct cpu_hw_events *cpu_hw_evt = dev;
        u64 start_clock = sched_clock();
        struct riscv_pmu_snapshot_data *sdata = cpu_hw_evt->snapshot_addr;
 
@@ -1059,13 +1059,15 @@ static irqreturn_t pmu_sbi_ovf_handler(int irq, void 
*dev)
        /* Firmware counter don't support overflow yet */
        fidx = find_first_bit(cpu_hw_evt->used_hw_ctrs, RISCV_MAX_COUNTERS);
        if (fidx == RISCV_MAX_COUNTERS) {
-               csr_clear(CSR_SIP, BIT(riscv_pmu_irq_num));
+               if (!from_sse)
+                       csr_clear(CSR_SIP, BIT(riscv_pmu_irq_num));
                return IRQ_NONE;
        }
 
        event = cpu_hw_evt->events[fidx];
        if (!event) {
-               ALT_SBI_PMU_OVF_CLEAR_PENDING(riscv_pmu_irq_mask);
+               if (!from_sse)
+                       ALT_SBI_PMU_OVF_CLEAR_PENDING(riscv_pmu_irq_mask);
                return IRQ_NONE;
        }
 
@@ -1080,16 +1082,16 @@ static irqreturn_t pmu_sbi_ovf_handler(int irq, void 
*dev)
 
        /*
         * Overflow interrupt pending bit should only be cleared after stopping
-        * all the counters to avoid any race condition.
+        * all the counters to avoid any race condition. When using SSE,
+        * interrupt is cleared when stopping counters.
         */
-       ALT_SBI_PMU_OVF_CLEAR_PENDING(riscv_pmu_irq_mask);
+       if (!from_sse)
+               ALT_SBI_PMU_OVF_CLEAR_PENDING(riscv_pmu_irq_mask);
 
        /* No overflow bit is set */
        if (!overflow)
                return IRQ_NONE;
 
-       regs = get_irq_regs();
-
        for_each_set_bit(lidx, cpu_hw_evt->used_hw_ctrs, RISCV_MAX_COUNTERS) {
                struct perf_event *event = cpu_hw_evt->events[lidx];
 
@@ -1133,18 +1136,65 @@ static irqreturn_t pmu_sbi_ovf_handler(int irq, void 
*dev)
                         * TODO: We will need to stop the guest counters once
                         * virtualization support is added.
                         */
-                       perf_event_overflow(event, &data, regs);
+                       ev_overflow |= perf_event_overflow(event, &data, regs);
                }
                /* Reset the state as we are going to start the counter after 
the loop */
                hw_evt->state = 0;
        }
 
-       pmu_sbi_start_overflow_mask(pmu, overflowed_ctrs);
+       if (!ev_overflow || !from_sse)
+               pmu_sbi_start_overflow_mask(pmu, overflowed_ctrs);
+
        perf_sample_event_took(sched_clock() - start_clock);
 
        return IRQ_HANDLED;
 }
 
+static irqreturn_t pmu_sbi_ovf_irq_handler(int irq, void *dev)
+{
+       return pmu_sbi_ovf_handler(dev, get_irq_regs(), false);
+}
+
+#ifdef CONFIG_RISCV_PMU_SBI_SSE
+static int pmu_sbi_ovf_sse_handler(u32 evt, void *arg, struct pt_regs *regs)
+{
+       struct cpu_hw_events __percpu *hw_events = arg;
+       struct cpu_hw_events *hw_event = raw_cpu_ptr(hw_events);
+
+       pmu_sbi_ovf_handler(hw_event, regs, true);
+
+       return 0;
+}
+
+static int pmu_sbi_setup_sse(struct riscv_pmu *pmu)
+{
+       int ret;
+       struct sse_event *evt;
+       struct cpu_hw_events __percpu *hw_events = pmu->hw_events;
+
+       evt = sse_event_register(SBI_SSE_EVENT_LOCAL_PMU_OVERFLOW, 0,
+                                pmu_sbi_ovf_sse_handler, hw_events);
+       if (IS_ERR(evt))
+               return PTR_ERR(evt);
+
+       ret = sse_event_enable(evt);
+       if (ret) {
+               sse_event_unregister(evt);
+               return ret;
+       }
+
+       pr_info("using SSE for PMU event delivery\n");
+       pmu->sse_evt = evt;
+
+       return ret;
+}
+#else
+static int pmu_sbi_setup_sse(struct riscv_pmu *pmu)
+{
+       return -EOPNOTSUPP;
+}
+#endif
+
 static int pmu_sbi_starting_cpu(unsigned int cpu, struct hlist_node *node)
 {
        struct riscv_pmu *pmu = hlist_entry_safe(node, struct riscv_pmu, node);
@@ -1195,6 +1242,10 @@ static int pmu_sbi_setup_irqs(struct riscv_pmu *pmu, 
struct platform_device *pde
        struct cpu_hw_events __percpu *hw_events = pmu->hw_events;
        struct irq_domain *domain = NULL;
 
+       ret = pmu_sbi_setup_sse(pmu);
+       if (!ret)
+               return 0;
+
        if (riscv_isa_extension_available(NULL, SSCOFPMF)) {
                riscv_pmu_irq_num = RV_IRQ_PMU;
                riscv_pmu_use_irq = true;
@@ -1229,7 +1280,7 @@ static int pmu_sbi_setup_irqs(struct riscv_pmu *pmu, 
struct platform_device *pde
                return -ENODEV;
        }
 
-       ret = request_percpu_irq(riscv_pmu_irq, pmu_sbi_ovf_handler, 
"riscv-pmu", hw_events);
+       ret = request_percpu_irq(riscv_pmu_irq, pmu_sbi_ovf_irq_handler, 
"riscv-pmu", hw_events);
        if (ret) {
                pr_err("registering percpu irq failed [%d]\n", ret);
                return ret;
diff --git a/include/linux/perf/riscv_pmu.h b/include/linux/perf/riscv_pmu.h
index f82a28040594..08fdcf6baf4e 100644
--- a/include/linux/perf/riscv_pmu.h
+++ b/include/linux/perf/riscv_pmu.h
@@ -28,6 +28,8 @@
 
 #define RISCV_PMU_CONFIG1_GUEST_EVENTS 0x1
 
+struct sse_event;
+
 struct cpu_hw_events {
        /* currently enabled events */
        int                     n_events;
@@ -54,6 +56,9 @@ struct riscv_pmu {
        char            *name;
 
        irqreturn_t     (*handle_irq)(int irq_num, void *dev);
+#ifdef CONFIG_RISCV_PMU_SBI_SSE
+       struct sse_event *sse_evt;
+#endif
 
        unsigned long   cmask;
        u64             (*ctr_read)(struct perf_event *event);
-- 
2.50.1 (Apple Git-155)

Reply via email to