Replace many of the indirect calls with static_call().

XXX run performance numbers

Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
---
 arch/x86/events/core.c |  136 +++++++++++++++++++++++++++++++++++--------------
 1 file changed, 98 insertions(+), 38 deletions(-)

--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -28,6 +28,7 @@
 #include <linux/bitops.h>
 #include <linux/device.h>
 #include <linux/nospec.h>
+#include <linux/static_call.h>
 
 #include <asm/apic.h>
 #include <asm/stacktrace.h>
@@ -51,6 +52,45 @@ DEFINE_PER_CPU(struct cpu_hw_events, cpu
 
 DEFINE_STATIC_KEY_FALSE(rdpmc_always_available_key);
 
+static void _x86_pmu_add(struct perf_event *event) { }
+static void _x86_pmu_del(struct perf_event *event) { }
+static void _x86_pmu_read(struct perf_event *event) { 
x86_perf_event_update(event); }
+static void _x86_pmu_put_event_constraints(struct cpu_hw_events *cpuc, struct 
perf_event *event) { }
+static void _x86_pmu_drain_pebs(struct pt_regs *regs) { }
+static void _x86_pmu_pebs_aliases(struct perf_event *event) { }
+static void _x86_pmu_start_scheduling(struct cpu_hw_events *cpuc) { }
+static void _x86_pmu_commit_scheduling(struct cpu_hw_events *cpuc, int idx, 
int cntr) { }
+static void _x86_pmu_stop_scheduling(struct cpu_hw_events *cpuc) { }
+static void _x86_pmu_sched_task(struct perf_event_context *ctx, bool sched_in) 
{ }
+
+DEFINE_STATIC_CALL(x86_pmu_handle_irq, x86_pmu_handle_irq);
+DEFINE_STATIC_CALL(x86_pmu_disable_all, x86_pmu_disable_all);
+DEFINE_STATIC_CALL(x86_pmu_enable_all, x86_pmu_enable_all);
+DEFINE_STATIC_CALL(x86_pmu_enable, x86_pmu_enable_event);
+DEFINE_STATIC_CALL(x86_pmu_disable, x86_pmu_disable_event);
+
+DEFINE_STATIC_CALL(x86_pmu_add, _x86_pmu_add);
+DEFINE_STATIC_CALL(x86_pmu_del, _x86_pmu_del);
+DEFINE_STATIC_CALL(x86_pmu_read, _x86_pmu_read);
+
+DEFINE_STATIC_CALL(x86_pmu_schedule_events, x86_schedule_events);
+
+// addr_offset
+// rdpmc_index
+// event_map
+
+DEFINE_STATIC_CALL(x86_pmu_get_event_constraints, x86_get_event_constraints);
+DEFINE_STATIC_CALL(x86_pmu_put_event_constraints, 
_x86_pmu_put_event_constraints);
+
+DEFINE_STATIC_CALL(x86_pmu_drain_pebs, _x86_pmu_drain_pebs);
+DEFINE_STATIC_CALL(x86_pmu_pebs_aliases, _x86_pmu_pebs_aliases);
+
+DEFINE_STATIC_CALL(x86_pmu_start_scheduling, _x86_pmu_start_scheduling);
+DEFINE_STATIC_CALL(x86_pmu_commit_scheduling, _x86_pmu_commit_scheduling);
+DEFINE_STATIC_CALL(x86_pmu_stop_scheduling, _x86_pmu_stop_scheduling);
+
+DEFINE_STATIC_CALL(x86_pmu_sched_task, _x86_pmu_sched_task);
+
 u64 __read_mostly hw_cache_event_ids
                                [PERF_COUNT_HW_CACHE_MAX]
                                [PERF_COUNT_HW_CACHE_OP_MAX]
@@ -651,7 +691,7 @@ static void x86_pmu_disable(struct pmu *
        cpuc->enabled = 0;
        barrier();
 
-       x86_pmu.disable_all();
+       static_call(x86_pmu_disable_all)();
 }
 
 void x86_pmu_enable_all(int added)
@@ -884,8 +924,7 @@ int x86_schedule_events(struct cpu_hw_ev
        if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
                n0 -= cpuc->n_txn;
 
-       if (x86_pmu.start_scheduling)
-               x86_pmu.start_scheduling(cpuc);
+       static_cond_call(x86_pmu_start_scheduling)(cpuc);
 
        for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {
                c = cpuc->event_constraint[i];
@@ -902,7 +941,7 @@ int x86_schedule_events(struct cpu_hw_ev
                 * change due to external factors (sibling state, allow_tfa).
                 */
                if (!c || (c->flags & PERF_X86_EVENT_DYNAMIC)) {
-                       c = x86_pmu.get_event_constraints(cpuc, i, 
cpuc->event_list[i]);
+                       c = static_call(x86_pmu_get_event_constraints)(cpuc, i, 
cpuc->event_list[i]);
                        cpuc->event_constraint[i] = c;
                }
 
@@ -969,8 +1008,7 @@ int x86_schedule_events(struct cpu_hw_ev
        if (!unsched && assign) {
                for (i = 0; i < n; i++) {
                        e = cpuc->event_list[i];
-                       if (x86_pmu.commit_scheduling)
-                               x86_pmu.commit_scheduling(cpuc, i, assign[i]);
+                       static_cond_call(x86_pmu_commit_scheduling)(cpuc, i, 
assign[i]);
                }
        } else {
                for (i = n0; i < n; i++) {
@@ -979,15 +1017,13 @@ int x86_schedule_events(struct cpu_hw_ev
                        /*
                         * release events that failed scheduling
                         */
-                       if (x86_pmu.put_event_constraints)
-                               x86_pmu.put_event_constraints(cpuc, e);
+                       static_cond_call(x86_pmu_put_event_constraints)(cpuc, 
e);
 
                        cpuc->event_constraint[i] = NULL;
                }
        }
 
-       if (x86_pmu.stop_scheduling)
-               x86_pmu.stop_scheduling(cpuc);
+       static_cond_call(x86_pmu_stop_scheduling)(cpuc);
 
        return unsched ? -EINVAL : 0;
 }
@@ -1174,7 +1210,7 @@ static void x86_pmu_enable(struct pmu *p
        cpuc->enabled = 1;
        barrier();
 
-       x86_pmu.enable_all(added);
+       static_call(x86_pmu_enable_all)(added);
 }
 
 static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
@@ -1288,7 +1324,7 @@ static int x86_pmu_add(struct perf_event
        if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
                goto done_collect;
 
-       ret = x86_pmu.schedule_events(cpuc, n, assign);
+       ret = static_call(x86_pmu_schedule_events)(cpuc, n, assign);
        if (ret)
                goto out;
        /*
@@ -1306,13 +1342,11 @@ static int x86_pmu_add(struct perf_event
        cpuc->n_added += n - n0;
        cpuc->n_txn += n - n0;
 
-       if (x86_pmu.add) {
-               /*
-                * This is before x86_pmu_enable() will call x86_pmu_start(),
-                * so we enable LBRs before an event needs them etc..
-                */
-               x86_pmu.add(event);
-       }
+       /*
+        * This is before x86_pmu_enable() will call x86_pmu_start(),
+        * so we enable LBRs before an event needs them etc..
+        */
+       static_cond_call(x86_pmu_add)(event);
 
        ret = 0;
 out:
@@ -1340,7 +1374,7 @@ static void x86_pmu_start(struct perf_ev
        cpuc->events[idx] = event;
        __set_bit(idx, cpuc->active_mask);
        __set_bit(idx, cpuc->running);
-       x86_pmu.enable(event);
+       static_call(x86_pmu_enable)(event);
        perf_event_update_userpage(event);
 }
 
@@ -1410,7 +1444,7 @@ void x86_pmu_stop(struct perf_event *eve
        struct hw_perf_event *hwc = &event->hw;
 
        if (test_bit(hwc->idx, cpuc->active_mask)) {
-               x86_pmu.disable(event);
+               static_call(x86_pmu_disable)(event);
                __clear_bit(hwc->idx, cpuc->active_mask);
                cpuc->events[hwc->idx] = NULL;
                WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
@@ -1460,8 +1494,7 @@ static void x86_pmu_del(struct perf_even
        if (i >= cpuc->n_events - cpuc->n_added)
                --cpuc->n_added;
 
-       if (x86_pmu.put_event_constraints)
-               x86_pmu.put_event_constraints(cpuc, event);
+       static_cond_call(x86_pmu_put_event_constraints)(cpuc, event);
 
        /* Delete the array entry. */
        while (++i < cpuc->n_events) {
@@ -1474,13 +1507,12 @@ static void x86_pmu_del(struct perf_even
        perf_event_update_userpage(event);
 
 do_del:
-       if (x86_pmu.del) {
-               /*
-                * This is after x86_pmu_stop(); so we disable LBRs after any
-                * event can need them etc..
-                */
-               x86_pmu.del(event);
-       }
+
+       /*
+        * This is after x86_pmu_stop(); so we disable LBRs after any
+        * event can need them etc..
+        */
+       static_cond_call(x86_pmu_del)(event);
 }
 
 int x86_pmu_handle_irq(struct pt_regs *regs)
@@ -1558,7 +1590,7 @@ perf_event_nmi_handler(unsigned int cmd,
                return NMI_DONE;
 
        start_clock = sched_clock();
-       ret = x86_pmu.handle_irq(regs);
+       ret = static_call(x86_pmu_handle_irq)(regs);
        finish_clock = sched_clock();
 
        perf_sample_event_took(finish_clock - start_clock);
@@ -1765,6 +1797,32 @@ ssize_t x86_event_sysfs_show(char *page,
 static struct attribute_group x86_pmu_attr_group;
 static struct attribute_group x86_pmu_caps_group;
 
+static void x86_pmu_static_call_update(void)
+{
+       static_call_update(x86_pmu_handle_irq, x86_pmu.handle_irq);
+       static_call_update(x86_pmu_disable_all, x86_pmu.disable_all);
+       static_call_update(x86_pmu_enable_all, x86_pmu.enable_all);
+       static_call_update(x86_pmu_enable, x86_pmu.enable);
+       static_call_update(x86_pmu_disable, x86_pmu.disable);
+
+       static_call_update(x86_pmu_add, x86_pmu.add);
+       static_call_update(x86_pmu_del, x86_pmu.del);
+       static_call_update(x86_pmu_read, x86_pmu.read);
+
+       static_call_update(x86_pmu_schedule_events, x86_pmu.schedule_events);
+       static_call_update(x86_pmu_get_event_constraints, 
x86_pmu.get_event_constraints);
+       static_call_update(x86_pmu_put_event_constraints, 
x86_pmu.put_event_constraints);
+
+       static_call_update(x86_pmu_drain_pebs, x86_pmu.drain_pebs);
+       static_call_update(x86_pmu_pebs_aliases, x86_pmu.pebs_aliases);
+
+       static_call_update(x86_pmu_start_scheduling, x86_pmu.start_scheduling);
+       static_call_update(x86_pmu_commit_scheduling, 
x86_pmu.commit_scheduling);
+       static_call_update(x86_pmu_stop_scheduling, x86_pmu.stop_scheduling);
+
+       static_call_update(x86_pmu_sched_task, x86_pmu.sched_task);
+}
+
 static int __init init_hw_perf_events(void)
 {
        struct x86_pmu_quirk *quirk;
@@ -1829,6 +1887,11 @@ static int __init init_hw_perf_events(vo
        pr_info("... fixed-purpose events:   %d\n",     
x86_pmu.num_counters_fixed);
        pr_info("... event mask:             %016Lx\n", x86_pmu.intel_ctrl);
 
+       if (!x86_pmu.read)
+               x86_pmu.read = _x86_pmu_read;
+
+       x86_pmu_static_call_update();
+
        /*
         * Install callbacks. Core will call them for each online
         * cpu.
@@ -1865,11 +1928,9 @@ static int __init init_hw_perf_events(vo
 }
 early_initcall(init_hw_perf_events);
 
-static inline void x86_pmu_read(struct perf_event *event)
+static void x86_pmu_read(struct perf_event *event)
 {
-       if (x86_pmu.read)
-               return x86_pmu.read(event);
-       x86_perf_event_update(event);
+       static_call(x86_pmu_read)(event);
 }
 
 /*
@@ -1946,7 +2007,7 @@ static int x86_pmu_commit_txn(struct pmu
        if (!x86_pmu_initialized())
                return -EAGAIN;
 
-       ret = x86_pmu.schedule_events(cpuc, n, assign);
+       ret = static_call(x86_pmu_schedule_events)(cpuc, n, assign);
        if (ret)
                return ret;
 
@@ -2239,8 +2300,7 @@ static const struct attribute_group *x86
 
 static void x86_pmu_sched_task(struct perf_event_context *ctx, bool sched_in)
 {
-       if (x86_pmu.sched_task)
-               x86_pmu.sched_task(ctx, sched_in);
+       static_cond_call(x86_pmu_sched_task)(ctx, sched_in);
 }
 
 void perf_check_microcode(void)


Reply via email to