Hello,
I have prepared a new version of the fix to Power6's timers which are
used for sampling(*) PMU registers 5 & 6. In addition to Stephane's
comments about using DEFINE_PER_CPU, I have made a few additional changes:
1) I consolidated all of the per cpu data used for the sampling,
including the hrtimer, into one DEFINE_PER_CPU data structure.
2) Because calls to pfm_power6_enable_counters and
pfm_power6_disable_counters are not perfectly paired (extra calls to
pfm_power6_enable_counters occur from pfm_restart), I needed some extra
code to know when it's safe to call hrtimer_start on a timer and when
it's not necessary. In order to do this correctly without race
conditions, I added a spinlock on the timer to make sure that it's dealt
with synchronously.
3) In the previous patch, the initialization of the hrtimers was done in
pfm_power6_enable_counters. I have changed that to be done once during
start-up of the module; all hrtimers are initialzed when the module is
loaded. This will improve performance a little, and makes the code
cleaner, I think. I used the "on_each_cpu" function to initialize each
of the per-cpu timers (as well as the spin locks).
Having said this, I may have been too conservative by using the spin
lock. If it's the case that the enable and disable functions are not
reentrant on the same CPU, the spin locks are not necessary. So,
Stephane, if you are convinced that the spin locks are not necessary in
this patch, I'd be happy to submit a v3 version without them.
Please have a look at the patch and let me know if you have
comments/issue/suggestions, etc.
Thanks for your consideration,
- Corey
Corey Ashford
Software Engineer
IBM Linux Technology Center, Linux Toolchain
Beaverton, OR
503-578-3507
cjash...@us.ibm.com
(*) The term "sampling" is used rather poorly here. I don't mean
perfmon sampling, but rather code that samples these free-running
counters into virtual registers that can be started and stopped like
counters 1-4 on thread context switches. Counters 5 & 6 cannot be
stopped via hardware.
diff --git a/arch/powerpc/perfmon/perfmon_power6.c
b/arch/powerpc/perfmon/perfmon_power6.c
index 7882feb..bf128e7 100644
--- a/arch/powerpc/perfmon/perfmon_power6.c
+++ b/arch/powerpc/perfmon/perfmon_power6.c
@@ -25,6 +25,9 @@
#include <linux/module.h>
#include <linux/perfmon_kern.h>
+#include <linux/hrtimer.h>
+#include <linux/spinlock.h>
+
MODULE_AUTHOR("Corey Ashford <cjash...@us.ibm.com>");
MODULE_DESCRIPTION("POWER6 PMU description table");
MODULE_LICENSE("GPL");
@@ -71,10 +74,19 @@ static struct pfm_regmap_desc pfm_power6_pmd_desc[] = {
#define PFM_PM_NUM_PMDS ARRAY_SIZE(pfm_power6_pmd_desc)
-u32 pmc5_start_save[NR_CPUS];
-u32 pmc6_start_save[NR_CPUS];
+struct pmc5_6_per_cpu {
+ u32 pmc5_start_save;
+ u32 pmc6_start_save;
+ struct pfm_arch_context *ctx_arch;
+ spinlock_t lock;
+ /* The elements below are protected by "lock" */
+ struct hrtimer timer;
+};
+
+static DEFINE_PER_CPU(struct pmc5_6_per_cpu, pmc5_6_percpu);
+
+#define PMC5_6_PERCPU __get_cpu_var(pmc5_6_percpu)
-static struct timer_list pmc5_6_update[NR_CPUS];
u64 enable_cntrs_cnt;
u64 disable_cntrs_cnt;
u64 call_delta;
@@ -84,9 +96,9 @@ u64 pm1_4_interrupt;
* timer.
*/
struct pfm_arch_context *pmc5_6_ctx_arch[NR_CPUS];
-long int update_time;
+ktime_t update_time;
-static void delta(int cpu_num, struct pfm_arch_context *ctx_arch)
+static void delta(struct pfm_arch_context *ctx_arch)
{
u32 tmp5, tmp6;
@@ -100,22 +112,21 @@ static void delta(int cpu_num, struct pfm_arch_context
*ctx_arch)
* arithmetic for the deltas to come out correct (especially in the
* presence of a 32-bit counter wrap).
*/
- ctx_arch->powergs_pmc5 += (u64)(tmp5 - pmc5_start_save[cpu_num]);
- ctx_arch->powergs_pmc6 += (u64)(tmp6 - pmc6_start_save[cpu_num]);
+ ctx_arch->powergs_pmc5 += (u64)(tmp5 - PMC5_6_PERCPU.pmc5_start_save);
+ ctx_arch->powergs_pmc6 += (u64)(tmp6 - PMC5_6_PERCPU.pmc6_start_save);
- pmc5_start_save[cpu_num] = tmp5;
- pmc6_start_save[cpu_num] = tmp6;
-
- return;
+ PMC5_6_PERCPU.pmc5_start_save = tmp5;
+ PMC5_6_PERCPU.pmc6_start_save = tmp6;
}
-static void pmc5_6_updater(unsigned long cpu_num)
+static enum hrtimer_restart pmc5_6_updater(struct hrtimer *hrtimer)
{
/* update the virtual pmd 5 and pmd 6 counters */
- delta(cpu_num, pmc5_6_ctx_arch[cpu_num]);
- mod_timer(&pmc5_6_update[cpu_num], jiffies + update_time);
+ delta(PMC5_6_PERCPU.ctx_arch);
+ hrtimer_forward_now(&PMC5_6_PERCPU.timer, update_time);
+ return HRTIMER_RESTART;
}
@@ -185,7 +196,6 @@ static void pfm_power6_write_pmd(unsigned int cnum, u64
value)
static u64 pfm_power6_sread(struct pfm_context *ctx, unsigned int cnum)
{
struct pfm_arch_context *ctx_arch = pfm_ctx_arch(ctx);
- int cpu_num = smp_processor_id();
/* On POWER 6 PMC5 and PMC6 are implemented as
* virtual counters. See comment in pfm_power6_pmd_desc
@@ -194,11 +204,11 @@ static u64 pfm_power6_sread(struct pfm_context *ctx,
unsigned int cnum)
switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) {
case SPRN_PMC5:
- return ctx_arch->powergs_pmc5 + (u64)((u32)mfspr(SPRN_PMC5) -
pmc5_start_save[cpu_num]);
+ return ctx_arch->powergs_pmc5 + (u64)((u32)mfspr(SPRN_PMC5) -
PMC5_6_PERCPU.pmc5_start_save);
break;
case SPRN_PMC6:
- return ctx_arch->powergs_pmc6 + (u64)((u32)mfspr(SPRN_PMC6) -
pmc6_start_save[cpu_num]);
+ return ctx_arch->powergs_pmc6 + (u64)((u32)mfspr(SPRN_PMC6) -
PMC5_6_PERCPU.pmc6_start_save);
break;
case PFM_DELTA_TB:
@@ -222,16 +232,15 @@ void pfm_power6_swrite(struct pfm_context *ctx, unsigned
int cnum,
u64 val)
{
struct pfm_arch_context *ctx_arch = pfm_ctx_arch(ctx);
- int cpu_num = smp_processor_id();
switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) {
case SPRN_PMC5:
- pmc5_start_save[cpu_num] = mfspr(SPRN_PMC5);
+ PMC5_6_PERCPU.pmc5_start_save = mfspr(SPRN_PMC5);
ctx_arch->powergs_pmc5 = val;
break;
case SPRN_PMC6:
- pmc6_start_save[cpu_num] = mfspr(SPRN_PMC6);
+ PMC5_6_PERCPU.pmc6_start_save = mfspr(SPRN_PMC6);
ctx_arch->powergs_pmc6 = val;
break;
@@ -284,7 +293,6 @@ static void pfm_power6_enable_counters(struct pfm_context
*ctx,
{
unsigned int i, max_pmc;
- int cpu_num = smp_processor_id();
struct pfm_arch_context *ctx_arch;
enable_cntrs_cnt++;
@@ -300,8 +308,8 @@ static void pfm_power6_enable_counters(struct pfm_context
*ctx,
pfm_power6_write_pmc(i - 1, set->pmcs[i - 1]);
/* save current free running HW event count */
- pmc5_start_save[cpu_num] = mfspr(SPRN_PMC5);
- pmc6_start_save[cpu_num] = mfspr(SPRN_PMC6);
+ PMC5_6_PERCPU.pmc5_start_save = mfspr(SPRN_PMC5);
+ PMC5_6_PERCPU.pmc6_start_save = mfspr(SPRN_PMC6);
ctx_arch->delta_purr_start = mfspr(SPRN_PURR);
@@ -314,19 +322,16 @@ static void pfm_power6_enable_counters(struct pfm_context
*ctx,
/* Start kernel timer for this cpu to periodically update
* the virtual counters.
*/
- init_timer(&pmc5_6_update[cpu_num]);
- pmc5_6_update[cpu_num].function = pmc5_6_updater;
- pmc5_6_update[cpu_num].data = (unsigned long) cpu_num;
- pmc5_6_update[cpu_num].expires = jiffies + update_time;
- /* context for this timer, timer will be removed if context
- * is switched because the counters will be stopped first.
- * NEEDS WORK, I think this is all ok, a little concerned about a
- * race between the kernel timer going off right as the counters
- * are being stopped and the context switching. Need to think
- * about this.
- */
- pmc5_6_ctx_arch[cpu_num] = ctx_arch;
- add_timer(&pmc5_6_update[cpu_num]);
+ PMC5_6_PERCPU.ctx_arch = ctx_arch;
+ {
+ unsigned long flags;
+
+ spin_lock_irqsave(&PMC5_6_PERCPU.lock, flags);
+ if (! hrtimer_active(&PMC5_6_PERCPU.timer)) {
+ hrtimer_start(&PMC5_6_PERCPU.timer, update_time,
HRTIMER_MODE_REL);
+ }
+ spin_unlock_irqrestore(&PMC5_6_PERCPU.lock, flags);
+ }
}
/**
@@ -337,7 +342,6 @@ static void pfm_power6_disable_counters(struct pfm_context
*ctx,
struct pfm_event_set *set)
{
struct pfm_arch_context *ctx_arch;
- int cpu_num = smp_processor_id();
disable_cntrs_cnt++;
@@ -345,14 +349,20 @@ static void pfm_power6_disable_counters(struct
pfm_context *ctx,
mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_FC);
asm volatile ("sync");
- /* delete kernel update timer */
- del_timer_sync(&pmc5_6_update[cpu_num]);
+ /* cancel the pmc5/6 update timer */
+ {
+ unsigned long flags;
+
+ spin_lock_irqsave(&PMC5_6_PERCPU.lock, flags);
+ hrtimer_cancel(&PMC5_6_PERCPU.timer);
+ spin_unlock_irqrestore(&PMC5_6_PERCPU.lock, flags);
+ }
/* Update the virtual pmd 5 and 6 counters from the free running
* HW counters
*/
ctx_arch = pfm_ctx_arch(ctx);
- delta(cpu_num, ctx_arch);
+ delta(ctx_arch);
ctx_arch->delta_tb +=
(((u64)mfspr(SPRN_TBRU) << 32) | mfspr(SPRN_TBRL))
@@ -480,6 +490,14 @@ static struct pfm_pmu_config pfm_power6_pmu_conf = {
.owner = THIS_MODULE
};
+static void init_pmc5_6_per_cpu(void *info) {
+ hrtimer_init(&PMC5_6_PERCPU.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ PMC5_6_PERCPU.timer.function = pmc5_6_updater;
+ PMC5_6_PERCPU.timer.cb_mode = HRTIMER_CB_IRQSAFE_UNLOCKED;
+ spin_lock_init(&PMC5_6_PERCPU.lock);
+}
+
+
static int __init pfm_power6_pmu_init_module(void)
{
int ret;
@@ -498,15 +516,22 @@ static int __init pfm_power6_pmu_init_module(void)
*/
# define MAX_EVENT_RATE (ppc_proc_freq * 2)
-
- /*
- * Calculate the time, in jiffies, it takes for event counter 5 or
- * 6 to completely wrap when counting at the max event rate, and
- * then figure on sampling at twice that rate.
- */
- update_time = (((unsigned long)HZ * OVERFLOW_VALUE)
- / ((unsigned long)MAX_EVENT_RATE)) / 2;
-
+# define SAFETY_FACTOR 2
+ {
+ u64 update_time_ns;
+
+ /*
+ * Calculate the time, in seconds + nanoseconds, it takes for
+ * counter 5 or 6 to completely wrap when counting at the max
+ * event rate, and then figure on sampling at half that period.
+ */
+ update_time_ns = ((NSEC_PER_SEC * OVERFLOW_VALUE) /
+ MAX_EVENT_RATE) / SAFETY_FACTOR;
+ update_time = ktime_set(
+ update_time_ns / NSEC_PER_SEC,
+ update_time_ns % NSEC_PER_SEC);
+ }
+ on_each_cpu(init_pmc5_6_per_cpu, NULL, 1);
ret = pfm_pmu_register(&pfm_power6_pmu_conf);
return ret;
}
------------------------------------------------------------------------------
This SF.net email is sponsored by:
SourcForge Community
SourceForge wants to tell your story.
http://p.sf.net/sfu/sf-spreadtheword
_______________________________________________
perfmon2-devel mailing list
perfmon2-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/perfmon2-devel