Hi Stephane,

I'm attaching the patches for the POWER port to the minimal perfmon2 kernel. I have built a working kernel based on this source, but I haven't tested perfmon2 yet. How are you testing? Can we use the existing libpfm examples without modification? I will try that next.

In addition to a port of POWER to the mimimal perfmon2, there's also a bug fix which I plan to submit to the full perfmon2 git also. J K Rai noticed that there was a problem with the use of the call smp_processor_id(). Our code had a race condition in that the cpu number is obtained but there's nothing to stop the CPU from being switched out from under the thread, which then goes on to read some CPU-specific registers and update data in the pfm context.

To fix this, J K Rai correctly suggested using get_cpu() and put_cpu() pairs to disable preemption while we are using the cpu number. So I have made those changes.

I'd much appreciate a review of this code before submitting to the LKML mailing list.

Regards,

- Corey


--
Corey Ashford
Software Engineer
IBM Linux Technology Center, Linux Toolchain
Beaverton, OR
503-578-3507
[EMAIL PROTECTED]
This patch adds chip-independent perfmon2 support for the PowerPC and POWER
architectures.  It does not implement perfmon2 for any specific PowerPC or
POWER chip, but does add the underpinnings for that implementation.

Prerequisites: The patch is built on top of Linux 2.6.26-rc5, plus Stephane
Eranian's minimal perfmon2 patch v2 posted to the LKML mailing list on
6/17.

In addition to adding a new directory, arch/powerpc/perfmon, and several
files in that new directory, it also adds perfmon2 support for handling the
PMU exception on these processors.

In order to share the PMU exception handler with Oprofile, we continue to
use the same exception wrapper (STD_PSERIES_EXCEPTION), but mimmick what
the MASKABLE_PSERIES_EXCEPTION macro does from within powerpc_irq_handler
in arch/powerpc/perfmon/perfmon.c.  In order to do this correctly, a new
member was added to the paca structure to record that a PMU exception
occurred while interrupts were soft disabled.

Index: linux-2.6/include/asm-powerpc/paca.h
===================================================================
--- linux-2.6.orig/include/asm-powerpc/paca.h   2008-06-20 13:41:00.000000000 
-0700
+++ linux-2.6/include/asm-powerpc/paca.h        2008-06-20 13:41:09.000000000 
-0700
@@ -99,6 +99,10 @@
        u8 soft_enabled;                /* irq soft-enable flag */
        u8 hard_enabled;                /* set if irqs are enabled in MSR */
        u8 io_sync;                     /* writel() needs spin_unlock sync */
+#ifdef CONFIG_PERFMON
+       u8 pmu_except_pending;          /* PMU exception occurred while soft
+                                        * disabled */
+#endif
 
        /* Stuff for accurate time accounting */
        u64 user_time;                  /* accumulated usermode TB ticks */
Index: linux-2.6/arch/powerpc/kernel/irq.c
===================================================================
--- linux-2.6.orig/arch/powerpc/kernel/irq.c    2008-06-20 13:41:00.000000000 
-0700
+++ linux-2.6/arch/powerpc/kernel/irq.c 2008-06-20 13:41:09.000000000 -0700
@@ -114,6 +114,24 @@
        : : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled)));
 }
 
+#ifdef CONFIG_PERFMON
+static inline unsigned long get_pmu_except_pending(void)
+{
+       unsigned long pending;
+
+       __asm__ __volatile__("lbz %0,%1(13)"
+       : "=r" (pending) : "i" (offsetof(struct paca_struct, 
pmu_except_pending)));
+
+       return pending;
+}
+
+static inline void set_pmu_except_pending(unsigned long pending)
+{
+       __asm__ __volatile__("stb %0,%1(13)"
+       : : "r" (pending), "i" (offsetof(struct paca_struct, 
pmu_except_pending)));
+}
+#endif /* CONFIG_PERFMON */
+
 void raw_local_irq_restore(unsigned long en)
 {
        /*
@@ -172,6 +190,19 @@
                lv1_get_version_info(&tmp);
        }
 
+#ifdef CONFIG_PERFMON
+       /*
+        * If a PMU exception occurred while interrupts were soft disabled,
+        * force a PMU exception.
+        */
+       if (get_pmu_except_pending()) {
+               set_pmu_except_pending(0);
+               /* Make sure we trigger the edge detection circuitry */
+               mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_PMAO);
+               mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_PMAO);
+       }
+#endif /* CONFIG_PERFMON */
+
        __hard_irq_enable();
 }
 EXPORT_SYMBOL(raw_local_irq_restore);
Index: linux-2.6/arch/powerpc/perfmon/perfmon.c
===================================================================
--- /dev/null   1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6/arch/powerpc/perfmon/perfmon.c    2008-06-20 16:19:07.000000000 
-0700
@@ -0,0 +1,297 @@
+/*
+ * This file implements the powerpc specific
+ * support for the perfmon2 interface
+ *
+ * Copyright (c) 2005 David Gibson, IBM Corporation.
+ *
+ * based on versions for other architectures:
+ * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P.
+ * Contributed by Stephane Eranian <[EMAIL PROTECTED]>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ * 02111-1307 USA
+ */
+#include <linux/interrupt.h>
+#include <linux/perfmon_kern.h>
+
+static void pfm_stop_active(struct task_struct *task,
+                           struct pfm_context *ctx, struct pfm_event_set *set)
+{
+       struct pfm_arch_pmu_info *arch_info;
+
+       arch_info = pfm_pmu_info();
+       BUG_ON(!arch_info->disable_counters || !arch_info->get_ovfl_pmds);
+
+       arch_info->disable_counters(ctx, set);
+
+       if (set->npend_ovfls)
+               return;
+
+       arch_info->get_ovfl_pmds(ctx, set);
+}
+
+/*
+ * Called from pfm_ctxsw(). Task is guaranteed to be current.
+ * Context is locked. Interrupts are masked. Monitoring is active.
+ * PMU access is guaranteed. PMC and PMD registers are live in PMU.
+ *
+ * for per-thread:
+ *     must stop monitoring for the task
+ * Return:
+ *     non-zero : did not save PMDs (as part of stopping the PMU)
+ *            0 : saved PMDs (no need to save them in caller)
+ */
+int pfm_arch_ctxswout_thread(struct task_struct *task, struct pfm_context *ctx)
+{
+       struct pfm_arch_pmu_info *arch_info;
+
+       arch_info = pfm_pmu_info();
+       /*
+        * disable lazy restore of PMC registers.
+        */
+       ctx->active_set->priv_flags |= PFM_SETFL_PRIV_MOD_PMCS;
+
+       pfm_stop_active(task, ctx, ctx->active_set);
+
+       if (arch_info->ctxswout_thread)
+               arch_info->ctxswout_thread(task, ctx, ctx->active_set);
+
+       return pfm_arch_is_active(ctx);
+}
+
+/*
+ * Called from pfm_ctxsw
+ */
+void pfm_arch_ctxswin_thread(struct task_struct *task, struct pfm_context *ctx)
+{
+       struct pfm_arch_pmu_info *arch_info;
+
+       arch_info = pfm_pmu_info();
+       if (ctx->flags.started == 1) {
+               BUG_ON(!arch_info->enable_counters);
+               arch_info->enable_counters(ctx, ctx->active_set);
+       }
+
+       if (arch_info->ctxswin_thread)
+               arch_info->ctxswin_thread(task, ctx, ctx->active_set);
+}
+
+/*
+ * Called from pfm_stop() and idle notifier
+ *
+ * Interrupts are masked. Context is locked. Set is the active set.
+ *
+ * For per-thread:
+ *   task is not necessarily current. If not current task, then
+ *   task is guaranteed stopped and off any cpu. Access to PMU
+ *   is not guaranteed. Interrupts are masked. Context is locked.
+ *   Set is the active set.
+ *
+ * For system-wide:
+ *     task is current
+ *
+ * must disable active monitoring. ctx cannot be NULL
+ */
+void pfm_arch_stop(struct task_struct *task, struct pfm_context *ctx)
+{
+       /*
+        * no need to go through stop_save()
+        * if we are already stopped
+        */
+       if (!ctx->flags.started)
+               return;
+
+       /*
+        * stop live registers and collect pending overflow
+        */
+       if (task == current)
+               pfm_stop_active(task, ctx, ctx->active_set);
+}
+
+/*
+ * Enable active monitoring. Called from pfm_start() and
+ * pfm_arch_unmask_monitoring().
+ *
+ * Interrupts are masked. Context is locked. Set is the active set.
+ *
+ * For per-thread:
+ *     Task is not necessarily current. If not current task, then task
+ *     is guaranteed stopped and off any cpu. No access to PMU if task
+ *     is not current.
+ *
+ * For system-wide:
+ *     Task is always current
+ */
+void pfm_arch_start(struct task_struct *task, struct pfm_context *ctx)
+{
+       struct pfm_arch_pmu_info *arch_info;
+
+       arch_info = pfm_pmu_info();
+       if (task != current)
+               return;
+
+       BUG_ON(!arch_info->enable_counters);
+
+       arch_info->enable_counters(ctx, ctx->active_set);
+}
+
+/*
+ * function called from pfm_switch_sets(), pfm_context_load_thread(),
+ * pfm_context_load_sys(), pfm_ctxsw(), pfm_switch_sets()
+ * context is locked. Interrupts are masked. set cannot be NULL.
+ * Access to the PMU is guaranteed.
+ *
+ * function must restore all PMD registers from set.
+ */
+void pfm_arch_restore_pmds(struct pfm_context *ctx, struct pfm_event_set *set)
+{
+       struct pfm_arch_pmu_info *arch_info;
+       u64 *used_pmds;
+       u16 i, num;
+
+       arch_info = pfm_pmu_info();
+
+       /* The model-specific module can override the default
+        * restore-PMD method.
+        */
+       if (arch_info->restore_pmds)
+               return arch_info->restore_pmds(set);
+
+       num = set->nused_pmds;
+       used_pmds = set->used_pmds;
+
+       for (i = 0; num; i++) {
+               if (likely(test_bit(i, used_pmds))) {
+                       pfm_write_pmd(ctx, i, set->pmds[i].value);
+                       num--;
+               }
+       }
+}
+
+/*
+ * function called from pfm_switch_sets(), pfm_context_load_thread(),
+ * pfm_context_load_sys(), pfm_ctxsw(), pfm_switch_sets()
+ * context is locked. Interrupts are masked. set cannot be NULL.
+ * Access to the PMU is guaranteed.
+ *
+ * function must restore all PMC registers from set, if needed.
+ */
+void pfm_arch_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set)
+{
+       struct pfm_arch_pmu_info *arch_info;
+       u64 *impl_pmcs;
+       unsigned int i, max_pmc, reg;
+
+       arch_info = pfm_pmu_info();
+       /* The model-specific module can override the default
+        * restore-PMC method.
+        */
+       if (arch_info->restore_pmcs)
+               return arch_info->restore_pmcs(set);
+
+       /* The "common" powerpc model's enable the counters simply by writing
+        * all the control registers. Therefore, if we're stopped we
+        * don't need to bother restoring the PMCs now.
+        */
+       if (ctx->flags.started == 0)
+               return;
+
+       max_pmc = pfm_pmu_conf->regs.max_pmc;
+       impl_pmcs = pfm_pmu_conf->regs.pmcs;
+
+       /*
+        * Restore all pmcs in reverse order to ensure the counters aren't
+        * enabled before their event selectors are set correctly.
+        */
+       reg = max_pmc - 1;
+       for (i = 0; i < max_pmc; i++) {
+               if (test_bit(reg, impl_pmcs))
+                       pfm_arch_write_pmc(ctx, reg, set->pmcs[reg]);
+               reg--;
+       }
+}
+
+char *pfm_arch_get_pmu_module_name(void)
+{
+        unsigned int pvr = mfspr(SPRN_PVR);
+
+        switch (PVR_VER(pvr)) {
+        case PV_POWER6:
+               return "perfmon_power6";
+       default:
+               return NULL;
+       }
+}
+
+void pfm_arch_init_percpu(void)
+{
+#ifdef CONFIG_PPC64
+       extern void ppc64_enable_pmcs(void);
+       ppc64_enable_pmcs();
+#endif
+}
+
+/**
+ * powerpc_irq_handler
+ *
+ * Get the perfmon context that belongs to the current CPU, and call the
+ * model-specific interrupt handler.
+ **/
+void powerpc_irq_handler(struct pt_regs *regs)
+{
+       struct pfm_arch_pmu_info *arch_info;
+       struct pfm_context *ctx;
+
+       if (! regs->softe) {
+               /*
+                * We got a PMU interrupt while interrupts were soft
+                * disabled.  Disable hardware interrupts by clearing
+                * MSR_EE and also clear PMAO because we will need to set
+                * that again later when interrupts are re-enabled and
+                * raw_local_irq_restore() sees that the pmu_except_pending
+                * flag is set.
+                */
+               regs->msr &= ~MSR_EE;
+               get_paca()->pmu_except_pending = 1;
+               mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_PMAO);
+               return;
+       }
+
+       arch_info = pfm_pmu_info();
+       if (arch_info->irq_handler) {
+               ctx = __get_cpu_var(pmu_ctx);
+               if (likely(ctx))
+                       arch_info->irq_handler(regs, ctx);
+       }
+}
+
+/**
+ * pfm_reserved_1, pfm_reserved_2, pfm_reserved_3, pfm_reserved_4
+ *
+ * These are placeholder system calls for future use by perfmon2 and are
+ * needed on POWER because the build check scripts do not tolerate gaps
+ * in the syscall numbering.
+ **/
+
+void sys_pfm_reserved_1(void)
+{}
+
+void sys_pfm_reserved_2(void)
+{}
+
+void sys_pfm_reserved_3(void)
+{}
+
+void sys_pfm_reserved_4(void)
+{}
Index: linux-2.6/arch/powerpc/perfmon/Kconfig
===================================================================
--- /dev/null   1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6/arch/powerpc/perfmon/Kconfig      2008-06-20 16:55:55.000000000 
-0700
@@ -0,0 +1,26 @@
+menu "Hardware Performance Monitoring support"
+config PERFMON
+       bool "Perfmon2 performance monitoring interface"
+       default n
+       help
+       Enables the perfmon2 interface to access the hardware
+       performance counters. See <http://perfmon2.sf.net/> for
+       more details.
+
+config PERFMON_DEBUG
+       bool "Perfmon debugging"
+       default n
+       depends on PERFMON
+       help
+       Enables perfmon debugging support
+
+config PERFMON_DEBUG_FS
+       bool "Enable perfmon statistics reporting via debugfs"
+       default y
+       depends on PERFMON && DEBUG_FS
+       help
+       Enable collection and reporting of perfmon timing statistics under
+       debugfs. This is used for debugging and performance analysis of the
+       subsystem. The debugfs filesystem must be mounted.
+
+endmenu
Index: linux-2.6/arch/powerpc/Kconfig
===================================================================
--- linux-2.6.orig/arch/powerpc/Kconfig 2008-06-20 13:41:00.000000000 -0700
+++ linux-2.6/arch/powerpc/Kconfig      2008-06-20 13:41:09.000000000 -0700
@@ -217,6 +217,7 @@
 
 source "arch/powerpc/sysdev/Kconfig"
 source "arch/powerpc/platforms/Kconfig"
+source "arch/powerpc/perfmon/Kconfig"
 
 menu "Kernel options"
 
Index: linux-2.6/include/asm-powerpc/perfmon.h
===================================================================
--- /dev/null   1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6/include/asm-powerpc/perfmon.h     2008-06-20 13:41:09.000000000 
-0700
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2007 Hewlett-Packard Development Company, L.P.
+ * Contributed by Stephane Eranian <[EMAIL PROTECTED]>
+ *
+ * This file contains powerpc specific definitions for the perfmon
+ * interface.
+ *
+ * This file MUST never be included directly. Use linux/perfmon.h.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ * 02111-1307 USA
+ */
+#ifndef _ASM_POWERPC_PERFMON_H_
+#define _ASM_POWERPC_PERFMON_H_
+
+/*
+ * arch-specific user visible interface definitions
+ */
+#define PFM_ARCH_MAX_PMCS      (256+64) /* 256 HW 64 SW */
+#define PFM_ARCH_MAX_PMDS      (256+64) /* 256 HW 64 SW */
+
+#endif /* _ASM_POWERPC_PERFMON_H_ */
Index: linux-2.6/include/asm-powerpc/perfmon_kern.h
===================================================================
--- /dev/null   1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6/include/asm-powerpc/perfmon_kern.h        2008-06-20 
16:51:32.000000000 -0700
@@ -0,0 +1,326 @@
+/*
+ * Copyright (c) 2005-2008 David Gibson, IBM Corporation.
+ *
+ * Based on other versions:
+ * Copyright (c) 2005 Hewlett-Packard Development Company, L.P.
+ * Contributed by Stephane Eranian <[EMAIL PROTECTED]>
+ *
+ * This file contains powerpc specific definitions for the perfmon
+ * interface.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ * 02111-1307 USA
+ */
+#ifndef _ASM_POWERPC_PERFMON_KERN_H_
+#define _ASM_POWERPC_PERFMON_KERN_H_
+
+#ifdef __KERNEL__
+
+#ifdef CONFIG_PERFMON
+
+#include <asm/pmc.h>
+
+enum powerpc_pmu_type {
+       PFM_POWERPC_PMU_NONE,
+       PFM_POWERPC_PMU_POWER6
+};
+
+struct pfm_arch_pmu_info {
+       enum powerpc_pmu_type pmu_style;
+
+       void (*write_pmc)(unsigned int cnum, u64 value);
+
+       void (*write_pmd)(struct pfm_context *ctx,
+                         unsigned int cnum,
+                         u64 value);
+
+       u64 (*read_pmd)(struct pfm_context *ctx, unsigned int cnum);
+
+       void (*enable_counters)(struct pfm_context *ctx,
+                               struct pfm_event_set *set);
+       void (*disable_counters)(struct pfm_context *ctx,
+                                struct pfm_event_set *set);
+
+       void (*irq_handler)(struct pt_regs *regs, struct pfm_context *ctx);
+       void (*get_ovfl_pmds)(struct pfm_context *ctx,
+                             struct pfm_event_set *set);
+
+       /* The following routines are optional. */
+       void (*restore_pmcs)(struct pfm_event_set *set);
+       void (*restore_pmds)(struct pfm_event_set *set);
+
+       int  (*ctxswout_thread)(struct task_struct *task,
+                               struct pfm_context *ctx,
+                               struct pfm_event_set *set);
+       void (*ctxswin_thread)(struct task_struct *task,
+                              struct pfm_context *ctx,
+                              struct pfm_event_set *set);
+       int  (*load_context)(struct pfm_context *ctx);
+       void (*unload_context)(struct pfm_context *ctx);
+       int  (*acquire_pmu)(u64 *unavail_pmcs, u64 *unavail_pmds);
+       void (*release_pmu)(void);
+       void *platform_info;
+       void (*resend_irq)(struct pfm_context *ctx);
+};
+
+#define PFM_ARCH_PMD_STK_ARG   8 /* conservative value */
+#define PFM_ARCH_PMC_STK_ARG   8 /* conservative value */
+
+static inline void pfm_arch_resend_irq(struct pfm_context *ctx)
+{
+       struct pfm_arch_pmu_info *arch_info;
+
+       arch_info = pfm_pmu_info();
+       arch_info->resend_irq(ctx);
+}
+
+static inline void pfm_arch_serialize(void)
+{}
+
+static inline void pfm_arch_write_pmc(struct pfm_context *ctx,
+                                     unsigned int cnum,
+                                     u64 value)
+{
+       struct pfm_arch_pmu_info *arch_info;
+
+       arch_info =  pfm_pmu_info();
+
+       /*
+        * we only write to the actual register when monitoring is
+        * active (pfm_start was issued)
+        */
+       if (ctx && ctx->flags.started == 0)
+               return;
+
+       BUG_ON(!arch_info->write_pmc);
+
+       arch_info->write_pmc(cnum, value);
+}
+
+static inline void pfm_arch_write_pmd(struct pfm_context *ctx,
+                                     unsigned int cnum, u64 value)
+{
+       struct pfm_arch_pmu_info *arch_info;
+
+       arch_info = pfm_pmu_info();
+
+       value &= pfm_pmu_conf->ovfl_mask;
+
+       BUG_ON(!arch_info->write_pmd);
+
+       arch_info->write_pmd(ctx, cnum, value);
+}
+
+static inline u64 pfm_arch_read_pmd(struct pfm_context *ctx, unsigned int cnum)
+{
+       struct pfm_arch_pmu_info *arch_info;
+
+       arch_info = pfm_pmu_info();
+
+       BUG_ON(!arch_info->read_pmd);
+
+       return arch_info->read_pmd(ctx, cnum);
+}
+
+/*
+ * For some CPUs, the upper bits of a counter must be set in order for the
+ * overflow interrupt to happen. On overflow, the counter has wrapped around,
+ * and the upper bits are cleared. This function may be used to set them back.
+ */
+static inline void pfm_arch_ovfl_reset_pmd(struct pfm_context *ctx,
+                                          unsigned int cnum)
+{
+       u64 val = pfm_arch_read_pmd(ctx, cnum);
+
+       /* This masks out overflow bit 31 */
+       pfm_arch_write_pmd(ctx, cnum, val);
+}
+
+/*
+ * At certain points, perfmon needs to know if monitoring has been
+ * explicitely started/stopped by user via pfm_start/pfm_stop. The
+ * information is tracked in flags.started. However on certain
+ * architectures, it may be possible to start/stop directly from
+ * user level with a single assembly instruction bypassing
+ * the kernel. This function must be used to determine by
+ * an arch-specific mean if monitoring is actually started/stopped.
+ */
+static inline int pfm_arch_is_active(struct pfm_context *ctx)
+{
+       return ctx->flags.started;
+}
+
+static inline void pfm_arch_ctxswout_sys(struct task_struct *task,
+                                        struct pfm_context *ctx)
+{}
+
+static inline void pfm_arch_ctxswin_sys(struct task_struct *task,
+                                       struct pfm_context *ctx)
+{}
+
+void pfm_arch_init_percpu(void);
+int  pfm_arch_is_monitoring_active(struct pfm_context *ctx);
+int  pfm_arch_ctxswout_thread(struct task_struct *task, struct pfm_context 
*ctx);
+void pfm_arch_ctxswin_thread(struct task_struct *task, struct pfm_context 
*ctx);
+void pfm_arch_stop(struct task_struct *task, struct pfm_context *ctx);
+void pfm_arch_start(struct task_struct *task, struct pfm_context *ctx);
+void pfm_arch_restore_pmds(struct pfm_context *ctx, struct pfm_event_set *set);
+void pfm_arch_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set);
+int  pfm_arch_get_ovfl_pmds(struct pfm_context *ctx,
+                           struct pfm_event_set *set);
+char *pfm_arch_get_pmu_module_name(void);
+/*
+ * called from __pfm_interrupt_handler(). ctx is not NULL.
+ * ctx is locked. PMU interrupt is masked.
+ *
+ * must stop all monitoring to ensure handler has consistent view.
+ * must collect overflowed PMDs bitmask  into povfls_pmds and
+ * npend_ovfls. If no interrupt detected then npend_ovfls
+ * must be set to zero.
+ */
+static inline void pfm_arch_intr_freeze_pmu(struct pfm_context *ctx, struct 
pfm_event_set *set)
+{
+       pfm_arch_stop(current, ctx);
+}
+
+void powerpc_irq_handler(struct pt_regs *regs);
+
+/*
+ * unfreeze PMU from pfm_do_interrupt_handler()
+ * ctx may be NULL for spurious
+ */
+static inline void pfm_arch_intr_unfreeze_pmu(struct pfm_context *ctx)
+{
+       struct pfm_arch_pmu_info *arch_info;
+
+       if (!ctx)
+               return;
+
+       PFM_DBG_ovfl("state=%d", ctx->state);
+
+       ctx->flags.started = 1;
+
+       arch_info = pfm_pmu_info();
+       BUG_ON(!arch_info->enable_counters);
+       arch_info->enable_counters(ctx, ctx->active_set);
+}
+
+static inline int pfm_arch_pmu_config_init(struct pfm_pmu_config *cfg)
+{
+       return 0;
+}
+
+static inline int pfm_arch_context_create(struct pfm_context *ctx,
+                                         u32 ctx_flags)
+{
+       return 0;
+}
+
+static inline void pfm_arch_context_free(struct pfm_context *ctx)
+{}
+
+/* not necessary on PowerPC */
+static inline void pfm_cacheflush(void *addr, unsigned int len)
+{}
+
+/*
+ * function called from pfm_setfl_sane(). Context is locked
+ * and interrupts are masked.
+ * The value of flags is the value of ctx_flags as passed by
+ * user.
+ *
+ * function must check arch-specific set flags.
+ * Return:
+ *     1 when flags are valid
+ *      0 on error
+ */
+static inline int pfm_arch_setfl_sane(struct pfm_context *ctx, u32 flags)
+{
+       return 0;
+}
+
+static inline int pfm_arch_init(void)
+{
+       return 0;
+}
+
+static inline int pfm_arch_load_context(struct pfm_context *ctx)
+{
+       struct pfm_arch_pmu_info *arch_info;
+       int rc = 0;
+
+       arch_info = pfm_pmu_info();
+       if (arch_info->load_context)
+               rc = arch_info->load_context(ctx);
+
+       return rc;
+}
+
+static inline void pfm_arch_unload_context(struct pfm_context *ctx)
+{
+       struct pfm_arch_pmu_info *arch_info;
+
+       arch_info = pfm_pmu_info();
+       if (arch_info->unload_context)
+               arch_info->unload_context(ctx);
+}
+
+static inline int pfm_arch_pmu_acquire(u64 *unavail_pmcs, u64 *unavail_pmds)
+{
+       struct pfm_arch_pmu_info *arch_info;
+       int rc = 0;
+
+       arch_info = pfm_pmu_info();
+       if (arch_info->acquire_pmu) {
+               rc = arch_info->acquire_pmu(unavail_pmcs, unavail_pmds);
+               if (rc)
+                       return rc;
+       }
+
+       return reserve_pmc_hardware(powerpc_irq_handler);
+}
+
+static inline void pfm_arch_pmu_release(void)
+{
+       struct pfm_arch_pmu_info *arch_info;
+
+       arch_info = pfm_pmu_info();
+       if (arch_info->release_pmu)
+               arch_info->release_pmu();
+
+       release_pmc_hardware();
+}
+
+static inline void pfm_arch_arm_handle_work(struct task_struct *task)
+{}
+
+static inline void pfm_arch_disarm_handle_work(struct task_struct *task)
+{}
+
+struct pfm_arch_context {
+        u64 powergs_pmc5, powergs_pmc6;
+        u64 delta_tb, delta_tb_start;
+        u64 delta_purr, delta_purr_start;
+};
+
+#define PFM_ARCH_CTX_SIZE sizeof(struct pfm_arch_context)
+/*
+ * PowerPC does not need extra alignment requirements for the sampling buffer
+ */
+#define PFM_ARCH_SMPL_ALIGN_SIZE       0
+
+#endif /* CONFIG_PERFMON */
+
+#endif /* __KERNEL__ */
+#endif /* _ASM_POWERPC_PERFMON_KERN_H_ */
Index: linux-2.6/arch/powerpc/Makefile
===================================================================
--- linux-2.6.orig/arch/powerpc/Makefile        2008-06-20 13:41:00.000000000 
-0700
+++ linux-2.6/arch/powerpc/Makefile     2008-06-20 13:41:09.000000000 -0700
@@ -147,6 +147,7 @@
                                   arch/powerpc/platforms/
 core-$(CONFIG_MATH_EMULATION)  += arch/powerpc/math-emu/
 core-$(CONFIG_XMON)            += arch/powerpc/xmon/
+core-$(CONFIG_PERFMON)         += arch/powerpc/perfmon/
 core-$(CONFIG_KVM)             += arch/powerpc/kvm/
 
 drivers-$(CONFIG_OPROFILE)     += arch/powerpc/oprofile/
Index: linux-2.6/arch/powerpc/perfmon/Makefile
===================================================================
--- /dev/null   1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6/arch/powerpc/perfmon/Makefile     2008-06-20 13:41:09.000000000 
-0700
@@ -0,0 +1,2 @@
+obj-$(CONFIG_PERFMON)          += perfmon.o
+obj-$(CONFIG_PERFMON_POWER6)   += perfmon_power6.o
Index: linux-2.6/include/asm-powerpc/unistd.h
===================================================================
--- linux-2.6.orig/include/asm-powerpc/unistd.h 2008-06-20 13:41:00.000000000 
-0700
+++ linux-2.6/include/asm-powerpc/unistd.h      2008-06-20 14:03:44.000000000 
-0700
@@ -335,10 +335,22 @@
 #define __NR_subpage_prot      310
 #define __NR_timerfd_settime   311
 #define __NR_timerfd_gettime   312
+#define __NR_pfm_create_context        313
+#define __NR_pfm_write_pmcs    314
+#define __NR_pfm_write_pmds    315
+#define __NR_pfm_read_pmds     316
+#define __NR_pfm_load_context  317
+#define __NR_pfm_start         318
+#define __NR_pfm_stop          319
+#define __NR_pfm_reserved_1    320
+#define __NR_pfm_reserved_2    321
+#define __NR_pfm_reserved_3    322
+#define __NR_pfm_reserved_4    323
+#define __NR_pfm_unload_context        324
 
 #ifdef __KERNEL__
 
-#define __NR_syscalls          313
+#define __NR_syscalls          325
 
 #define __NR__exit __NR_exit
 #define NR_syscalls    __NR_syscalls
Index: linux-2.6/include/asm-powerpc/systbl.h
===================================================================
--- linux-2.6.orig/include/asm-powerpc/systbl.h 2008-06-20 13:41:00.000000000 
-0700
+++ linux-2.6/include/asm-powerpc/systbl.h      2008-06-20 14:04:09.000000000 
-0700
@@ -316,3 +316,15 @@
 SYSCALL(subpage_prot)
 COMPAT_SYS_SPU(timerfd_settime)
 COMPAT_SYS_SPU(timerfd_gettime)
+SYSCALL(pfm_create_context)
+SYSCALL(pfm_write_pmcs)
+SYSCALL(pfm_write_pmds)
+SYSCALL(pfm_read_pmds)
+SYSCALL(pfm_load_context)
+SYSCALL(pfm_start)
+SYSCALL(pfm_stop)
+SYSCALL(pfm_reserved_1)
+SYSCALL(pfm_reserved_2)
+SYSCALL(pfm_reserved_3)
+SYSCALL(pfm_reserved_4)
+SYSCALL(pfm_unload_context)
This patch adds perfmon2 support for the POWER6 chip.

Index: linux-2.6/arch/powerpc/perfmon/perfmon_power6.c
===================================================================
--- /dev/null   1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6/arch/powerpc/perfmon/perfmon_power6.c     2008-06-23 
18:13:05.000000000 -0500
@@ -0,0 +1,466 @@
+/*
+ * This file contains the POWER6 PMU register description tables
+ * and pmc checker used by perfmon.c.
+ *
+ * Copyright (c) 2007, IBM Corporation
+ *
+ * Based on perfmon_power5.c, and written by Carl Love <[EMAIL PROTECTED]>
+ * and Kevin Corry <[EMAIL PROTECTED]>.  Some fixes and refinement by
+ * Corey Ashford <[EMAIL PROTECTED]>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ * 02111-1307 USA
+ */
+#include <linux/module.h>
+#include <linux/perfmon_kern.h>
+
+MODULE_AUTHOR("Corey Ashford <[EMAIL PROTECTED]>");
+MODULE_DESCRIPTION("POWER6 PMU description table");
+MODULE_LICENSE("GPL");
+
+static struct pfm_regmap_desc pfm_power6_pmc_desc[] = {
+/* mmcr0 */ PMC_D(PFM_REG_I, "MMCR0", MMCR0_FC, 0, 0, SPRN_MMCR0),
+/* mmcr1 */ PMC_D(PFM_REG_I, "MMCR1", 0, 0, 0, SPRN_MMCR1),
+/* mmcra */ PMC_D(PFM_REG_I, "MMCRA", 0, 0, 0, SPRN_MMCRA)
+};
+#define PFM_PM_NUM_PMCS        ARRAY_SIZE(pfm_power6_pmc_desc)
+#define PFM_DELTA_TB    10000   /* Not a real registers */
+#define PFM_DELTA_PURR  10001
+
+/*
+ * counters wrap to zero at transition from 2^32-1 to 2^32.  Note:
+ * interrupt generated at transition from 2^31-1 to 2^31
+ */
+#define OVERFLOW_VALUE    0x100000000UL
+
+/* The TB and PURR registers are read-only. Also, note that the TB register
+ * actually consists of both the 32-bit SPRN_TBRU and SPRN_TBRL registers.
+ * For Perfmon2's purposes, we'll treat it as a single 64-bit register.
+ */
+static struct pfm_regmap_desc pfm_power6_pmd_desc[] = {
+       /* On POWER 6 PMC5 and PMC6 are not writable, they do not
+        * generate interrupts, and do not qualify their counts
+        * based on problem mode, supervisor mode or hypervisor mode.
+        * These two counters are implemented as virtual counters
+        * to make the appear to work like the other counters.  A
+        * kernel timer is used sample the real PMC5 and PMC6 and
+        * update the virtual counters.
+        */
+/* tb    */ PMD_D((PFM_REG_I|PFM_REG_RO), "TB", SPRN_TBRL),
+/* pmd1  */ PMD_D(PFM_REG_C, "PMC1", SPRN_PMC1),
+/* pmd2  */ PMD_D(PFM_REG_C, "PMC2", SPRN_PMC2),
+/* pmd3  */ PMD_D(PFM_REG_C, "PMC3", SPRN_PMC3),
+/* pmd4  */ PMD_D(PFM_REG_C, "PMC4", SPRN_PMC4),
+/* pmd5  */ PMD_D((PFM_REG_I), "PMC5", SPRN_PMC5),
+/* pmd6  */ PMD_D((PFM_REG_I), "PMC6", SPRN_PMC6),
+/* purr  */ PMD_D((PFM_REG_I|PFM_REG_RO), "PURR", SPRN_PURR),
+/* delta purr */ PMD_D((PFM_REG_I), "DELTA_TB", PFM_DELTA_TB),
+/* delta tb   */ PMD_D((PFM_REG_I), "DELTA_PURR", PFM_DELTA_PURR),
+};
+
+#define PFM_PM_NUM_PMDS        ARRAY_SIZE(pfm_power6_pmd_desc)
+
+u32 pmc5_start_save[NR_CPUS];
+u32 pmc6_start_save[NR_CPUS];
+
+static struct timer_list pmc5_6_update[NR_CPUS];
+u64 enable_cntrs_cnt;
+u64 disable_cntrs_cnt;
+u64 call_delta;
+u64 pm5_6_interrupt;
+u64 pm1_4_interrupt;
+/* need ctx_arch for kernel timer.  Can't get it in context of the kernel
+ * timer.
+ */
+struct pfm_arch_context *pmc5_6_ctx_arch[NR_CPUS];
+long int update_time;
+
+static void delta(int cpu_num, struct pfm_arch_context *ctx_arch)
+{
+       u32 tmp5, tmp6;
+
+       call_delta++;
+
+       tmp5 = (u32) mfspr(SPRN_PMC5);
+       tmp6 = (u32) mfspr(SPRN_PMC6);
+
+       /*
+        * The following difference calculation relies on 32-bit modular
+        * arithmetic for the deltas to come out correct (especially in the
+        * presence of a 32-bit counter wrap).
+        */
+       ctx_arch->powergs_pmc5 += (u64)(tmp5 - pmc5_start_save[cpu_num]);
+       ctx_arch->powergs_pmc6 += (u64)(tmp6 - pmc6_start_save[cpu_num]);
+
+       pmc5_start_save[cpu_num] = tmp5;
+       pmc6_start_save[cpu_num] = tmp6;
+
+       return;
+}
+
+
+static void pmc5_6_updater(unsigned long cpu_num)
+{
+       /* update the virtual pmd 5 and pmd 6 counters */
+
+       delta(cpu_num, pmc5_6_ctx_arch[cpu_num]);
+       mod_timer(&pmc5_6_update[cpu_num], jiffies + update_time);
+}
+
+
+static void pfm_power6_write_pmc(unsigned int cnum, u64 value)
+{
+       switch (pfm_pmu_conf->pmc_desc[cnum].hw_addr) {
+       case SPRN_MMCR0:
+               mtspr(SPRN_MMCR0, value);
+               break;
+       case SPRN_MMCR1:
+               mtspr(SPRN_MMCR1, value);
+               break;
+       case SPRN_MMCRA:
+               mtspr(SPRN_MMCRA, value);
+               break;
+       default:
+               BUG();
+       }
+}
+
+static void pfm_power6_write_pmd(struct pfm_context *ctx,
+                                unsigned int cnum,
+                                u64 value)
+{
+        struct pfm_arch_context *ctx_arch = pfm_ctx_arch(ctx);
+        int cpu_num;
+
+       switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) {
+       case SPRN_PMC1:
+               mtspr(SPRN_PMC1, value);
+               break;
+       case SPRN_PMC2:
+               mtspr(SPRN_PMC2, value);
+               break;
+       case SPRN_PMC3:
+               mtspr(SPRN_PMC3, value);
+               break;
+       case SPRN_PMC4:
+               mtspr(SPRN_PMC4, value);
+               break;
+        case SPRN_PMC5:
+               cpu_num = get_cpu();
+                       pmc5_start_save[cpu_num] = mfspr(SPRN_PMC5);
+               put_cpu();
+               ctx_arch->powergs_pmc5 = value;
+                break;
+        case SPRN_PMC6:
+               cpu_num = get_cpu();
+                       pmc6_start_save[cpu_num] = mfspr(SPRN_PMC6);
+               put_cpu();
+                ctx_arch->powergs_pmc6 = value;
+                break;
+        case PFM_DELTA_TB:
+                ctx_arch->delta_tb_start =
+                        (((u64)mfspr(SPRN_TBRU) << 32) | mfspr(SPRN_TBRL));
+                ctx_arch->delta_tb = value;
+                break;
+        case PFM_DELTA_PURR:
+                ctx_arch->delta_purr_start = mfspr(SPRN_PURR);
+                ctx_arch->delta_purr = value;
+       case SPRN_TBRL:
+       case SPRN_PURR:
+               /* Ignore writes to read-only registers. */
+               break;
+       default:
+               BUG();
+       }
+}
+
+
+static u64 pfm_power6_read_pmd(struct pfm_context *ctx, unsigned int cnum)
+{
+        struct pfm_arch_context *ctx_arch = pfm_ctx_arch(ctx);
+        int cpu_num;
+       u64 temp;
+
+       switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) {
+       case SPRN_PMC1:
+               return mfspr(SPRN_PMC1);
+       case SPRN_PMC2:
+               return mfspr(SPRN_PMC2);
+       case SPRN_PMC3:
+               return mfspr(SPRN_PMC3);
+       case SPRN_PMC4:
+               return mfspr(SPRN_PMC4);
+        case SPRN_PMC5:
+               cpu_num = get_cpu();
+                       temp = ctx_arch->powergs_pmc5 + 
(u64)((u32)mfspr(SPRN_PMC5) - pmc5_start_save[cpu_num]);
+               put_cpu();
+               return temp;
+        case SPRN_PMC6:
+               cpu_num = get_cpu();
+                       temp = ctx_arch->powergs_pmc6 + 
(u64)((u32)mfspr(SPRN_PMC6) - pmc6_start_save[cpu_num]);
+               put_cpu();
+               return temp;
+        case PFM_DELTA_TB:
+                return ctx_arch->delta_tb
+                        + (((u64)mfspr(SPRN_TBRU) << 32) | mfspr(SPRN_TBRL))
+                        - ctx_arch->delta_tb_start;
+        case PFM_DELTA_PURR:
+                return ctx_arch->delta_purr
+                        + mfspr(SPRN_PURR)
+                        - ctx_arch->delta_purr_start;
+       case SPRN_TBRL:
+               return ((u64)mfspr(SPRN_TBRU) << 32) | mfspr(SPRN_TBRL);
+       case SPRN_PURR:
+               if (cpu_has_feature(CPU_FTR_PURR))
+                       return mfspr(SPRN_PURR);
+               else
+                       return 0;
+       default:
+               BUG();
+       }
+}
+
+/* forward decl */
+static void pfm_power6_disable_counters(struct pfm_context *ctx,
+                                       struct pfm_event_set *set);
+
+/**
+ * pfm_power6_enable_counters
+ *
+ **/
+static void pfm_power6_enable_counters(struct pfm_context *ctx,
+                                      struct pfm_event_set *set)
+{
+
+       unsigned int i, max_pmc;
+       int cpu_num;
+       struct pfm_arch_context *ctx_arch;
+
+       enable_cntrs_cnt++;
+
+       /* Make sure the counters are disabled before touching the other
+          control registers */
+       pfm_power6_disable_counters(ctx, set);
+
+       /* need the ctx passed down to the routine */
+       ctx_arch = pfm_ctx_arch(ctx);
+       max_pmc = pfm_pmu_conf->regs.max_pmc;
+
+       /* Write MMCR0 last, and a fairly easy way to do this is to write
+          the registers in the reverse order */
+       for (i = max_pmc; i != 0; i--)
+               if (test_bit(i - 1, set->used_pmcs))
+                       pfm_power6_write_pmc(i - 1, set->pmcs[i - 1]);
+
+       /* save current free running HW event count */
+       cpu_num = get_cpu();
+               pmc5_start_save[cpu_num] = mfspr(SPRN_PMC5);
+               pmc6_start_save[cpu_num] = mfspr(SPRN_PMC6);
+
+               ctx_arch->delta_purr_start = mfspr(SPRN_PURR);
+
+               if (cpu_has_feature(CPU_FTR_PURR))
+                       ctx_arch->delta_tb_start =
+                               ((u64)mfspr(SPRN_TBRU) << 32) | 
mfspr(SPRN_TBRL);
+               else
+                       ctx_arch->delta_tb_start = 0;
+
+               /* Start kernel timer for this cpu to periodically update
+                * the virtual counters.
+                */
+               init_timer(&pmc5_6_update[cpu_num]);
+               pmc5_6_update[cpu_num].function = pmc5_6_updater;
+               pmc5_6_update[cpu_num].data = (unsigned long) cpu_num;
+               pmc5_6_update[cpu_num].expires = jiffies + update_time;
+               pmc5_6_ctx_arch[cpu_num] = ctx_arch;
+               add_timer(&pmc5_6_update[cpu_num]);
+       put_cpu();
+}
+
+/**
+ * pfm_power6_disable_counters
+ *
+ **/
+static void pfm_power6_disable_counters(struct pfm_context *ctx,
+                                       struct pfm_event_set *set)
+{
+       struct pfm_arch_context *ctx_arch;
+       int cpu_num;
+
+       disable_cntrs_cnt++;
+
+       /* Set the Freeze Counters bit */
+       mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_FC);
+       asm volatile ("sync");
+
+       cpu_num = get_cpu();
+               /* delete kernel update timer */
+               del_timer_sync(&pmc5_6_update[cpu_num]);
+
+               /* Update the virtual pmd 5 and 6 counters from the free running
+                * HW counters
+                */
+               ctx_arch = pfm_ctx_arch(ctx);
+               delta(cpu_num, ctx_arch);
+
+               ctx_arch->delta_tb +=
+                       (((u64)mfspr(SPRN_TBRU) << 32) | mfspr(SPRN_TBRL))
+                       - ctx_arch->delta_tb_start;
+
+               ctx_arch->delta_purr += mfspr(SPRN_PURR)
+                       - ctx_arch->delta_purr_start;
+       put_cpu();
+}
+
+/**
+ * pfm_power6_get_ovfl_pmds
+ *
+ * Determine which counters in this set have overflowed and fill in the
+ * set->povfl_pmds mask and set->npend_ovfls count.
+ **/
+static void pfm_power6_get_ovfl_pmds(struct pfm_context *ctx,
+                                    struct pfm_event_set *set)
+{
+       unsigned int i;
+       unsigned int max_intr_pmd = pfm_pmu_conf->regs.max_intr_pmd;
+       u64 *used_pmds = set->used_pmds;
+       u64 *cntr_pmds = pfm_pmu_conf->regs.cnt_pmds;
+       u64 width_mask = 1 << pfm_pmu_conf->counter_width;
+       u64 new_val, mask[PFM_PMD_BV];
+
+       bitmap_and(cast_ulp(mask), cast_ulp(cntr_pmds), cast_ulp(used_pmds), 
max_intr_pmd);
+
+       /* max_intr_pmd is actually the last interrupting pmd register + 1 */
+       for (i = 0; i < max_intr_pmd; i++) {
+               if (test_bit(i, mask)) {
+                       new_val = pfm_power6_read_pmd(ctx, i);
+                       if (new_val & width_mask) {
+                               set_bit(i, set->povfl_pmds);
+                               set->npend_ovfls++;
+                       }
+               }
+       }
+}
+
+static void pfm_power6_irq_handler(struct pt_regs *regs,
+                                  struct pfm_context *ctx)
+{
+       u32 mmcr0;
+       u64 mmcra;
+
+       /* Disable the counters (set the freeze bit) to not polute
+        * the counts.
+        */
+       mmcr0 = mfspr(SPRN_MMCR0);
+       mtspr(SPRN_MMCR0, (mmcr0 | MMCR0_FC));
+       mmcra = mfspr(SPRN_MMCRA);
+
+       /* Set the PMM bit (see comment below). */
+       mtmsrd(mfmsr() | MSR_PMM);
+
+       pm1_4_interrupt++;
+
+       pfm_interrupt_handler(instruction_pointer(regs), regs);
+
+       mmcr0 = mfspr(SPRN_MMCR0);
+       /* Reset the perfmon trigger. */
+       mmcr0 |= MMCR0_PMXE;
+
+       /*
+        * We must clear the PMAO bit on some (GQ) chips. Just do it
+        * all the time.
+        */
+       mmcr0 &= ~MMCR0_PMAO;
+
+       /* Clear the appropriate bits in the MMCRA. */
+       mmcra &= ~(POWER6_MMCRA_THRM | POWER6_MMCRA_OTHER);
+       mtspr(SPRN_MMCRA, mmcra);
+
+       /*
+        * Now clear the freeze bit, counting will not start until we
+        * rfid from this exception, because only at that point will
+        * the PMM bit be cleared.
+        */
+       mmcr0 &= ~MMCR0_FC;
+       mtspr(SPRN_MMCR0, mmcr0);
+}
+
+static void pfm_power6_resend_irq(struct pfm_context *ctx)
+{
+       /*
+        * Assert the PMAO bit to cause a PMU interrupt.  Make sure we
+        * trigger the edge detection circuitry for PMAO
+        */
+       mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_PMAO);
+       mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_PMAO);
+}
+
+struct pfm_arch_pmu_info pfm_power6_pmu_info = {
+       .pmu_style        = PFM_POWERPC_PMU_POWER6,
+       .write_pmc        = pfm_power6_write_pmc,
+       .write_pmd        = pfm_power6_write_pmd,
+       .read_pmd         = pfm_power6_read_pmd,
+       .irq_handler      = pfm_power6_irq_handler,
+       .get_ovfl_pmds    = pfm_power6_get_ovfl_pmds,
+       .enable_counters  = pfm_power6_enable_counters,
+       .disable_counters = pfm_power6_disable_counters,
+       .resend_irq       = pfm_power6_resend_irq
+};
+
+/*
+ * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
+ */
+static struct pfm_pmu_config pfm_power6_pmu_conf = {
+       .pmu_name = "POWER6",
+       .counter_width = 31,
+       .pmd_desc = pfm_power6_pmd_desc,
+       .pmc_desc = pfm_power6_pmc_desc,
+       .num_pmc_entries = PFM_PM_NUM_PMCS,
+       .num_pmd_entries = PFM_PM_NUM_PMDS,
+       .version = "1.0",
+       .pmu_info = &pfm_power6_pmu_info,
+};
+
+static int __init pfm_power6_pmu_init_module(void)
+{
+       int ret;
+       disable_cntrs_cnt = 0;
+       enable_cntrs_cnt = 0;
+       call_delta = 0;
+       pm5_6_interrupt = 0;
+       pm1_4_interrupt = 0;
+
+       /* calculate the time for updating counters 5 and 6 */
+
+       /*
+        * MAX_EVENT_RATE assumes a max instruction issue rate of 2
+        * instructions per clock cycle.  Experience shows that this factor
+        * of 2 is more than adequate.
+        */
+
+# define MAX_EVENT_RATE (ppc_proc_freq * 2)
+
+       /*
+        * Calculate the time, in jiffies, it takes for event counter 5 or
+        * 6 to completely wrap when counting at the max event rate, and
+        * then figure on sampling at twice that rate.
+        */
+       update_time = (((unsigned long)HZ * OVERFLOW_VALUE)
+                      / ((unsigned long)MAX_EVENT_RATE)) / 2;
+
+       ret =  pfm_pmu_register(&pfm_power6_pmu_conf);
+       return ret;
+}
+
+device_initcall(pfm_power6_pmu_init_module);
Index: linux-2.6/include/asm-powerpc/reg.h
===================================================================
--- linux-2.6.orig/include/asm-powerpc/reg.h    2008-06-23 16:45:18.000000000 
-0500
+++ linux-2.6/include/asm-powerpc/reg.h 2008-06-23 16:58:09.000000000 -0500
@@ -692,6 +692,7 @@
 #define PV_POWER5      0x003A
 #define PV_POWER5p     0x003B
 #define PV_970FX       0x003C
+#define PV_POWER6      0x003E
 #define PV_630         0x0040
 #define PV_630p        0x0041
 #define PV_970MP       0x0044
Index: linux-2.6/arch/powerpc/perfmon/Kconfig
===================================================================
--- linux-2.6.orig/arch/powerpc/perfmon/Kconfig 2008-06-23 16:58:02.000000000 
-0500
+++ linux-2.6/arch/powerpc/perfmon/Kconfig      2008-06-23 16:58:09.000000000 
-0500
@@ -23,4 +23,11 @@
        debugfs. This is used for debugging and performance analysis of the
        subsystem. The debugfs filesystem must be mounted.
 
+config PERFMON_POWER6
+        bool "Support for Power6 hardware performance counters"
+        depends on PERFMON && PPC64
+        default n
+        help
+        Enables support for the Power 6 hardware performance counters
+
 endmenu
-------------------------------------------------------------------------
Check out the new SourceForge.net Marketplace.
It's the best place to buy or sell services for
just about anything Open Source.
http://sourceforge.net/services/buy/index.php
_______________________________________________
perfmon2-devel mailing list
perfmon2-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/perfmon2-devel

Reply via email to