This patch adds spe ctxsw functions to switch the perfmon context by
the spe scheduling instead of the ppe scheduling.

Updating the subunit field of pmX_event by spe id is the key
to follow the target SPE context(thread).

Signed-off-by: Takashi Yamamoto <TakashiA.Yamamoto@jp.sony.com>
---
 arch/powerpc/perfmon/perfmon.c      |   25 +++++
 arch/powerpc/perfmon/perfmon_cell.c |  157 ++++++++++++++++++++++++++++++++++--
 include/asm-powerpc/perfmon.h       |    8 +
 include/asm-powerpc/perfmon_kern.h  |    3 
 4 files changed, 187 insertions(+), 6 deletions(-)

--- a/arch/powerpc/perfmon/perfmon.c
+++ b/arch/powerpc/perfmon/perfmon.c
@@ -22,6 +22,9 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  * 02111-1307 USA
  */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/interrupt.h>
 #include <linux/perfmon_kern.h>
 
@@ -222,6 +225,28 @@ void pfm_arch_restore_pmcs(struct pfm_co
 	}
 }
 
+struct task_struct *pfm_get_task_by_pid(int pid)
+{
+	struct task_struct *p;
+
+	read_lock(&tasklist_lock);
+
+	p = find_task_by_pid(pid);
+	if (p)
+		get_task_struct(p);
+
+	read_unlock(&tasklist_lock);
+
+	return p;
+}
+EXPORT_SYMBOL_GPL(pfm_get_task_by_pid);
+
+void pfm_put_task(struct task_struct *p)
+{
+	put_task_struct(p);
+}
+EXPORT_SYMBOL_GPL(pfm_put_task);
+
 char *pfm_arch_get_pmu_module_name(void)
 {
 	unsigned int pvr = mfspr(SPRN_PVR);
--- a/arch/powerpc/perfmon/perfmon_cell.c
+++ b/arch/powerpc/perfmon/perfmon_cell.c
@@ -33,6 +33,7 @@
 #include <asm/rtas.h>
 #include <asm/ps3.h>
 #include <asm/spu.h>
+#include "../platforms/cell/spufs/spufs.h"
 
 MODULE_AUTHOR("Kevin Corry <kevcorry@us.ibm.com>, "
 	      "Carl Love <carll@us.ibm.com>");
@@ -709,6 +710,9 @@ static void pfm_cell_enable_counters(str
 		((struct pfm_arch_pmu_info *)
 		 (pfm_pmu_conf->pmu_info))->platform_info;
 
+	if (set->priv_flags & PFM_SETFL_PRIV_WAIT_SUB_UNIT_UPDATE)
+		return ;
+
 	info->enable_pm(smp_processor_id());
 }
 
@@ -716,6 +720,10 @@ static void pfm_cell_enable_counters(str
  * pfm_cell_disable_counters
  *
  * Just need to turn off the global disable bit in pm_control.
+ *
+ * When a event set is switched out (multi event set),
+ * we need to reset the debug-bus signals so the next event set that
+ * gets switched in can start from a clean set of signals.
  **/
 static void pfm_cell_disable_counters(struct pfm_context *ctx,
 				      struct pfm_event_set *set)
@@ -725,8 +733,7 @@ static void pfm_cell_disable_counters(st
 		 (pfm_pmu_conf->pmu_info))->platform_info;
 
 	info->disable_pm(smp_processor_id());
-	if (machine_is(ps3))
-		reset_signals(smp_processor_id());
+	reset_signals(smp_processor_id());
 }
 
 /*
@@ -819,6 +826,9 @@ void pfm_cell_restore_pmcs(struct pfm_ev
 		((struct pfm_arch_pmu_info *)
 		 (pfm_pmu_conf->pmu_info))->platform_info;
 
+	if (set->priv_flags & PFM_SETFL_PRIV_WAIT_SUB_UNIT_UPDATE)
+		return ;
+
 	for (i = 0; i < NR_CTRS; i++) {
 		ctr_ctrl = set->pmcs[i];
 
@@ -964,7 +974,6 @@ static int pfm_cell_check_cntr_ovfl(stru
 	return 0;
 }
 
-#ifdef CONFIG_PPC_PS3
 /**
  * update_sub_unit_field
  *
@@ -983,14 +992,18 @@ static u64 pfm_get_spe_id(void *arg)
 	struct spu *spu = arg;
 	u64 spe_id;
 
+#ifdef CONFIG_PPC_PS3
 	if (machine_is(ps3))
 		spe_id = ps3_get_spe_id(arg);
 	else
 		spe_id = spu->spe_id;
-
+#else
+	spe_id = spu->spe_id;
+#endif
 	return spe_id;
 }
 
+#ifdef CONFIG_PPC_PS3
 /**
  * pfm_spu_number_to_id
  *
@@ -1124,6 +1137,13 @@ static int pfm_cell_load_context(struct 
 					s->id, i, target_th_id);
 			}
 		}
+
+		if (ctx->flags.not_dflt_ctxsw)
+			s->priv_flags |=
+				PFM_SETFL_PRIV_WAIT_SUB_UNIT_UPDATE;
+		else
+			s->priv_flags &=
+				~PFM_SETFL_PRIV_WAIT_SUB_UNIT_UPDATE;
 	}
 
 	return ret;
@@ -1140,9 +1160,22 @@ static int pfm_cell_load_context(struct 
  **/
 static void pfm_cell_unload_context(struct pfm_context *ctx)
 {
+	struct pfm_cell_platform_pmu_info *info =
+		((struct pfm_arch_pmu_info *)
+		 (pfm_pmu_conf->pmu_info))->platform_info;
+
 	if (ctx->task == current || ctx->flags.system) {
 		reset_signals(smp_processor_id());
 	}
+
+	/*
+	 * At the end of following spe context,
+	 * detaching pfm_ctx from the target task by Ctrl-c
+	 * may not be done correctly.
+	 * So, Clear all signals and disable PM.
+	 */
+	info->disable_pm(smp_processor_id());
+	reset_signals(smp_processor_id());
 }
 
 /**
@@ -1211,14 +1244,113 @@ static void pfm_cell_get_ovfl_pmds(struc
 }
 
 /**
+ * pfm_prepare_ctxswin_thread
+ *
+ **/
+static void pfm_prepare_ctxswin_thread(struct pfm_context *ctx, u64 spe_id)
+{
+	struct pfm_event_set *set;
+	int i, last_pmc;
+	u64 signal_group;
+	u64 *used_pmcs;
+
+	spin_lock(&ctx->lock);
+
+	last_pmc = NR_CTRS + 8;
+	list_for_each_entry(set, &ctx->set_list, list) {
+
+		used_pmcs = set->used_pmcs;
+		for (i = NR_CTRS; i < last_pmc; i++) {
+			if (!test_bit(i, used_pmcs))
+				continue;
+
+			signal_group = PFM_EVENT_PMC_SIGNAL_GROUP(set->pmcs[i]);
+
+			/*
+			 * If the target event is a SPE signal group event,
+			 * The sub_unit field in pmX_event pmc is changed to the
+			 * specified spe_id.
+			 */
+			if (SIG_GROUP_SPU_BASE < signal_group &&
+			    signal_group < SIG_GROUP_EIB_BASE) {
+				set->pmcs[i] = update_sub_unit_field(
+					set->pmcs[i], spe_id);
+				set->priv_flags |= PFM_SETFL_PRIV_MOD_PMCS;
+				set->priv_flags &=
+					~PFM_SETFL_PRIV_WAIT_SUB_UNIT_UPDATE;
+				PFM_DBG("pmcs[%d] : 0x%lx", i, set->pmcs[i]);
+			}
+		}
+	}
+
+	spin_unlock(&ctx->lock);
+}
+
+/**
+ * pfm_spe_ctxsw_thread
+ *
+ **/
+static int pfm_spe_ctxsw_thread(struct notifier_block *block,
+				unsigned long object_id, void *arg)
+{
+	struct task_struct *p;
+	struct pfm_arch_context *ctx_arch;
+	u64 spe_id;
+
+	p = pfm_get_task_by_pid(((struct spu *)arg)->ctx->tid);
+	if (p) {
+		if (!p->pfm_context) {
+			pfm_put_task(p);
+			return 0;
+		}
+
+		ctx_arch = pfm_ctx_arch(p->pfm_context);
+		spe_id = pfm_get_spe_id(arg);
+		if (object_id) {
+			PFM_DBG("=== PFM SPE CTXSWIN === 0x%lx", spe_id);
+			pfm_prepare_ctxswin_thread(p->pfm_context, spe_id);
+			ctx_arch->arg = arg;
+			pfm_ctxsw(NULL, p);
+
+		} else {
+			PFM_DBG("=== PFM SPE CTXSWOUT === 0x%lx", spe_id);
+			pfm_ctxsw(p, NULL);
+			ctx_arch->arg = NULL;
+		}
+
+		pfm_put_task(p);
+	}
+	return 0;
+}
+
+/**
  * pfm_cell_create_context
  *
  **/
 static int pfm_cell_create_context(struct pfm_context *ctx, u32 ctx_flags)
 {
+	struct pfm_arch_context *ctx_arch = pfm_ctx_arch(ctx);
+	int ret = 0;
+
 	ctx->flags.not_dflt_ctxsw =
 		(ctx_flags & PFM_FL_CELL_SPE_FOLLOW) ? 1:0;
-	return 0;
+  	if (ctx->flags.not_dflt_ctxsw) {
+
+		if (ctx->flags.system)
+			return -EINVAL;
+
+		ctx_arch->arg = NULL;
+		ctx_arch->ctxsw_notifier.notifier_call = pfm_spe_ctxsw_thread;
+		ctx_arch->ctxsw_notifier.next = NULL;
+		ctx_arch->ctxsw_notifier.priority = 0;
+		ret = spu_switch_event_register(&ctx_arch->ctxsw_notifier);
+		if (ret) {
+			ctx_arch->ctxsw_notifier.notifier_call = NULL;
+			PFM_ERR("Can't register spe_notifier\n");
+		}
+	}
+
+	return ret;
 }
 
 /**
@@ -1227,7 +1359,20 @@ static int pfm_cell_create_context(struc
  **/
 static void pfm_cell_free_context(struct pfm_context *ctx)
 {
-	return ;
+	struct pfm_arch_context *ctx_arch = pfm_ctx_arch(ctx);
+	int ret;
+
+	if (ctx->flags.not_dflt_ctxsw) {
+		if (ctx->flags.system)
+			return;
+
+		if (ctx_arch->ctxsw_notifier.notifier_call) {
+			ret = spu_switch_event_unregister(&ctx_arch->ctxsw_notifier);
+			if (ret)
+				PFM_ERR("Can't unregister spe_notifier\n");
+		}
+	}
+	return;
 }
 
 /**
--- a/include/asm-powerpc/perfmon.h
+++ b/include/asm-powerpc/perfmon.h
@@ -38,4 +38,12 @@
  */
 #define PFM_FL_CELL_SPE_FOLLOW 0x10000
 
+/*
+ * set private flags
+ *
+ * bits[00-15]: generic flags
+ * bits[16-31]: arch-specific flags
+ */
+#define PFM_SETFL_PRIV_WAIT_SUB_UNIT_UPDATE 0x10000
+
 #endif /* _ASM_POWERPC_PERFMON_H_ */
--- a/include/asm-powerpc/perfmon_kern.h
+++ b/include/asm-powerpc/perfmon_kern.h
@@ -404,6 +404,9 @@ static inline void pfm_arch_arm_handle_w
 static inline void pfm_arch_disarm_handle_work(struct task_struct *task)
 {}
 
+struct task_struct *pfm_get_task_by_pid(int pid);
+void pfm_put_task(struct task_struct *p);
+
 struct pfm_arch_context {
 	/* Cell: Most recent value of the pm_status
 	 * register read by the interrupt handler.
