The patch titled
OProfile: add support to OProfile for profiling CELL BE (update)
has been added to the -mm tree. Its filename is
oprofile-add-support-to-oprofile-for-profiling-cell-be-update.patch
*** Remember to use Documentation/SubmitChecklist when testing your code ***
See http://www.zip.com.au/~akpm/linux/patches/stuff/added-to-mm.txt to find
out what to do about this
------------------------------------------------------
Subject: OProfile: add support to OProfile for profiling CELL BE (update)
From: Maynard Johnson <[EMAIL PROTECTED]>
Moved number_of_online_nodes() from pr_util.h to spu_task_sync.c
and dropped the inline as it is too big.
Fixed a number of style issues.
Added some comments for structs and memory barriers.
Changed set_profiling_frequency() to set_spu_profiling_frequency().
Aquire mmap_sem around vma walk.
Cc: Carl Love <[EMAIL PROTECTED]>
Cc: Maynard Johnson <[EMAIL PROTECTED]>
Cc: Bob Nelson <[EMAIL PROTECTED]>
Cc: Arnd Bergmann <[EMAIL PROTECTED]>
Cc: Paul Mackerras <[EMAIL PROTECTED]>
Signed-off-by: Andrew Morton <[EMAIL PROTECTED]>
---
arch/powerpc/oprofile/cell/pr_util.h | 55 ++++++++++---------
arch/powerpc/oprofile/cell/spu_profiler.c | 12 +---
arch/powerpc/oprofile/cell/spu_task_sync.c | 21 ++++++-
arch/powerpc/oprofile/op_model_cell.c | 14 ++--
4 files changed, 62 insertions(+), 40 deletions(-)
diff -puN
arch/powerpc/oprofile/cell/pr_util.h~oprofile-add-support-to-oprofile-for-profiling-cell-be-update
arch/powerpc/oprofile/cell/pr_util.h
---
a/arch/powerpc/oprofile/cell/pr_util.h~oprofile-add-support-to-oprofile-for-profiling-cell-be-update
+++ a/arch/powerpc/oprofile/cell/pr_util.h
@@ -21,39 +21,46 @@
#include "../../platforms/cell/cbe_regs.h"
-static inline int number_of_online_nodes(void)
-{
- u32 cpu; u32 tmp;
- int nodes = 0;
- for_each_online_cpu(cpu) {
- tmp = cbe_cpu_to_node(cpu) + 1;
- if (tmp > nodes)
- nodes++;
- }
- return nodes;
-}
-
/* Defines used for sync_start */
#define SKIP_GENERIC_SYNC 0
#define SYNC_START_ERROR -1
#define DO_GENERIC_SYNC 1
-struct spu_overlay_info
-{
- unsigned int vma;
- unsigned int size;
- unsigned int offset;
+struct spu_overlay_info { /* map of sections within an SPU overlay */
+ unsigned int vma; /* SPU virtual memory address from elf */
+ unsigned int size; /* size of section from elf */
+ unsigned int offset; /* offset of section into elf file */
unsigned int buf;
};
-struct vma_to_fileoffset_map
-{
- struct vma_to_fileoffset_map *next;
- unsigned int vma;
- unsigned int size;
- unsigned int offset;
+struct vma_to_fileoffset_map { /* map of sections within an SPU program */
+ struct vma_to_fileoffset_map *next; /* list pointer */
+ unsigned int vma; /* SPU virtual memory address from elf */
+ unsigned int size; /* size of section from elf */
+ unsigned int offset; /* offset of section into elf file */
unsigned int guard_ptr;
unsigned int guard_val;
+ /*
+ * The guard pointer is an entry in the _ovly_buf_table,
+ * computed using ovly.buf as the index into the table. Since
+ * ovly.buf values begin at '1' to reference the first (or 0th)
+ * entry in the _ovly_buf_table, the computation subtracts 1
+ * from ovly.buf.
+ * The guard value is stored in the _ovly_buf_table entry and
+ * is an index (starting at 1) back to the _ovly_table entry
+ * that is pointing at this _ovly_buf_table entry. So, for
+ * example, for an overlay scenario with one overlay segment
+ * and two overlay sections:
+ * - Section 1 points to the first entry of the
+ * _ovly_buf_table, which contains a guard value
+ * of '1', referencing the first (index=0) entry of
+ * _ovly_table.
+ * - Section 2 points to the second entry of the
+ * _ovly_buf_table, which contains a guard value
+ * of '2', referencing the second (index=1) entry of
+ * _ovly_table.
+ */
+
};
/* The three functions below are for maintaining and accessing
@@ -85,6 +92,6 @@ int spu_sync_stop(void);
void spu_sync_buffer(int spu_num, unsigned int *samples,
int num_samples);
-void set_profiling_frequency(unsigned int freq_khz, unsigned int cycles_reset);
+void set_spu_profiling_frequency(unsigned int freq_khz, unsigned int
cycles_reset);
#endif /* PR_UTIL_H */
diff -puN
arch/powerpc/oprofile/cell/spu_profiler.c~oprofile-add-support-to-oprofile-for-profiling-cell-be-update
arch/powerpc/oprofile/cell/spu_profiler.c
---
a/arch/powerpc/oprofile/cell/spu_profiler.c~oprofile-add-support-to-oprofile-for-profiling-cell-be-update
+++ a/arch/powerpc/oprofile/cell/spu_profiler.c
@@ -16,7 +16,6 @@
#include <linux/smp.h>
#include <linux/slab.h>
#include <asm/cell-pmu.h>
-/*#include <linux/time.h>*/
#include "pr_util.h"
#define TRACE_ARRAY_SIZE 1024
@@ -27,9 +26,6 @@ static u32 *samples;
static int spu_prof_running;
static unsigned int profiling_interval;
-extern int spu_prof_num_nodes;
-
-
#define NUM_SPU_BITS_TRBUF 16
#define SPUS_PER_TB_ENTRY 4
#define SPUS_PER_NODE 8
@@ -39,9 +35,10 @@ extern int spu_prof_num_nodes;
static DEFINE_SPINLOCK(sample_array_lock);
unsigned long sample_array_lock_flags;
-void set_profiling_frequency(unsigned int freq_khz, unsigned int cycles_reset)
+void set_spu_profiling_frequency(unsigned int freq_khz, unsigned int
cycles_reset)
{
unsigned long ns_per_cyc;
+
if (!freq_khz)
freq_khz = ppc_proc_freq/1000;
@@ -123,7 +120,7 @@ static int cell_spu_pc_collection(int cp
trace_addr = cbe_read_pm(cpu, trace_address);
}
- return(entry);
+ return entry;
}
@@ -170,7 +167,8 @@ static enum hrtimer_restart profile_spus
sample_array_lock_flags);
}
- smp_wmb();
+ smp_wmb(); /* insure spu event buffer updates are written */
+ /* don't want events intermingled... */
kt = ktime_set(0, profiling_interval);
if (!spu_prof_running)
diff -puN
arch/powerpc/oprofile/cell/spu_task_sync.c~oprofile-add-support-to-oprofile-for-profiling-cell-be-update
arch/powerpc/oprofile/cell/spu_task_sync.c
---
a/arch/powerpc/oprofile/cell/spu_task_sync.c~oprofile-add-support-to-oprofile-for-profiling-cell-be-update
+++ a/arch/powerpc/oprofile/cell/spu_task_sync.c
@@ -232,6 +232,8 @@ get_exec_dcookie_and_offset(struct spu *
if (!mm)
goto out;
+ down_read(&mm->mmap_sem);
+
for (vma = mm->mmap; vma; vma = vma->vm_next) {
if (!vma->vm_file)
continue;
@@ -263,10 +265,14 @@ get_exec_dcookie_and_offset(struct spu *
vma->vm_file->f_vfsmnt);
pr_debug("got dcookie for %s\n", vma->vm_file->f_dentry->d_name.name);
+ up_read(&mm->mmap_sem);
+
out:
return app_cookie;
fail_no_image_cookie:
+ up_read(&mm->mmap_sem);
+
printk(KERN_ERR "SPU_PROF: "
"%s, line %d: Cannot find dcookie for SPU binary\n",
__FUNCTION__, __LINE__);
@@ -310,7 +316,8 @@ static int process_context_switch(struct
add_event_entry(spu_cookie);
add_event_entry(offset);
spin_unlock_irqrestore(&buffer_lock, flags);
- smp_wmb();
+ smp_wmb(); /* insure spu event buffer updates are written */
+ /* don't want entries intermingled... */
out:
return retval;
}
@@ -343,6 +350,18 @@ static struct notifier_block spu_active
.notifier_call = spu_active_notify,
};
+static int number_of_online_nodes(void)
+{
+ u32 cpu; u32 tmp;
+ int nodes = 0;
+ for_each_online_cpu(cpu) {
+ tmp = cbe_cpu_to_node(cpu) + 1;
+ if (tmp > nodes)
+ nodes++;
+ }
+ return nodes;
+}
+
/* The main purpose of this function is to synchronize
* OProfile with SPUFS by registering to be notified of
* SPU task switches.
diff -puN
arch/powerpc/oprofile/op_model_cell.c~oprofile-add-support-to-oprofile-for-profiling-cell-be-update
arch/powerpc/oprofile/op_model_cell.c
---
a/arch/powerpc/oprofile/op_model_cell.c~oprofile-add-support-to-oprofile-for-profiling-cell-be-update
+++ a/arch/powerpc/oprofile/op_model_cell.c
@@ -867,7 +867,7 @@ oprof_cpufreq_notify(struct notifier_blo
if ((val == CPUFREQ_PRECHANGE && frq->old < frq->new) ||
(val == CPUFREQ_POSTCHANGE && frq->old > frq->new) ||
(val == CPUFREQ_RESUMECHANGE || val == CPUFREQ_SUSPENDCHANGE))
- set_profiling_frequency(frq->new, spu_cycle_reset);
+ set_spu_profiling_frequency(frq->new, spu_cycle_reset);
return ret;
}
@@ -902,7 +902,7 @@ static int cell_global_start_spu(struct
cpu_khzfreq = cpufreq_quick_get(smp_processor_id());
#endif
- set_profiling_frequency(cpu_khzfreq, spu_cycle_reset);
+ set_spu_profiling_frequency(cpu_khzfreq, spu_cycle_reset);
for_each_online_cpu(cpu) {
if (cbe_get_hw_thread_id(cpu))
@@ -1015,11 +1015,10 @@ static int cell_global_start_ppu(struct
static int cell_global_start(struct op_counter_config *ctr)
{
- if (spu_cycle_reset) {
+ if (spu_cycle_reset)
return cell_global_start_spu(ctr);
- } else {
+ else
return cell_global_start_ppu(ctr);
- }
}
/*
@@ -1101,11 +1100,10 @@ static void cell_global_stop_ppu(void)
static void cell_global_stop(void)
{
- if (spu_cycle_reset) {
+ if (spu_cycle_reset)
cell_global_stop_spu();
- } else {
+ else
cell_global_stop_ppu();
- }
}
static void cell_handle_interrupt(struct pt_regs *regs,
_
Patches currently in -mm which might be from [EMAIL PROTECTED] are
oprofile-enable-spu-switch-notification-to-detect-currently-active-spu-tasks.patch
oprofile-enable-spu-switch-notification-to-detect-currently-active-spu-tasks-update.patch
oprofile-add-support-to-oprofile-for-profiling-cell-be-spus.patch
oprofile-add-support-to-oprofile-for-profiling-cell-be-update.patch
-
To unsubscribe from this list: send the line "unsubscribe mm-commits" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at http://vger.kernel.org/majordomo-info.html