Re: [Cbe-oss-dev] [RFC, PATCH] CELL Oprofile SPU profiling updated patch

Arnd Bergmann Mon, 26 Feb 2007 15:55:16 -0800

On Thursday 22 February 2007, Carl Love wrote:
> This patch updates the existing arch/powerpc/oprofile/op_model_cell.c
> to add in the SPU profiling capabilities.  In addition, a 'cell' subdirectory
> was added to arch/powerpc/oprofile to hold Cell-specific SPU profiling
> code.


There was a significant amount of whitespace breakage in this patch,
which I cleaned up. The patch below consists of the other things
I changed as a further cleanup. Note that I changed the format
of the context switch record, which I found too complicated, as
I described on IRC last week.

        Arnd <><

--
Subject: cleanup spu oprofile code

From: Arnd Bergmann <[EMAIL PROTECTED]>
This cleans up some of the new oprofile code. It's mostly
cosmetic changes, like way multi-line comments are formatted.
The most significant change is a simplification of the
context-switch record format.

It does mean the oprofile report tool needs to be adapted,
but I'm sure that it pays off in the end.

Signed-off-by: Arnd Bergmann <[EMAIL PROTECTED]>
Index: linux-2.6/arch/powerpc/oprofile/cell/spu_task_sync.c
===================================================================
--- linux-2.6.orig/arch/powerpc/oprofile/cell/spu_task_sync.c
+++ linux-2.6/arch/powerpc/oprofile/cell/spu_task_sync.c
@@ -61,11 +61,12 @@ static void destroy_cached_info(struct k
 static struct cached_info * get_cached_info(struct spu * the_spu, int spu_num)
 {
        struct kref * ref;
-       struct cached_info * ret_info = NULL;
+       struct cached_info * ret_info;
        if (spu_num >= num_spu_nodes) {
                printk(KERN_ERR "SPU_PROF: "
                       "%s, line %d: Invalid index %d into spu info cache\n",
                       __FUNCTION__, __LINE__, spu_num);
+               ret_info = NULL;
                goto out;
        }
        if (!spu_info[spu_num] && the_spu) {
@@ -89,9 +90,9 @@ static struct cached_info * get_cached_i
 static int
 prepare_cached_spu_info(struct spu * spu, unsigned int objectId)
 {
-       unsigned long flags = 0;
+       unsigned long flags;
        struct vma_to_fileoffset_map * new_map;
-       int retval = 0;
+       int retval;
        struct cached_info * info;
 
        /* We won't bother getting cache_lock here since
@@ -112,6 +113,7 @@ prepare_cached_spu_info(struct spu * spu
                printk(KERN_ERR "SPU_PROF: "
                       "%s, line %d: create vma_map failed\n",
                       __FUNCTION__, __LINE__);
+               retval = -ENOMEM;
                goto err_alloc;
        }
        new_map = create_vma_map(spu, objectId);
@@ -119,6 +121,7 @@ prepare_cached_spu_info(struct spu * spu
                printk(KERN_ERR "SPU_PROF: "
                       "%s, line %d: create vma_map failed\n",
                       __FUNCTION__, __LINE__);
+               retval = -ENOMEM;
                goto err_alloc;
        }
 
@@ -144,7 +147,7 @@ prepare_cached_spu_info(struct spu * spu
        goto out;
 
 err_alloc:
-       retval = -1;
+       kfree(info);
 out:
        return retval;
 }
@@ -215,11 +218,9 @@ static inline unsigned long fast_get_dco
 static unsigned long
 get_exec_dcookie_and_offset(struct spu * spu, unsigned int * offsetp,
                            unsigned long * spu_bin_dcookie,
-                           unsigned long * shlib_dcookie,
                            unsigned int spu_ref)
 {
        unsigned long app_cookie = 0;
-       unsigned long * image_cookie = NULL;
        unsigned int my_offset = 0;
        struct file * app = NULL;
        struct vm_area_struct * vma;
@@ -252,24 +253,17 @@ get_exec_dcookie_and_offset(struct spu *
                         my_offset, spu_ref,
                         vma->vm_file->f_dentry->d_name.name);
                *offsetp = my_offset;
-               if (my_offset == 0)
-                       image_cookie = spu_bin_dcookie;
-               else if (vma->vm_file != app)
-                       image_cookie = shlib_dcookie;
                break;
        }
 
-       if (image_cookie) {
-               *image_cookie = fast_get_dcookie(vma->vm_file->f_dentry,
+       *spu_bin_dcookie = fast_get_dcookie(vma->vm_file->f_dentry,
                                                 vma->vm_file->f_vfsmnt);
-               pr_debug("got dcookie for %s\n",
-                        vma->vm_file->f_dentry->d_name.name);
-       }
+       pr_debug("got dcookie for %s\n", vma->vm_file->f_dentry->d_name.name);
 
- out:
+out:
        return app_cookie;
 
- fail_no_image_cookie:
+fail_no_image_cookie:
        printk(KERN_ERR "SPU_PROF: "
                "%s, line %d: Cannot find dcookie for SPU binary\n",
                __FUNCTION__, __LINE__);
@@ -285,18 +279,18 @@ get_exec_dcookie_and_offset(struct spu *
 static int process_context_switch(struct spu * spu, unsigned int objectId)
 {
        unsigned long flags;
-       int retval = 0;
-       unsigned int offset = 0;
-       unsigned long spu_cookie = 0, app_dcookie = 0, shlib_cookie = 0;
+       int retval;
+       unsigned int offset;
+       unsigned long spu_cookie, app_dcookie;
+
        retval = prepare_cached_spu_info(spu, objectId);
-       if (retval == -1) {
+       if (retval)
                goto out;
-       }
+
        /* Get dcookie first because a mutex_lock is taken in that
         * code path, so interrupts must not be disabled.
         */
-       app_dcookie = get_exec_dcookie_and_offset(spu, &offset, &spu_cookie,
-                                                 &shlib_cookie, objectId);
+       app_dcookie = get_exec_dcookie_and_offset(spu, &offset, &spu_cookie, 
objectId);
 
        /* Record context info in event buffer */
        spin_lock_irqsave(&buffer_lock, flags);
@@ -306,27 +300,8 @@ static int process_context_switch(struct
        add_event_entry(spu->pid);
        add_event_entry(spu->tgid);
        add_event_entry(app_dcookie);
-
-       if (offset) {
-               /* When offset is non-zero, the SPU ELF was embedded;
-                * otherwise, it was loaded from a separate binary file. For
-                * embedded case, we record the offset into the embedding file
-                * where the SPU ELF was placed.  The embedding file may be
-                * either the executable application binary or shared library.
-                * For the non-embedded case, we record a dcookie that
-                * points to the location of the separate SPU binary that was
-                * loaded.
-                */
-               if (shlib_cookie) {
-                       add_event_entry(SPU_SHLIB_COOKIE_CODE);
-                       add_event_entry(shlib_cookie);
-               }
-               add_event_entry(SPU_OFFSET_CODE);
-               add_event_entry(offset);
-       } else {
-               add_event_entry(SPU_COOKIE_CODE);
-               add_event_entry(spu_cookie);
-       }
+       add_event_entry(spu_cookie);
+       add_event_entry(offset);
        spin_unlock_irqrestore(&buffer_lock, flags);
        smp_wmb();
 out:
@@ -343,8 +318,8 @@ static int spu_active_notify(struct noti
                                void * data)
 {
        int retval;
-       unsigned long flags = 0;
-       struct spu * the_spu = data;
+       unsigned long flags;
+       struct spu *the_spu = data;
        pr_debug("SPU event notification arrived\n");
        if (!val){
                spin_lock_irqsave(&cache_lock, flags);
@@ -403,8 +378,7 @@ void spu_sync_buffer(int spu_num, unsign
                     int num_samples)
 {
        unsigned long long file_offset;
-       unsigned long cache_lock_flags = 0;
-       unsigned long buffer_lock_flags = 0;
+       unsigned long flags;
        int i;
        struct vma_to_fileoffset_map * map;
        struct spu * the_spu;
@@ -417,29 +391,27 @@ void spu_sync_buffer(int spu_num, unsign
         * corresponding to this cached_info may end, thus resulting
         * in the destruction of the cached_info.
         */
-       spin_lock_irqsave(&cache_lock, cache_lock_flags);
+       spin_lock_irqsave(&cache_lock, flags);
        c_info = get_cached_info(NULL, spu_num);
-       if (c_info == NULL) {
+       if (!c_info) {
        /* This legitimately happens when the SPU task ends before all
         * samples are recorded.  No big deal -- so we just drop a few samples.
         */
                pr_debug("SPU_PROF: No cached SPU contex "
                          "for SPU #%d. Dropping samples.\n", spu_num);
-               spin_unlock_irqrestore(&cache_lock, cache_lock_flags);
-               return ;
+               goto out;
        }
 
        map = c_info->map;
        the_spu = c_info->the_spu;
-       spin_lock_irqsave(&buffer_lock, buffer_lock_flags);
+       spin_lock(&buffer_lock);
        for (i = 0; i < num_samples; i++) {
                unsigned int sample = *(samples+i);
                int grd_val = 0;
                file_offset = 0;
                if (sample == 0)
                        continue;
-               file_offset = vma_map_lookup(
-                       map, sample, the_spu, &grd_val);
+               file_offset = vma_map_lookup( map, sample, the_spu, &grd_val);
 
                /* If overlays are used by this SPU application, the guard
                 * value is non-zero, indicating which overlay section is in
@@ -460,8 +432,9 @@ void spu_sync_buffer(int spu_num, unsign
                        continue;
                add_event_entry(file_offset | spu_num_shifted);
        }
-       spin_unlock_irqrestore(&buffer_lock, buffer_lock_flags);
-       spin_unlock_irqrestore(&cache_lock, cache_lock_flags);
+       spin_unlock(&buffer_lock);
+out:
+       spin_unlock_irqrestore(&cache_lock, flags);
 }
 
 
Index: linux-2.6/arch/powerpc/oprofile/op_model_cell.c
===================================================================
--- linux-2.6.orig/arch/powerpc/oprofile/op_model_cell.c
+++ linux-2.6/arch/powerpc/oprofile/op_model_cell.c
@@ -40,7 +40,8 @@
 #include "../platforms/cell/cbe_regs.h"
 #include "cell/pr_util.h"
 
-/* spu_cycle_reset is the number of cycles between samples.
+/*
+ * spu_cycle_reset is the number of cycles between samples.
  * This variable is used for SPU profiling and should ONLY be set
  * at the beginning of cell_reg_setup; otherwise, it's read-only.
  */
@@ -73,7 +74,6 @@ struct pmc_cntrl_data {
 /*
  * ibm,cbe-perftools rtas parameters
  */
-
 struct pm_signal {
        u16 cpu;                /* Processor to modify */
        u16 sub_unit;           /* hw subunit this applies to (if applicable)*/
@@ -123,7 +123,8 @@ static DEFINE_PER_CPU(unsigned long[NR_P
 
 static struct pmc_cntrl_data pmc_cntrl[NUM_THREADS][NR_PHYS_CTRS];
 
-/* The CELL profiling code makes rtas calls to setup the debug bus to
+/*
+ * The CELL profiling code makes rtas calls to setup the debug bus to
  * route the performance signals.  Additionally, SPU profiling requires
  * a second rtas call to setup the hardware to capture the SPU PCs.
  * The EIO error value is returned if the token lookups or the rtas
@@ -137,16 +138,21 @@ static struct pmc_cntrl_data pmc_cntrl[N
  * either.
  */
 
-/* Interpetation of hdw_thread:
+/*
+ * Interpetation of hdw_thread:
  * 0 - even virtual cpus 0, 2, 4,...
  * 1 - odd virtual cpus 1, 3, 5, ...
+ *
+ * FIXME: this is strictly wrong, we need to clean this up in a number
+ * of places. It works for now. -arnd
  */
 static u32 hdw_thread;
 
 static u32 virt_cntr_inter_mask;
 static struct timer_list timer_virt_cntr;
 
-/* pm_signal needs to be global since it is initialized in
+/*
+ * pm_signal needs to be global since it is initialized in
  * cell_reg_setup at the time when the necessary information
  * is available.
  */
@@ -167,7 +173,6 @@ static unsigned char input_bus[NUM_INPUT
 /*
  * Firmware interface functions
  */
-
 static int
 rtas_ibm_cbe_perftools(int subfunc, int passthru,
                       void *address, unsigned long length)
@@ -183,12 +188,13 @@ static void pm_rtas_reset_signals(u32 no
        int ret;
        struct pm_signal pm_signal_local;
 
-       /*  The debug bus is being set to the passthru disable state.
-        *  However, the FW still expects atleast one legal signal routing
-        *  entry or it will return an error on the arguments.  If we don't
-        *  supply a valid entry, we must ignore all return values.  Ignoring
-        *  all return values means we might miss an error we should be
-        *  concerned about.
+       /*
+        * The debug bus is being set to the passthru disable state.
+        * However, the FW still expects atleast one legal signal routing
+        * entry or it will return an error on the arguments.   If we don't
+        * supply a valid entry, we must ignore all return values.  Ignoring
+        * all return values means we might miss an error we should be
+        * concerned about.
         */
 
        /*  fw expects physical cpu #. */
@@ -203,7 +209,8 @@ static void pm_rtas_reset_signals(u32 no
                                     sizeof(struct pm_signal));
 
        if (unlikely(ret))
-               /* Not a fatal error. For Oprofile stop, the oprofile
+               /*
+                * Not a fatal error. For Oprofile stop, the oprofile
                 * functions do not support returning an error for
                 * failure to stop OProfile.
                 */
@@ -217,7 +224,8 @@ static int pm_rtas_activate_signals(u32 
        int i, j;
        struct pm_signal pm_signal_local[NR_PHYS_CTRS];
 
-       /* There is no debug setup required for the cycles event.
+       /*
+        * There is no debug setup required for the cycles event.
         * Note that only events in the same group can be used.
         * Otherwise, there will be conflicts in correctly routing
         * the signals on the debug bus.  It is the responsiblity
@@ -295,7 +303,8 @@ static void set_pm_event(u32 ctr, int ev
        pm_regs.pm07_cntrl[ctr] |= PM07_CTR_POLARITY(polarity);
        pm_regs.pm07_cntrl[ctr] |= PM07_CTR_INPUT_CONTROL(input_control);
 
-       /* Some of the islands signal selection is based on 64 bit words.
+       /*
+        * Some of the islands signal selection is based on 64 bit words.
         * The debug bus words are 32 bits, the input words to the performance
         * counters are defined as 32 bits.  Need to convert the 64 bit island
         * specification to the appropriate 32 input bit and bus word for the
@@ -345,7 +354,8 @@ out:
 
 static void write_pm_cntrl(int cpu)
 {
-       /* Oprofile will use 32 bit counters, set bits 7:10 to 0
+       /*
+        * Oprofile will use 32 bit counters, set bits 7:10 to 0
         * pmregs.pm_cntrl is a global
         */
 
@@ -362,7 +372,8 @@ static void write_pm_cntrl(int cpu)
        if (pm_regs.pm_cntrl.freeze == 1)
                val |= CBE_PM_FREEZE_ALL_CTRS;
 
-       /* Routine set_count_mode must be called previously to set
+       /*
+        * Routine set_count_mode must be called previously to set
         * the count mode based on the user selection of user and kernel.
         */
        val |= CBE_PM_COUNT_MODE_SET(pm_regs.pm_cntrl.count_mode);
@@ -372,7 +383,8 @@ static void write_pm_cntrl(int cpu)
 static inline void
 set_count_mode(u32 kernel, u32 user)
 {
-       /* The user must specify user and kernel if they want them. If
+       /*
+        * The user must specify user and kernel if they want them. If
         *  neither is specified, OProfile will count in hypervisor mode.
         *  pm_regs.pm_cntrl is a global
         */
@@ -413,17 +425,18 @@ static inline void enable_ctr(u32 cpu, u
  * pair of per-cpu arrays is used for storing the previous and next
  * pmc values for a given node.
  * NOTE: We use the per-cpu variable to improve cache performance.
+ *
+ * This routine will alternate loading the virtual counters for
+ * virtual CPUs
  */
 static void cell_virtual_cntr(unsigned long data)
 {
-       /* This routine will alternate loading the virtual counters for
-        * virtual CPUs
-        */
        int i, prev_hdw_thread, next_hdw_thread;
        u32 cpu;
        unsigned long flags;
 
-       /* Make sure that the interrupt_hander and the virt counter are
+       /*
+        * Make sure that the interrupt_hander and the virt counter are
         * not both playing with the counters on the same node.
         */
 
@@ -435,22 +448,25 @@ static void cell_virtual_cntr(unsigned l
        hdw_thread = 1 ^ hdw_thread;
        next_hdw_thread = hdw_thread;
 
-       for (i = 0; i < num_counters; i++)
-       /* There are some per thread events.  Must do the
+       /*
+        * There are some per thread events.  Must do the
         * set event, for the thread that is being started
         */
+       for (i = 0; i < num_counters; i++)
                set_pm_event(i,
                        pmc_cntrl[next_hdw_thread][i].evnts,
                        pmc_cntrl[next_hdw_thread][i].masks);
 
-       /* The following is done only once per each node, but
+       /*
+        * The following is done only once per each node, but
         * we need cpu #, not node #, to pass to the cbe_xxx functions.
         */
        for_each_online_cpu(cpu) {
                if (cbe_get_hw_thread_id(cpu))
                        continue;
 
-               /* stop counters, save counter values, restore counts
+               /*
+                * stop counters, save counter values, restore counts
                 * for previous thread
                 */
                cbe_disable_pm(cpu);
@@ -479,13 +495,15 @@ static void cell_virtual_cntr(unsigned l
                                                      next_hdw_thread)[i]);
                }
 
-               /* Switch to the other thread. Change the interrupt
+               /*
+                * Switch to the other thread. Change the interrupt
                 * and control regs to be scheduled on the CPU
                 * corresponding to the thread to execute.
                 */
                for (i = 0; i < num_counters; i++) {
                        if (pmc_cntrl[next_hdw_thread][i].enabled) {
-                               /* There are some per thread events.
+                               /*
+                                * There are some per thread events.
                                 * Must do the set event, enable_cntr
                                 * for each cpu.
                                 */
@@ -517,9 +535,8 @@ static void start_virt_cntrs(void)
 }
 
 /* This function is called once for all cpus combined */
-static int
-cell_reg_setup(struct op_counter_config *ctr,
-              struct op_system_config *sys, int num_ctrs)
+static int cell_reg_setup(struct op_counter_config *ctr,
+                       struct op_system_config *sys, int num_ctrs)
 {
        int i, j, cpu;
        spu_cycle_reset = 0;
@@ -527,7 +544,8 @@ cell_reg_setup(struct op_counter_config 
        if (ctr[0].event == SPU_CYCLES_EVENT_NUM) {
                spu_cycle_reset = ctr[0].count;
 
-               /* Each node will need to make the rtas call to start
+               /*
+                * Each node will need to make the rtas call to start
                 * and stop SPU profiling.  Get the token once and store it.
                 */
                spu_rtas_token = rtas_token("ibm,cbe-spu-perftools");
@@ -542,7 +560,8 @@ cell_reg_setup(struct op_counter_config 
 
        pm_rtas_token = rtas_token("ibm,cbe-perftools");
 
-       /* For all events excetp PPU CYCLEs, each node will need to make
+       /*
+        * For all events excetp PPU CYCLEs, each node will need to make
         * the rtas cbe-perftools call to setup and reset the debug bus.
         * Make the token lookup call once and store it in the global
         * variable pm_rtas_token.
@@ -579,7 +598,8 @@ cell_reg_setup(struct op_counter_config 
                        per_cpu(pmc_values, j)[i] = 0;
        }
 
-       /* Setup the thread 1 events, map the thread 0 event to the
+       /*
+        * Setup the thread 1 events, map the thread 0 event to the
         * equivalent thread 1 event.
         */
        for (i = 0; i < num_ctrs; ++i) {
@@ -603,7 +623,8 @@ cell_reg_setup(struct op_counter_config 
        for (i = 0; i < NUM_INPUT_BUS_WORDS; i++)
                input_bus[i] = 0xff;
 
-       /* Our counters count up, and "count" refers to
+       /*
+        * Our counters count up, and "count" refers to
         * how much before the next interrupt, and we interrupt
         * on overflow.  So we calculate the starting value
         * which will give us "count" until overflow.
@@ -667,19 +688,19 @@ static int cell_cpu_setup(struct op_coun
                }
        }
 
-       /* the pm_rtas_activate_signals will return -EIO if the FW
+       /*
+        * The pm_rtas_activate_signals will return -EIO if the FW
         * call failed.
         */
-       return (pm_rtas_activate_signals(cbe_cpu_to_node(cpu), num_enabled));
-
+       return pm_rtas_activate_signals(cbe_cpu_to_node(cpu), num_enabled);
 }
 
 #define ENTRIES         303
 #define MAXLFSR         0xFFFFFF
 
 /* precomputed table of 24 bit LFSR values */
-int initial_lfsr[] =
-{8221349, 12579195, 5379618, 10097839, 7512963, 7519310, 3955098, 10753424,
+static int initial_lfsr[] = {
+ 8221349, 12579195, 5379618, 10097839, 7512963, 7519310, 3955098, 10753424,
  15507573, 7458917, 285419, 2641121, 9780088, 3915503, 6668768, 1548716,
  4885000, 8774424, 9650099, 2044357, 2304411, 9326253, 10332526, 4421547,
  3440748, 10179459, 13332843, 10375561, 1313462, 8375100, 5198480, 6071392,
@@ -716,7 +737,8 @@ int initial_lfsr[] =
  3258216, 12505185, 6007317, 9218111, 14661019, 10537428, 11731949, 9027003,
  6641507, 9490160, 200241, 9720425, 16277895, 10816638, 1554761, 10431375,
  7467528, 6790302, 3429078, 14633753, 14428997, 11463204, 3576212, 2003426,
- 6123687, 820520, 9992513, 15784513, 5778891, 6428165, 8388607};
+ 6123687, 820520, 9992513, 15784513, 5778891, 6428165, 8388607
+};
 
 /*
  * The hardware uses an LFSR counting sequence to determine when to capture
@@ -777,28 +799,25 @@ int initial_lfsr[] =
 
 static int calculate_lfsr(int n)
 {
-       /* The ranges and steps are in powers of 2 so the calculations
+       /*
+        * The ranges and steps are in powers of 2 so the calculations
         * can be done using shifts rather then divide.
         */
        int index;
 
-       if ((n >> 16) == 0) {
+       if ((n >> 16) == 0)
                index = 0;
-
-       } else if (((n - V2_16) >> 19) == 0) {
+       else if (((n - V2_16) >> 19) == 0)
                index = ((n - V2_16) >> 12) + 1;
-
-       } else if (((n - V2_16 - V2_19) >> 22) == 0) {
+       else if (((n - V2_16 - V2_19) >> 22) == 0)
                index = ((n - V2_16 - V2_19) >> 15 ) + 1 + 128;
+       else if (((n - V2_16 - V2_19 - V2_22) >> 24) == 0)
+               index = ((n - V2_16 - V2_19 - V2_22) >> 18 ) + 1 + 256;
+       else
+               index = ENTRIES-1;
 
-       } else if (((n - V2_16 - V2_19 - V2_22) >> 24) == 0) {
-               index = ((n - V2_16 - V2_19 - V2_22) >> 18 )
-                       + 1 + 256;
-       }
-
-       if ((index > ENTRIES) || (index < 0))   /* make sure index is
-                                                * valid
-                                                */
+       /* make sure index is valid */
+       if ((index > ENTRIES) || (index < 0))
                index = ENTRIES-1;
 
        return initial_lfsr[index];
@@ -809,15 +828,17 @@ static int pm_rtas_activate_spu_profilin
        int ret, i;
        struct pm_signal pm_signal_local[NR_PHYS_CTRS];
 
-       /* Set up the rtas call to configure the debug bus to
-        * route the SPU PCs.  Setup the pm_signal for each SPU */
+       /*
+        * Set up the rtas call to configure the debug bus to
+        * route the SPU PCs.  Setup the pm_signal for each SPU
+        */
        for (i = 0; i < NUM_SPUS_PER_NODE; i++) {
                pm_signal_local[i].cpu = node;
                pm_signal_local[i].signal_group = 41;
-               pm_signal_local[i].bus_word = 1 << i / 2; /* spu i on
-                                                          * word (i/2)
-                                                          */
-               pm_signal_local[i].sub_unit = i;        /* spu i */
+               /* spu i on word (i/2) */
+               pm_signal_local[i].bus_word = 1 << i / 2;
+               /* spu i */
+               pm_signal_local[i].sub_unit = i;
                pm_signal_local[i].bit = 63;
        }
 
@@ -858,8 +879,8 @@ static int cell_global_start_spu(struct 
        int subfunc, rtn_value;
        unsigned int lfsr_value;
        int cpu;
-       int ret = 0;
-       int rtas_error = 0;
+       int ret;
+       int rtas_error;
        unsigned int cpu_khzfreq = 0;
 
        /* The SPU profiling uses time-based profiling based on
@@ -884,24 +905,23 @@ static int cell_global_start_spu(struct 
        for_each_online_cpu(cpu) {
                if (cbe_get_hw_thread_id(cpu))
                        continue;
-               /* Setup SPU cycle-based profiling.
+
+               /*
+                * Setup SPU cycle-based profiling.
                 * Set perf_mon_control bit 0 to a zero before
                 * enabling spu collection hardware.
                 */
                cbe_write_pm(cpu, pm_control, 0);
 
                if (spu_cycle_reset > MAX_SPU_COUNT)
-                       /* use largest possible value
-                        */
+                       /* use largest possible value */
                        lfsr_value = calculate_lfsr(MAX_SPU_COUNT-1);
                else
-                   lfsr_value = calculate_lfsr(spu_cycle_reset);
+                       lfsr_value = calculate_lfsr(spu_cycle_reset);
 
-               if (lfsr_value == 0) {  /* must use a non zero value.  Zero
-                                        * disables data collection.
-                                        */
-                               lfsr_value = calculate_lfsr(1);
-               }
+               /* must use a non zero value. Zero disables data collection. */
+               if (lfsr_value == 0)
+                       lfsr_value = calculate_lfsr(1);
 
                lfsr_value = lfsr_value << 8; /* shift lfsr to correct
                                                * register location
@@ -916,7 +936,7 @@ static int cell_global_start_spu(struct 
                }
 
 
-               subfunc = 2;    // 2 - activate SPU tracing, 3 - deactivate
+               subfunc = 2;    /* 2 - activate SPU tracing, 3 - deactivate */
 
                /* start profiling */
                rtn_value = rtas_call(spu_rtas_token, 3, 1, NULL, subfunc,
@@ -976,7 +996,8 @@ static int cell_global_start_ppu(struct 
        oprofile_running = 1;
        smp_wmb();
 
-       /* NOTE: start_virt_cntrs will result in cell_virtual_cntr() being
+       /*
+        * NOTE: start_virt_cntrs will result in cell_virtual_cntr() being
         * executed which manipulates the PMU.  We start the "virtual counter"
         * here so that we do not need to synchronize access to the PMU in
         * the above for-loop.
@@ -986,7 +1007,6 @@ static int cell_global_start_ppu(struct 
        return 0;
 }
 
-
 static int cell_global_start(struct op_counter_config *ctr)
 {
        if (spu_cycle_reset) {
@@ -996,14 +1016,15 @@ static int cell_global_start(struct op_c
        }
 }
 
-static void cell_global_stop_spu(void)
-/* Note the generic OProfile stop calls do not support returning
+/*
+ * Note the generic OProfile stop calls do not support returning
  * an error on stop.  Hence, will not return an error if the FW
  * calls fail on stop. Failure to reset the debug bus is not an issue.
  * Failure to disable the SPU profiling is not an issue.  The FW calls
  * to enable the performance counters and debug bus will work even if
  * the hardware was not cleanly reset.
  */
+static void cell_global_stop_spu(void)
 {
        int subfunc, rtn_value;
        unsigned int lfsr_value;
@@ -1020,7 +1041,8 @@ static void cell_global_stop_spu(void)
                if (cbe_get_hw_thread_id(cpu))
                        continue;
 
-               subfunc = 3;    /* 2 - activate SPU tracing,
+               subfunc = 3;    /*
+                                * 2 - activate SPU tracing,
                                 * 3 - deactivate
                                 */
                lfsr_value = 0x8f100000;
@@ -1046,7 +1068,8 @@ static void cell_global_stop_ppu(void)
 {
        int cpu;
 
-       /* This routine will be called once for the system.
+       /*
+        * This routine will be called once for the system.
         * There is one performance monitor per node, so we
         * only need to perform this function once per node.
         */
@@ -1079,8 +1102,8 @@ static void cell_global_stop(void)
        }
 }
 
-static void
-cell_handle_interrupt(struct pt_regs *regs, struct op_counter_config *ctr)
+static void cell_handle_interrupt(struct pt_regs *regs,
+                               struct op_counter_config *ctr)
 {
        u32 cpu;
        u64 pc;
@@ -1091,13 +1114,15 @@ cell_handle_interrupt(struct pt_regs *re
 
        cpu = smp_processor_id();
 
-       /* Need to make sure the interrupt handler and the virt counter
+       /*
+        * Need to make sure the interrupt handler and the virt counter
         * routine are not running at the same time. See the
         * cell_virtual_cntr() routine for additional comments.
         */
        spin_lock_irqsave(&virt_cntr_lock, flags);
 
-       /* Need to disable and reenable the performance counters
+       /*
+        * Need to disable and reenable the performance counters
         * to get the desired behavior from the hardware.  This
         * is hardware specific.
         */
@@ -1106,7 +1131,8 @@ cell_handle_interrupt(struct pt_regs *re
 
        interrupt_mask = cbe_get_and_clear_pm_interrupts(cpu);
 
-       /* If the interrupt mask has been cleared, then the virt cntr
+       /*
+        * If the interrupt mask has been cleared, then the virt cntr
         * has cleared the interrupt.  When the thread that generated
         * the interrupt is restored, the data count will be restored to
         * 0xffffff0 to cause the interrupt to be regenerated.
@@ -1124,7 +1150,8 @@ cell_handle_interrupt(struct pt_regs *re
                        }
                }
 
-               /* The counters were frozen by the interrupt.
+               /*
+                * The counters were frozen by the interrupt.
                 * Reenable the interrupt and restart the counters.
                 * If there was a race between the interrupt handler and
                 * the virtual counter routine.  The virutal counter
@@ -1134,7 +1161,8 @@ cell_handle_interrupt(struct pt_regs *re
                cbe_enable_pm_interrupts(cpu, hdw_thread,
                                         virt_cntr_inter_mask);
 
-               /* The writes to the various performance counters only writes
+               /*
+                * The writes to the various performance counters only writes
                 * to a latch.  The new values (interrupt setting bits, reset
                 * counter value etc.) are not copied to the actual registers
                 * until the performance monitor is enabled.  In order to get
@@ -1147,7 +1175,8 @@ cell_handle_interrupt(struct pt_regs *re
        spin_unlock_irqrestore(&virt_cntr_lock, flags);
 }
 
-/* This function is called from the generic OProfile
+/*
+ * This function is called from the generic OProfile
  * driver.  When profiling PPUs, we need to do the
  * generic sync start; otherwise, do spu_sync_start.
  */
@@ -1167,7 +1196,6 @@ static int cell_sync_stop(void)
                return 1;
 }
 
-
 struct op_powerpc_model op_model_cell = {
        .reg_setup = cell_reg_setup,
        .cpu_setup = cell_cpu_setup,
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [Cbe-oss-dev] [RFC, PATCH] CELL Oprofile SPU profiling updated patch

Reply via email to