On Tue, Jul 15, 2014 at 02:31:40AM +0200, Stephane Eranian wrote:
> @@ -595,7 +595,8 @@ struct perf_sample_data {
>       struct perf_callchain_entry     *callchain;
>       struct perf_raw_record          *raw;
>       struct perf_branch_stack        *br_stack;
> -     struct perf_regs_user           regs_user;
> +     struct perf_regs                regs_user;
> +     struct perf_regs                regs_intr;
>       u64                             stack_user_size;
>       u64                             weight;
>       /*
> @@ -618,6 +619,8 @@ static inline void perf_sample_data_init(struct 
> perf_sample_data *data,
>       data->weight = 0;
>       data->data_src.val = 0;
>       data->txn = 0;
> +     data->regs_intr.abi = PERF_SAMPLE_REGS_ABI_NONE;
> +     data->regs_intr.regs = NULL;
>  }

I don't think we've been very careful here; does the below make sense?

AFAICT we don't need to set stack_user_size at all,
perf_prepare_sample() will set it when required, and with the change to
perf_sample_regs_user() the same is true for the regs_user thing.

This again reduces the cost of perf_sample_data_init() to touching a
single cacheline.

I'm not entirely sure the ____cacheline_aligned makes sense though, the
previous stack line is probably touched already so any next cacheline is
the one, and one avg we'd gain 0.5 cachelines worth of data.



---
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 707617a8c0f6..d27fec8118b1 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -575,34 +575,40 @@ extern u64 perf_event_read_value(struct perf_event *event,
 
 
 struct perf_sample_data {
-       u64                             type;
+       /*
+        * Fields set by perf_sample_data_init(), group so as to
+        * minimize the cachelines touched.
+        */
+       u64                             addr;
+       struct perf_raw_record          *raw;
+       struct perf_branch_stack        *br_stack;
+       u64                             period;
+       u64                             weight;
+       u64                             txn;
+       union  perf_mem_data_src        data_src;
+
 
+       /*
+        * The other fields, optionally {set,used} by
+        * perf_{prepare,output}_sample().
+        */
+       u64                             type;
        u64                             ip;
        struct {
                u32     pid;
                u32     tid;
        }                               tid_entry;
        u64                             time;
-       u64                             addr;
        u64                             id;
        u64                             stream_id;
        struct {
                u32     cpu;
                u32     reserved;
        }                               cpu_entry;
-       u64                             period;
-       union  perf_mem_data_src        data_src;
        struct perf_callchain_entry     *callchain;
-       struct perf_raw_record          *raw;
-       struct perf_branch_stack        *br_stack;
        struct perf_regs_user           regs_user;
        u64                             stack_user_size;
-       u64                             weight;
-       /*
-        * Transaction flags for abort events:
-        */
-       u64                             txn;
-};
+} ____cacheline_aligned;
 
 static inline void perf_sample_data_init(struct perf_sample_data *data,
                                         u64 addr, u64 period)
@@ -612,9 +618,6 @@ static inline void perf_sample_data_init(struct 
perf_sample_data *data,
        data->raw  = NULL;
        data->br_stack = NULL;
        data->period = period;
-       data->regs_user.abi = PERF_SAMPLE_REGS_ABI_NONE;
-       data->regs_user.regs = NULL;
-       data->stack_user_size = 0;
        data->weight = 0;
        data->data_src.val = 0;
        data->txn = 0;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index c8b53c94d41d..926cd7aafc14 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4299,6 +4299,8 @@ perf_output_sample_regs(struct perf_output_handle *handle,
 static void perf_sample_regs_user(struct perf_regs_user *regs_user,
                                  struct pt_regs *regs)
 {
+       regs_user->abi = PERF_SAMPLE_REGS_ABI_NONE;
+
        if (!user_mode(regs)) {
                if (current->mm)
                        regs = task_pt_regs(current);

Attachment: pgpHpRiGD2ysu.pgp
Description: PGP signature

Reply via email to