Used the pahole tool ("Pack A Hole") to take a look at the size of the
perfmon2 related data structures on x86_64. pahole reads the elf
information and determine the size of the fields in a data structure
and finds holes. It can also suggest reorderings of field to reduce
spaces due to alignment. pahole is available as part of dwarves
tools:
http://oops.ghostprotocols.net:81/acme/dwarves/
RPMs for i386 are available from:
http://oops.ghostprotocols.net:81/acme/dwarves/rpm
Some of the data structures are large, one is over 100KB in
size. Below is the information on the perfmon data structures that are
over 1K in size.
In _pfm_pmu_config why PFM_MAX_PMCS+1 and PFM_MAX_PMDS+1 for pmc_desc[]
and pmd_desc[] in _pfm_pmu_config? Each pmc_desc and pmd_desc element
has a struct kobject, which is 120 bytes. Is that needed? That is over 74K
/* <4f3d4> include/linux/perfmon_pmu.h:102 */
struct _pfm_pmu_config {
u64 impl_pmcs[5]; /* 0 40 */
u64 impl_pmds[5]; /* 40 40 */
/* --- cacheline 1 boundary (64 bytes) was 16 bytes ago --- */
u64 impl_rw_pmds[5]; /* 80 40 */
u64 cnt_pmds[5]; /* 120 40 */
/* --- cacheline 2 boundary (128 bytes) was 32 bytes ago --- */
u64 ovfl_mask; /* 160 8 */
u16 max_pmc; /* 168 2 */
u16 max_pmd; /* 170 2 */
u16 max_rw_pmd; /* 172 2 */
u16 first_cnt_pmd; /* 174 2 */
u16 max_cnt_pmd; /* 176 2 */
u16 num_pmcs; /* 178 2 */
u16 num_pmds; /* 180 2 */
u16 num_counters; /* 182 2 */
char * pmu_name; /* 184 8 */
/* --- cacheline 3 boundary (192 bytes) --- */
char * version; /* 192 8 */
int counter_width; /* 200 4 */
/* XXX 4 bytes hole, try to pack */
struct pfm_reg_desc pmc_desc[321]; /* 208 53928 */
/* --- cacheline 845 boundary (54080 bytes) was 56 bytes ago --- */
struct pfm_reg_desc pmd_desc[321]; /* 54136 53928 */
/* --- cacheline 1688 boundary (108032 bytes) was 32 bytes ago --- */
pfm_pmc_check_t pmc_write_check; /* 42528 8 */
pfm_pmd_check_t pmd_write_check; /* 42536 8 */
pfm_pmd_sread_t pmd_sread; /* 42544 8 */
pfm_pmd_swrite_t pmd_swrite; /* 42552 8 */
/* --- cacheline 1689 boundary (108096 bytes) --- */
void * arch_info; /* 42560 8 */
u32 flags; /* 42568 4 */
/* XXX 4 bytes hole, try to pack */
struct module * owner; /* 42576 8 */
struct kobject kobj; /* 42584 120 */
/* --- cacheline 1691 boundary (108224 bytes) was 16 bytes ago --- */
}; /* size: 108240, cachelines: 1692 */
/* sum members: 108232, holes: 2, sum holes: 8 */
/* last cacheline: 16 bytes */
/* definitions: 20 */
/* <4efd8> include/linux/perfmon.h:364 */
struct pfm_event_set {
u16 id; /* 0 2 */
u16 id_next; /* 2 2 */
u32 flags; /* 4 4 */
struct list_head list; /* 8 16 */
struct pfm_event_set * sw_next; /* 24 8 */
u32 priv_flags; /* 32 4 */
u32 npend_ovfls; /* 36 4 */
u64 used_pmds[5]; /* 40 40 */
/* --- cacheline 1 boundary (64 bytes) was 16 bytes ago --- */
u64 povfl_pmds[5]; /* 80 40 */
u64 ovfl_pmds[5]; /* 120 40 */
/* --- cacheline 2 boundary (128 bytes) was 32 bytes ago --- */
u64 reset_pmds[5]; /* 160 40 */
/* --- cacheline 3 boundary (192 bytes) was 8 bytes ago --- */
u64 ovfl_notify[5]; /* 200 40 */
u64 pmcs[320]; /* 240 2560 */
/* --- cacheline 43 boundary (2752 bytes) was 48 bytes ago --- */
u16 nused_pmds; /* 2800 2 */
u16 nused_pmcs; /* 2802 2 */
/* XXX 4 bytes hole, try to pack */
struct pfm_pmd pmds[320]; /* 2808 46080 */
/* --- cacheline 763 boundary (48832 bytes) was 56 bytes ago --- */
struct pfm_set_view * view; /* 48888 8 */
/* --- cacheline 764 boundary (48896 bytes) --- */
u64 timeout_sw_ref; /* 48896 8 */
u64 timeout_sw_left; /* 48904 8 */
u64 timeout_sw_exp; /* 48912 8 */
u64 duration_start; /* 48920 8 */
u64 duration; /* 48928 8 */
off_t mmap_offset; /* 48936 8 */
u64 used_pmcs[5]; /* 48944 40 */
/* --- cacheline 765 boundary (48960 bytes) was 24 bytes ago --- */
long unsigned int last_iip; /* 48984 8 */
u64 last_ovfl_pmd_reset; /* 48992 8 */
unsigned int last_ovfl_pmd; /* 49000 4 */
}; /* size: 49008, cachelines: 766 */
/* sum members: 49000, holes: 1, sum holes: 4 */
/* padding: 4 */
/* last cacheline: 48 bytes */
/* definitions: 20 */
/* <cffe5b> include/asm-i386/perfmon.h:85 */
struct pfm_arch_pmu_info {
struct pfm_arch_ext_reg pmc_addrs[256]; /* 0 6144 */
/* --- cacheline 96 boundary (6144 bytes) --- */
struct pfm_arch_ext_reg pmd_addrs[256]; /* 6144 6144 */
/* --- cacheline 192 boundary (12288 bytes) --- */
u64 enable_mask[5]; /* 12288 40 */
u64 ovfl_reg_mask; /* 12328 8 */
u16 pebs_ctr_idx; /* 12336 2 */
u16 flags; /* 12338 2 */
u8 pmu_style; /* 12340 1 */
}; /* size: 12344, cachelines: 193 */
/* padding: 3 */
/* last cacheline: 56 bytes */
/* definitions: 5 */
/* <4f580> include/linux/perfmon_fmt.h:26 */
struct pfm_ovfl_arg {
u16 ovfl_pmd; /* 0 2 */
u16 active_set; /* 2 2 */
u32 ovfl_ctrl; /* 4 4 */
u64 pmd_last_reset; /* 8 8 */
u64 smpl_pmds_values[320]; /* 16 2560 */
/* --- cacheline 40 boundary (2560 bytes) was 16 bytes ago --- */
u64 pmd_eventid; /* 2576 8 */
u16 num_smpl_pmds; /* 2584 2 */
}; /* size: 2592, cachelines: 41 */
/* padding: 6 */
/* last cacheline: 32 bytes */
/* definitions: 20 */
/* <4ecb4> include/linux/perfmon.h:208 */
struct pfm_set_view {
__u32 set_status; /* 0 4 */
__u32 set_reserved1; /* 4 4 */
__u64 set_runs; /* 8 8 */
__u64 set_pmds[320]; /* 16 2560 */
/* --- cacheline 40 boundary (2560 bytes) was 16 bytes ago --- */
volatile long unsigned int set_seq; /* 2576 8 */
}; /* size: 2584, cachelines: 41 */
/* last cacheline: 24 bytes */
/* definitions: 20 */
_______________________________________________
perfmon mailing list
[email protected]
http://www.hpl.hp.com/hosted/linux/mail-archives/perfmon/