Since 'struct clocksource' is ____cacheline_aligned, gcc must insert a lot of padding between reg and clksrc in 'struct clocksource_mmio' (for example, L1_CACHE_BYTES = 64 on ARMv7).
Storing reg within 'struct clocksource' removes unnecessary padding, and reg can then be grouped with other hot data. A nice side-effect of this patch is making container_of() unnecessary, which makes the code a bit simpler. On 32-bit platforms, reg fits in the padding between read and mask, meaning no downside from storing it there. 0 4 8 +----------------+----------------+ | read | pad/reg | +----------------+----------------+ | mask | +----------------+----------------+ | mult | shift | +----------------+----------------+ | max_idle_ns | +----------------+----------------+ On 64-bit platforms, placing reg between read and mask changes the layout, moving max_idle_ns to offset +32 instead of +24. 0 4 8 +----------------+----------------+ | read | +----------------+----------------+ | reg | +----------------+----------------+ | mask | +----------------+----------------+ | mult | shift | +----------------+----------------+ | max_idle_ns | +----------------+----------------+ Signed-off-by: Marc Gonzalez <marc_gonza...@sigmadesigns.com> --- drivers/clocksource/mmio.c | 36 +++++++++++++----------------------- include/linux/clocksource.h | 3 +++ 2 files changed, 16 insertions(+), 23 deletions(-) diff --git a/drivers/clocksource/mmio.c b/drivers/clocksource/mmio.c index 1593ade2a815..c28fc6ef63ef 100644 --- a/drivers/clocksource/mmio.c +++ b/drivers/clocksource/mmio.c @@ -10,34 +10,24 @@ #include <linux/init.h> #include <linux/slab.h> -struct clocksource_mmio { - void __iomem *reg; - struct clocksource clksrc; -}; - -static inline struct clocksource_mmio *to_mmio_clksrc(struct clocksource *c) -{ - return container_of(c, struct clocksource_mmio, clksrc); -} - cycle_t clocksource_mmio_readl_up(struct clocksource *c) { - return (cycle_t)readl_relaxed(to_mmio_clksrc(c)->reg); + return (cycle_t)readl_relaxed(c->reg); } cycle_t clocksource_mmio_readl_down(struct clocksource *c) { - return ~(cycle_t)readl_relaxed(to_mmio_clksrc(c)->reg) & c->mask; + return ~(cycle_t)readl_relaxed(c->reg) & c->mask; } cycle_t clocksource_mmio_readw_up(struct clocksource *c) { - return (cycle_t)readw_relaxed(to_mmio_clksrc(c)->reg); + return (cycle_t)readw_relaxed(c->reg); } cycle_t clocksource_mmio_readw_down(struct clocksource *c) { - return ~(cycle_t)readw_relaxed(to_mmio_clksrc(c)->reg) & c->mask; + return ~(cycle_t)readw_relaxed(c->reg) & c->mask; } /** @@ -53,21 +43,21 @@ int __init clocksource_mmio_init(void __iomem *base, const char *name, unsigned long hz, int rating, unsigned bits, cycle_t (*read)(struct clocksource *)) { - struct clocksource_mmio *cs; + struct clocksource *cs; if (bits > 32 || bits < 16) return -EINVAL; - cs = kzalloc(sizeof(struct clocksource_mmio), GFP_KERNEL); + cs = kzalloc(sizeof *cs, GFP_KERNEL); if (!cs) return -ENOMEM; - cs->reg = base; - cs->clksrc.name = name; - cs->clksrc.rating = rating; - cs->clksrc.read = read; - cs->clksrc.mask = CLOCKSOURCE_MASK(bits); - cs->clksrc.flags = CLOCK_SOURCE_IS_CONTINUOUS; + cs->read = read; + cs->reg = base; + cs->name = name; + cs->rating = rating; + cs->mask = CLOCKSOURCE_MASK(bits); + cs->flags = CLOCK_SOURCE_IS_CONTINUOUS; - return clocksource_register_hz(&cs->clksrc, hz); + return clocksource_register_hz(cs, hz); } diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 278dd279a7a8..50725fd23ab0 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -69,6 +69,9 @@ struct clocksource { * clocksource itself is cacheline aligned. */ cycle_t (*read)(struct clocksource *cs); +#ifdef CONFIG_CLKSRC_MMIO + void __iomem *reg; +#endif cycle_t mask; u32 mult; u32 shift; -- 2.4.5 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/