From: Peter Keilty <[EMAIL PROTECTED]>

Update ia64 conversion to the generic timekeeping/clocksource code.
Modified fast syscall path for gettimeofday to get performance equal
to orginal code and handle clocksource change at clock hz time.

Performance measurements for single calls (ITC cycles):

A. 32 way Intel IA64 SMP system 8640 (montecito)
A.1. Current code itc cmpxchg
gettimeofday cycles: 39 37 37 37 37 37 37 37 37 37 
clock_gettime(REAL) cycles: 49 35 35 35 35 35 35 35 35 35
clock_gettime(MONO) cycles: 42 36 36 36 36 36 36 36 36 36

A.2 New code itc cmpxchg
gettimeofday cycles: 39 38 37 37 38 37 38 37 37 37
clock_gettime(REAL) cycles: 53 35 35 36 35 35 35 35 35 35
clock_gettime(MONO) cycles: 44 38 36 36 36 36 36 36 36 36

A.3 New code itc cmpxchg switched off (nojitter kernel option)
gettimeofday cycles: 35 35 36 35 36 35 36 35 35 35
clock_gettime(REAL) cycles: 49 33 33 34 33 33 33 33 33 33
clock_gettime(MONO) cycles: 38 33 33 33 33 33 33 33 33 33

A.4 New code with hpet as clocksource, mmio space read
gettimeofday cycles: 183 183 181 180 182 185 182 183 184 182
clock_gettime(REAL) cycles: 196 180 179 179 183 181 182 183 183 181
clock_gettime(MONO) cycles: 185 180 182 180 182 182 179 179 179 181

Signed-off-by: Peter keilty <[EMAIL PROTECTED]>

---

 arch/ia64/kernel/asm-offsets.c        |   15 ++++---
 arch/ia64/kernel/cyclone.c            |    2 -
 arch/ia64/kernel/fsys.S               |   64 ++++++++++++++++++----------------
 arch/ia64/kernel/fsyscall_gtod_data.h |   18 +++++++++
 arch/ia64/kernel/time.c               |   37 +++++++++++++------
 arch/ia64/sn/kernel/sn2/timer.c       |    2 -
 drivers/char/hpet.c                   |    2 -
 kernel/time/ntp.c                     |   10 -----
 8 files changed, 90 insertions(+), 60 deletions(-)

Index: Linux/arch/ia64/kernel/asm-offsets.c
===================================================================
--- Linux.orig/arch/ia64/kernel/asm-offsets.c   2007-04-24 11:40:44.000000000 
-0400
+++ Linux/arch/ia64/kernel/asm-offsets.c        2007-04-24 12:58:49.000000000 
-0400
@@ -16,6 +16,7 @@
 #include <asm-ia64/mca.h>
 
 #include "../kernel/sigframe.h"
+#include "../kernel/fsyscall_gtod_data.h"
 
 #define DEFINE(sym, val) \
         asm volatile("\n->" #sym " %0 " #val : : "i" (val))
@@ -256,10 +257,12 @@ void foo(void)
        BLANK();
 
        /* used by fsys_gettimeofday in arch/ia64/kernel/fsys.S */
-       DEFINE(IA64_TIMESPEC_TV_NSEC_OFFSET, offsetof (struct timespec, 
tv_nsec));
-       DEFINE(IA64_CLOCKSOURCE_MASK_OFFSET, offsetof (struct clocksource, 
mask));
-       DEFINE(IA64_CLOCKSOURCE_MULT_OFFSET, offsetof (struct clocksource, 
mult));
-       DEFINE(IA64_CLOCKSOURCE_SHIFT_OFFSET, offsetof (struct clocksource, 
shift));
-       DEFINE(IA64_CLOCKSOURCE_MMIO_PTR_OFFSET, offsetof (struct clocksource, 
fsys_mmio_ptr));
-       DEFINE(IA64_CLOCKSOURCE_CYCLE_LAST_OFFSET, offsetof (struct 
clocksource, cycle_last));
+       DEFINE(IA64_GTOD_LOCK_OFFSET, offsetof (struct fsyscall_gtod_data_t, 
lock));
+       DEFINE(IA64_CLKSRC_MASK_OFFSET, offsetof (struct fsyscall_gtod_data_t, 
clk_mask));
+       DEFINE(IA64_CLKSRC_MULT_OFFSET, offsetof (struct fsyscall_gtod_data_t, 
clk_mult));
+       DEFINE(IA64_CLKSRC_SHIFT_OFFSET, offsetof (struct fsyscall_gtod_data_t, 
clk_shift));
+       DEFINE(IA64_CLKSRC_MMIO_PTR_OFFSET, offsetof (struct 
fsyscall_gtod_data_t, clk_fsys_mmio_ptr));
+       DEFINE(IA64_CLKSRC_CYCLE_LAST_OFFSET, offsetof (struct 
fsyscall_gtod_data_t, clk_cycle_last));
+       DEFINE(IA64_ITC_LASTCYCLE_OFFSET, offsetof (struct 
fsyscall_gtod_data_t, itc_lastcycle));
+       DEFINE(IA64_ITC_JITTER_OFFSET, offsetof (struct fsyscall_gtod_data_t, 
itc_jitter));
 }
Index: Linux/arch/ia64/kernel/cyclone.c
===================================================================
--- Linux.orig/arch/ia64/kernel/cyclone.c       2007-04-24 11:40:44.000000000 
-0400
+++ Linux/arch/ia64/kernel/cyclone.c    2007-04-24 12:31:20.000000000 -0400
@@ -33,7 +33,7 @@ static struct clocksource clocksource_cy
         .mask           = (1LL << 40) - 1,
         .mult           = 0, /*to be caluclated*/
         .shift          = 16,
-        .is_continuous  = 1,
+        .flags          = CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
 int __init init_cyclone_clock(void)
Index: Linux/arch/ia64/kernel/fsys.S
===================================================================
--- Linux.orig/arch/ia64/kernel/fsys.S  2007-04-24 11:40:44.000000000 -0400
+++ Linux/arch/ia64/kernel/fsys.S       2007-04-25 16:39:25.000000000 -0400
@@ -145,6 +145,9 @@ ENTRY(fsys_set_tid_address)
        FSYS_RETURN
 END(fsys_set_tid_address)
 
+#if IA64_GTOD_LOCK_OFFSET !=0
+#error fsys_gettimeofday incompatible with changes to struct 
fsyscall_gtod_data_t
+#endif
 #define CLOCK_REALTIME 0
 #define CLOCK_MONOTONIC 1
 #define CLOCK_DIVIDE_BY_1000 0x4000
@@ -175,10 +178,10 @@ ENTRY(fsys_gettimeofday)
        // r16 = preserved: current task pointer
        // r17 = wall to monotonic use
        // r19 = address of itc_lastcycle
-       // r20 = struct clocksource / address of first element
-       // r21 = shift value
-       // r22 = address of itc_jitter/ wall_to_monotonic
-       // r23 = address of shift
+       // r20 = struct fsyscall_gtod_data  / address of first element
+       // r21 = address of mmio_ptr
+       // r22 = address of  wall_to_monotonic
+       // r23 = address of shift/ value
        // r24 = address mult factor / cycle_last value
        // r25 = itc_lastcycle value
        // r26 = address clocksource cycle_last
@@ -204,46 +207,47 @@ ENTRY(fsys_gettimeofday)
        tnat.nz p6,p0 = r31     // branch deferred since it does not fit into 
bundle structure
        mov pr = r30,0xc000     // Set predicates according to function
        add r2 = TI_FLAGS+IA64_TASK_SIZE,r16
-       movl r20 = fsyscall_clock // load fsyscall clocksource address
+       movl r20 = fsyscall_gtod_data // load fsyscall gettimeofday data address
        ;;
-       add r10 = IA64_CLOCKSOURCE_MMIO_PTR_OFFSET,r20
-       movl r29 = xtime_lock
-       ld4 r2 = [r2]           // process work pending flags
+       add r29 = IA64_ITC_JITTER_OFFSET,r20
        movl r27 = xtime
-       ;;      // only one bundle here
-       add r14 = IA64_CLOCKSOURCE_MASK_OFFSET,r20
-       movl r22 = itc_jitter
-       add r24 = IA64_CLOCKSOURCE_MULT_OFFSET,r20
+       ld4 r2 = [r2]           // process work pending flags
+(p15)  movl r22 = wall_to_monotonic
+       ;;
+       add r21 = IA64_CLKSRC_MMIO_PTR_OFFSET,r20
+       add r19 = IA64_ITC_LASTCYCLE_OFFSET,r20
        and r2 = TIF_ALLWORK_MASK,r2
 (p6)    br.cond.spnt.few .fail_einval  // deferred branch
        ;;
-       ld8 r30 = [r10]         // clocksource->mmio_ptr
-       movl r19 = itc_lastcycle
-       add r23 = IA64_CLOCKSOURCE_SHIFT_OFFSET,r20
+       add r26 = IA64_CLKSRC_CYCLE_LAST_OFFSET,r20 // clksrc_cycle_last
        cmp.ne p6, p0 = 0, r2   // Fallback if work is scheduled
 (p6)    br.cond.spnt.many fsys_fallback_syscall
+       ;; // get lock.seq here new code, outer loop2!
+.time_redo:
+       ld4.acq r28 = [r20]     // gtod_lock.sequence, Must be first in struct
+       ld8 r30 = [r21]         // clocksource->mmio_ptr
+       add r24 = IA64_CLKSRC_MULT_OFFSET,r20
+       ld4 r2 = [r29]          // itc_jitter value
+       add r23 = IA64_CLKSRC_SHIFT_OFFSET,r20
+       add r14 = IA64_CLKSRC_MASK_OFFSET,r20
        ;;
+       ld4 r3 = [r24]          // clocksource mult value
        ld8 r14 = [r14]         // clocksource mask value
-       ld4 r2 = [r22]          // itc_jitter value
-       add r26 = IA64_CLOCKSOURCE_CYCLE_LAST_OFFSET,r20 // clock 
fsyscall_cycle_last
-       ld4 r3 = [r24]          // clocksource->mult value
        cmp.eq p8,p9 = 0,r30    // Check for cpu timer, no mmio_ptr, set p8, 
clear p9
        ;;
        setf.sig f7 = r3        // Setup for mult scaling of counter
-(p15)  movl r22 = wall_to_monotonic
-       ld4 r21 = [r23]         // shift value
-(p8)   cmp.ne p13,p0 = r2,r0   // need jitter compensation, set p13
+(p8)   cmp.ne p13,p0 = r2,r0   // need itc_jitter compensation, set p13
+       ld4 r23 = [r23]         // clocksource shift value
+       ld8 r24 = [r26]         // get clksrc_cycle_last value
 (p9)   cmp.eq p13,p0 = 0,r30   // if mmio_ptr, clear p13 jitter control
-       ;;
-.time_redo:
-       .pred.rel.mutex p8,p9,p10
-       ld4.acq r28 = [r29]     // xtime_lock.sequence. Must come first for 
locking purposes
+       ;; // old position for lock seq, new inner loop1!
+.cmpxchg_redo:
+       .pred.rel.mutex p8,p9
 (p8)   mov r2 = ar.itc         // CPU_TIMER. 36 clocks latency!!!
 (p9)   ld8 r2 = [r30]          // readq(ti->address). Could also have latency 
issues..
 (p13)  ld8 r25 = [r19]         // get itc_lastcycle value
        ;;                      // could be removed by moving the last add 
upward
        ld8 r9 = [r27],IA64_TIMESPEC_TV_NSEC_OFFSET
-       ld8 r24 = [r26]         // get fsyscall_cycle_last value
 (p15)  ld8 r17 = [r22],IA64_TIMESPEC_TV_NSEC_OFFSET
        ;;
        ld8 r8 = [r27],-IA64_TIMESPEC_TV_NSEC_OFFSET    // xtime.tv_nsec
@@ -265,21 +269,23 @@ EX(.fail_efault, probe.w.fault r31, 3)    /
        xmpy.l f8 = f8,f7       // nsec_per_cyc*(counter-last_counter)
 (p15)  add r9 = r9,r17         // Add wall to monotonic.secs to result secs
        ;;
+       // End cmpxchg critical section loop1
 (p15)  ld8 r17 = [r22],-IA64_TIMESPEC_TV_NSEC_OFFSET
 (p7)   cmp.ne p7,p0 = r25,r3   // if cmpxchg not successful redo
+(p7)   br.cond.dpnt.few .cmpxchg_redo  // inner loop1
        // simulate tbit.nz.or p7,p0 = r28,0
        and r28 = ~1,r28        // Make sequence even to force retry if odd
        getf.sig r2 = f8
        mf
        ;;
-       ld4 r10 = [r29]         // xtime_lock.sequence
+       ld4 r10 = [r20]         // gtod_lock.sequence, old xtime_lock.sequence
 (p15)  add r8 = r8, r17        // Add monotonic.nsecs to nsecs
-       shr.u r2 = r2,r21       // shift by factor
+       shr.u r2 = r2,r23       // shift by factor
        ;;              // overloaded 3 bundles!
        // End critical section.
        add r8 = r8,r2          // Add xtime.nsecs
        cmp4.ne.or p7,p0 = r28,r10
-(p7)   br.cond.dpnt.few .time_redo     // sequence number changed ?
+(p7)   br.cond.dpnt.few .time_redo     // sequence number changed, outer loop2
        // Now r8=tv->tv_nsec and r9=tv->tv_sec
        mov r10 = r0
        movl r2 = 1000000000
Index: Linux/arch/ia64/kernel/fsyscall_gtod_data.h
===================================================================
--- /dev/null   1970-01-01 00:00:00.000000000 +0000
+++ Linux/arch/ia64/kernel/fsyscall_gtod_data.h 2007-04-24 12:54:52.000000000 
-0400
@@ -0,0 +1,18 @@
+/*
+ * (c) Copyright 2007 Hewlett-Packard Development Company, L.P.
+ *        Contributed by Peter Keilty <[EMAIL PROTECTED]>
+ *
+ * fsyscall gettimeofday data
+ */
+
+struct fsyscall_gtod_data_t {
+        seqlock_t lock;
+        cycle_t   clk_mask;
+        u32       clk_mult;
+        u32       clk_shift;
+        void     *clk_fsys_mmio_ptr;
+        cycle_t   clk_cycle_last;
+        cycle_t   itc_lastcycle;
+        int       itc_jitter;
+} __attribute__ ((aligned (L1_CACHE_BYTES)));
+
Index: Linux/arch/ia64/kernel/time.c
===================================================================
--- Linux.orig/arch/ia64/kernel/time.c  2007-04-24 11:40:44.000000000 -0400
+++ Linux/arch/ia64/kernel/time.c       2007-04-24 12:41:28.000000000 -0400
@@ -30,9 +30,13 @@
 #include <asm/sections.h>
 #include <asm/system.h>
 
+#include "fsyscall_gtod_data.h"
+
 static cycle_t itc_get_cycles(void);
-cycle_t        itc_lastcycle __attribute__((aligned(L1_CACHE_BYTES)));
-int    itc_jitter __attribute__((aligned(L1_CACHE_BYTES)));
+
+struct fsyscall_gtod_data_t fsyscall_gtod_data = {
+       .lock = SEQLOCK_UNLOCKED,
+};
 
 volatile int time_keeper_id = 0; /* smp_processor_id() of time-keeper */
 
@@ -50,7 +54,7 @@ static struct clocksource clocksource_it
         .mask           = 0xffffffffffffffffLL,
         .mult           = 0, /*to be caluclated*/
         .shift          = 16,
-        .is_continuous  = 1,
+        .flags          = CLOCK_SOURCE_IS_CONTINUOUS,
 };
 static struct clocksource *clocksource_itc_p;
 
@@ -232,7 +236,7 @@ ia64_init_itm (void)
                 * even going backward) if the ITC offsets between the 
individual CPUs
                 * are too large.
                 */
-               if (!nojitter) itc_jitter = 1;
+               if (!nojitter) fsyscall_gtod_data.itc_jitter = 1;
 #endif
        }
 
@@ -248,15 +252,14 @@ ia64_init_itm (void)
        }
 }
 
-
 static cycle_t itc_get_cycles()
 {
-       if (itc_jitter) {
+       if (fsyscall_gtod_data.itc_jitter) {
                u64 lcycle;
                u64 now;
 
                do {
-                       lcycle = itc_lastcycle;
+                       lcycle = fsyscall_gtod_data.itc_lastcycle;
                        now = get_cycles();
                        if (lcycle && time_after(lcycle, now))
                                return lcycle;
@@ -266,14 +269,14 @@ static cycle_t itc_get_cycles()
                         * force to retry until the write lock is released.
                         */
                        if (spin_is_locked(&xtime_lock.lock)) {
-                               itc_lastcycle = now;
+                               fsyscall_gtod_data.itc_lastcycle = now;
                                return now;
                        }
                        /* Keep track of the last timer value returned.
                         * The use of cmpxchg here will cause contention in
                         * an SMP environment.
                         */
-               } while (unlikely(cmpxchg(&itc_lastcycle, lcycle, now) != 
lcycle));
+               } while (likely(cmpxchg(&fsyscall_gtod_data.itc_lastcycle, 
lcycle, now) != lcycle));
                return now;
        } else
                return get_cycles();
@@ -356,9 +359,19 @@ ia64_setup_printk_clock(void)
                ia64_printk_clock = ia64_itc_printk_clock;
 }
 
-struct clocksource fsyscall_clock __attribute__((aligned(L1_CACHE_BYTES)));
-
 void update_vsyscall(struct timespec *wall, struct clocksource *c)
 {
-       fsyscall_clock = *c;
+        unsigned long flags;
+
+        write_seqlock_irqsave(&fsyscall_gtod_data.lock, flags);
+
+        /* copy fsyscall clock data */
+        fsyscall_gtod_data.clk_mask = c->mask;
+        fsyscall_gtod_data.clk_mult = c->mult;
+        fsyscall_gtod_data.clk_shift = c->shift;
+        fsyscall_gtod_data.clk_fsys_mmio_ptr = c->fsys_mmio_ptr;
+        fsyscall_gtod_data.clk_cycle_last = c->cycle_last;
+
+        write_sequnlock_irqrestore(&fsyscall_gtod_data.lock, flags);
 }
+
Index: Linux/arch/ia64/sn/kernel/sn2/timer.c
===================================================================
--- Linux.orig/arch/ia64/sn/kernel/sn2/timer.c  2007-04-24 11:40:44.000000000 
-0400
+++ Linux/arch/ia64/sn/kernel/sn2/timer.c       2007-04-24 12:32:33.000000000 
-0400
@@ -37,7 +37,7 @@ static struct clocksource clocksource_sn
         .mask           = (1LL << 55) - 1,
         .mult           = 0,
         .shift          = 10,
-        .is_continuous  = 1,
+        .flags          = CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
 /*
Index: Linux/drivers/char/hpet.c
===================================================================
--- Linux.orig/drivers/char/hpet.c      2007-04-24 11:40:44.000000000 -0400
+++ Linux/drivers/char/hpet.c   2007-04-26 09:36:31.000000000 -0400
@@ -76,7 +76,7 @@ static struct clocksource clocksource_hp
         .mask           = 0xffffffffffffffffLL,
         .mult           = 0, /*to be caluclated*/
         .shift          = 10,
-        .is_continuous  = 1,
+        .flags          = CLOCK_SOURCE_IS_CONTINUOUS,
 };
 static struct clocksource *hpet_clocksource_p;
 
Index: Linux/kernel/time/ntp.c
===================================================================
--- Linux.orig/kernel/time/ntp.c        2007-04-24 11:40:44.000000000 -0400
+++ Linux/kernel/time/ntp.c     2007-04-24 12:33:24.000000000 -0400
@@ -114,11 +114,6 @@ void second_overflow(void)
                if (xtime.tv_sec % 86400 == 0) {
                        xtime.tv_sec--;
                        wall_to_monotonic.tv_sec++;
-                       /*
-                        * The timer interpolator will make time change
-                        * gradually instead of an immediate jump by one second
-                        */
-                       time_interpolator_update(-NSEC_PER_SEC);
                        time_state = TIME_OOP;
                        clock_was_set();
                        printk(KERN_NOTICE "Clock: inserting leap second "
@@ -129,11 +124,6 @@ void second_overflow(void)
                if ((xtime.tv_sec + 1) % 86400 == 0) {
                        xtime.tv_sec++;
                        wall_to_monotonic.tv_sec--;
-                       /*
-                        * Use of time interpolator for a gradual change of
-                        * time
-                        */
-                       time_interpolator_update(NSEC_PER_SEC);
                        time_state = TIME_WAIT;
                        clock_was_set();
                        printk(KERN_NOTICE "Clock: deleting leap second "
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to