This is 2 of 3 patches for ia64 clocksource.
My former patch (1 of 3) revealed that "default" has problem.
It was assumed that something in combination of "fsys + jitter"
is wrong since both of "nojitter (= fsys)" and "nolwsys (= non-fsys
+ jitter)" provides better result.
Especially it is assumed as serious bug that "non-fsys + jitter",
written in C is faster than "fsys + jitter", written in assembler.
I picked up some points:
- It seems that logics are not so different.
- both uses jitter-related data in fsys_gtod_data
- only fsys uses rests of contents of fsys_gtod_data,
while non-fsys still uses xtime.
- xtime_lock is larger lock than gtod_lock.
So summary was:
"reading one structure with small lock"
is slower than
"reading two structure with large lock"
I came near to give up, but for some reason, an inspiration hit me,
and at last I caught a line.
> } __attribute__ ((aligned (L1_CACHE_BYTES)));
The imagined situation is that a read from fsys_gtod_data
was disturbed by concurrent cmpxchg of itc_lastcycle,
which located in same cacheline.
If so, it can be explained that why read from xtime is
faster than read from fsys_gtod_data.
So I made this patch to:
- separate itc_lastcycle from fsys_gtod_data and put
it on other cacheline.
- separate itc_jitter from fsys_gtod_data too.
in fact, itc_lastcycle and itc_jitter are used from
both of fsys and non-fsys.
Fortunately this patch works well, then the results:
> # savextime : default
> CPU 0: 5.49 (usecs) (0 errors / 1822461 iterations)
> CPU 1: 5.46 (usecs) (0 errors / 1830527 iterations)
> CPU 2: 5.45 (usecs) (0 errors / 1834697 iterations)
> CPU 3: 5.46 (usecs) (0 errors / 1831464 iterations)
> # savextime : nojitter
> CPU 0: 0.14 (usecs) (0 errors / 70918167 iterations)
> CPU 1: 0.14 (usecs) (0 errors / 71620242 iterations)
> CPU 2: 0.14 (usecs) (0 errors / 70946394 iterations)
> CPU 3: 0.14 (usecs) (0 errors / 70933043 iterations)
> # savextime : nolwsys
> CPU 0: 2.89 (usecs) (0 errors / 3459498 iterations)
> CPU 1: 2.89 (usecs) (0 errors / 3458876 iterations)
> CPU 2: 2.96 (usecs) (0 errors / 3377799 iterations)
> CPU 3: 2.97 (usecs) (0 errors / 3372353 iterations)
become more reasonable, expected level:
> # separatejitter : default
> CPU 0: 1.50 (usecs) (0 errors / 6677159 iterations)
> CPU 1: 1.49 (usecs) (0 errors / 6697159 iterations)
> CPU 2: 1.50 (usecs) (0 errors / 6664672 iterations)
> CPU 3: 1.50 (usecs) (0 errors / 6668999 iterations)
> # separatejitter : nojitter
> CPU 0: 0.14 (usecs) (0 errors / 70580221 iterations)
> CPU 1: 0.14 (usecs) (0 errors / 71275618 iterations)
> CPU 2: 0.14 (usecs) (0 errors / 70626121 iterations)
> CPU 3: 0.14 (usecs) (0 errors / 70603364 iterations)
> # separatejitter : nolwsys
> CPU 0: 2.26 (usecs) (0 errors / 4417197 iterations)
> CPU 1: 2.26 (usecs) (0 errors / 4415829 iterations)
> CPU 2: 2.27 (usecs) (0 errors / 4402768 iterations)
> CPU 3: 2.27 (usecs) (0 errors / 4406101 iterations)
(Continue to next patch.)
Thanks,
H.Seto
Signed-off-by: Hidetoshi Seto <[EMAIL PROTECTED]>
-----
arch/ia64/kernel/asm-offsets.c | 6 +++---
arch/ia64/kernel/fsys.S | 7 +++++--
arch/ia64/kernel/fsyscall_gtod_data.h | 5 ++++-
arch/ia64/kernel/time.c | 12 +++++++-----
4 files changed, 19 insertions(+), 11 deletions(-)
Index: linux-2.6.22/arch/ia64/kernel/asm-offsets.c
===================================================================
--- linux-2.6.22.orig/arch/ia64/kernel/asm-offsets.c
+++ linux-2.6.22/arch/ia64/kernel/asm-offsets.c
@@ -274,8 +274,8 @@
offsetof (struct fsyscall_gtod_data_t, clk_fsys_mmio));
DEFINE(IA64_CLKSRC_CYCLE_LAST_OFFSET,
offsetof (struct fsyscall_gtod_data_t, clk_cycle_last));
- DEFINE(IA64_ITC_LASTCYCLE_OFFSET,
- offsetof (struct fsyscall_gtod_data_t, itc_lastcycle));
DEFINE(IA64_ITC_JITTER_OFFSET,
- offsetof (struct fsyscall_gtod_data_t, itc_jitter));
+ offsetof (struct itc_jitter_data_t, itc_jitter));
+ DEFINE(IA64_ITC_LASTCYCLE_OFFSET,
+ offsetof (struct itc_jitter_data_t, itc_lastcycle));
}
Index: linux-2.6.22/arch/ia64/kernel/fsys.S
===================================================================
--- linux-2.6.22.orig/arch/ia64/kernel/fsys.S
+++ linux-2.6.22/arch/ia64/kernel/fsys.S
@@ -150,6 +150,9 @@
#if IA64_GTOD_LOCK_OFFSET !=0
#error fsys_gettimeofday incompatible with changes to struct
fsyscall_gtod_data_t
#endif
+#if IA64_ITC_JITTER_OFFSET !=0
+#error fsys_gettimeofday incompatible with changes to struct itc_jitter_data_t
+#endif
#define CLOCK_REALTIME 0
#define CLOCK_MONOTONIC 1
#define CLOCK_DIVIDE_BY_1000 0x4000
@@ -215,13 +218,13 @@
add r2 = TI_FLAGS+IA64_TASK_SIZE,r16
movl r20 = fsyscall_gtod_data // load fsyscall gettimeofday data address
;;
- add r29 = IA64_ITC_JITTER_OFFSET,r20
+ movl r29 = itc_jitter_data // itc_jitter
add r22 = IA64_GTOD_WALL_TIME_OFFSET,r20 // wall_time
ld4 r2 = [r2] // process work pending flags
;;
(p15) add r22 = IA64_GTOD_MONO_TIME_OFFSET,r20 // monotonic_time
add r21 = IA64_CLKSRC_MMIO_OFFSET,r20
- add r19 = IA64_ITC_LASTCYCLE_OFFSET,r20
+ add r19 = IA64_ITC_LASTCYCLE_OFFSET,r29
and r2 = TIF_ALLWORK_MASK,r2
(p6) br.cond.spnt.few .fail_einval // ? deferred branch
;;
Index: linux-2.6.22/arch/ia64/kernel/fsyscall_gtod_data.h
===================================================================
--- linux-2.6.22.orig/arch/ia64/kernel/fsyscall_gtod_data.h
+++ linux-2.6.22/arch/ia64/kernel/fsyscall_gtod_data.h
@@ -14,7 +14,10 @@
u32 clk_shift;
void *clk_fsys_mmio;
cycle_t clk_cycle_last;
- cycle_t itc_lastcycle;
+} __attribute__ ((aligned (L1_CACHE_BYTES)));
+
+struct itc_jitter_data_t {
int itc_jitter;
+ cycle_t itc_lastcycle;
} __attribute__ ((aligned (L1_CACHE_BYTES)));
Index: linux-2.6.22/arch/ia64/kernel/time.c
===================================================================
--- linux-2.6.22.orig/arch/ia64/kernel/time.c
+++ linux-2.6.22/arch/ia64/kernel/time.c
@@ -37,6 +37,8 @@
.lock = SEQLOCK_UNLOCKED,
};
+struct itc_jitter_data_t itc_jitter_data;
+
volatile int time_keeper_id = 0; /* smp_processor_id() of time-keeper */
#ifdef CONFIG_IA64_DEBUG_IRQ
@@ -236,7 +238,7 @@
* are too large.
*/
if (!nojitter)
- fsyscall_gtod_data.itc_jitter = 1;
+ itc_jitter_data.itc_jitter = 1;
#endif
}
@@ -258,10 +260,10 @@
u64 lcycle;
u64 now;
- if (!fsyscall_gtod_data.itc_jitter)
+ if (!itc_jitter_data.itc_jitter)
return get_cycles();
do {
- lcycle = fsyscall_gtod_data.itc_lastcycle;
+ lcycle = itc_jitter_data.itc_lastcycle;
now = get_cycles();
if (lcycle && time_after(lcycle, now))
return lcycle;
@@ -271,14 +273,14 @@
* force to retry until the write lock is released.
*/
if (spin_is_locked(&xtime_lock.lock)) {
- fsyscall_gtod_data.itc_lastcycle = now;
+ itc_jitter_data.itc_lastcycle = now;
return now;
}
/* Keep track of the last timer value returned.
* The use of cmpxchg here will cause contention in
* an SMP environment.
*/
- } while (likely(cmpxchg(&fsyscall_gtod_data.itc_lastcycle,
+ } while (likely(cmpxchg(&itc_jitter_data.itc_lastcycle,
lcycle, now) != lcycle));
return now;
}
-
To unsubscribe from this list: send the line "unsubscribe linux-ia64" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at http://vger.kernel.org/majordomo-info.html