On Sat, 13 Jan 2007, dean gaudet wrote:
> ok here is the latest rev of this patch (against 2.6.20-rc4).
>
> timings in cycles:
>
> baseline patchedbaseline patched
> no cache no cachecache cache
> k8 pre-revF2116 1417
> k8 revF3117 1417
> core2 3816 1214
> p4 4941 2424
>
> the degredation in cached timings appears to be due to the 16 byte stack
> frame set up for the sidt instruction. apparently due to -mno-red-zone...
> would you accept a patch which re-enables the red-zone for vsyscalls?
here is a first stab at a patch (applied on top of my vgetcpu sidt patch)
which enables red-zone for vsyscall. it fixes the cache degredation
problem above by getting rid of the stack frame setup in vgetcpu (and
improves the no cache cases as well but i haven't run it everywhere yet).
to do this i split the user-mode-only portion of vsyscall.c into
vsyscall_user.c. this required a couple externs in vsyscall.c and two
extra ".globl" in the asm in vsyscall_user.c.
i'm not sure if we need the CFLAGS_vsyscall.o still or not.
let me know what you think... thanks.
-dean
Index: linux/arch/x86_64/kernel/Makefile
===
--- linux.orig/arch/x86_64/kernel/Makefile 2006-11-29 13:57:37.0
-0800
+++ linux/arch/x86_64/kernel/Makefile 2007-01-13 23:34:22.0 -0800
@@ -6,7 +6,7 @@
EXTRA_AFLAGS := -traditional
obj-y := process.o signal.o entry.o traps.o irq.o \
ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_x86_64.o \
- x8664_ksyms.o i387.o syscall.o vsyscall.o \
+ x8664_ksyms.o i387.o syscall.o vsyscall.o vsyscall_user.o \
setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \
pci-dma.o pci-nommu.o alternative.o
@@ -45,6 +45,7 @@
obj-y += intel_cacheinfo.o
CFLAGS_vsyscall.o := $(PROFILING) -g0
+CFLAGS_vsyscall_user.o := $(PROFILING) -g0 -mred-zone
therm_throt-y += ../../i386/kernel/cpu/mcheck/therm_throt.o
bootflag-y += ../../i386/kernel/bootflag.o
Index: linux/arch/x86_64/kernel/vsyscall.c
===
--- linux.orig/arch/x86_64/kernel/vsyscall.c2007-01-13 22:21:01.0
-0800
+++ linux/arch/x86_64/kernel/vsyscall.c 2007-01-13 23:41:08.0 -0800
@@ -40,161 +40,12 @@
#include
#include
#include
-
-#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
-#define __syscall_clobber "r11","rcx","memory"
-
-int __sysctl_vsyscall __section_sysctl_vsyscall = 1;
-seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED;
-
-/* is this necessary? */
-#ifndef CONFIG_NODES_SHIFT
-#define CONFIG_NODES_SHIFT 0
-#endif
-
#include
-static __always_inline void timeval_normalize(struct timeval * tv)
-{
- time_t __sec;
-
- __sec = tv->tv_usec / 100;
- if (__sec) {
- tv->tv_usec %= 100;
- tv->tv_sec += __sec;
- }
-}
-
-static __always_inline void do_vgettimeofday(struct timeval * tv)
-{
- long sequence, t;
- unsigned long sec, usec;
-
- do {
- sequence = read_seqbegin(&__xtime_lock);
-
- sec = __xtime.tv_sec;
- usec = __xtime.tv_nsec / 1000;
-
- if (__vxtime.mode != VXTIME_HPET) {
- t = get_cycles_sync();
- if (t < __vxtime.last_tsc)
- t = __vxtime.last_tsc;
- usec += ((t - __vxtime.last_tsc) *
-__vxtime.tsc_quot) >> 32;
- /* See comment in x86_64 do_gettimeofday. */
- } else {
- usec += ((readl((void __iomem *)
- fix_to_virt(VSYSCALL_HPET) + 0xf0) -
- __vxtime.last) * __vxtime.quot) >> 32;
- }
- } while (read_seqretry(&__xtime_lock, sequence));
-
- tv->tv_sec = sec + usec / 100;
- tv->tv_usec = usec % 100;
-}
-
-/* RED-PEN may want to readd seq locking, but then the variable should be
write-once. */
-static __always_inline void do_get_tz(struct timezone * tz)
-{
- *tz = __sys_tz;
-}
-
-static __always_inline int gettimeofday(struct timeval *tv, struct timezone
*tz)
-{
- int ret;
- asm volatile("vsysc2: syscall"
- : "=a" (ret)
- : "0" (__NR_gettimeofday),"D" (tv),"S" (tz) : __syscall_clobber
);
- return ret;
-}
-
-static __always_inline long time_syscall(long *t)
-{
- long secs;
- asm volatile("vsysc1: syscall"
- : "=a" (secs)
- : "0" (__NR_time),"D" (t) :