Re: [patch] faster vgetcpu using sidt (take 2)

2007-01-22 Thread dean gaudet
On Thu, 18 Jan 2007, Andi Kleen wrote:

> > let me know what you think... thanks.
> 
> It's ok, although I would like to have the file in a separate directory.

cool -- do you have a directory in mind?

and would you like this change as two separate patches or one combined 
patch?

thanks
-dean
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [patch] faster vgetcpu using sidt (take 2)

2007-01-22 Thread dean gaudet
On Thu, 18 Jan 2007, Andi Kleen wrote:

  let me know what you think... thanks.
 
 It's ok, although I would like to have the file in a separate directory.

cool -- do you have a directory in mind?

and would you like this change as two separate patches or one combined 
patch?

thanks
-dean
-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [patch] faster vgetcpu using sidt (take 2)

2007-01-18 Thread Andi Kleen
> let me know what you think... thanks.

It's ok, although I would like to have the file in a separate directory.

-Andi
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [patch] faster vgetcpu using sidt (take 2)

2007-01-18 Thread Andi Kleen
 let me know what you think... thanks.

It's ok, although I would like to have the file in a separate directory.

-Andi
-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [patch] faster vgetcpu using sidt (take 2)

2007-01-14 Thread dean gaudet
On Sat, 13 Jan 2007, dean gaudet wrote:

> ok here is the latest rev of this patch (against 2.6.20-rc4).
> 
> timings in cycles:
> 
> baseline   patchedbaseline   patched
> no cache   no cachecache  cache
> k8 pre-revF2116  1417
> k8 revF3117  1417
> core2  3816  1214
> p4 4941  2424
> 
> the degredation in cached timings appears to be due to the 16 byte stack
> frame set up for the sidt instruction.  apparently due to -mno-red-zone...
> would you accept a patch which re-enables the red-zone for vsyscalls?

here is a first stab at a patch (applied on top of my vgetcpu sidt patch) 
which enables red-zone for vsyscall.  it fixes the cache degredation 
problem above by getting rid of the stack frame setup in vgetcpu (and 
improves the no cache cases as well but i haven't run it everywhere yet).

to do this i split the user-mode-only portion of vsyscall.c into 
vsyscall_user.c.  this required a couple externs in vsyscall.c and two 
extra ".globl" in the asm in vsyscall_user.c.

i'm not sure if we need the CFLAGS_vsyscall.o still or not.

let me know what you think... thanks.

-dean

Index: linux/arch/x86_64/kernel/Makefile
===
--- linux.orig/arch/x86_64/kernel/Makefile  2006-11-29 13:57:37.0 
-0800
+++ linux/arch/x86_64/kernel/Makefile   2007-01-13 23:34:22.0 -0800
@@ -6,7 +6,7 @@
 EXTRA_AFLAGS   := -traditional
 obj-y  := process.o signal.o entry.o traps.o irq.o \
ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_x86_64.o \
-   x8664_ksyms.o i387.o syscall.o vsyscall.o \
+   x8664_ksyms.o i387.o syscall.o vsyscall.o vsyscall_user.o \
setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \
pci-dma.o pci-nommu.o alternative.o
 
@@ -45,6 +45,7 @@
 obj-y  += intel_cacheinfo.o
 
 CFLAGS_vsyscall.o  := $(PROFILING) -g0
+CFLAGS_vsyscall_user.o := $(PROFILING) -g0 -mred-zone
 
 therm_throt-y   += ../../i386/kernel/cpu/mcheck/therm_throt.o
 bootflag-y += ../../i386/kernel/bootflag.o
Index: linux/arch/x86_64/kernel/vsyscall.c
===
--- linux.orig/arch/x86_64/kernel/vsyscall.c2007-01-13 22:21:01.0 
-0800
+++ linux/arch/x86_64/kernel/vsyscall.c 2007-01-13 23:41:08.0 -0800
@@ -40,161 +40,12 @@
 #include 
 #include 
 #include 
-
-#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
-#define __syscall_clobber "r11","rcx","memory"
-
-int __sysctl_vsyscall __section_sysctl_vsyscall = 1;
-seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED;
-
-/* is this necessary? */
-#ifndef CONFIG_NODES_SHIFT
-#define CONFIG_NODES_SHIFT 0
-#endif
-
 #include 
 
-static __always_inline void timeval_normalize(struct timeval * tv)
-{
-   time_t __sec;
-
-   __sec = tv->tv_usec / 100;
-   if (__sec) {
-   tv->tv_usec %= 100;
-   tv->tv_sec += __sec;
-   }
-}
-
-static __always_inline void do_vgettimeofday(struct timeval * tv)
-{
-   long sequence, t;
-   unsigned long sec, usec;
-
-   do {
-   sequence = read_seqbegin(&__xtime_lock);
-   
-   sec = __xtime.tv_sec;
-   usec = __xtime.tv_nsec / 1000;
-
-   if (__vxtime.mode != VXTIME_HPET) {
-   t = get_cycles_sync();
-   if (t < __vxtime.last_tsc)
-   t = __vxtime.last_tsc;
-   usec += ((t - __vxtime.last_tsc) *
-__vxtime.tsc_quot) >> 32;
-   /* See comment in x86_64 do_gettimeofday. */
-   } else {
-   usec += ((readl((void __iomem *)
-  fix_to_virt(VSYSCALL_HPET) + 0xf0) -
- __vxtime.last) * __vxtime.quot) >> 32;
-   }
-   } while (read_seqretry(&__xtime_lock, sequence));
-
-   tv->tv_sec = sec + usec / 100;
-   tv->tv_usec = usec % 100;
-}
-
-/* RED-PEN may want to readd seq locking, but then the variable should be 
write-once. */
-static __always_inline void do_get_tz(struct timezone * tz)
-{
-   *tz = __sys_tz;
-}
-
-static __always_inline int gettimeofday(struct timeval *tv, struct timezone 
*tz)
-{
-   int ret;
-   asm volatile("vsysc2: syscall"
-   : "=a" (ret)
-   : "0" (__NR_gettimeofday),"D" (tv),"S" (tz) : __syscall_clobber 
);
-   return ret;
-}
-
-static __always_inline long time_syscall(long *t)
-{
-   long secs;
-   asm volatile("vsysc1: syscall"
-   : "=a" (secs)
-   : "0" (__NR_time),"D" (t) : 

Re: [patch] faster vgetcpu using sidt (take 2)

2007-01-14 Thread dean gaudet
On Sat, 13 Jan 2007, dean gaudet wrote:

 ok here is the latest rev of this patch (against 2.6.20-rc4).
 
 timings in cycles:
 
 baseline   patchedbaseline   patched
 no cache   no cachecache  cache
 k8 pre-revF2116  1417
 k8 revF3117  1417
 core2  3816  1214
 p4 4941  2424
 
 the degredation in cached timings appears to be due to the 16 byte stack
 frame set up for the sidt instruction.  apparently due to -mno-red-zone...
 would you accept a patch which re-enables the red-zone for vsyscalls?

here is a first stab at a patch (applied on top of my vgetcpu sidt patch) 
which enables red-zone for vsyscall.  it fixes the cache degredation 
problem above by getting rid of the stack frame setup in vgetcpu (and 
improves the no cache cases as well but i haven't run it everywhere yet).

to do this i split the user-mode-only portion of vsyscall.c into 
vsyscall_user.c.  this required a couple externs in vsyscall.c and two 
extra .globl in the asm in vsyscall_user.c.

i'm not sure if we need the CFLAGS_vsyscall.o still or not.

let me know what you think... thanks.

-dean

Index: linux/arch/x86_64/kernel/Makefile
===
--- linux.orig/arch/x86_64/kernel/Makefile  2006-11-29 13:57:37.0 
-0800
+++ linux/arch/x86_64/kernel/Makefile   2007-01-13 23:34:22.0 -0800
@@ -6,7 +6,7 @@
 EXTRA_AFLAGS   := -traditional
 obj-y  := process.o signal.o entry.o traps.o irq.o \
ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_x86_64.o \
-   x8664_ksyms.o i387.o syscall.o vsyscall.o \
+   x8664_ksyms.o i387.o syscall.o vsyscall.o vsyscall_user.o \
setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \
pci-dma.o pci-nommu.o alternative.o
 
@@ -45,6 +45,7 @@
 obj-y  += intel_cacheinfo.o
 
 CFLAGS_vsyscall.o  := $(PROFILING) -g0
+CFLAGS_vsyscall_user.o := $(PROFILING) -g0 -mred-zone
 
 therm_throt-y   += ../../i386/kernel/cpu/mcheck/therm_throt.o
 bootflag-y += ../../i386/kernel/bootflag.o
Index: linux/arch/x86_64/kernel/vsyscall.c
===
--- linux.orig/arch/x86_64/kernel/vsyscall.c2007-01-13 22:21:01.0 
-0800
+++ linux/arch/x86_64/kernel/vsyscall.c 2007-01-13 23:41:08.0 -0800
@@ -40,161 +40,12 @@
 #include asm/segment.h
 #include asm/desc.h
 #include asm/topology.h
-
-#define __vsyscall(nr) __attribute__ ((unused,__section__(.vsyscall_ #nr)))
-#define __syscall_clobber r11,rcx,memory
-
-int __sysctl_vsyscall __section_sysctl_vsyscall = 1;
-seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED;
-
-/* is this necessary? */
-#ifndef CONFIG_NODES_SHIFT
-#define CONFIG_NODES_SHIFT 0
-#endif
-
 #include asm/unistd.h
 
-static __always_inline void timeval_normalize(struct timeval * tv)
-{
-   time_t __sec;
-
-   __sec = tv-tv_usec / 100;
-   if (__sec) {
-   tv-tv_usec %= 100;
-   tv-tv_sec += __sec;
-   }
-}
-
-static __always_inline void do_vgettimeofday(struct timeval * tv)
-{
-   long sequence, t;
-   unsigned long sec, usec;
-
-   do {
-   sequence = read_seqbegin(__xtime_lock);
-   
-   sec = __xtime.tv_sec;
-   usec = __xtime.tv_nsec / 1000;
-
-   if (__vxtime.mode != VXTIME_HPET) {
-   t = get_cycles_sync();
-   if (t  __vxtime.last_tsc)
-   t = __vxtime.last_tsc;
-   usec += ((t - __vxtime.last_tsc) *
-__vxtime.tsc_quot)  32;
-   /* See comment in x86_64 do_gettimeofday. */
-   } else {
-   usec += ((readl((void __iomem *)
-  fix_to_virt(VSYSCALL_HPET) + 0xf0) -
- __vxtime.last) * __vxtime.quot)  32;
-   }
-   } while (read_seqretry(__xtime_lock, sequence));
-
-   tv-tv_sec = sec + usec / 100;
-   tv-tv_usec = usec % 100;
-}
-
-/* RED-PEN may want to readd seq locking, but then the variable should be 
write-once. */
-static __always_inline void do_get_tz(struct timezone * tz)
-{
-   *tz = __sys_tz;
-}
-
-static __always_inline int gettimeofday(struct timeval *tv, struct timezone 
*tz)
-{
-   int ret;
-   asm volatile(vsysc2: syscall
-   : =a (ret)
-   : 0 (__NR_gettimeofday),D (tv),S (tz) : __syscall_clobber 
);
-   return ret;
-}
-
-static __always_inline long time_syscall(long *t)
-{
-   long secs;
-   asm volatile(vsysc1: syscall
-   : =a (secs)
-   : 0 (__NR_time),D (t) :