Re: [patch] faster vgetcpu using sidt (take 2)

2007-01-22 Thread dean gaudet
On Thu, 18 Jan 2007, Andi Kleen wrote:

> > let me know what you think... thanks.
> 
> It's ok, although I would like to have the file in a separate directory.

cool -- do you have a directory in mind?

and would you like this change as two separate patches or one combined 
patch?

thanks
-dean
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [patch] faster vgetcpu using sidt (take 2)

2007-01-18 Thread Andi Kleen
> let me know what you think... thanks.

It's ok, although I would like to have the file in a separate directory.

-Andi
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [patch] faster vgetcpu using sidt (take 2)

2007-01-14 Thread dean gaudet
On Sat, 13 Jan 2007, dean gaudet wrote:

> ok here is the latest rev of this patch (against 2.6.20-rc4).
> 
> timings in cycles:
> 
> baseline   patchedbaseline   patched
> no cache   no cachecache  cache
> k8 pre-revF2116  1417
> k8 revF3117  1417
> core2  3816  1214
> p4 4941  2424
> 
> the degredation in cached timings appears to be due to the 16 byte stack
> frame set up for the sidt instruction.  apparently due to -mno-red-zone...
> would you accept a patch which re-enables the red-zone for vsyscalls?

here is a first stab at a patch (applied on top of my vgetcpu sidt patch) 
which enables red-zone for vsyscall.  it fixes the cache degredation 
problem above by getting rid of the stack frame setup in vgetcpu (and 
improves the no cache cases as well but i haven't run it everywhere yet).

to do this i split the user-mode-only portion of vsyscall.c into 
vsyscall_user.c.  this required a couple externs in vsyscall.c and two 
extra ".globl" in the asm in vsyscall_user.c.

i'm not sure if we need the CFLAGS_vsyscall.o still or not.

let me know what you think... thanks.

-dean

Index: linux/arch/x86_64/kernel/Makefile
===
--- linux.orig/arch/x86_64/kernel/Makefile  2006-11-29 13:57:37.0 
-0800
+++ linux/arch/x86_64/kernel/Makefile   2007-01-13 23:34:22.0 -0800
@@ -6,7 +6,7 @@
 EXTRA_AFLAGS   := -traditional
 obj-y  := process.o signal.o entry.o traps.o irq.o \
ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_x86_64.o \
-   x8664_ksyms.o i387.o syscall.o vsyscall.o \
+   x8664_ksyms.o i387.o syscall.o vsyscall.o vsyscall_user.o \
setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \
pci-dma.o pci-nommu.o alternative.o
 
@@ -45,6 +45,7 @@
 obj-y  += intel_cacheinfo.o
 
 CFLAGS_vsyscall.o  := $(PROFILING) -g0
+CFLAGS_vsyscall_user.o := $(PROFILING) -g0 -mred-zone
 
 therm_throt-y   += ../../i386/kernel/cpu/mcheck/therm_throt.o
 bootflag-y += ../../i386/kernel/bootflag.o
Index: linux/arch/x86_64/kernel/vsyscall.c
===
--- linux.orig/arch/x86_64/kernel/vsyscall.c2007-01-13 22:21:01.0 
-0800
+++ linux/arch/x86_64/kernel/vsyscall.c 2007-01-13 23:41:08.0 -0800
@@ -40,161 +40,12 @@
 #include 
 #include 
 #include 
-
-#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
-#define __syscall_clobber "r11","rcx","memory"
-
-int __sysctl_vsyscall __section_sysctl_vsyscall = 1;
-seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED;
-
-/* is this necessary? */
-#ifndef CONFIG_NODES_SHIFT
-#define CONFIG_NODES_SHIFT 0
-#endif
-
 #include 
 
-static __always_inline void timeval_normalize(struct timeval * tv)
-{
-   time_t __sec;
-
-   __sec = tv->tv_usec / 100;
-   if (__sec) {
-   tv->tv_usec %= 100;
-   tv->tv_sec += __sec;
-   }
-}
-
-static __always_inline void do_vgettimeofday(struct timeval * tv)
-{
-   long sequence, t;
-   unsigned long sec, usec;
-
-   do {
-   sequence = read_seqbegin(&__xtime_lock);
-   
-   sec = __xtime.tv_sec;
-   usec = __xtime.tv_nsec / 1000;
-
-   if (__vxtime.mode != VXTIME_HPET) {
-   t = get_cycles_sync();
-   if (t < __vxtime.last_tsc)
-   t = __vxtime.last_tsc;
-   usec += ((t - __vxtime.last_tsc) *
-__vxtime.tsc_quot) >> 32;
-   /* See comment in x86_64 do_gettimeofday. */
-   } else {
-   usec += ((readl((void __iomem *)
-  fix_to_virt(VSYSCALL_HPET) + 0xf0) -
- __vxtime.last) * __vxtime.quot) >> 32;
-   }
-   } while (read_seqretry(&__xtime_lock, sequence));
-
-   tv->tv_sec = sec + usec / 100;
-   tv->tv_usec = usec % 100;
-}
-
-/* RED-PEN may want to readd seq locking, but then the variable should be 
write-once. */
-static __always_inline void do_get_tz(struct timezone * tz)
-{
-   *tz = __sys_tz;
-}
-
-static __always_inline int gettimeofday(struct timeval *tv, struct timezone 
*tz)
-{
-   int ret;
-   asm volatile("vsysc2: syscall"
-   : "=a" (ret)
-   : "0" (__NR_gettimeofday),"D" (tv),"S" (tz) : __syscall_clobber 
);
-   return ret;
-}
-
-static __always_inline long time_syscall(long *t)
-{
-   long secs;
-   asm volatile("vsysc1: syscall"
-   : "=a" (secs)
-   : "0" (__NR_time),"D" (t) :