Hi Mathieu,
On Wed, 2009-03-18 at 11:29 +0530, Subrata Modak wrote:
Hi Mathieu,
>
> On Tue, 2009-03-17 at 11:41 -0400, Mathieu Desnoyers wrote:
> > * Subrata Modak ([email protected]) wrote:
> > > Hi Mathieu,
> > >
> > > On Tue, Mar 17, 2009 at 7:02 AM, Mathieu Desnoyers <
> > > [email protected]> wrote:
> > >
> > > > Hi,
> > > >
> > > > I am trying to get access to some non-x86 hardware to run some atomic
> > > > primitive benchmarks for a paper on LTTng I am preparing. That should be
> > > > useful to argue about performance benefit of per-cpu atomic operations
> > > > vs interrupt disabling. I would like to run the following benchmark
> > > > module on CONFIG_SMP :
> > > >
> > > > - PowerPC
> > > > - MIPS
> > > > - ia64
> > > > - alpha
> > > >
> > > > usage :
> > > > make
> > > > insmod test-cmpxchg-nolock.ko
> > > > insmod: error inserting 'test-cmpxchg-nolock.ko': -1 Resource
> > > > temporarily
> > > > unavailable
> > > > dmesg (see dmesg output)
> > > >
> > >
> > > With your permission, can we include this test in LTP (
> > > http://ltp.sourceforge.net/), in some appropriate place as a small
> > > benchmark
> > > test ?
> > >
> >
> > Hi Subrata,
> >
> > Sure, maybe you'll want to use a better interface than a module init
> > that fails though. :)
>
> Please Cc me when you come up with a better interface. Meanwhile, i will
> find out a better way to integrate this with LTP and will notify you
> when i do that. Thanks.
How about the following simple patch ? This will integrate it to LTP.
Nemeth,
Comments ?
> >
> > Mathieu
> >
> > > Regards--
> > > Subrata
> > >
> > >
> > > > If some of you would be kind enough to run my test module provided below
> > > > and provide the results of these tests on a recent kernel (2.6.26~2.6.29
> > > > should be good) along with their cpuinfo, I would greatly appreciate.
> > > >
> > > > Here are the CAS results for various Intel-based architectures :
> > > >
> > > > Architecture | Speedup | CAS |
> > > > Interrupts |
> > > > | (cli + sti) / local cmpxchg | local | sync |
> > > > Enable
> > > > (sti) | Disable (cli)
> > > >
> > > > -------------------------------------------------------------------------------------------------
> > > > Intel Pentium 4 | 5.24 | 25 | 81 | 70
> > > > | 61 |
> > > > AMD Athlon(tm)64 X2 | 4.57 | 7 | 17 | 17
> > > > | 15 |
> > > > Intel Core2 | 6.33 | 6 | 30 | 20
> > > > | 18 |
> > > > Intel Xeon E5405 | 5.25 | 8 | 24 | 20
> > > > | 22 |
> > > >
> > > > The benefit expected on PowerPC, ia64 and alpha should principally come
> > > > from removed memory barriers in the local primitives.
> > > >
> > > > Thanks,
> > > >
> > > > Mathieu
> > > >
> > > > P.S. please forgive the coding style and hackish interface. :)
> > > >
---
---
ltp-full-20090331.orig/testcases/kernel/device-drivers/misc_modules/per_cpu_atomic_operations_vs_interrupt_disabling_module/Makefile
1970-01-01 05:30:00.000000000 +0530
+++
ltp-full-20090331/testcases/kernel/device-drivers/misc_modules/per_cpu_atomic_operations_vs_interrupt_disabling_module/Makefile
2009-03-31 20:33:16.000000000 +0530
@@ -0,0 +1,20 @@
+ifneq ($(KERNELRELEASE),)
+ obj-m += test-cmpxchg-nolock.o
+else
+KERNELDIR ?= /lib/modules/$(shell uname -r)/build
+PWD := $(shell pwd)
+KERNELRELEASE = $(shell cat
$(KERNELDIR)/$(KBUILD_OUTPUT)/include/linux/version.h | sed -n
's/.*UTS_RELEASE.*\"\(.*\)\".*/\1/p')
+ifneq ($(INSTALL_MOD_PATH),)
+ DEPMOD_OPT := -b $(INSTALL_MOD_PATH)
+endif
+
+default:
+ $(MAKE) -C $(KERNELDIR) M=$(PWD) modules
+
+modules_install:
+ $(MAKE) -C $(KERNELDIR) M=$(PWD) modules_install
+ if [ -f $(KERNELDIR)/$(KBUILD_OUTPUT)/System.map ] ; then /sbin/depmod
-ae -F $(KERNELDIR)/$(KBUILD_OUTPUT)/System.map $(DEPMOD_OPT) $(KERNELRELEASE)
; fi
+
+clean:
+ $(MAKE) -C $(KERNELDIR) M=$(PWD) clean
+endif
---
ltp-full-20090331.orig/testcases/kernel/device-drivers/misc_modules/per_cpu_atomic_operations_vs_interrupt_disabling_module/test-cmpxchg-nolock.c
1970-01-01 05:30:00.000000000 +0530
+++
ltp-full-20090331/testcases/kernel/device-drivers/misc_modules/per_cpu_atomic_operations_vs_interrupt_disabling_module/test-cmpxchg-nolock.c
2009-03-31 20:34:04.000000000 +0530
@@ -0,0 +1,301 @@
+/******************************************************************************/
+/*
*/
+/* Copyright (c) Mathieu Desnoyers <[email protected]>, 2009
*/
+/*
*/
+/* This program is free software; you can redistribute it and/or modify
*/
+/* it under the terms of the GNU General Public License as published by
*/
+/* the Free Software Foundation; either version 2 of the License, or
*/
+/* (at your option) any later version.
*/
+/*
*/
+/* This program is distributed in the hope that it will be useful,
*/
+/* but WITHOUT ANY WARRANTY; without even the implied warranty of
*/
+/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
*/
+/* the GNU General Public License for more details.
*/
+/*
*/
+/* You should have received a copy of the GNU General Public License
*/
+/* along with this program; if not, write to the Free Software
*/
+/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
+/*
*/
+/* usage :
+ make
+ insmod test-cmpxchg-nolock.ko
+ insmod: error inserting 'test-cmpxchg-nolock.ko':
+ -1 Resource temporarily unavailable
+ dmesg (see dmesg output)
*/
+/******************************************************************************/
+
+
+
+/* test-cmpxchg-nolock.c
+*
+* Compare local cmpxchg with irq disable / enable.
+*/
+
+
+#include <linux/jiffies.h>
+#include <linux/compiler.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/math64.h>
+#include <asm/timex.h>
+#include <asm/system.h>
+
+#define NR_LOOPS 20000
+
+int test_val;
+
+static void do_testbaseline(void)
+{
+ unsigned long flags;
+ unsigned int i;
+ cycles_t time1, time2, time;
+ u32 rem;
+
+ local_irq_save(flags);
+ preempt_disable();
+ time1 = get_cycles();
+ for (i = 0; i < NR_LOOPS; i++) {
+ asm volatile ("");
+ }
+ time2 = get_cycles();
+ local_irq_restore(flags);
+ preempt_enable();
+ time = time2 - time1;
+
+ printk(KERN_ALERT "test results: time for baseline\n");
+ printk(KERN_ALERT "number of loops: %d\n", NR_LOOPS);
+ printk(KERN_ALERT "total time: %llu\n", time);
+ time = div_u64_rem(time, NR_LOOPS, &rem);
+ printk(KERN_ALERT "-> baseline takes %llu cycles\n", time);
+ printk(KERN_ALERT "test end\n");
+}
+
+static void do_test_sync_cmpxchg(void)
+{
+ int ret;
+ unsigned long flags;
+ unsigned int i;
+ cycles_t time1, time2, time;
+ u32 rem;
+
+ local_irq_save(flags);
+ preempt_disable();
+ time1 = get_cycles();
+ for (i = 0; i < NR_LOOPS; i++) {
+#ifdef CONFIG_X86_32
+ ret = sync_cmpxchg(&test_val, 0, 0);
+#else
+ ret = cmpxchg(&test_val, 0, 0);
+#endif
+ }
+ time2 = get_cycles();
+ local_irq_restore(flags);
+ preempt_enable();
+ time = time2 - time1;
+
+ printk(KERN_ALERT "test results: time for locked cmpxchg\n");
+ printk(KERN_ALERT "number of loops: %d\n", NR_LOOPS);
+ printk(KERN_ALERT "total time: %llu\n", time);
+ time = div_u64_rem(time, NR_LOOPS, &rem);
+ printk(KERN_ALERT "-> locked cmpxchg takes %llu cycles\n", time);
+ printk(KERN_ALERT "test end\n");
+}
+
+static void do_test_cmpxchg(void)
+{
+ int ret;
+ unsigned long flags;
+ unsigned int i;
+ cycles_t time1, time2, time;
+ u32 rem;
+
+ local_irq_save(flags);
+ preempt_disable();
+ time1 = get_cycles();
+ for (i = 0; i < NR_LOOPS; i++) {
+ ret = cmpxchg_local(&test_val, 0, 0);
+ }
+ time2 = get_cycles();
+ local_irq_restore(flags);
+ preempt_enable();
+ time = time2 - time1;
+
+ printk(KERN_ALERT "test results: time for non locked cmpxchg\n");
+ printk(KERN_ALERT "number of loops: %d\n", NR_LOOPS);
+ printk(KERN_ALERT "total time: %llu\n", time);
+ time = div_u64_rem(time, NR_LOOPS, &rem);
+ printk(KERN_ALERT "-> non locked cmpxchg takes %llu cycles\n", time);
+ printk(KERN_ALERT "test end\n");
+}
+static void do_test_sync_inc(void)
+{
+ int ret;
+ unsigned long flags;
+ unsigned int i;
+ cycles_t time1, time2, time;
+ u32 rem;
+ atomic_t val;
+
+ local_irq_save(flags);
+ preempt_disable();
+ time1 = get_cycles();
+ for (i = 0; i < NR_LOOPS; i++) {
+ ret = atomic_add_return(10, &val);
+ }
+ time2 = get_cycles();
+ local_irq_restore(flags);
+ preempt_enable();
+ time = time2 - time1;
+
+ printk(KERN_ALERT "test results: time for locked add return\n");
+ printk(KERN_ALERT "number of loops: %d\n", NR_LOOPS);
+ printk(KERN_ALERT "total time: %llu\n", time);
+ time = div_u64_rem(time, NR_LOOPS, &rem);
+ printk(KERN_ALERT "-> locked add return takes %llu cycles\n", time);
+ printk(KERN_ALERT "test end\n");
+}
+
+
+static void do_test_inc(void)
+{
+ int ret;
+ unsigned long flags;
+ unsigned int i;
+ cycles_t time1, time2, time;
+ u32 rem;
+ local_t loc_val;
+
+ local_irq_save(flags);
+ preempt_disable();
+ time1 = get_cycles();
+ for (i = 0; i < NR_LOOPS; i++) {
+ ret = local_add_return(10, &loc_val);
+ }
+ time2 = get_cycles();
+ local_irq_restore(flags);
+ preempt_enable();
+ time = time2 - time1;
+
+ printk(KERN_ALERT "test results: time for non locked add return\n");
+ printk(KERN_ALERT "number of loops: %d\n", NR_LOOPS);
+ printk(KERN_ALERT "total time: %llu\n", time);
+ time = div_u64_rem(time, NR_LOOPS, &rem);
+ printk(KERN_ALERT "-> non locked add return takes %llu cycles\n", time);
+ printk(KERN_ALERT "test end\n");
+}
+
+
+
+/*
+ * This test will have a higher standard deviation due to incoming interrupts.
+ */
+static void do_test_enable_int(void)
+{
+ unsigned long flags;
+ unsigned int i;
+ cycles_t time1, time2, time;
+ u32 rem;
+
+ local_irq_save(flags);
+ preempt_disable();
+ time1 = get_cycles();
+ for (i = 0; i < NR_LOOPS; i++) {
+ local_irq_restore(flags);
+ }
+ time2 = get_cycles();
+ local_irq_restore(flags);
+ preempt_enable();
+ time = time2 - time1;
+
+ printk(KERN_ALERT "test results: time for enabling interrupts (STI)\n");
+ printk(KERN_ALERT "number of loops: %d\n", NR_LOOPS);
+ printk(KERN_ALERT "total time: %llu\n", time);
+ time = div_u64_rem(time, NR_LOOPS, &rem);
+ printk(KERN_ALERT "-> enabling interrupts (STI) takes %llu cycles\n",
+ time);
+ printk(KERN_ALERT "test end\n");
+}
+
+static void do_test_disable_int(void)
+{
+ unsigned long flags, flags2;
+ unsigned int i;
+ cycles_t time1, time2, time;
+ u32 rem;
+
+ local_irq_save(flags);
+ preempt_disable();
+ time1 = get_cycles();
+ for ( i = 0; i < NR_LOOPS; i++) {
+ local_irq_save(flags2);
+ }
+ time2 = get_cycles();
+ local_irq_restore(flags);
+ preempt_enable();
+ time = time2 - time1;
+
+ printk(KERN_ALERT "test results: time for disabling interrupts
(CLI)\n");
+ printk(KERN_ALERT "number of loops: %d\n", NR_LOOPS);
+ printk(KERN_ALERT "total time: %llu\n", time);
+ time = div_u64_rem(time, NR_LOOPS, &rem);
+ printk(KERN_ALERT "-> disabling interrupts (CLI) takes %llu cycles\n",
+ time);
+ printk(KERN_ALERT "test end\n");
+}
+
+static void do_test_int(void)
+{
+ unsigned long flags;
+ unsigned int i;
+ cycles_t time1, time2, time;
+ u32 rem;
+
+ local_irq_save(flags);
+ preempt_disable();
+ time1 = get_cycles();
+ for (i = 0; i < NR_LOOPS; i++) {
+ local_irq_restore(flags);
+ local_irq_save(flags);
+ }
+ time2 = get_cycles();
+ local_irq_restore(flags);
+ preempt_enable();
+ time = time2 - time1;
+
+ printk(KERN_ALERT "test results: time for disabling/enabling interrupts
(STI/CLI)\n");
+ printk(KERN_ALERT "number of loops: %d\n", NR_LOOPS);
+ printk(KERN_ALERT "total time: %llu\n", time);
+ time = div_u64_rem(time, NR_LOOPS, &rem);
+ printk(KERN_ALERT "-> enabling/disabling interrupts (STI/CLI) takes
%llu cycles\n",
+ time);
+ printk(KERN_ALERT "test end\n");
+}
+
+
+
+static int ltt_test_init(void)
+{
+ printk(KERN_ALERT "test init\n");
+
+ do_testbaseline();
+ do_test_sync_cmpxchg();
+ do_test_cmpxchg();
+ do_test_sync_inc();
+ do_test_inc();
+ do_test_enable_int();
+ do_test_disable_int();
+ do_test_int();
+ return -EAGAIN; /* Fail will directly unload the module */
+}
+
+static void ltt_test_exit(void)
+{
+ printk(KERN_ALERT "test exit\n");
+}
+
+module_init(ltt_test_init)
+module_exit(ltt_test_exit)
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mathieu Desnoyers");
+MODULE_DESCRIPTION("Cmpxchg vs int Test");
---
Regards--
Subrata
>
> Regards--
> Subrata
>
------------------------------------------------------------------------------
_______________________________________________
Ltp-list mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/ltp-list