On Sun, Jun 13, 2010 at 06:01:29PM -0400, Mathieu Desnoyers wrote: > * Paul E. McKenney ([email protected]) wrote: > > The machine I am using is cortex-a9. It may be necessary to create a > > more intelligent mapping to the many variants of ARM. Interestingly > > enough, __sync_synchronize() -still- fails, so use an explicit asm > > for the dmb instruction. > > > > Leaving off the '-O' gives freaking slow results. ;-) > > Is this -O problem something we should fix at an higher level ? (e.g. for all > arch ?)
Might be. In this case, the definition of "freaking slow" is >40ns for an empty loop. Though I suspect that getting call_rcu() working would be higher priority, and I believe that I have a reasonable approach. And we are going to need a few RCU-based data structures as well. > More comments below, > > > > > Signed-off-by: Paul E. McKenney <[email protected]> > > --- > > > > configure.ac | 4 +++ > > urcu/arch_armv7l.h | 59 > > +++++++++++++++++++++++++++++++++++++++++++++ > > urcu/uatomic_arch_armv7l.h | 48 ++++++++++++++++++++++++++++++++++++ > > 3 files changed, 111 insertions(+) > > > > diff --git a/configure.ac b/configure.ac > > index 9274337..9e2615f 100644 > > --- a/configure.ac > > +++ b/configure.ac > > @@ -51,6 +51,7 @@ case $host_cpu in > > s390x) ARCHTYPE="s390" ;; > > sparc64) ARCHTYPE="sparc64" ;; > > alpha*) ARCHTYPE="alpha" ;; > > + armv7l) ARCHTYPE="armv7l" ;; > > *) ARCHTYPE="unknown";; > > esac > > > > @@ -61,6 +62,9 @@ if test "x$ARCHTYPE" != xx86 -a "x$ARCHTYPE" != xppc; then > > else > > APISRC=tests/api_$ARCHTYPE.h > > fi > > +if test "x$ARCHTYPE" == xarmv7l; then > > + CFLAGS="-mcpu=cortex-a9 -mtune=cortex-a9 -O2" > > +fi > > > > AC_SUBST(ARCHTYPE) > > AC_SUBST(SUBARCHTYPE) > > diff --git a/urcu/arch_armv7l.h b/urcu/arch_armv7l.h > > new file mode 100644 > > index 0000000..5920a4d > > --- /dev/null > > +++ b/urcu/arch_armv7l.h > > @@ -0,0 +1,59 @@ > > +#ifndef _URCU_ARCH_ARMV7L_H > > +#define _URCU_ARCH_ARMV7L_H > > + > > +/* > > + * arch_unknown.h: trivial definitions for the "unknown" architecture. > > + * > > + * Copyright (c) 2010 Paul E. McKenney, IBM Corporation. > > + * Copyright (c) 2009 Mathieu Desnoyers <[email protected]> > > + * > > + * This library is free software; you can redistribute it and/or > > + * modify it under the terms of the GNU Lesser General Public > > + * License as published by the Free Software Foundation; either > > + * version 2.1 of the License, or (at your option) any later version. > > + * > > + * This library is distributed in the hope that it will be useful, > > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > + * Lesser General Public License for more details. > > + * > > + * You should have received a copy of the GNU Lesser General Public > > + * License along with this library; if not, write to the Free Software > > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA > > 02110-1301 USA > > + */ > > + > > +#include <urcu/compiler.h> > > +#include <urcu/config.h> > > + > > +#ifdef __cplusplus > > +extern "C" { > > +#endif > > + > > +/* We don't know, so guess!!! */ > > +#define CACHE_LINE_SIZE 128 > > + > > +#define mb() asm volatile("dmb":::"memory") > > + > > +#include <stdlib.h> > > +#include <sys/time.h> > > + > > +typedef unsigned long long cycles_t; > > + > > +static inline cycles_t get_cycles (void) > > +{ > > + cycles_t thetime; > > + struct timeval tv; > > + > > + if (gettimeofday(&tv, NULL) != 0) > > + return 0; > > + thetime = ((cycles_t)tv.tv_sec) * 1000000ULL + ((cycles_t)tv.tv_usec); > > + return (cycles_t)thetime; > > +} > > + > > +#ifdef __cplusplus > > +} > > +#endif > > + > > +#include <urcu/arch_generic.h> > > + > > +#endif /* _URCU_ARCH_ARMV7L_H */ > > diff --git a/urcu/uatomic_arch_armv7l.h b/urcu/uatomic_arch_armv7l.h > > new file mode 100644 > > index 0000000..5eece49 > > --- /dev/null > > +++ b/urcu/uatomic_arch_armv7l.h > > @@ -0,0 +1,48 @@ > > +#ifndef _URCU_ARCH_UATOMIC_ARMV7L_H > > +#define _URCU_ARCH_UATOMIC_ARMV7L_H > > + > > +/* > > + * Copyright (c) 1991-1994 by Xerox Corporation. All rights reserved. > > + * Copyright (c) 1996-1999 by Silicon Graphics. All rights reserved. > > + * Copyright (c) 1999-2004 Hewlett-Packard Development Company, L.P. > > + * Copyright (c) 2009 Mathieu Desnoyers > > + * Copyright (c) 2010 Paul E. McKenney, IBM Corporation > > + * (Adapted from uatomic_arch_unknown.h) > > + * > > + * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED > > + * OR IMPLIED. ANY USE IS AT YOUR OWN RISK. > > + * > > + * Permission is hereby granted to use or copy this program > > + * for any purpose, provided the above notices are retained on all copies. > > + * Permission to modify the code and to distribute modified code is > > granted, > > + * provided the above notices are retained, and a notice that the code was > > + * modified is included with the above copyright notice. > > + * > > + * Code inspired from libuatomic_ops-1.2, inherited in part from the > > + * Boehm-Demers-Weiser conservative garbage collector. > > + */ > > + > > +#include <urcu/compiler.h> > > +#include <urcu/system.h> > > + > > +#ifdef __cplusplus > > +extern "C" { > > +#endif > > + > > +/* xchg */ > > +#define uatomic_xchg(addr, v) __sync_lock_test_and_set(addr, v); > > I expect that these __sync_*() operations have missing memory barriers ? It > might be safer to do something like: > > #define uatomic_cmpxchg(addr, old _new) \ > {( \ > __typeof__(_new) __ret; \ > \ > smp_mb(); \ > __ret = __sync_val_compare_and_swap(addr, old, _new); \ > smp_mb(); \ > __ret; > )} > > (similar for other primitives) > > The idea is that I don't trust gcc's __sync_*() implementation if they got > the > __sync_synchronize() wrong. Then there is the question as to whether they used atomics instructions. Sigh! I will run some tests and see what I learn. (Busted disassemblers, but I might yet need to hand-decode the instructions...) Thanx, Paul > Thanks, > > Mathieu > > > + > > +/* cmpxchg */ > > +#define uatomic_cmpxchg(addr, old, _new) \ > > + __sync_val_compare_and_swap(addr, old, _new) > > + > > +/* uatomic_add_return */ > > +#define uatomic_add_return(addr, v) __sync_add_and_fetch(addr, v) > > + > > +#ifdef __cplusplus > > +} > > +#endif > > + > > +#include <urcu/uatomic_generic.h> > > + > > +#endif /* _URCU_ARCH_UATOMIC_ARMV7L_H */ > > -- > Mathieu Desnoyers > Operating System Efficiency R&D Consultant > EfficiOS Inc. > http://www.efficios.com _______________________________________________ ltt-dev mailing list [email protected] http://lists.casi.polymtl.ca/cgi-bin/mailman/listinfo/ltt-dev
