Many thanks for this contribution! A few points:
1. This is a lengthy contribution; it's a bit more than a "trivial" patch that we could include in the mainline without worrying about intellectual property. :-( Can you officially release this code under the BSD license, and/or sign the Open MPI 3rd party contribution agreement? (I know that this is a major hassle, but we have to do it :-( ) 2. Where did you generate this patch from? Based on your patch filenames, I tried to apply it to the SVN trunk@24210 but failed: [11:07] svbu-mpi:~/svn/ompi4 % svnversion . 24210 [11:07] svbu-mpi:~/svn/ompi4 % patch -p1 < ~/arm.patch patching file opal/asm/generate-asm.pl Hunk #1 FAILED at 103. 1 out of 1 hunk FAILED -- saving rejects to file opal/asm/generate-asm.pl.rej patching file opal/asm/asm-data.txt Hunk #1 FAILED at 48. 1 out of 1 hunk FAILED -- saving rejects to file opal/asm/asm-data.txt.rej patching file opal/asm/base/ARM.asm patching file opal/include/opal/sys/arm/atomic.h patching file opal/include/opal/sys/arm/timer.h patching file opal/include/opal/sys/arm/Makefile.am patching file opal/include/opal/sys/arm/update.sh patching file opal/include/opal/sys/atomic.h Hunk #1 FAILED at 146. 1 out of 1 hunk FAILED -- saving rejects to file opal/include/opal/sys/atomic.h.rej patching file opal/include/opal/sys/timer.h Hunk #1 FAILED at 79. 1 out of 1 hunk FAILED -- saving rejects to file opal/include/opal/sys/timer.h.rej patching file opal/include/opal/sys/architecture.h Hunk #1 FAILED at 36. 1 out of 1 hunk FAILED -- saving rejects to file opal/include/opal/sys/architecture.h.rej patching file opal/config/opal_config_asm.m4 Hunk #1 FAILED at 900. 1 out of 1 hunk FAILED -- saving rejects to file opal/config/opal_config_asm.m4.rej [11:07] svbu-mpi:~/svn/ompi4 % On Dec 24, 2010, at 9:22 AM, Leif Lindholm wrote: > Hi, > > The following patch adds support for the ARMv7-A architecture to opal. > This includes current processors such as Cortex-A8 and Cortex-A9, as > well as upcoming Cortex-A5 and Cortex-A15. > > It has been validated on Ubuntu Lucid (10.04) and Maverick (10.10), > although the former might require some package updates to build from > checkout. > > The opal/include/opal/sys/arm directory was cloned from powerpc. > > I apologise for what I had to do to generate-asm.pl to get it to build. > > Signed-off-by: leif.lindh...@arm.com > > Index: ompi-trunk/opal/asm/generate-asm.pl > =================================================================== > --- ompi-trunk/opal/asm/generate-asm.pl (revision 24191) > +++ ompi-trunk/opal/asm/generate-asm.pl (working copy) > @@ -103,7 +103,11 @@ > } > > if ($GNU_STACK == 1) { > - print OUTPUT "\n\t.section\t.note.GNU-stack,\"\",\@progbits\n"; > + if ($asmarch eq "ARM") { > + print OUTPUT "\n\t.section\t.note.GNU-stack,\"\",\%progbits\n"; > + } else { > + print OUTPUT "\n\t.section\t.note.GNU-stack,\"\",\@progbits\n"; > + } > } > > close(INPUT); > Index: ompi-trunk/opal/asm/asm-data.txt > =================================================================== > --- ompi-trunk/opal/asm/asm-data.txt (revision 24191) > +++ ompi-trunk/opal/asm/asm-data.txt (working copy) > @@ -48,6 +48,15 @@ > > ###################################################################### > # > +# ARM (ARMv7 and later) > +# > +###################################################################### > + > +ARM default-.text-.globl-:--.L-#-1-1-1-1-1 arm-linux > + > + > +###################################################################### > +# > # Intel Pentium Class > # > ###################################################################### > Index: ompi-trunk/opal/asm/base/ARM.asm > =================================================================== > --- ompi-trunk/opal/asm/base/ARM.asm (revision 0) > +++ ompi-trunk/opal/asm/base/ARM.asm (revision 0) > @@ -0,0 +1,150 @@ > +START_FILE > + TEXT > + > + ALIGN(4) > +START_FUNC(opal_atomic_mb) > + dmb > + bx lr > +END_FUNC(opal_atomic_mb) > + > + > +START_FUNC(opal_atomic_rmb) > + dmb > + bx lr > +END_FUNC(opal_atomic_rmb) > + > + > +START_FUNC(opal_atomic_wmb) > + dmb > + bx lr > +END_FUNC(opal_atomic_wmb) > + > + > +START_FUNC(opal_atomic_cmpset_32) > + LSYM(1) > + ldrex r3, [r0] > + cmp r1, r3 > + bne REFLSYM(2) > + strex r12, r2, [r0] > + cmp r12, #0 > + bne REFLSYM(1) > + mov r0, #1 > + LSYM(2) > + movne r0, #0 > + bx lr > +END_FUNC(opal_atomic_cmpset_32) > + > + > +START_FUNC(opal_atomic_cmpset_acq_32) > + LSYM(3) > + ldrex r3, [r0] > + cmp r1, r3 > + bne REFLSYM(4) > + strex r12, r2, [r0] > + cmp r12, #0 > + bne REFLSYM(3) > + dmb > + mov r0, #1 > + LSYM(4) > + movne r0, #0 > + bx lr > +END_FUNC(opal_atomic_cmpset_acq_32) > + > + > +START_FUNC(opal_atomic_cmpset_rel_32) > + LSYM(5) > + ldrex r3, [r0] > + cmp r1, r3 > + bne REFLSYM(6) > + dmb > + strex r12, r2, [r0] > + cmp r12, #0 > + bne REFLSYM(4) > + mov r0, #1 > + LSYM(6) > + movne r0, #0 > + bx lr > +END_FUNC(opal_atomic_cmpset_rel_32) > + > +#START_64BIT > +START_FUNC(opal_atomic_cmpset_64) > + push {r4-r7} > + ldrd r6, r7, [sp, #16] > + LSYM(7) > + ldrexd r4, r5, [r0] > + cmp r4, r2 > + cmpeq r5, r3 > + bne REFLSYM(8) > + strexd r1, r6, r7, [r0] > + cmp r1, #0 > + bne REFLSYM(7) > + mov r0, #1 > + LSYM(8) > + movne r0, #0 > + pop {r4-r7} > + bx lr > +END_FUNC(opal_atomic_cmpset_64) > + > +START_FUNC(opal_atomic_cmpset_acq_64) > + push {r4-r7} > + ldrd r6, r7, [sp, #16] > + LSYM(9) > + ldrexd r4, r5, [r0] > + cmp r4, r2 > + cmpeq r5, r3 > + bne REFLSYM(10) > + strexd r1, r6, r7, [r0] > + cmp r1, #0 > + bne REFLSYM(9) > + dmb > + mov r0, #1 > + LSYM(10) > + movne r0, #0 > + pop {r4-r7} > + bx lr > +END_FUNC(opal_atomic_cmpset_acq_64) > + > + > +START_FUNC(opal_atomic_cmpset_rel_64) > + push {r4-r7} > + ldrd r6, r7, [sp, #16] > + LSYM(11) > + ldrexd r4, r5, [r0] > + cmp r4, r2 > + cmpeq r5, r3 > + bne REFLSYM(12) > + dmb > + strexd r1, r6, r7, [r0] > + cmp r1, #0 > + bne REFLSYM(11) > + mov r0, #1 > + LSYM(12) > + movne r0, #0 > + pop {r4-r7} > + bx lr > +END_FUNC(opal_atomic_cmpset_rel_64) > +#END_64BIT > + > + > +START_FUNC(opal_atomic_add_32) > + LSYM(13) > + ldrex r2, [r0] > + add r2, r2, r1 > + strex r3, r2, [r0] > + cmp r3, #0 > + bne REFLSYM(13) > + mov r0, r2 > + bx lr > +END_FUNC(opal_atomic_add_32) > + > + > +START_FUNC(opal_atomic_sub_32) > + LSYM(14) > + ldrex r2, [r0] > + sub r2, r2, r1 > + strex r3, r2, [r0] > + cmp r3, #0 > + bne REFLSYM(14) > + mov r0, r2 > + bx lr > +END_FUNC(opal_atomic_sub_32) > Index: ompi-trunk/opal/include/opal/sys/arm/atomic.h > =================================================================== > --- ompi-trunk/opal/include/opal/sys/arm/atomic.h (revision 0) > +++ ompi-trunk/opal/include/opal/sys/arm/atomic.h (revision 0) > @@ -0,0 +1,227 @@ > +/* > + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana > + * University Research and Technology > + * Corporation. All rights reserved. > + * Copyright (c) 2004-2005 The University of Tennessee and The University > + * of Tennessee Research Foundation. All rights > + * reserved. > + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, > + * University of Stuttgart. All rights reserved. > + * Copyright (c) 2004-2005 The Regents of the University of California. > + * All rights reserved. > + * Copyright (c) 2010 IBM Corporation. All rights reserved. > + * Copyright (c) 2010 ARM ltd. All rights reserved. > + * $COPYRIGHT$ > + * > + * Additional copyrights may follow > + * > + * $HEADER$ > + */ > + > +#ifndef OMPI_SYS_ARCH_ATOMIC_H > +#define OMPI_SYS_ARCH_ATOMIC_H 1 > + > +#if OPAL_WANT_SMP_LOCKS > + > +#define MB() __asm__ __volatile__ ("dmb" : : : "memory") > +#define RMB() __asm__ __volatile__ ("dmb" : : : "memory") > +#define WMB() __asm__ __volatile__ ("dmb" : : : "memory") > + > +#else > + > +#define MB() > +#define RMB() > +#define WMB() > + > +#endif > + > + > +/********************************************************************** > + * > + * Define constants for ARMv7 > + * > + *********************************************************************/ > +#define OPAL_HAVE_ATOMIC_MEM_BARRIER 1 > + > +#define OPAL_HAVE_ATOMIC_CMPSET_32 1 > + > +#define OPAL_HAVE_ATOMIC_CMPSET_64 1 > + > +#define OPAL_HAVE_ATOMIC_MATH_32 1 > +#define OPAL_HAVE_ATOMIC_ADD_32 1 > +#define OPAL_HAVE_ATOMIC_SUB_32 1 > + > + > +/********************************************************************** > + * > + * Memory Barriers > + * > + *********************************************************************/ > +#if OMPI_GCC_INLINE_ASSEMBLY > + > +static inline > +void opal_atomic_mb(void) > +{ > + MB(); > +} > + > + > +static inline > +void opal_atomic_rmb(void) > +{ > + RMB(); > +} > + > + > +static inline > +void opal_atomic_wmb(void) > +{ > + WMB(); > +} > + > + > +/********************************************************************** > + * > + * Atomic math operations > + * > + *********************************************************************/ > + > +static inline int opal_atomic_cmpset_32(volatile int32_t *addr, > + int32_t oldval, int32_t newval) > +{ > + int32_t ret, tmp; > + > + __asm__ __volatile__ ( > + "1: ldrex %0, [%2] \n" > + " cmp %0, %3 \n" > + " bne 2f \n" > + " strex %1, %4, [%2] \n" > + " cmp %1, #0 \n" > + " bne 1b \n" > + "2: \n" > + > + : "=&r" (ret), "=&r" (tmp) > + : "r" (addr), "r" (oldval), "r" (newval) > + : "cc", "memory"); > + > + return (ret == oldval); > +} > + > +/* these two functions aren't inlined in the non-gcc case because then > + there would be two function calls (since neither cmpset_32 nor > + atomic_?mb can be inlined). Instead, we "inline" them by hand in > + the assembly, meaning there is one function call overhead instead > + of two */ > +static inline int opal_atomic_cmpset_acq_32(volatile int32_t *addr, > + int32_t oldval, int32_t newval) > +{ > + int rc; > + > + rc = opal_atomic_cmpset_32(addr, oldval, newval); > + opal_atomic_rmb(); > + > + return rc; > +} > + > + > +static inline int opal_atomic_cmpset_rel_32(volatile int32_t *addr, > + int32_t oldval, int32_t newval) > +{ > + opal_atomic_wmb(); > + return opal_atomic_cmpset_32(addr, oldval, newval); > +} > + > + > +static inline int opal_atomic_cmpset_64(volatile int64_t *addr, > + int64_t oldval, int64_t newval) > +{ > + int64_t ret; > + int tmp; > + > + > + __asm__ __volatile__ ( > + "1: ldrexd %0, %H0, [%2] \n" > + " cmp %0, %3 \n" > + " cmpeq %H0, %H3 \n" > + " bne 2f \n" > + " strexd %1, %4, %H4, [%2] \n" > + " cmp %1, #0 \n" > + " bne 1b \n" > + "2: \n" > + > + : "=&r" (ret), "=&r" (tmp) > + : "r" (addr), "r" (oldval), "r" (newval) > + : "cc", "memory"); > + > + return (ret == oldval); > +} > + > +/* these two functions aren't inlined in the non-gcc case because then > + there would be two function calls (since neither cmpset_64 nor > + atomic_?mb can be inlined). Instead, we "inline" them by hand in > + the assembly, meaning there is one function call overhead instead > + of two */ > +static inline int opal_atomic_cmpset_acq_64(volatile int64_t *addr, > + int64_t oldval, int64_t newval) > +{ > + int rc; > + > + rc = opal_atomic_cmpset_64(addr, oldval, newval); > + opal_atomic_rmb(); > + > + return rc; > +} > + > + > +static inline int opal_atomic_cmpset_rel_64(volatile int64_t *addr, > + int64_t oldval, int64_t newval) > +{ > + opal_atomic_wmb(); > + return opal_atomic_cmpset_64(addr, oldval, newval); > +} > + > + > +static inline int32_t opal_atomic_add_32(volatile int32_t* v, int inc) > +{ > + int32_t t; > + int tmp; > + > + __asm__ __volatile__( > + "1: ldrex %0, [%2] \n" > + " add %0, %0, %3 \n" > + " strex %1, %0, [%2] \n" > + " cmp %1, #0 \n" > + " bne 1b \n" > + > + : "=&r" (t), "=&r" (tmp) > + : "r" (v), "r" (inc) > + : "cc", "memory"); > + > + > + return t; > +} > + > + > +static inline int32_t opal_atomic_sub_32(volatile int32_t* v, int dec) > +{ > + int32_t t; > + int tmp; > + > + __asm__ __volatile__( > + "1: ldrex %0, [%2] \n" > + " sub %0, %0, %3 \n" > + " strex %1, %0, [%2] \n" > + " cmp %1, #0 \n" > + " bne 1b \n" > + > + : "=&r" (t), "=&r" (tmp) > + : "r" (v), "r" (dec) > + : "cc", "memory"); > + > + return t; > +} > + > + > +#endif /* OMPI_GCC_INLINE_ASSEMBLY */ > + > +#endif /* ! OMPI_SYS_ARCH_ATOMIC_H */ > Index: ompi-trunk/opal/include/opal/sys/arm/timer.h > =================================================================== > --- ompi-trunk/opal/include/opal/sys/arm/timer.h (revision 0) > +++ ompi-trunk/opal/include/opal/sys/arm/timer.h (revision 0) > @@ -0,0 +1,33 @@ > +/* > + * Copyright (c) 2008 The University of Tennessee and The University > + * of Tennessee Research Foundation. All rights > + * reserved. > + * $COPYRIGHT$ > + * > + * Additional copyrights may follow > + * > + * $HEADER$ > + */ > + > +#ifndef OMPI_SYS_ARCH_TIMER_H > +#define OMPI_SYS_ARCH_TIMER_H 1 > + > +#include <sys/times.h> > + > +typedef uint64_t opal_timer_t; > + > +static inline opal_timer_t > +opal_sys_timer_get_cycles(void) > +{ > + opal_timer_t ret; > + struct tms accurate_clock; > + > + times(&accurate_clock); > + ret = accurate_clock.tms_utime + accurate_clock.tms_stime; > + > + return ret; > +} > + > +#define OPAL_HAVE_SYS_TIMER_GET_CYCLES 1 > + > +#endif /* ! OMPI_SYS_ARCH_TIMER_H */ > Index: ompi-trunk/opal/include/opal/sys/arm/Makefile.am > =================================================================== > --- ompi-trunk/opal/include/opal/sys/arm/Makefile.am (revision 0) > +++ ompi-trunk/opal/include/opal/sys/arm/Makefile.am (revision 0) > @@ -0,0 +1,24 @@ > +# > +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana > +# University Research and Technology > +# Corporation. All rights reserved. > +# Copyright (c) 2004-2008 The University of Tennessee and The University > +# of Tennessee Research Foundation. All rights > +# reserved. > +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, > +# University of Stuttgart. All rights reserved. > +# Copyright (c) 2004-2005 The Regents of the University of California. > +# All rights reserved. > +# $COPYRIGHT$ > +# > +# Additional copyrights may follow > +# > +# $HEADER$ > +# > + > +# This makefile.am does not stand on its own - it is included from > opal/include/Makefile.am > + > +headers += \ > + opal/sys/arm/atomic.h \ > + opal/sys/arm/timer.h > + > Index: ompi-trunk/opal/include/opal/sys/arm/update.sh > =================================================================== > --- ompi-trunk/opal/include/opal/sys/arm/update.sh (revision 0) > +++ ompi-trunk/opal/include/opal/sys/arm/update.sh (revision 0) > @@ -0,0 +1,37 @@ > +#!/bin/sh > +# > +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana > +# University Research and Technology > +# Corporation. All rights reserved. > +# Copyright (c) 2004-2005 The University of Tennessee and The University > +# of Tennessee Research Foundation. All rights > +# reserved. > +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, > +# University of Stuttgart. All rights reserved. > +# Copyright (c) 2004-2005 The Regents of the University of California. > +# All rights reserved. > +# $COPYRIGHT$ > +# > +# Additional copyrights may follow > +# > +# $HEADER$ > +# > + > +CFILE=/tmp/opal_atomic_$$.c > + > +trap "/bin/rm -f $CFILE; exit 0" 0 1 2 15 > + > +echo Updating atomic.s from atomic.h using gcc > + > +cat > $CFILE<<EOF > +#include <stdlib.h> > +#include <inttypes.h> > +#define static > +#define inline > +#define OMPI_GCC_INLINE_ASSEMBLY 1 > +#define OPAL_WANT_SMP_LOCKS 1 > +#include "../architecture.h" > +#include "atomic.h" > +EOF > + > +gcc -O1 -I. -S $CFILE -o atomic.s > Index: ompi-trunk/opal/include/opal/sys/atomic.h > =================================================================== > --- ompi-trunk/opal/include/opal/sys/atomic.h (revision 24191) > +++ ompi-trunk/opal/include/opal/sys/atomic.h (working copy) > @@ -146,6 +146,8 @@ > #include "opal/sys/alpha/atomic.h" > #elif OPAL_ASSEMBLY_ARCH == OMPI_AMD64 > #include "opal/sys/amd64/atomic.h" > +#elif OPAL_ASSEMBLY_ARCH == OMPI_ARM > +#include "opal/sys/arm/atomic.h" > #elif OPAL_ASSEMBLY_ARCH == OMPI_IA32 > #include "opal/sys/ia32/atomic.h" > #elif OPAL_ASSEMBLY_ARCH == OMPI_IA64 > Index: ompi-trunk/opal/include/opal/sys/timer.h > =================================================================== > --- ompi-trunk/opal/include/opal/sys/timer.h (revision 24191) > +++ ompi-trunk/opal/include/opal/sys/timer.h (working copy) > @@ -79,6 +79,8 @@ > /* don't include system-level gorp when generating doxygen files */ > #elif OPAL_ASSEMBLY_ARCH == OMPI_AMD64 > #include "opal/sys/amd64/timer.h" > +#elif OPAL_ASSEMBLY_ARCH == OMPI_ARM > +#include "opal/sys/arm/timer.h" > #elif OPAL_ASSEMBLY_ARCH == OMPI_IA32 > #include "opal/sys/ia32/timer.h" > #elif OPAL_ASSEMBLY_ARCH == OMPI_IA64 > Index: ompi-trunk/opal/include/opal/sys/architecture.h > =================================================================== > --- ompi-trunk/opal/include/opal/sys/architecture.h (revision 24191) > +++ ompi-trunk/opal/include/opal/sys/architecture.h (working copy) > @@ -36,6 +36,7 @@ > #define OMPI_SPARCV9_32 0061 > #define OMPI_SPARCV9_64 0062 > #define OMPI_MIPS 0070 > +#define OMPI_ARM 0100 > > /* Formats */ > #define OMPI_DEFAULT 1000 /* standard for given architecture */ > Index: ompi-trunk/opal/config/opal_config_asm.m4 > =================================================================== > --- ompi-trunk/opal/config/opal_config_asm.m4 (revision 24191) > +++ ompi-trunk/opal/config/opal_config_asm.m4 (working copy) > @@ -900,6 +900,12 @@ > OMPI_GCC_INLINE_ASSIGN='"bis [$]31,[$]31,%0" : "=&r"(ret)' > ;; > > + armv7*) > + ompi_cv_asm_arch="ARM" > + OPAL_ASM_SUPPORT_64BIT=1 > + OMPI_GCC_INLINE_ASSIGN='"mov %0, #0" : "=&r"(ret)' > + ;; > + > mips-*|mips64*) > # Should really find some way to make sure that we are on > # a MIPS III machine (r4000 and later) > > > > > _______________________________________________ > devel mailing list > de...@open-mpi.org > http://www.open-mpi.org/mailman/listinfo.cgi/devel -- Jeff Squyres jsquy...@cisco.com For corporate legal information go to: http://www.cisco.com/web/about/doing_business/legal/cri/