Many thanks for this contribution!

A few points:

1. This is a lengthy contribution; it's a bit more than a "trivial" patch that 
we could include in the mainline without worrying about intellectual property.  
:-(  Can you officially release this code under the BSD license, and/or sign 
the Open MPI 3rd party contribution agreement?  (I know that this is a major 
hassle, but we have to do it :-( )

2. Where did you generate this patch from?  Based on your patch filenames, I 
tried to apply it to the SVN trunk@24210 but failed:

[11:07] svbu-mpi:~/svn/ompi4 % svnversion .
24210
[11:07] svbu-mpi:~/svn/ompi4 % patch -p1 < ~/arm.patch
patching file opal/asm/generate-asm.pl
Hunk #1 FAILED at 103.
1 out of 1 hunk FAILED -- saving rejects to file opal/asm/generate-asm.pl.rej
patching file opal/asm/asm-data.txt
Hunk #1 FAILED at 48.
1 out of 1 hunk FAILED -- saving rejects to file opal/asm/asm-data.txt.rej
patching file opal/asm/base/ARM.asm
patching file opal/include/opal/sys/arm/atomic.h
patching file opal/include/opal/sys/arm/timer.h
patching file opal/include/opal/sys/arm/Makefile.am
patching file opal/include/opal/sys/arm/update.sh
patching file opal/include/opal/sys/atomic.h
Hunk #1 FAILED at 146.
1 out of 1 hunk FAILED -- saving rejects to file 
opal/include/opal/sys/atomic.h.rej
patching file opal/include/opal/sys/timer.h
Hunk #1 FAILED at 79.
1 out of 1 hunk FAILED -- saving rejects to file 
opal/include/opal/sys/timer.h.rej
patching file opal/include/opal/sys/architecture.h
Hunk #1 FAILED at 36.
1 out of 1 hunk FAILED -- saving rejects to file 
opal/include/opal/sys/architecture.h.rej
patching file opal/config/opal_config_asm.m4
Hunk #1 FAILED at 900.
1 out of 1 hunk FAILED -- saving rejects to file 
opal/config/opal_config_asm.m4.rej
[11:07] svbu-mpi:~/svn/ompi4 % 




On Dec 24, 2010, at 9:22 AM, Leif Lindholm wrote:

> Hi,
> 
> The following patch adds support for the ARMv7-A architecture to opal.
> This includes current processors such as Cortex-A8 and Cortex-A9, as
> well as upcoming Cortex-A5 and Cortex-A15.
> 
> It has been validated on Ubuntu Lucid (10.04) and Maverick (10.10),
> although the former might require some package updates to build from
> checkout.
> 
> The opal/include/opal/sys/arm directory was cloned from powerpc.
> 
> I apologise for what I had to do to generate-asm.pl to get it to build.
> 
> Signed-off-by: leif.lindh...@arm.com
> 
> Index: ompi-trunk/opal/asm/generate-asm.pl
> ===================================================================
> --- ompi-trunk/opal/asm/generate-asm.pl       (revision 24191)
> +++ ompi-trunk/opal/asm/generate-asm.pl       (working copy)
> @@ -103,7 +103,11 @@
> }
> 
> if ($GNU_STACK == 1) {
> -    print OUTPUT "\n\t.section\t.note.GNU-stack,\"\",\@progbits\n";
> +    if ($asmarch eq "ARM") {
> +        print OUTPUT "\n\t.section\t.note.GNU-stack,\"\",\%progbits\n";
> +    } else {
> +        print OUTPUT "\n\t.section\t.note.GNU-stack,\"\",\@progbits\n";
> +    }
> }
> 
> close(INPUT);
> Index: ompi-trunk/opal/asm/asm-data.txt
> ===================================================================
> --- ompi-trunk/opal/asm/asm-data.txt  (revision 24191)
> +++ ompi-trunk/opal/asm/asm-data.txt  (working copy)
> @@ -48,6 +48,15 @@
> 
> ######################################################################
> #
> +# ARM (ARMv7 and later)
> +#
> +######################################################################
> +
> +ARM  default-.text-.globl-:--.L-#-1-1-1-1-1  arm-linux
> +
> +
> +######################################################################
> +#
> # Intel Pentium Class
> #
> ######################################################################
> Index: ompi-trunk/opal/asm/base/ARM.asm
> ===================================================================
> --- ompi-trunk/opal/asm/base/ARM.asm  (revision 0)
> +++ ompi-trunk/opal/asm/base/ARM.asm  (revision 0)
> @@ -0,0 +1,150 @@
> +START_FILE
> +     TEXT
> +
> +     ALIGN(4)
> +START_FUNC(opal_atomic_mb)
> +     dmb
> +     bx      lr
> +END_FUNC(opal_atomic_mb)
> +
> +
> +START_FUNC(opal_atomic_rmb)
> +     dmb
> +     bx      lr
> +END_FUNC(opal_atomic_rmb)
> +
> +
> +START_FUNC(opal_atomic_wmb)
> +     dmb
> +     bx      lr
> +END_FUNC(opal_atomic_wmb)
> +
> +
> +START_FUNC(opal_atomic_cmpset_32)
> +     LSYM(1)
> +     ldrex   r3, [r0]
> +     cmp     r1, r3
> +     bne     REFLSYM(2)
> +     strex   r12, r2, [r0]
> +     cmp     r12, #0
> +     bne     REFLSYM(1)
> +     mov     r0, #1
> +     LSYM(2)
> +     movne   r0, #0
> +     bx      lr
> +END_FUNC(opal_atomic_cmpset_32)
> +
> +
> +START_FUNC(opal_atomic_cmpset_acq_32)
> +     LSYM(3)
> +     ldrex   r3, [r0]
> +     cmp     r1, r3
> +     bne     REFLSYM(4)
> +     strex   r12, r2, [r0]
> +     cmp     r12, #0
> +     bne     REFLSYM(3)
> +     dmb
> +     mov     r0, #1
> +     LSYM(4)
> +     movne   r0, #0
> +     bx      lr
> +END_FUNC(opal_atomic_cmpset_acq_32)
> +
> +
> +START_FUNC(opal_atomic_cmpset_rel_32)
> +     LSYM(5)
> +     ldrex   r3, [r0]
> +     cmp     r1, r3
> +     bne     REFLSYM(6)
> +     dmb
> +     strex   r12, r2, [r0]
> +     cmp     r12, #0
> +     bne     REFLSYM(4)
> +     mov     r0, #1
> +     LSYM(6)
> +     movne   r0, #0
> +     bx      lr
> +END_FUNC(opal_atomic_cmpset_rel_32)
> +
> +#START_64BIT
> +START_FUNC(opal_atomic_cmpset_64)
> +     push    {r4-r7}
> +     ldrd    r6, r7, [sp, #16]
> +     LSYM(7)
> +     ldrexd  r4, r5, [r0]
> +     cmp     r4, r2
> +     cmpeq   r5, r3
> +     bne     REFLSYM(8)
> +     strexd  r1, r6, r7, [r0]
> +     cmp     r1, #0
> +     bne     REFLSYM(7)
> +     mov     r0, #1
> +     LSYM(8)
> +     movne   r0, #0
> +     pop     {r4-r7}
> +     bx      lr
> +END_FUNC(opal_atomic_cmpset_64)
> +
> +START_FUNC(opal_atomic_cmpset_acq_64)
> +     push    {r4-r7}
> +     ldrd    r6, r7, [sp, #16]
> +     LSYM(9)
> +     ldrexd  r4, r5, [r0]
> +     cmp     r4, r2
> +     cmpeq   r5, r3
> +     bne     REFLSYM(10)
> +     strexd  r1, r6, r7, [r0]
> +     cmp     r1, #0
> +     bne     REFLSYM(9)
> +     dmb
> +     mov     r0, #1
> +     LSYM(10)
> +     movne   r0, #0
> +     pop     {r4-r7}
> +     bx      lr
> +END_FUNC(opal_atomic_cmpset_acq_64)
> +
> +
> +START_FUNC(opal_atomic_cmpset_rel_64)
> +     push    {r4-r7}
> +     ldrd    r6, r7, [sp, #16]
> +     LSYM(11)
> +     ldrexd  r4, r5, [r0]
> +     cmp     r4, r2
> +     cmpeq   r5, r3
> +     bne     REFLSYM(12)
> +     dmb
> +     strexd  r1, r6, r7, [r0]
> +     cmp     r1, #0
> +     bne     REFLSYM(11)
> +     mov     r0, #1
> +     LSYM(12)
> +     movne   r0, #0
> +     pop     {r4-r7}
> +     bx      lr
> +END_FUNC(opal_atomic_cmpset_rel_64)
> +#END_64BIT
> +
> +
> +START_FUNC(opal_atomic_add_32)
> +     LSYM(13)
> +     ldrex   r2, [r0]
> +     add     r2, r2, r1
> +     strex   r3, r2, [r0]
> +     cmp     r3, #0
> +     bne     REFLSYM(13)
> +     mov     r0, r2
> +     bx      lr
> +END_FUNC(opal_atomic_add_32)
> +
> +
> +START_FUNC(opal_atomic_sub_32)
> +     LSYM(14)
> +     ldrex   r2, [r0]
> +     sub     r2, r2, r1
> +     strex   r3, r2, [r0]
> +     cmp     r3, #0
> +     bne     REFLSYM(14)
> +     mov     r0, r2
> +     bx      lr
> +END_FUNC(opal_atomic_sub_32)
> Index: ompi-trunk/opal/include/opal/sys/arm/atomic.h
> ===================================================================
> --- ompi-trunk/opal/include/opal/sys/arm/atomic.h     (revision 0)
> +++ ompi-trunk/opal/include/opal/sys/arm/atomic.h     (revision 0)
> @@ -0,0 +1,227 @@
> +/*
> + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
> + *                         University Research and Technology
> + *                         Corporation.  All rights reserved.
> + * Copyright (c) 2004-2005 The University of Tennessee and The University
> + *                         of Tennessee Research Foundation.  All rights
> + *                         reserved.
> + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, 
> + *                         University of Stuttgart.  All rights reserved.
> + * Copyright (c) 2004-2005 The Regents of the University of California.
> + *                         All rights reserved.
> + * Copyright (c) 2010      IBM Corporation.  All rights reserved.
> + * Copyright (c) 2010      ARM ltd.  All rights reserved.
> + * $COPYRIGHT$
> + * 
> + * Additional copyrights may follow
> + * 
> + * $HEADER$
> + */
> +
> +#ifndef OMPI_SYS_ARCH_ATOMIC_H
> +#define OMPI_SYS_ARCH_ATOMIC_H 1
> +
> +#if OPAL_WANT_SMP_LOCKS
> +
> +#define MB()  __asm__ __volatile__ ("dmb" : : : "memory")
> +#define RMB() __asm__ __volatile__ ("dmb" : : : "memory")
> +#define WMB() __asm__ __volatile__ ("dmb" : : : "memory")
> +
> +#else
> +
> +#define MB()
> +#define RMB()
> +#define WMB()
> +
> +#endif
> +
> +
> +/**********************************************************************
> + *
> + * Define constants for ARMv7
> + *
> + *********************************************************************/
> +#define OPAL_HAVE_ATOMIC_MEM_BARRIER 1
> +
> +#define OPAL_HAVE_ATOMIC_CMPSET_32 1
> +
> +#define OPAL_HAVE_ATOMIC_CMPSET_64 1
> +
> +#define OPAL_HAVE_ATOMIC_MATH_32 1
> +#define OPAL_HAVE_ATOMIC_ADD_32 1
> +#define OPAL_HAVE_ATOMIC_SUB_32 1
> +
> +
> +/**********************************************************************
> + *
> + * Memory Barriers
> + *
> + *********************************************************************/
> +#if OMPI_GCC_INLINE_ASSEMBLY
> +
> +static inline
> +void opal_atomic_mb(void)
> +{
> +    MB();
> +}
> +
> +
> +static inline
> +void opal_atomic_rmb(void)
> +{
> +    RMB();
> +}
> +
> +
> +static inline
> +void opal_atomic_wmb(void)
> +{
> +    WMB();
> +}
> +
> +
> +/**********************************************************************
> + *
> + * Atomic math operations
> + *
> + *********************************************************************/
> +
> +static inline int opal_atomic_cmpset_32(volatile int32_t *addr,
> +                                        int32_t oldval, int32_t newval)
> +{
> +  int32_t ret, tmp;
> +
> +   __asm__ __volatile__ (
> +                         "1:  ldrex   %0, [%2]        \n"
> +                         "    cmp     %0, %3          \n"
> +                         "    bne     2f              \n"
> +                         "    strex   %1, %4, [%2]    \n"
> +                         "    cmp     %1, #0          \n"
> +                         "    bne     1b              \n"
> +                         "2:                          \n"
> +
> +                         : "=&r" (ret), "=&r" (tmp)
> +                         : "r" (addr), "r" (oldval), "r" (newval)
> +                         : "cc", "memory");
> +
> +   return (ret == oldval);
> +}
> +
> +/* these two functions aren't inlined in the non-gcc case because then
> +   there would be two function calls (since neither cmpset_32 nor
> +   atomic_?mb can be inlined).  Instead, we "inline" them by hand in
> +   the assembly, meaning there is one function call overhead instead
> +   of two */
> +static inline int opal_atomic_cmpset_acq_32(volatile int32_t *addr,
> +                                            int32_t oldval, int32_t newval)
> +{
> +    int rc;
> +
> +    rc = opal_atomic_cmpset_32(addr, oldval, newval);
> +    opal_atomic_rmb();
> +
> +    return rc;
> +}
> +
> +
> +static inline int opal_atomic_cmpset_rel_32(volatile int32_t *addr,
> +                                            int32_t oldval, int32_t newval)
> +{
> +    opal_atomic_wmb();
> +    return opal_atomic_cmpset_32(addr, oldval, newval);
> +}
> +
> +
> +static inline int opal_atomic_cmpset_64(volatile int64_t *addr,
> +                                        int64_t oldval, int64_t newval)
> +{
> +  int64_t ret;
> +  int tmp;
> +
> +
> +   __asm__ __volatile__ (
> +                         "1:  ldrexd  %0, %H0, [%2]           \n"
> +                         "    cmp     %0, %3                  \n"
> +                         "    cmpeq   %H0, %H3                \n"
> +                         "    bne     2f                      \n"
> +                         "    strexd  %1, %4, %H4, [%2]       \n"
> +                         "    cmp     %1, #0                  \n"
> +                         "    bne     1b                      \n"
> +                         "2:                                    \n"
> +
> +                         : "=&r" (ret), "=&r" (tmp)
> +                         : "r" (addr), "r" (oldval), "r" (newval)
> +                         : "cc", "memory");
> +
> +   return (ret == oldval);
> +}
> +
> +/* these two functions aren't inlined in the non-gcc case because then
> +   there would be two function calls (since neither cmpset_64 nor
> +   atomic_?mb can be inlined).  Instead, we "inline" them by hand in
> +   the assembly, meaning there is one function call overhead instead
> +   of two */
> +static inline int opal_atomic_cmpset_acq_64(volatile int64_t *addr,
> +                                            int64_t oldval, int64_t newval)
> +{
> +    int rc;
> +
> +    rc = opal_atomic_cmpset_64(addr, oldval, newval);
> +    opal_atomic_rmb();
> +
> +    return rc;
> +}
> +
> +
> +static inline int opal_atomic_cmpset_rel_64(volatile int64_t *addr,
> +                                            int64_t oldval, int64_t newval)
> +{
> +    opal_atomic_wmb();
> +    return opal_atomic_cmpset_64(addr, oldval, newval);
> +}
> +
> +
> +static inline int32_t opal_atomic_add_32(volatile int32_t* v, int inc)
> +{
> +   int32_t t;
> +   int tmp;
> +
> +   __asm__ __volatile__(
> +                         "1:  ldrex   %0, [%2]        \n"
> +                         "    add     %0, %0, %3      \n"
> +                         "    strex   %1, %0, [%2]    \n"
> +                         "    cmp     %1, #0          \n"
> +                         "    bne     1b              \n"
> +
> +                         : "=&r" (t), "=&r" (tmp)
> +                         : "r" (v), "r" (inc)
> +                         : "cc", "memory");
> +
> +
> +   return t;
> +}
> +
> +
> +static inline int32_t opal_atomic_sub_32(volatile int32_t* v, int dec)
> +{
> +   int32_t t;
> +   int tmp;
> +
> +   __asm__ __volatile__(
> +                         "1:  ldrex   %0, [%2]        \n"
> +                         "    sub     %0, %0, %3      \n"
> +                         "    strex   %1, %0, [%2]    \n"
> +                         "    cmp     %1, #0          \n"
> +                         "    bne     1b              \n"
> +
> +                         : "=&r" (t), "=&r" (tmp)
> +                         : "r" (v), "r" (dec)
> +                         : "cc", "memory");
> +
> +   return t;
> +}
> +
> +
> +#endif /* OMPI_GCC_INLINE_ASSEMBLY */
> +
> +#endif /* ! OMPI_SYS_ARCH_ATOMIC_H */
> Index: ompi-trunk/opal/include/opal/sys/arm/timer.h
> ===================================================================
> --- ompi-trunk/opal/include/opal/sys/arm/timer.h      (revision 0)
> +++ ompi-trunk/opal/include/opal/sys/arm/timer.h      (revision 0)
> @@ -0,0 +1,33 @@
> +/*
> + * Copyright (c) 2008      The University of Tennessee and The University
> + *                         of Tennessee Research Foundation.  All rights
> + *                         reserved.
> + * $COPYRIGHT$
> + * 
> + * Additional copyrights may follow
> + * 
> + * $HEADER$
> + */
> +
> +#ifndef OMPI_SYS_ARCH_TIMER_H
> +#define OMPI_SYS_ARCH_TIMER_H 1
> +
> +#include <sys/times.h>
> +
> +typedef uint64_t opal_timer_t;
> +
> +static inline opal_timer_t
> +opal_sys_timer_get_cycles(void)
> +{
> +    opal_timer_t ret;
> +    struct tms accurate_clock;
> +
> +    times(&accurate_clock);
> +    ret = accurate_clock.tms_utime + accurate_clock.tms_stime;
> +
> +    return ret;
> +}
> +
> +#define OPAL_HAVE_SYS_TIMER_GET_CYCLES 1
> +
> +#endif /* ! OMPI_SYS_ARCH_TIMER_H */
> Index: ompi-trunk/opal/include/opal/sys/arm/Makefile.am
> ===================================================================
> --- ompi-trunk/opal/include/opal/sys/arm/Makefile.am  (revision 0)
> +++ ompi-trunk/opal/include/opal/sys/arm/Makefile.am  (revision 0)
> @@ -0,0 +1,24 @@
> +#
> +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
> +#                         University Research and Technology
> +#                         Corporation.  All rights reserved.
> +# Copyright (c) 2004-2008 The University of Tennessee and The University
> +#                         of Tennessee Research Foundation.  All rights
> +#                         reserved.
> +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, 
> +#                         University of Stuttgart.  All rights reserved.
> +# Copyright (c) 2004-2005 The Regents of the University of California.
> +#                         All rights reserved.
> +# $COPYRIGHT$
> +# 
> +# Additional copyrights may follow
> +# 
> +# $HEADER$
> +#
> +
> +# This makefile.am does not stand on its own - it is included from
> opal/include/Makefile.am
> +
> +headers += \
> +     opal/sys/arm/atomic.h \
> +     opal/sys/arm/timer.h
> +
> Index: ompi-trunk/opal/include/opal/sys/arm/update.sh
> ===================================================================
> --- ompi-trunk/opal/include/opal/sys/arm/update.sh    (revision 0)
> +++ ompi-trunk/opal/include/opal/sys/arm/update.sh    (revision 0)
> @@ -0,0 +1,37 @@
> +#!/bin/sh
> +#
> +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
> +#                         University Research and Technology
> +#                         Corporation.  All rights reserved.
> +# Copyright (c) 2004-2005 The University of Tennessee and The University
> +#                         of Tennessee Research Foundation.  All rights
> +#                         reserved.
> +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, 
> +#                         University of Stuttgart.  All rights reserved.
> +# Copyright (c) 2004-2005 The Regents of the University of California.
> +#                         All rights reserved.
> +# $COPYRIGHT$
> +# 
> +# Additional copyrights may follow
> +# 
> +# $HEADER$
> +#
> +
> +CFILE=/tmp/opal_atomic_$$.c
> +
> +trap "/bin/rm -f $CFILE; exit 0" 0 1 2 15
> +
> +echo Updating atomic.s from atomic.h using gcc
> +
> +cat > $CFILE<<EOF
> +#include <stdlib.h>
> +#include <inttypes.h>
> +#define static
> +#define inline
> +#define OMPI_GCC_INLINE_ASSEMBLY 1
> +#define OPAL_WANT_SMP_LOCKS 1
> +#include "../architecture.h"
> +#include "atomic.h"
> +EOF
> +
> +gcc -O1 -I. -S $CFILE -o atomic.s
> Index: ompi-trunk/opal/include/opal/sys/atomic.h
> ===================================================================
> --- ompi-trunk/opal/include/opal/sys/atomic.h (revision 24191)
> +++ ompi-trunk/opal/include/opal/sys/atomic.h (working copy)
> @@ -146,6 +146,8 @@
> #include "opal/sys/alpha/atomic.h"
> #elif OPAL_ASSEMBLY_ARCH == OMPI_AMD64
> #include "opal/sys/amd64/atomic.h"
> +#elif OPAL_ASSEMBLY_ARCH == OMPI_ARM
> +#include "opal/sys/arm/atomic.h"
> #elif OPAL_ASSEMBLY_ARCH == OMPI_IA32
> #include "opal/sys/ia32/atomic.h"
> #elif OPAL_ASSEMBLY_ARCH == OMPI_IA64
> Index: ompi-trunk/opal/include/opal/sys/timer.h
> ===================================================================
> --- ompi-trunk/opal/include/opal/sys/timer.h  (revision 24191)
> +++ ompi-trunk/opal/include/opal/sys/timer.h  (working copy)
> @@ -79,6 +79,8 @@
> /* don't include system-level gorp when generating doxygen files */ 
> #elif OPAL_ASSEMBLY_ARCH == OMPI_AMD64
> #include "opal/sys/amd64/timer.h"
> +#elif OPAL_ASSEMBLY_ARCH == OMPI_ARM
> +#include "opal/sys/arm/timer.h"
> #elif OPAL_ASSEMBLY_ARCH == OMPI_IA32
> #include "opal/sys/ia32/timer.h"
> #elif OPAL_ASSEMBLY_ARCH == OMPI_IA64
> Index: ompi-trunk/opal/include/opal/sys/architecture.h
> ===================================================================
> --- ompi-trunk/opal/include/opal/sys/architecture.h   (revision 24191)
> +++ ompi-trunk/opal/include/opal/sys/architecture.h   (working copy)
> @@ -36,6 +36,7 @@
> #define OMPI_SPARCV9_32     0061
> #define OMPI_SPARCV9_64     0062
> #define OMPI_MIPS           0070
> +#define OMPI_ARM            0100
> 
> /* Formats */
> #define OMPI_DEFAULT        1000  /* standard for given architecture */
> Index: ompi-trunk/opal/config/opal_config_asm.m4
> ===================================================================
> --- ompi-trunk/opal/config/opal_config_asm.m4 (revision 24191)
> +++ ompi-trunk/opal/config/opal_config_asm.m4 (working copy)
> @@ -900,6 +900,12 @@
>             OMPI_GCC_INLINE_ASSIGN='"bis [$]31,[$]31,%0" : "=&r"(ret)'
>             ;;
> 
> +        armv7*)
> +            ompi_cv_asm_arch="ARM"
> +            OPAL_ASM_SUPPORT_64BIT=1
> +            OMPI_GCC_INLINE_ASSIGN='"mov %0, #0" : "=&r"(ret)'
> +            ;;
> +
>         mips-*|mips64*)
>             # Should really find some way to make sure that we are on
>             # a MIPS III machine (r4000 and later)
> 
> 
> 
> 
> _______________________________________________
> devel mailing list
> de...@open-mpi.org
> http://www.open-mpi.org/mailman/listinfo.cgi/devel


-- 
Jeff Squyres
jsquy...@cisco.com
For corporate legal information go to:
http://www.cisco.com/web/about/doing_business/legal/cri/


Reply via email to