Hi,

The following patch adds support for the ARMv7-A architecture to opal.
This includes current processors such as Cortex-A8 and Cortex-A9, as
well as upcoming Cortex-A5 and Cortex-A15.

It has been validated on Ubuntu Lucid (10.04) and Maverick (10.10),
although the former might require some package updates to build from
checkout.

The opal/include/opal/sys/arm directory was cloned from powerpc.

I apologise for what I had to do to generate-asm.pl to get it to build.

Signed-off-by: leif.lindh...@arm.com

Index: ompi-trunk/opal/asm/generate-asm.pl
===================================================================
--- ompi-trunk/opal/asm/generate-asm.pl (revision 24191)
+++ ompi-trunk/opal/asm/generate-asm.pl (working copy)
@@ -103,7 +103,11 @@
 }

 if ($GNU_STACK == 1) {
-    print OUTPUT "\n\t.section\t.note.GNU-stack,\"\",\@progbits\n";
+    if ($asmarch eq "ARM") {
+        print OUTPUT "\n\t.section\t.note.GNU-stack,\"\",\%progbits\n";
+    } else {
+        print OUTPUT "\n\t.section\t.note.GNU-stack,\"\",\@progbits\n";
+    }
 }

 close(INPUT);
Index: ompi-trunk/opal/asm/asm-data.txt
===================================================================
--- ompi-trunk/opal/asm/asm-data.txt    (revision 24191)
+++ ompi-trunk/opal/asm/asm-data.txt    (working copy)
@@ -48,6 +48,15 @@

 ######################################################################
 #
+# ARM (ARMv7 and later)
+#
+######################################################################
+
+ARM    default-.text-.globl-:--.L-#-1-1-1-1-1  arm-linux
+
+
+######################################################################
+#
 # Intel Pentium Class
 #
 ######################################################################
Index: ompi-trunk/opal/asm/base/ARM.asm
===================================================================
--- ompi-trunk/opal/asm/base/ARM.asm    (revision 0)
+++ ompi-trunk/opal/asm/base/ARM.asm    (revision 0)
@@ -0,0 +1,150 @@
+START_FILE
+       TEXT
+
+       ALIGN(4)
+START_FUNC(opal_atomic_mb)
+       dmb
+       bx      lr
+END_FUNC(opal_atomic_mb)
+
+
+START_FUNC(opal_atomic_rmb)
+       dmb
+       bx      lr
+END_FUNC(opal_atomic_rmb)
+
+
+START_FUNC(opal_atomic_wmb)
+       dmb
+       bx      lr
+END_FUNC(opal_atomic_wmb)
+
+
+START_FUNC(opal_atomic_cmpset_32)
+       LSYM(1)
+       ldrex   r3, [r0]
+       cmp     r1, r3
+       bne     REFLSYM(2)
+       strex   r12, r2, [r0]
+       cmp     r12, #0
+       bne     REFLSYM(1)
+       mov     r0, #1
+       LSYM(2)
+       movne   r0, #0
+       bx      lr
+END_FUNC(opal_atomic_cmpset_32)
+
+
+START_FUNC(opal_atomic_cmpset_acq_32)
+       LSYM(3)
+       ldrex   r3, [r0]
+       cmp     r1, r3
+       bne     REFLSYM(4)
+       strex   r12, r2, [r0]
+       cmp     r12, #0
+       bne     REFLSYM(3)
+       dmb
+       mov     r0, #1
+       LSYM(4)
+       movne   r0, #0
+       bx      lr
+END_FUNC(opal_atomic_cmpset_acq_32)
+
+
+START_FUNC(opal_atomic_cmpset_rel_32)
+       LSYM(5)
+       ldrex   r3, [r0]
+       cmp     r1, r3
+       bne     REFLSYM(6)
+       dmb
+       strex   r12, r2, [r0]
+       cmp     r12, #0
+       bne     REFLSYM(4)
+       mov     r0, #1
+       LSYM(6)
+       movne   r0, #0
+       bx      lr
+END_FUNC(opal_atomic_cmpset_rel_32)
+
+#START_64BIT
+START_FUNC(opal_atomic_cmpset_64)
+       push    {r4-r7}
+       ldrd    r6, r7, [sp, #16]
+       LSYM(7)
+       ldrexd  r4, r5, [r0]
+       cmp     r4, r2
+       cmpeq   r5, r3
+       bne     REFLSYM(8)
+       strexd  r1, r6, r7, [r0]
+       cmp     r1, #0
+       bne     REFLSYM(7)
+       mov     r0, #1
+       LSYM(8)
+       movne   r0, #0
+       pop     {r4-r7}
+       bx      lr
+END_FUNC(opal_atomic_cmpset_64)
+
+START_FUNC(opal_atomic_cmpset_acq_64)
+       push    {r4-r7}
+       ldrd    r6, r7, [sp, #16]
+       LSYM(9)
+       ldrexd  r4, r5, [r0]
+       cmp     r4, r2
+       cmpeq   r5, r3
+       bne     REFLSYM(10)
+       strexd  r1, r6, r7, [r0]
+       cmp     r1, #0
+       bne     REFLSYM(9)
+       dmb
+       mov     r0, #1
+       LSYM(10)
+       movne   r0, #0
+       pop     {r4-r7}
+       bx      lr
+END_FUNC(opal_atomic_cmpset_acq_64)
+
+
+START_FUNC(opal_atomic_cmpset_rel_64)
+       push    {r4-r7}
+       ldrd    r6, r7, [sp, #16]
+       LSYM(11)
+       ldrexd  r4, r5, [r0]
+       cmp     r4, r2
+       cmpeq   r5, r3
+       bne     REFLSYM(12)
+       dmb
+       strexd  r1, r6, r7, [r0]
+       cmp     r1, #0
+       bne     REFLSYM(11)
+       mov     r0, #1
+       LSYM(12)
+       movne   r0, #0
+       pop     {r4-r7}
+       bx      lr
+END_FUNC(opal_atomic_cmpset_rel_64)
+#END_64BIT
+
+
+START_FUNC(opal_atomic_add_32)
+       LSYM(13)
+       ldrex   r2, [r0]
+       add     r2, r2, r1
+       strex   r3, r2, [r0]
+       cmp     r3, #0
+       bne     REFLSYM(13)
+       mov     r0, r2
+       bx      lr
+END_FUNC(opal_atomic_add_32)
+
+
+START_FUNC(opal_atomic_sub_32)
+       LSYM(14)
+       ldrex   r2, [r0]
+       sub     r2, r2, r1
+       strex   r3, r2, [r0]
+       cmp     r3, #0
+       bne     REFLSYM(14)
+       mov     r0, r2
+       bx      lr
+END_FUNC(opal_atomic_sub_32)
Index: ompi-trunk/opal/include/opal/sys/arm/atomic.h
===================================================================
--- ompi-trunk/opal/include/opal/sys/arm/atomic.h       (revision 0)
+++ ompi-trunk/opal/include/opal/sys/arm/atomic.h       (revision 0)
@@ -0,0 +1,227 @@
+/*
+ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
+ *                         University Research and Technology
+ *                         Corporation.  All rights reserved.
+ * Copyright (c) 2004-2005 The University of Tennessee and The University
+ *                         of Tennessee Research Foundation.  All rights
+ *                         reserved.
+ * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, 
+ *                         University of Stuttgart.  All rights reserved.
+ * Copyright (c) 2004-2005 The Regents of the University of California.
+ *                         All rights reserved.
+ * Copyright (c) 2010      IBM Corporation.  All rights reserved.
+ * Copyright (c) 2010      ARM ltd.  All rights reserved.
+ * $COPYRIGHT$
+ * 
+ * Additional copyrights may follow
+ * 
+ * $HEADER$
+ */
+
+#ifndef OMPI_SYS_ARCH_ATOMIC_H
+#define OMPI_SYS_ARCH_ATOMIC_H 1
+
+#if OPAL_WANT_SMP_LOCKS
+
+#define MB()  __asm__ __volatile__ ("dmb" : : : "memory")
+#define RMB() __asm__ __volatile__ ("dmb" : : : "memory")
+#define WMB() __asm__ __volatile__ ("dmb" : : : "memory")
+
+#else
+
+#define MB()
+#define RMB()
+#define WMB()
+
+#endif
+
+
+/**********************************************************************
+ *
+ * Define constants for ARMv7
+ *
+ *********************************************************************/
+#define OPAL_HAVE_ATOMIC_MEM_BARRIER 1
+
+#define OPAL_HAVE_ATOMIC_CMPSET_32 1
+
+#define OPAL_HAVE_ATOMIC_CMPSET_64 1
+
+#define OPAL_HAVE_ATOMIC_MATH_32 1
+#define OPAL_HAVE_ATOMIC_ADD_32 1
+#define OPAL_HAVE_ATOMIC_SUB_32 1
+
+
+/**********************************************************************
+ *
+ * Memory Barriers
+ *
+ *********************************************************************/
+#if OMPI_GCC_INLINE_ASSEMBLY
+
+static inline
+void opal_atomic_mb(void)
+{
+    MB();
+}
+
+
+static inline
+void opal_atomic_rmb(void)
+{
+    RMB();
+}
+
+
+static inline
+void opal_atomic_wmb(void)
+{
+    WMB();
+}
+
+
+/**********************************************************************
+ *
+ * Atomic math operations
+ *
+ *********************************************************************/
+
+static inline int opal_atomic_cmpset_32(volatile int32_t *addr,
+                                        int32_t oldval, int32_t newval)
+{
+  int32_t ret, tmp;
+
+   __asm__ __volatile__ (
+                         "1:  ldrex   %0, [%2]        \n"
+                         "    cmp     %0, %3          \n"
+                         "    bne     2f              \n"
+                         "    strex   %1, %4, [%2]    \n"
+                         "    cmp     %1, #0          \n"
+                         "    bne     1b              \n"
+                         "2:                          \n"
+
+                         : "=&r" (ret), "=&r" (tmp)
+                         : "r" (addr), "r" (oldval), "r" (newval)
+                         : "cc", "memory");
+
+   return (ret == oldval);
+}
+
+/* these two functions aren't inlined in the non-gcc case because then
+   there would be two function calls (since neither cmpset_32 nor
+   atomic_?mb can be inlined).  Instead, we "inline" them by hand in
+   the assembly, meaning there is one function call overhead instead
+   of two */
+static inline int opal_atomic_cmpset_acq_32(volatile int32_t *addr,
+                                            int32_t oldval, int32_t newval)
+{
+    int rc;
+
+    rc = opal_atomic_cmpset_32(addr, oldval, newval);
+    opal_atomic_rmb();
+
+    return rc;
+}
+
+
+static inline int opal_atomic_cmpset_rel_32(volatile int32_t *addr,
+                                            int32_t oldval, int32_t newval)
+{
+    opal_atomic_wmb();
+    return opal_atomic_cmpset_32(addr, oldval, newval);
+}
+
+
+static inline int opal_atomic_cmpset_64(volatile int64_t *addr,
+                                        int64_t oldval, int64_t newval)
+{
+  int64_t ret;
+  int tmp;
+
+
+   __asm__ __volatile__ (
+                         "1:  ldrexd  %0, %H0, [%2]           \n"
+                         "    cmp     %0, %3                  \n"
+                         "    cmpeq   %H0, %H3                \n"
+                         "    bne     2f                      \n"
+                         "    strexd  %1, %4, %H4, [%2]       \n"
+                         "    cmp     %1, #0                  \n"
+                         "    bne     1b                      \n"
+                         "2:                                    \n"
+
+                         : "=&r" (ret), "=&r" (tmp)
+                         : "r" (addr), "r" (oldval), "r" (newval)
+                         : "cc", "memory");
+
+   return (ret == oldval);
+}
+
+/* these two functions aren't inlined in the non-gcc case because then
+   there would be two function calls (since neither cmpset_64 nor
+   atomic_?mb can be inlined).  Instead, we "inline" them by hand in
+   the assembly, meaning there is one function call overhead instead
+   of two */
+static inline int opal_atomic_cmpset_acq_64(volatile int64_t *addr,
+                                            int64_t oldval, int64_t newval)
+{
+    int rc;
+
+    rc = opal_atomic_cmpset_64(addr, oldval, newval);
+    opal_atomic_rmb();
+
+    return rc;
+}
+
+
+static inline int opal_atomic_cmpset_rel_64(volatile int64_t *addr,
+                                            int64_t oldval, int64_t newval)
+{
+    opal_atomic_wmb();
+    return opal_atomic_cmpset_64(addr, oldval, newval);
+}
+
+
+static inline int32_t opal_atomic_add_32(volatile int32_t* v, int inc)
+{
+   int32_t t;
+   int tmp;
+
+   __asm__ __volatile__(
+                         "1:  ldrex   %0, [%2]        \n"
+                         "    add     %0, %0, %3      \n"
+                         "    strex   %1, %0, [%2]    \n"
+                         "    cmp     %1, #0          \n"
+                         "    bne     1b              \n"
+
+                         : "=&r" (t), "=&r" (tmp)
+                         : "r" (v), "r" (inc)
+                         : "cc", "memory");
+
+
+   return t;
+}
+
+
+static inline int32_t opal_atomic_sub_32(volatile int32_t* v, int dec)
+{
+   int32_t t;
+   int tmp;
+
+   __asm__ __volatile__(
+                         "1:  ldrex   %0, [%2]        \n"
+                         "    sub     %0, %0, %3      \n"
+                         "    strex   %1, %0, [%2]    \n"
+                         "    cmp     %1, #0          \n"
+                         "    bne     1b              \n"
+
+                         : "=&r" (t), "=&r" (tmp)
+                         : "r" (v), "r" (dec)
+                         : "cc", "memory");
+
+   return t;
+}
+
+
+#endif /* OMPI_GCC_INLINE_ASSEMBLY */
+
+#endif /* ! OMPI_SYS_ARCH_ATOMIC_H */
Index: ompi-trunk/opal/include/opal/sys/arm/timer.h
===================================================================
--- ompi-trunk/opal/include/opal/sys/arm/timer.h        (revision 0)
+++ ompi-trunk/opal/include/opal/sys/arm/timer.h        (revision 0)
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2008      The University of Tennessee and The University
+ *                         of Tennessee Research Foundation.  All rights
+ *                         reserved.
+ * $COPYRIGHT$
+ * 
+ * Additional copyrights may follow
+ * 
+ * $HEADER$
+ */
+
+#ifndef OMPI_SYS_ARCH_TIMER_H
+#define OMPI_SYS_ARCH_TIMER_H 1
+
+#include <sys/times.h>
+
+typedef uint64_t opal_timer_t;
+
+static inline opal_timer_t
+opal_sys_timer_get_cycles(void)
+{
+    opal_timer_t ret;
+    struct tms accurate_clock;
+
+    times(&accurate_clock);
+    ret = accurate_clock.tms_utime + accurate_clock.tms_stime;
+
+    return ret;
+}
+
+#define OPAL_HAVE_SYS_TIMER_GET_CYCLES 1
+
+#endif /* ! OMPI_SYS_ARCH_TIMER_H */
Index: ompi-trunk/opal/include/opal/sys/arm/Makefile.am
===================================================================
--- ompi-trunk/opal/include/opal/sys/arm/Makefile.am    (revision 0)
+++ ompi-trunk/opal/include/opal/sys/arm/Makefile.am    (revision 0)
@@ -0,0 +1,24 @@
+#
+# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
+#                         University Research and Technology
+#                         Corporation.  All rights reserved.
+# Copyright (c) 2004-2008 The University of Tennessee and The University
+#                         of Tennessee Research Foundation.  All rights
+#                         reserved.
+# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, 
+#                         University of Stuttgart.  All rights reserved.
+# Copyright (c) 2004-2005 The Regents of the University of California.
+#                         All rights reserved.
+# $COPYRIGHT$
+# 
+# Additional copyrights may follow
+# 
+# $HEADER$
+#
+
+# This makefile.am does not stand on its own - it is included from
opal/include/Makefile.am
+
+headers += \
+       opal/sys/arm/atomic.h \
+       opal/sys/arm/timer.h
+
Index: ompi-trunk/opal/include/opal/sys/arm/update.sh
===================================================================
--- ompi-trunk/opal/include/opal/sys/arm/update.sh      (revision 0)
+++ ompi-trunk/opal/include/opal/sys/arm/update.sh      (revision 0)
@@ -0,0 +1,37 @@
+#!/bin/sh
+#
+# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
+#                         University Research and Technology
+#                         Corporation.  All rights reserved.
+# Copyright (c) 2004-2005 The University of Tennessee and The University
+#                         of Tennessee Research Foundation.  All rights
+#                         reserved.
+# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, 
+#                         University of Stuttgart.  All rights reserved.
+# Copyright (c) 2004-2005 The Regents of the University of California.
+#                         All rights reserved.
+# $COPYRIGHT$
+# 
+# Additional copyrights may follow
+# 
+# $HEADER$
+#
+
+CFILE=/tmp/opal_atomic_$$.c
+
+trap "/bin/rm -f $CFILE; exit 0" 0 1 2 15
+
+echo Updating atomic.s from atomic.h using gcc
+
+cat > $CFILE<<EOF
+#include <stdlib.h>
+#include <inttypes.h>
+#define static
+#define inline
+#define OMPI_GCC_INLINE_ASSEMBLY 1
+#define OPAL_WANT_SMP_LOCKS 1
+#include "../architecture.h"
+#include "atomic.h"
+EOF
+
+gcc -O1 -I. -S $CFILE -o atomic.s
Index: ompi-trunk/opal/include/opal/sys/atomic.h
===================================================================
--- ompi-trunk/opal/include/opal/sys/atomic.h   (revision 24191)
+++ ompi-trunk/opal/include/opal/sys/atomic.h   (working copy)
@@ -146,6 +146,8 @@
 #include "opal/sys/alpha/atomic.h"
 #elif OPAL_ASSEMBLY_ARCH == OMPI_AMD64
 #include "opal/sys/amd64/atomic.h"
+#elif OPAL_ASSEMBLY_ARCH == OMPI_ARM
+#include "opal/sys/arm/atomic.h"
 #elif OPAL_ASSEMBLY_ARCH == OMPI_IA32
 #include "opal/sys/ia32/atomic.h"
 #elif OPAL_ASSEMBLY_ARCH == OMPI_IA64
Index: ompi-trunk/opal/include/opal/sys/timer.h
===================================================================
--- ompi-trunk/opal/include/opal/sys/timer.h    (revision 24191)
+++ ompi-trunk/opal/include/opal/sys/timer.h    (working copy)
@@ -79,6 +79,8 @@
 /* don't include system-level gorp when generating doxygen files */ 
 #elif OPAL_ASSEMBLY_ARCH == OMPI_AMD64
 #include "opal/sys/amd64/timer.h"
+#elif OPAL_ASSEMBLY_ARCH == OMPI_ARM
+#include "opal/sys/arm/timer.h"
 #elif OPAL_ASSEMBLY_ARCH == OMPI_IA32
 #include "opal/sys/ia32/timer.h"
 #elif OPAL_ASSEMBLY_ARCH == OMPI_IA64
Index: ompi-trunk/opal/include/opal/sys/architecture.h
===================================================================
--- ompi-trunk/opal/include/opal/sys/architecture.h     (revision 24191)
+++ ompi-trunk/opal/include/opal/sys/architecture.h     (working copy)
@@ -36,6 +36,7 @@
 #define OMPI_SPARCV9_32     0061
 #define OMPI_SPARCV9_64     0062
 #define OMPI_MIPS           0070
+#define OMPI_ARM            0100

 /* Formats */
 #define OMPI_DEFAULT        1000  /* standard for given architecture */
Index: ompi-trunk/opal/config/opal_config_asm.m4
===================================================================
--- ompi-trunk/opal/config/opal_config_asm.m4   (revision 24191)
+++ ompi-trunk/opal/config/opal_config_asm.m4   (working copy)
@@ -900,6 +900,12 @@
             OMPI_GCC_INLINE_ASSIGN='"bis [$]31,[$]31,%0" : "=&r"(ret)'
             ;;

+        armv7*)
+            ompi_cv_asm_arch="ARM"
+            OPAL_ASM_SUPPORT_64BIT=1
+            OMPI_GCC_INLINE_ASSIGN='"mov %0, #0" : "=&r"(ret)'
+            ;;
+
         mips-*|mips64*)
             # Should really find some way to make sure that we are on
             # a MIPS III machine (r4000 and later)




Reply via email to