mgorny updated this revision to Diff 87170.
mgorny added a comment.

CUDA: added the `MaxAtomicPromoteWidth` setting, and moved the CPU setting a 
little lower to ensure that it doesn't get called with null `HostTarget`.


https://reviews.llvm.org/D29542

Files:
  lib/Basic/Targets.cpp
  test/CodeGen/atomic-ops.c
  test/CodeGen/ms-volatile.c
  test/CodeGenCXX/atomicinit.cpp
  test/Sema/atomic-ops.c

Index: test/Sema/atomic-ops.c
===================================================================
--- test/Sema/atomic-ops.c
+++ test/Sema/atomic-ops.c
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 %s -verify -ffreestanding -fsyntax-only -triple=i686-linux-gnu -std=c11
+// RUN: %clang_cc1 %s -verify -ffreestanding -fsyntax-only -triple=i686-linux-gnu -target-cpu i686 -std=c11
+// RUN: %clang_cc1 %s -verify -ffreestanding -fsyntax-only -triple=i486-linux-gnu -target-cpu i486 -std=c11
 
 // Basic parsing/Sema tests for __c11_atomic_*
 
@@ -14,22 +15,34 @@
 _Static_assert(__GCC_ATOMIC_SHORT_LOCK_FREE == 2, "");
 _Static_assert(__GCC_ATOMIC_INT_LOCK_FREE == 2, "");
 _Static_assert(__GCC_ATOMIC_LONG_LOCK_FREE == 2, "");
+#if defined(__i486__)
+_Static_assert(__GCC_ATOMIC_LLONG_LOCK_FREE == 1, "");
+#else
 _Static_assert(__GCC_ATOMIC_LLONG_LOCK_FREE == 2, "");
+#endif
 _Static_assert(__GCC_ATOMIC_POINTER_LOCK_FREE == 2, "");
 
 _Static_assert(__c11_atomic_is_lock_free(1), "");
 _Static_assert(__c11_atomic_is_lock_free(2), "");
 _Static_assert(__c11_atomic_is_lock_free(3), ""); // expected-error {{not an integral constant expression}}
 _Static_assert(__c11_atomic_is_lock_free(4), "");
+#if defined(__i486__)
+_Static_assert(__c11_atomic_is_lock_free(8), ""); // expected-error {{not an integral constant expression}}
+#else
 _Static_assert(__c11_atomic_is_lock_free(8), "");
+#endif
 _Static_assert(__c11_atomic_is_lock_free(16), ""); // expected-error {{not an integral constant expression}}
 _Static_assert(__c11_atomic_is_lock_free(17), ""); // expected-error {{not an integral constant expression}}
 
 _Static_assert(__atomic_is_lock_free(1, 0), "");
 _Static_assert(__atomic_is_lock_free(2, 0), "");
 _Static_assert(__atomic_is_lock_free(3, 0), ""); // expected-error {{not an integral constant expression}}
 _Static_assert(__atomic_is_lock_free(4, 0), "");
+#if defined(__i486__)
+_Static_assert(__atomic_is_lock_free(8, 0), ""); // expected-error {{not an integral constant expression}}
+#else
 _Static_assert(__atomic_is_lock_free(8, 0), "");
+#endif
 _Static_assert(__atomic_is_lock_free(16, 0), ""); // expected-error {{not an integral constant expression}}
 _Static_assert(__atomic_is_lock_free(17, 0), ""); // expected-error {{not an integral constant expression}}
 
@@ -56,13 +69,21 @@
 _Static_assert(__atomic_is_lock_free(4, &i32), "");
 _Static_assert(__atomic_is_lock_free(4, &i64), "");
 _Static_assert(__atomic_is_lock_free(8, &i32), ""); // expected-error {{not an integral constant expression}}
+#if defined(__i486__)
+_Static_assert(__atomic_is_lock_free(8, &i64), ""); // expected-error {{not an integral constant expression}}
+#else
 _Static_assert(__atomic_is_lock_free(8, &i64), "");
+#endif
 
 _Static_assert(__atomic_always_lock_free(1, 0), "");
 _Static_assert(__atomic_always_lock_free(2, 0), "");
 _Static_assert(!__atomic_always_lock_free(3, 0), "");
 _Static_assert(__atomic_always_lock_free(4, 0), "");
+#if defined(__i486__)
+_Static_assert(!__atomic_always_lock_free(8, 0), "");
+#else
 _Static_assert(__atomic_always_lock_free(8, 0), "");
+#endif
 _Static_assert(!__atomic_always_lock_free(16, 0), "");
 _Static_assert(!__atomic_always_lock_free(17, 0), "");
 
@@ -79,7 +100,11 @@
 _Static_assert(__atomic_always_lock_free(4, &i32), "");
 _Static_assert(__atomic_always_lock_free(4, &i64), "");
 _Static_assert(!__atomic_always_lock_free(8, &i32), "");
+#if defined(__i486__)
+_Static_assert(!__atomic_always_lock_free(8, &i64), "");
+#else
 _Static_assert(__atomic_always_lock_free(8, &i64), "");
+#endif
 
 #define _AS1 __attribute__((address_space(1)))
 #define _AS2 __attribute__((address_space(2)))
Index: test/CodeGenCXX/atomicinit.cpp
===================================================================
--- test/CodeGenCXX/atomicinit.cpp
+++ test/CodeGenCXX/atomicinit.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -emit-llvm -O1 -o - -triple=i686-apple-darwin9 -std=c++11 | FileCheck %s
+// RUN: %clang_cc1 %s -emit-llvm -O1 -o - -triple=i686-apple-darwin9 -target-cpu i686 -std=c++11 | FileCheck %s
 
 // CHECK-DAG: @PR22043 = local_unnamed_addr global i32 0, align 4
 typedef _Atomic(int) AtomicInt;
Index: test/CodeGen/ms-volatile.c
===================================================================
--- test/CodeGen/ms-volatile.c
+++ test/CodeGen/ms-volatile.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple i386-pc-win32 -fms-extensions -emit-llvm -fms-volatile -o - < %s | FileCheck %s
+// RUN: %clang_cc1 -triple i386-pc-win32 -target-cpu i686 -fms-extensions -emit-llvm -fms-volatile -o - < %s | FileCheck %s
 struct foo {
   volatile int x;
 };
Index: test/CodeGen/atomic-ops.c
===================================================================
--- test/CodeGen/atomic-ops.c
+++ test/CodeGen/atomic-ops.c
@@ -1,10 +1,10 @@
-// RUN: %clang_cc1 %s -emit-llvm -o - -ffreestanding -ffake-address-space-map -triple=i686-apple-darwin9 | FileCheck %s
+// RUN: %clang_cc1 %s -emit-llvm -o - -ffreestanding -ffake-address-space-map -triple=i686-apple-darwin9 -target-cpu i686 | FileCheck %s
 // REQUIRES: x86-registered-target
 
 // Also test serialization of atomic operations here, to avoid duplicating the
 // test.
-// RUN: %clang_cc1 %s -emit-pch -o %t -ffreestanding -ffake-address-space-map -triple=i686-apple-darwin9
-// RUN: %clang_cc1 %s -include-pch %t -ffreestanding -ffake-address-space-map -triple=i686-apple-darwin9 -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -emit-pch -o %t -ffreestanding -ffake-address-space-map -triple=i686-apple-darwin9 -target-cpu i686
+// RUN: %clang_cc1 %s -include-pch %t -ffreestanding -ffake-address-space-map -triple=i686-apple-darwin9 -target-cpu i686 -emit-llvm -o - | FileCheck %s
 #ifndef ALREADY_INCLUDED
 #define ALREADY_INCLUDED
 
Index: lib/Basic/Targets.cpp
===================================================================
--- lib/Basic/Targets.cpp
+++ lib/Basic/Targets.cpp
@@ -1801,6 +1801,12 @@
       return;
     }
 
+    // If targeting x86-32, set the CPU to i586 to enable all inline
+    // atomics. This matches the defaults for the systems using CUDA.
+    // TODO: pass the host target CPU
+    if (HostTriple.getArch() == llvm::Triple::x86)
+      HostTarget->setCPU("i586");
+
     // Copy properties from host target.
     PointerWidth = HostTarget->getPointerWidth(/* AddrSpace = */ 0);
     PointerAlign = HostTarget->getPointerAlign(/* AddrSpace = */ 0);
@@ -1845,6 +1851,7 @@
     // other things) they affect which standard library classes are defined, and
     // we need all classes to be defined on both the host and device.
     MaxAtomicInlineWidth = HostTarget->getMaxAtomicInlineWidth();
+    MaxAtomicPromoteWidth = HostTarget->getMaxAtomicPromoteWidth();
 
     // Properties intentionally not copied from host:
     // - LargeArrayMinWidth, LargeArrayAlign: Not visible across the
@@ -2443,6 +2450,7 @@
 // X86 target abstract base class; x86-32 and x86-64 are very close, so
 // most of the implementation can be shared.
 class X86TargetInfo : public TargetInfo {
+protected:
   enum X86SSEEnum {
     NoSSE, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2, AVX512F
   } SSELevel = NoSSE;
@@ -4231,6 +4239,18 @@
 
 // X86-32 generic target
 class X86_32TargetInfo : public X86TargetInfo {
+  void setAtomic() {
+    // based on the logic from X86TargetInfo::getTargetDefines()
+    if (CPU >= CK_i586) { // cmpxchg8b
+      MaxAtomicPromoteWidth = 64;
+      MaxAtomicInlineWidth = 64;
+    } else if (CPU >= CK_i486) { // cmpxchg
+      MaxAtomicPromoteWidth = 32;
+      MaxAtomicInlineWidth = 32;
+    } else // allow locked atomics up to 4 bytes
+      MaxAtomicPromoteWidth = 32;
+  }
+
 public:
   X86_32TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
       : X86TargetInfo(Triple, Opts) {
@@ -4249,15 +4269,19 @@
                              (1 << TargetInfo::Double) |
                              (1 << TargetInfo::LongDouble));
 
-    // x86-32 has atomics up to 8 bytes
-    // FIXME: Check that we actually have cmpxchg8b before setting
-    // MaxAtomicInlineWidth. (cmpxchg8b is an i586 instruction.)
-    MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
+    setAtomic();
   }
   BuiltinVaListKind getBuiltinVaListKind() const override {
     return TargetInfo::CharPtrBuiltinVaList;
   }
 
+  bool setCPU(const std::string &Name) override {
+    bool ret = X86TargetInfo::setCPU(Name);
+    if (ret)
+      setAtomic();
+    return ret;
+  }
+
   int getEHDataRegisterNumber(unsigned RegNo) const override {
     if (RegNo == 0) return 0;
     if (RegNo == 1) return 2;
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to