Here is the patch against clang,
0002-Add-mhle-option-support-and-populate-pre-defined-mac.patch

Yours
- Michael

On Tue, 2013-02-19 at 14:07 -0800, Michael Liao wrote:
> Hi All,
> 
> I'd like to add HLE support in LLVM/clang consistent to GCC's style [1].
> HLE from Intel TSX [2] is legacy compatible instruction set extension to
> specify transactional region by adding XACQUIRE and XRELEASE prefixes.
> To support that, GCC chooses the approach by extending the memory order
> flag in __atomic_* builtins with target-specific memory model in high
> bits (bit 31-16 for target-specific memory model, bit 15-0 for the
> general memory model.) To follow the similar approach, I propose to
> change LLVM/clang by adding:
> 
> + a metadata 'targetflags' in LLVM atomic IR to pass this
>   target-specific memory model hint
> 
> + one extra target flag in AtomicSDNode & MemIntrinsicSDNode to specify
> XACQUIRE or XRELEASE hints
>   This extra target flag is embedded into the SubclassData fields. The
> following is rationale how such target flags are embedded into
> SubclassData in SDNode
> 
>   here is the current SDNode class hierarchy of memory related nodes
> 
>   SDNode -> MemSDNode -> LSBaseNode -> LoadSDNode
>                     |             + -> StoreSDNode
>                     + -> AtomicSDNode
>                     + -> MemIntrinsicSDNode
> 
>   here is the current SubclassData definitions:
> 
>   bit 0~1 : extension type used in LoadSDNode
>   bit 0   : truncating store in StoreSDNode
>   bit 2~4 : addressing mode in LSBaseNode
>   bit 5   : volatile bit in MemSDNode
>   bit 6   : non-temporal bit in MemSDNode
>   bit 7   : invariant bit in MemSDNode
>   bit 8~11: memory order in AtomicSDNode
>   bit 12  : synch scope in AtomicSDNode
> 
>   Considering the class hierarchy, we could safely reused bit 0~1 as the
> target flags in AtomicSDNode/MemIntrinsicNode
>   
> + X86 backend is modified to generate additional XACQUIRE/XRELEASE
> prefix based on the specified target flag
> 
> 
> The following are details of each patch:
> 
> * 0001-Add-targetflags-in-AtomicSDNode-MemIntrinsicSDNode.patch
> 
> This patch adds 'targetflags' support in AtomicSDNode and
> MemIntrinsicSDNode. It will check metadata 'targetflags' and embedded
> its value into SubclassData. Currently, only two bits are defined.
> 
> * 0002-Add-HLE-target-feature.patch
> 
> This patch adds HLE feature and auto-detection support
> 
> * 0003-Add-XACQ-XREL-prefix-and-encoding-asm-printer-suppor.patch
> 
> This patch adds XACQUIRE/XRELEASE prefix and its assembler/encoding
> support
> 
> * 0004-Enable-HLE-code-generation.patch
> 
> This patch enables HLE code generation by extending the current logic to
> handle 'targetflags'.
> 
> * 0001-Add-target-flags-support-for-atomic-ops.patch
> 
> This patch adds target flags support in __atomic_* builtins. It splits
> the whole 32-bit order word into high and low 16-bit parts. The low
> 16-bit is the original memory order and the high 16-bit will be
> re-defined as target-specific flags and passed through 'targetflags'
> metadata.
> 
> * 0002-Add-mhle-option-support-and-populate-pre-defined-mac.patch
> 
> It adds '-m[no]hle' option to turn on HLE feature or not. Once HLE
> feature is turned on, two more macros (__ATOMIC_HLE_ACQUIRE and
> __ATOMIC_HLE_RELEASE) are defined for developers to mark atomic
> builtins.
> 
> Thanks for your time to review!
> 
> Yours
> - Michael
> ---
> [1] http://gcc.gnu.org/ml/gcc-patches/2012-04/msg01073.html
> [2] http://software.intel.com/sites/default/files/319433-014.pdf
> 

>From df54e5f8e988b4fca5cd06ac9e7ea608086efbc0 Mon Sep 17 00:00:00 2001
From: Michael Liao <[email protected]>
Date: Sun, 8 Jul 2012 14:07:19 -0700
Subject: [PATCH 2/2] Add '-mhle' option support and populate pre-defined
 macros

- 3 pre-defined macros are added if HLE is turned on
  * __HLE__
  * __ATOMIC_HLE_ACQUIRE__
  * __ATOMIC_HLE_RELEASE__
---
 include/clang/Driver/Options.td            |    2 ++
 lib/Basic/Targets.cpp                      |   23 +++++++++++++++++++++--
 test/Preprocessor/predefined-arch-macros.c |    6 ++++++
 3 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/include/clang/Driver/Options.td b/include/clang/Driver/Options.td
index cafd7d7..47fd862 100644
--- a/include/clang/Driver/Options.td
+++ b/include/clang/Driver/Options.td
@@ -885,6 +885,7 @@ def mno_fma : Flag<["-"], "mno-fma">, Group<m_x86_Features_Group>;
 def mno_xop : Flag<["-"], "mno-xop">, Group<m_x86_Features_Group>;
 def mno_f16c : Flag<["-"], "mno-f16c">, Group<m_x86_Features_Group>;
 def mno_rtm : Flag<["-"], "mno-rtm">, Group<m_x86_Features_Group>;
+def mno_hle : Flag<["-"], "mno-hle">, Group<m_x86_Features_Group>;
 
 def mno_thumb : Flag<["-"], "mno-thumb">, Group<m_Group>;
 def marm : Flag<["-"], "marm">, Alias<mno_thumb>;
@@ -928,6 +929,7 @@ def mfma : Flag<["-"], "mfma">, Group<m_x86_Features_Group>;
 def mxop : Flag<["-"], "mxop">, Group<m_x86_Features_Group>;
 def mf16c : Flag<["-"], "mf16c">, Group<m_x86_Features_Group>;
 def mrtm : Flag<["-"], "mrtm">, Group<m_x86_Features_Group>;
+def mhle : Flag<["-"], "mhle">, Group<m_x86_Features_Group>;
 def mips16 : Flag<["-"], "mips16">, Group<m_Group>;
 def mno_mips16 : Flag<["-"], "mno-mips16">, Group<m_Group>;
 def mxgot : Flag<["-"], "mxgot">, Group<m_Group>;
diff --git a/lib/Basic/Targets.cpp b/lib/Basic/Targets.cpp
index eaf2e7d..70d10e7 100644
--- a/lib/Basic/Targets.cpp
+++ b/lib/Basic/Targets.cpp
@@ -1608,6 +1608,7 @@ class X86TargetInfo : public TargetInfo {
   bool HasBMI2;
   bool HasPOPCNT;
   bool HasRTM;
+  bool HasHLE;
   bool HasSSE4a;
   bool HasFMA4;
   bool HasFMA;
@@ -1759,8 +1760,8 @@ public:
     : TargetInfo(triple), SSELevel(NoSSE), MMX3DNowLevel(NoMMX3DNow),
       HasAES(false), HasPCLMUL(false), HasLZCNT(false), HasRDRND(false),
       HasBMI(false), HasBMI2(false), HasPOPCNT(false), HasRTM(false),
-      HasSSE4a(false), HasFMA4(false), HasFMA(false), HasXOP(false),
-      HasF16C(false), CPU(CK_Generic) {
+      HasHLE(false), HasSSE4a(false), HasFMA4(false), HasFMA(false),
+      HasXOP(false), HasF16C(false), CPU(CK_Generic) {
     BigEndian = false;
     LongDoubleFormat = &llvm::APFloat::x87DoubleExtended;
   }
@@ -1966,6 +1967,7 @@ void X86TargetInfo::getDefaultFeatures(llvm::StringMap<bool> &Features) const {
   Features["bmi2"] = false;
   Features["popcnt"] = false;
   Features["rtm"] = false;
+  Features["hle"] = false;
   Features["fma4"] = false;
   Features["fma"] = false;
   Features["xop"] = false;
@@ -2039,6 +2041,7 @@ void X86TargetInfo::getDefaultFeatures(llvm::StringMap<bool> &Features) const {
     setFeatureEnabled(Features, "bmi", true);
     setFeatureEnabled(Features, "bmi2", true);
     setFeatureEnabled(Features, "rtm", true);
+    setFeatureEnabled(Features, "hle", true);
     setFeatureEnabled(Features, "fma", true);
     break;
   case CK_K6:
@@ -2188,6 +2191,8 @@ bool X86TargetInfo::setFeatureEnabled(llvm::StringMap<bool> &Features,
       Features["f16c"] = true;
     else if (Name == "rtm")
       Features["rtm"] = true;
+    else if (Name == "hle")
+      Features["hle"] = true;
   } else {
     if (Name == "mmx")
       Features["mmx"] = Features["3dnow"] = Features["3dnowa"] = false;
@@ -2252,6 +2257,8 @@ bool X86TargetInfo::setFeatureEnabled(llvm::StringMap<bool> &Features,
       Features["f16c"] = false;
     else if (Name == "rtm")
       Features["rtm"] = false;
+    else if (Name == "hle")
+      Features["hle"] = false;
   }
 
   return true;
@@ -2308,6 +2315,11 @@ void X86TargetInfo::HandleTargetFeatures(std::vector<std::string> &Features) {
       continue;
     }
 
+    if (Feature == "hle") {
+      HasHLE = true;
+      continue;
+    }
+
     if (Feature == "sse4a") {
       HasSSE4a = true;
       continue;
@@ -2532,6 +2544,12 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
   if (HasRTM)
     Builder.defineMacro("__RTM__");
 
+  if (HasHLE) {
+    Builder.defineMacro("__HLE__");
+    Builder.defineMacro("__ATOMIC_HLE_ACQUIRE", Twine(1U << 16));
+    Builder.defineMacro("__ATOMIC_HLE_RELEASE", Twine(2U << 16));
+  }
+
   if (HasSSE4a)
     Builder.defineMacro("__SSE4A__");
 
@@ -2620,6 +2638,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
       .Case("pclmul", HasPCLMUL)
       .Case("popcnt", HasPOPCNT)
       .Case("rtm", HasRTM)
+      .Case("hle", HasHLE)
       .Case("sse", SSELevel >= SSE1)
       .Case("sse2", SSELevel >= SSE2)
       .Case("sse3", SSELevel >= SSE3)
diff --git a/test/Preprocessor/predefined-arch-macros.c b/test/Preprocessor/predefined-arch-macros.c
index 680f39a..4303735 100644
--- a/test/Preprocessor/predefined-arch-macros.c
+++ b/test/Preprocessor/predefined-arch-macros.c
@@ -509,11 +509,14 @@
 // RUN:     -target i386-unknown-linux \
 // RUN:   | FileCheck %s -check-prefix=CHECK_CORE_AVX2_M32
 // CHECK_CORE_AVX2_M32: #define __AES__ 1
+// CHECK_CORE_AVX2_M32: #define __ATOMIC_HLE_ACQUIRE 65536
+// CHECK_CORE_AVX2_M32: #define __ATOMIC_HLE_RELEASE 131072
 // CHECK_CORE_AVX2_M32: #define __AVX__ 1
 // CHECK_CORE_AVX2_M32: #define __BMI2__ 1
 // CHECK_CORE_AVX2_M32: #define __BMI__ 1
 // CHECK_CORE_AVX2_M32: #define __F16C__ 1
 // CHECK_CORE_AVX2_M32: #define __FMA__ 1
+// CHECK_CORE_AVX2_M32: #define __HLE__ 1
 // CHECK_CORE_AVX2_M32: #define __LZCNT__ 1
 // CHECK_CORE_AVX2_M32: #define __MMX__ 1
 // CHECK_CORE_AVX2_M32: #define __PCLMUL__ 1
@@ -536,11 +539,14 @@
 // RUN:     -target i386-unknown-linux \
 // RUN:   | FileCheck %s -check-prefix=CHECK_CORE_AVX2_M64
 // CHECK_CORE_AVX2_M64: #define __AES__ 1
+// CHECK_CORE_AVX2_M64: #define __ATOMIC_HLE_ACQUIRE 65536
+// CHECK_CORE_AVX2_M64: #define __ATOMIC_HLE_RELEASE 131072
 // CHECK_CORE_AVX2_M64: #define __AVX__ 1
 // CHECK_CORE_AVX2_M64: #define __BMI2__ 1
 // CHECK_CORE_AVX2_M64: #define __BMI__ 1
 // CHECK_CORE_AVX2_M64: #define __F16C__ 1
 // CHECK_CORE_AVX2_M64: #define __FMA__ 1
+// CHECK_CORE_AVX2_M64: #define __HLE__ 1
 // CHECK_CORE_AVX2_M64: #define __LZCNT__ 1
 // CHECK_CORE_AVX2_M64: #define __MMX__ 1
 // CHECK_CORE_AVX2_M64: #define __PCLMUL__ 1
-- 
1.7.9.5

_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits

Reply via email to