diff --git a/gcc/builtins.c b/gcc/builtins.c
index b47f218..2c90d04 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -5338,6 +5338,7 @@ static enum memmodel
 get_memmodel (tree exp)
 {
   rtx op;
+  unsigned memmodel_mask = (1<<16) - 1;
 
   /* If the parameter is not a constant, it's a run time value so we'll just
      convert it to MEMMODEL_SEQ_CST to avoid annoying runtime checking.  */
@@ -5345,12 +5346,22 @@ get_memmodel (tree exp)
     return MEMMODEL_SEQ_CST;
 
   op = expand_normal (exp);
-  if (INTVAL (op) < 0 || INTVAL (op) >= MEMMODEL_LAST)
+
+  if(INTVAL (op) & ~(memmodel_mask | targetm.memmodel_mask))
+    {
+      warning (OPT_Winvalid_memory_model,
+	       "Unknown architecture specifier in memory model to builtin.");
+      return MEMMODEL_SEQ_CST;
+    }
+
+  if ((INTVAL (op) & memmodel_mask) < 0
+      || ((INTVAL(op) & memmodel_mask) >= MEMMODEL_LAST))
     {
       warning (OPT_Winvalid_memory_model,
 	       "invalid memory model argument to builtin");
       return MEMMODEL_SEQ_CST;
     }
+
   return (enum memmodel) INTVAL (op);
 }
 
@@ -5398,11 +5409,14 @@ expand_builtin_atomic_compare_exchange (enum machine_mode mode, tree exp,
   enum memmodel success, failure;
   tree weak;
   bool is_weak;
+  /* Suppose that higher bits are target dependant.  */
+  unsigned memmodel_mask = (1<<16) - 1;
 
   success = get_memmodel (CALL_EXPR_ARG (exp, 4));
   failure = get_memmodel (CALL_EXPR_ARG (exp, 5));
 
-  if (failure == MEMMODEL_RELEASE || failure == MEMMODEL_ACQ_REL)
+  if ( (failure & memmodel_mask) == MEMMODEL_RELEASE
+       || (failure & memmodel_mask) == MEMMODEL_ACQ_REL)
     {
       error ("invalid failure memory model for %<__atomic_compare_exchange%>");
       return NULL_RTX;
diff --git a/gcc/config.in b/gcc/config.in
index 8806012..5321452 100644
--- a/gcc/config.in
+++ b/gcc/config.in
@@ -350,6 +350,11 @@
 #undef HAVE_AS_IX86_SAHF
 #endif
 
+/* Define if your assembler supports HLE prefixes. */
+#ifndef USED_FOR_TARGET
+#undef HAVE_AS_IX86_HLE
+#endif
+
 
 /* Define if your assembler supports the swap suffix. */
 #ifndef USED_FOR_TARGET
diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h
index 6696b7a..a9d25c5 100644
--- a/gcc/config/i386/cpuid.h
+++ b/gcc/config/i386/cpuid.h
@@ -66,6 +66,7 @@
 /* Extended Features (%eax == 7) */
 #define bit_FSGSBASE	(1 << 0)
 #define bit_BMI		(1 << 3)
+#define bit_HLE		(1 << 4)
 #define bit_AVX2	(1 << 5)
 #define bit_BMI2	(1 << 8)
 #define bit_RTM		(1 << 11)
diff --git a/gcc/config/i386/driver-i386.c b/gcc/config/i386/driver-i386.c
index 09de555..34cd096 100644
--- a/gcc/config/i386/driver-i386.c
+++ b/gcc/config/i386/driver-i386.c
@@ -397,6 +397,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
   unsigned int has_pclmul = 0, has_abm = 0, has_lwp = 0;
   unsigned int has_fma = 0, has_fma4 = 0, has_xop = 0;
   unsigned int has_bmi = 0, has_bmi2 = 0, has_tbm = 0, has_lzcnt = 0;
+  unsigned int has_hle = 0;
 
   bool arch;
 
@@ -456,6 +457,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
       __cpuid_count (7, 0, eax, ebx, ecx, edx);
 
       has_bmi = ebx & bit_BMI;
+      has_hle = ebx & bit_HLE;
       has_avx2 = ebx & bit_AVX2;
       has_bmi2 = ebx & bit_BMI2;
     }
@@ -726,10 +728,12 @@ const char *host_detect_local_cpu (int argc, const char **argv)
       const char *sse4_2 = has_sse4_2 ? " -msse4.2" : " -mno-sse4.2";
       const char *sse4_1 = has_sse4_1 ? " -msse4.1" : " -mno-sse4.1";
       const char *lzcnt = has_lzcnt ? " -mlzcnt" : " -mno-lzcnt";
+      const char *hle = has_hle ? " -mhle" : "-mno-hle";
 
       options = concat (options, cx16, sahf, movbe, ase, pclmul,
 			popcnt, abm, lwp, fma, fma4, xop, bmi, bmi2,
-			tbm, avx, avx2, sse4_2, sse4_1, lzcnt, NULL);
+			tbm, avx, avx2, sse4_2, sse4_1, lzcnt,
+			hle, NULL);
     }
 
 done:
diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c
index 49fd4d9..c6551e2 100644
--- a/gcc/config/i386/i386-c.c
+++ b/gcc/config/i386/i386-c.c
@@ -54,6 +54,7 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
   size_t tune_len = strlen (ix86_tune_string);
   int last_arch_char = ix86_arch_string[arch_len - 1];
   int last_tune_char = ix86_tune_string[tune_len - 1];
+  char hle_macro[64];
 
   /* Built-ins based on -march=.  */
   switch (arch)
@@ -293,6 +294,12 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
     def_or_undef (parse_in, "__SSE_MATH__");
   if ((fpmath & FPMATH_SSE) && (isa_flag & OPTION_MASK_ISA_SSE2))
     def_or_undef (parse_in, "__SSE2_MATH__");
+
+  sprintf (hle_macro, "__ATOMIC_HLE_ACQUIRE=%d", IX86_HLE_ACQUIRE);
+  def_or_undef (parse_in, hle_macro);
+
+  sprintf (hle_macro, "__ATOMIC_HLE_RELEASE=%d", IX86_HLE_RELEASE);
+  def_or_undef (parse_in, hle_macro);
 }
 
 
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index abe3f1b..64397cf 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -2679,6 +2679,7 @@ ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
     { "-mbmi",		OPTION_MASK_ISA_BMI },
     { "-mbmi2", 	OPTION_MASK_ISA_BMI2 },
     { "-mlzcnt",	OPTION_MASK_ISA_LZCNT },
+    { "-mhle",		OPTION_MASK_ISA_HLE },
     { "-mtbm",		OPTION_MASK_ISA_TBM },
     { "-mpopcnt",	OPTION_MASK_ISA_POPCNT },
     { "-mmovbe",	OPTION_MASK_ISA_MOVBE },
@@ -2954,6 +2955,7 @@ ix86_option_override_internal (bool main_args_p)
 #define PTA_AVX2		(HOST_WIDE_INT_1 << 30)
 #define PTA_BMI2	 	(HOST_WIDE_INT_1 << 31)
 #define PTA_RTM		 	(HOST_WIDE_INT_1 << 32)
+#define PTA_HLE	 		(HOST_WIDE_INT_1 << 33)
 /* if this reaches 64, need to widen struct pta flags below */
 
   static struct pta
@@ -3012,7 +3014,7 @@ ix86_option_override_internal (bool main_args_p)
 	| PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX | PTA_AVX2
 	| PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL | PTA_FSGSBASE
 	| PTA_RDRND | PTA_F16C | PTA_BMI | PTA_BMI2 | PTA_LZCNT
-        | PTA_FMA | PTA_MOVBE | PTA_RTM},
+        | PTA_FMA | PTA_MOVBE | PTA_RTM | PTA_HLE},
       {"atom", PROCESSOR_ATOM, CPU_ATOM,
 	PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
 	| PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
@@ -3430,6 +3432,9 @@ ix86_option_override_internal (bool main_args_p)
 	if (processor_alias_table[i].flags & PTA_RTM
 	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
 	  ix86_isa_flags |= OPTION_MASK_ISA_RTM;
+	if (processor_alias_table[i].flags & PTA_HLE
+	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
+	  ix86_isa_flags |= OPTION_MASK_ISA_HLE;
 	if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
 	  x86_prefetch_sse = true;
 
@@ -4251,6 +4256,7 @@ ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
     IX86_ATTR_ISA ("rdrnd",	OPT_mrdrnd),
     IX86_ATTR_ISA ("f16c",	OPT_mf16c),
     IX86_ATTR_ISA ("rtm",	OPT_mrtm),
+    IX86_ATTR_ISA ("hle",	OPT_mhle),
 
     /* enum options */
     IX86_ATTR_ENUM ("fpmath=",	OPT_mfpmath_),
@@ -14340,6 +14346,24 @@ ix86_print_operand (FILE *file, rtx x, int code)
 	  x = adjust_address_nv (x, DImode, 8);
 	  break;
 
+	case 'K':
+	  gcc_assert (CONST_INT_P (x));
+
+	  if (INTVAL (x) & IX86_HLE_ACQUIRE)
+#ifdef HAVE_AS_IX86_HLE
+	    fputs ("xacquire ", file);
+#else
+	  fputs ("\n" ASM_BYTE "0xf2\n\t", file);
+#endif
+	  else if (INTVAL (x) & IX86_HLE_RELEASE)
+#ifdef HAVE_AS_IX86_HLE
+	    fputs ("xrelease ", file);
+#else
+	  fputs ("\n" ASM_BYTE "0xf3\n\t");
+#endif
+	  /* We do not want to print value of the operand.  */
+	  return;
+
 	case '+':
 	  {
 	    rtx x;
@@ -39042,6 +39066,9 @@ ix86_autovectorize_vector_sizes (void)
 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
 
+#undef TARGET_MEMMODEL_MASK
+#define TARGET_MEMMODEL_MASK (IX86_HLE_ACQUIRE|IX86_HLE_RELEASE)
+
 #ifdef HAVE_AS_TLS
 #undef TARGET_HAVE_TLS
 #define TARGET_HAVE_TLS true
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 8942ea8..0944260 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -75,6 +75,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 #define TARGET_RDRND	OPTION_ISA_RDRND
 #define TARGET_F16C	OPTION_ISA_F16C
 #define TARGET_RTM      OPTION_ISA_RTM
+#define TARGET_HLE	OPTION_ISA_HLE
 
 #define TARGET_LP64	OPTION_ABI_64
 #define TARGET_X32	OPTION_ABI_X32
@@ -2344,6 +2345,9 @@ extern void debug_dispatch_window (int);
 #define TARGET_RECIP_VEC_DIV	((recip_mask & RECIP_MASK_VEC_DIV) != 0)
 #define TARGET_RECIP_VEC_SQRT	((recip_mask & RECIP_MASK_VEC_SQRT) != 0)
 
+#define IX86_HLE_ACQUIRE (1 << 16)
+#define IX86_HLE_RELEASE (1 << 17)
+
 /*
 Local variables:
 version-control: t
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 27ed5f6..0ab8c64 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -58,6 +58,7 @@
 ;; X -- don't print any sort of PIC '@' suffix for a symbol.
 ;; & -- print some in-use local-dynamic symbol name.
 ;; H -- print a memory address offset by 8; used for sse high-parts
+;; K -- print HLE lock prefix
 ;; Y -- print condition for XOP pcom* instruction.
 ;; + -- print a branch hint as 'cs' or 'ds' prefix
 ;; ; -- print a semicolon (after prefixes due to bug in older gas).
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index bf50aed..db34e1f 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -528,6 +528,10 @@ mlzcnt
 Target Report Mask(ISA_LZCNT) Var(ix86_isa_flags) Save
 Support LZCNT built-in function and code generation
 
+mhle
+Target Report Mask(ISA_HLE) Var(ix86_isa_flags) Save
+Support Hardware Lock Elision prefixes
+
 mtbm
 Target Report Mask(ISA_TBM) Var(ix86_isa_flags) Save
 Support TBM built-in functions and code generation
diff --git a/gcc/config/i386/sync.md b/gcc/config/i386/sync.md
index faf65ba..e02a949 100644
--- a/gcc/config/i386/sync.md
+++ b/gcc/config/i386/sync.md
@@ -315,8 +315,9 @@
    (match_operand:SI 7 "const_int_operand")]	;; failure model
   "TARGET_CMPXCHG"
 {
-  emit_insn (gen_atomic_compare_and_swap_single<mode>
-	     (operands[1], operands[2], operands[3], operands[4]));
+  emit_insn
+   (gen_atomic_compare_and_swap_single<mode>
+    (operands[1], operands[2], operands[3], operands[4], operands[6]));
   ix86_expand_setcc (operands[0], EQ, gen_rtx_REG (CCZmode, FLAGS_REG),
 		     const0_rtx);
   DONE;
@@ -344,8 +345,9 @@
 {
   if (<MODE>mode == DImode && TARGET_64BIT)
     {
-      emit_insn (gen_atomic_compare_and_swap_singledi
-		 (operands[1], operands[2], operands[3], operands[4]));
+      emit_insn
+       (gen_atomic_compare_and_swap_singledi
+	(operands[1], operands[2], operands[3], operands[4], operands[6]));
     }
   else
     {
@@ -370,7 +372,7 @@
 	mem = replace_equiv_address (mem, force_reg (Pmode, XEXP (mem, 0)));
 
       emit_insn (gen_atomic_compare_and_swap_double<mode>
-		 (lo_o, hi_o, mem, lo_e, hi_e, lo_n, hi_n));
+		 (lo_o, hi_o, mem, lo_e, hi_e, lo_n, hi_n, operands[6]));
     }
   ix86_expand_setcc (operands[0], EQ, gen_rtx_REG (CCZmode, FLAGS_REG),
 		     const0_rtx);
@@ -382,14 +384,15 @@
 	(unspec_volatile:SWI
 	  [(match_operand:SWI 1 "memory_operand" "+m")
 	   (match_operand:SWI 2 "register_operand" "0")
-	   (match_operand:SWI 3 "register_operand" "<r>")]
+	   (match_operand:SWI 3 "register_operand" "<r>")
+	   (match_operand:SI 4 "const_int_operand")]
 	  UNSPECV_CMPXCHG_1))
    (set (match_dup 1)
 	(unspec_volatile:SWI [(const_int 0)] UNSPECV_CMPXCHG_2))
    (set (reg:CCZ FLAGS_REG)
         (unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG_3))]
   "TARGET_CMPXCHG"
-  "lock{%;} cmpxchg{<imodesuffix>}\t{%3, %1|%1, %3}")
+  "lock{%;} %K4cmpxchg{<imodesuffix>}\t{%3, %1|%1, %3}")
 
 ;; For double-word compare and swap, we are obliged to play tricks with
 ;; the input newval (op5:op6) because the Intel register numbering does
@@ -403,7 +406,8 @@
 	   (match_operand:<DCASHMODE> 3 "register_operand" "0")
 	   (match_operand:<DCASHMODE> 4 "register_operand" "1")
 	   (match_operand:<DCASHMODE> 5 "register_operand" "b")
-	   (match_operand:<DCASHMODE> 6 "register_operand" "c")]
+	   (match_operand:<DCASHMODE> 6 "register_operand" "c")
+	   (match_operand:SI 7 "const_int_operand")]
 	  UNSPECV_CMPXCHG_1))
    (set (match_operand:<DCASHMODE> 1 "register_operand" "=d")
 	(unspec_volatile:<DCASHMODE> [(const_int 0)] UNSPECV_CMPXCHG_2))
@@ -412,7 +416,7 @@
    (set (reg:CCZ FLAGS_REG)
         (unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG_4))]
   ""
-  "lock{%;} cmpxchg<doublemodesuffix>b\t%2")
+  "lock{%;} %K7cmpxchg<doublemodesuffix>b\t%2")
 
 ;; Theoretically we'd like to use constraint "r" (any reg) for op5,
 ;; but that includes ecx.  If op5 and op6 are the same (like when
@@ -430,7 +434,8 @@
 	   (match_operand:SI 3 "register_operand" "0")
 	   (match_operand:SI 4 "register_operand" "1")
 	   (match_operand:SI 5 "register_operand" "SD")
-	   (match_operand:SI 6 "register_operand" "c")]
+	   (match_operand:SI 6 "register_operand" "c")
+	   (match_operand:SI 7 "const_int_operand")]
 	  UNSPECV_CMPXCHG_1))
    (set (match_operand:SI 1 "register_operand" "=d")
 	(unspec_volatile:SI [(const_int 0)] UNSPECV_CMPXCHG_2))
@@ -439,7 +444,7 @@
    (set (reg:CCZ FLAGS_REG)
         (unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG_4))]
   "!TARGET_64BIT && TARGET_CMPXCHG8B && flag_pic"
-  "xchg{l}\t%%ebx, %5\;lock{%;} cmpxchg8b\t%2\;xchg{l}\t%%ebx, %5")
+  "xchg{l}\t%%ebx, %5\;lock{%;} %K7cmpxchg8b\t%2\;xchg{l}\t%%ebx, %5")
 
 ;; For operand 2 nonmemory_operand predicate is used instead of
 ;; register_operand to allow combiner to better optimize atomic
@@ -455,7 +460,7 @@
 		  (match_operand:SWI 2 "nonmemory_operand" "0")))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_XADD"
-  "lock{%;} xadd{<imodesuffix>}\t{%0, %1|%1, %0}")
+  "lock{%;} %K3xadd{<imodesuffix>}\t{%0, %1|%1, %0}")
 
 ;; This peephole2 and following insn optimize
 ;; __sync_fetch_and_add (x, -N) == N into just lock {add,sub,inc,dec}
@@ -526,7 +531,7 @@
    (set (match_dup 1)
 	(match_operand:SWI 2 "register_operand" "0"))]		;; input
   ""
-  "xchg{<imodesuffix>}\t{%1, %0|%0, %1}")
+  "%K3xchg{<imodesuffix>}\t{%1, %0|%0, %1}")
 
 (define_insn "atomic_add<mode>"
   [(set (match_operand:SWI 0 "memory_operand" "+m")
@@ -541,15 +546,15 @@
   if (TARGET_USE_INCDEC)
     {
       if (operands[1] == const1_rtx)
-	return "lock{%;} inc{<imodesuffix>}\t%0";
+	return "lock{%;} %K2inc{<imodesuffix>}\t%0";
       if (operands[1] == constm1_rtx)
-	return "lock{%;} dec{<imodesuffix>}\t%0";
+	return "lock{%;} %K2dec{<imodesuffix>}\t%0";
     }
 
   if (x86_maybe_negate_const_int (&operands[1], <MODE>mode))
-    return "lock{%;} sub{<imodesuffix>}\t{%1, %0|%0, %1}";
+    return "lock{%;} %K2sub{<imodesuffix>}\t{%1, %0|%0, %1}";
 
-  return "lock{%;} add{<imodesuffix>}\t{%1, %0|%0, %1}";
+  return "lock{%;} %K2add{<imodesuffix>}\t{%1, %0|%0, %1}";
 })
 
 (define_insn "atomic_sub<mode>"
@@ -565,15 +570,15 @@
   if (TARGET_USE_INCDEC)
     {
       if (operands[1] == const1_rtx)
-	return "lock{%;} dec{<imodesuffix>}\t%0";
+	return "lock{%;} %K2dec{<imodesuffix>}\t%0";
       if (operands[1] == constm1_rtx)
-	return "lock{%;} inc{<imodesuffix>}\t%0";
+	return "lock{%;} %K2inc{<imodesuffix>}\t%0";
     }
 
   if (x86_maybe_negate_const_int (&operands[1], <MODE>mode))
-    return "lock{%;} add{<imodesuffix>}\t{%1, %0|%0, %1}";
+    return "lock{%;} %K2add{<imodesuffix>}\t{%1, %0|%0, %1}";
 
-  return "lock{%;} sub{<imodesuffix>}\t{%1, %0|%0, %1}";
+  return "lock{%;} %K2sub{<imodesuffix>}\t{%1, %0|%0, %1}";
 })
 
 (define_insn "atomic_<logic><mode>"
@@ -585,4 +590,4 @@
 	  UNSPECV_LOCK))
    (clobber (reg:CC FLAGS_REG))]
   ""
-  "lock{%;} <logic>{<imodesuffix>}\t{%1, %0|%0, %1}")
+  "lock{%;} %K2<logic>{<imodesuffix>}\t{%1, %0|%0, %1}")
diff --git a/gcc/configure b/gcc/configure
index c1b0e46..223f218 100755
--- a/gcc/configure
+++ b/gcc/configure
@@ -24628,6 +24628,39 @@ $as_echo "#define HAVE_AS_IX86_SAHF 1" >>confdefs.h
 
 fi
 
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for hle prefixes" >&5
+$as_echo_n "checking assembler for hle prefixes... " >&6; }
+if test "${gcc_cv_as_ix86_hle+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  gcc_cv_as_ix86_hle=no
+  if test x$gcc_cv_as != x; then
+    $as_echo '.code64
+       lock xacquire cmpxchg %esi, (%rcx)
+       ' > conftest.s
+    if { ac_try='$gcc_cv_as $gcc_cv_as_flags  -o conftest.o conftest.s >&5'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }
+    then
+	gcc_cv_as_ix86_hle=yes
+    else
+      echo "configure: failed program was" >&5
+      cat conftest.s >&5
+    fi
+    rm -f conftest.o conftest.s
+  fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_ix86_hle" >&5
+$as_echo "$gcc_cv_as_ix86_hle" >&6; }
+if test $gcc_cv_as_ix86_hle = yes; then
+
+$as_echo "#define HAVE_AS_IX86_HLE 1" >>confdefs.h
+
+fi
+
     { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for swap suffix" >&5
 $as_echo_n "checking assembler for swap suffix... " >&6; }
 if test "${gcc_cv_as_ix86_swap+set}" = set; then :
diff --git a/gcc/configure.ac b/gcc/configure.ac
index 8869121..86b4bea 100644
--- a/gcc/configure.ac
+++ b/gcc/configure.ac
@@ -3597,6 +3597,14 @@ foo:	nop
       [AC_DEFINE(HAVE_AS_IX86_SAHF, 1,
         [Define if your assembler supports the sahf mnemonic in 64bit mode.])])
 
+    gcc_GAS_CHECK_FEATURE([hle prefixes],
+      gcc_cv_as_ix86_hle,,,
+      [.code64
+       lock xacquire cmpxchg %esi, (%rcx)
+       ],,
+      [AC_DEFINE(HAVE_AS_IX86_HLE, 1,
+        [Define if your assembler supports HLE prefixes.])])
+
     gcc_GAS_CHECK_FEATURE([swap suffix],
       gcc_cv_as_ix86_swap,,,
       [movl.s %esp, %ebp],,
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 2891bb6..ee0d700 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -11362,6 +11362,10 @@ MIPS, where add-immediate takes a 16-bit signed value,
 @code{TARGET_CONST_ANCHOR} is set to @samp{0x8000}.  The default value
 is zero, which disables this optimization.  @end deftypevr
 
+@deftypevr {Target Hook} {unsigned HOST_WIDE_INT} TARGET_MEMMODEL_MASK
+Some architectures may prvide additional memory model bits. This hook
+must mask such a bits. @end deftypevr
+
 @deftypevr {Target Hook} {unsigned char} TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
 This value should be set if the result written by @code{atomic_test_and_set} is not exactly 1, i.e. the @code{bool} @code{true}.
 @end deftypevr
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index a222654..95eef59 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -11242,4 +11242,8 @@ MIPS, where add-immediate takes a 16-bit signed value,
 @code{TARGET_CONST_ANCHOR} is set to @samp{0x8000}.  The default value
 is zero, which disables this optimization.  @end deftypevr
 
+@hook TARGET_MEMMODEL_MASK
+Some architectures may prvide additional memory model bits. This hook
+must mask such a bits. @end deftypevr
+
 @hook TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
diff --git a/gcc/target.def b/gcc/target.def
index d658b11..f06b6bf 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -1940,6 +1940,12 @@ DEFHOOKPOD
  "",
  unsigned HOST_WIDE_INT, 0)
 
+/* Defines, which target-dependent bits (upper 16) are used by port  */
+DEFHOOKPOD
+(memmodel_mask,
+ "",
+ unsigned HOST_WIDE_INT, 0)
+
 /* Functions relating to calls - argument passing, returns, etc.  */
 /* Members of struct call have no special macro prefix.  */
 HOOK_VECTOR (TARGET_CALLS, calls)
diff --git a/gcc/testsuite/gcc.target/i386/hle-add-acq-1.c b/gcc/testsuite/gcc.target/i386/hle-add-acq-1.c
new file mode 100644
index 0000000..06d3126
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/hle-add-acq-1.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-mhle" } */
+/* { dg-final { scan-assembler "lock\[ \n\t\]+\(xacquire\|\.byte\[ \t\]+0xf2\)\[ \t\n\]+add" } } */
+
+void
+hle_add (int *p, int v)
+{
+  __atomic_fetch_add (p, v, __ATOMIC_ACQUIRE | __ATOMIC_HLE_ACQUIRE);
+}
diff --git a/gcc/testsuite/gcc.target/i386/hle-add-rel-1.c b/gcc/testsuite/gcc.target/i386/hle-add-rel-1.c
new file mode 100644
index 0000000..d9f29b9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/hle-add-rel-1.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-mhle" } */
+/* { dg-final { scan-assembler "lock\[ \n\t\]+\(xrelease\|\.byte\[ \t\]+0xf2\)\[ \t\n\]+add" } } */
+
+void
+hle_add (int *p, int v)
+{
+  __atomic_fetch_add (p, v, __ATOMIC_ACQUIRE | __ATOMIC_HLE_RELEASE);
+}
diff --git a/gcc/testsuite/gcc.target/i386/hle-and-acq-1.c b/gcc/testsuite/gcc.target/i386/hle-and-acq-1.c
new file mode 100644
index 0000000..321aa4e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/hle-and-acq-1.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-mhle" } */
+/* { dg-final { scan-assembler "lock\[ \n\t\]+\(xacquire\|\.byte\[ \t\]+0xf2\)\[ \t\n\]+and" } } */
+
+void
+hle_and (int *p, int v)
+{
+  __atomic_fetch_and (p, v, __ATOMIC_ACQUIRE | __ATOMIC_HLE_ACQUIRE);
+}
diff --git a/gcc/testsuite/gcc.target/i386/hle-and-rel-1.c b/gcc/testsuite/gcc.target/i386/hle-and-rel-1.c
new file mode 100644
index 0000000..eceb200
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/hle-and-rel-1.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-mhle" } */
+/* { dg-final { scan-assembler "lock\[ \n\t\]+\(xrelease\|\.byte\[ \t\]+0xf3\)\[ \t\n\]+and" } } */
+
+void
+hle_and (int *p, int v)
+{
+  __atomic_fetch_and (p, v, __ATOMIC_ACQUIRE | __ATOMIC_HLE_RELEASE);
+}
diff --git a/gcc/testsuite/gcc.target/i386/hle-cmpxchg-acq-1.c b/gcc/testsuite/gcc.target/i386/hle-cmpxchg-acq-1.c
new file mode 100644
index 0000000..8b43e54
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/hle-cmpxchg-acq-1.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-mhle" } */
+/* { dg-final { scan-assembler "lock\[ \n\t\]+\(xacquire\|\.byte\[ \t\]+0xf2\)\[ \t\n\]+cmpxchg" } } */
+
+int
+hle_cmpxchg (int *p, int oldv, int newv)
+{
+  return __atomic_compare_exchange_n (p, &oldv, newv, 0, __ATOMIC_ACQUIRE | __ATOMIC_HLE_ACQUIRE, __ATOMIC_ACQUIRE);
+}
diff --git a/gcc/testsuite/gcc.target/i386/hle-cmpxchg-rel-1.c b/gcc/testsuite/gcc.target/i386/hle-cmpxchg-rel-1.c
new file mode 100644
index 0000000..8549542
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/hle-cmpxchg-rel-1.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-mhle" } */
+/* { dg-final { scan-assembler "lock\[ \n\t\]+\(xrelease\|\.byte\[ \t\]+0xf3\)\[ \t\n\]+cmpxchg" } } */
+
+int
+hle_cmpxchg (int *p, int oldv, int newv)
+{
+  return __atomic_compare_exchange_n (p, &oldv, newv, 0, __ATOMIC_RELEASE | __ATOMIC_HLE_RELEASE, __ATOMIC_ACQUIRE);
+}
diff --git a/gcc/testsuite/gcc.target/i386/hle-or-acq-1.c b/gcc/testsuite/gcc.target/i386/hle-or-acq-1.c
new file mode 100644
index 0000000..b742993
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/hle-or-acq-1.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-mhle" } */
+/* { dg-final { scan-assembler "lock\[ \n\t\]+\(xacquire\|\.byte\[ \t\]+0xf2\)\[ \t\n\]+or" } } */
+
+void
+hle_or (int *p, int v)
+{
+  __atomic_or_fetch (p, 1, __ATOMIC_ACQUIRE | __ATOMIC_HLE_ACQUIRE);
+}
diff --git a/gcc/testsuite/gcc.target/i386/hle-or-rel-1.c b/gcc/testsuite/gcc.target/i386/hle-or-rel-1.c
new file mode 100644
index 0000000..93e0017
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/hle-or-rel-1.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-mhle" } */
+/* { dg-final { scan-assembler "lock\[ \n\t\]+\(xrelease\|\.byte\[ \t\]+0xf3\)\[ \t\n\]+or" } } */
+
+void
+hle_xor (int *p, int v)
+{
+  __atomic_fetch_or (p, v, __ATOMIC_ACQUIRE | __ATOMIC_HLE_RELEASE);
+}
diff --git a/gcc/testsuite/gcc.target/i386/hle-sub-acq-1.c b/gcc/testsuite/gcc.target/i386/hle-sub-acq-1.c
new file mode 100644
index 0000000..c9efa4a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/hle-sub-acq-1.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-mhle" } */
+/* { dg-final { scan-assembler "lock\[ \n\t\]+\(xacquire\|\.byte\[ \t\]+0xf2\)\[ \t\n\]+sub" } } */
+
+void
+hle_sub (int *p, int v)
+{
+  __atomic_fetch_sub (p, v, __ATOMIC_ACQUIRE | __ATOMIC_HLE_ACQUIRE);
+}
diff --git a/gcc/testsuite/gcc.target/i386/hle-sub-rel-1.c b/gcc/testsuite/gcc.target/i386/hle-sub-rel-1.c
new file mode 100644
index 0000000..b9dae1b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/hle-sub-rel-1.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-mhle" } */
+/* { dg-final { scan-assembler "lock\[ \n\t\]+\(xrelease\|\.byte\[ \t\]+0xf2\)\[ \t\n\]+sub" } } */
+
+void
+hle_sub (int *p, int v)
+{
+  __atomic_fetch_sub (p, v, __ATOMIC_ACQUIRE | __ATOMIC_HLE_RELEASE);
+}
diff --git a/gcc/testsuite/gcc.target/i386/hle-xadd-acq-1.c b/gcc/testsuite/gcc.target/i386/hle-xadd-acq-1.c
new file mode 100644
index 0000000..b4f1e22
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/hle-xadd-acq-1.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-mhle" } */
+/* { dg-final { scan-assembler "lock\[ \n\t\]+\(xacquire\|\.byte\[ \t\]+0xf2\)\[ \t\n\]+xadd" } } */
+
+int
+hle_xadd (int *p, int v)
+{
+  return __atomic_fetch_add (p, v, __ATOMIC_ACQUIRE | __ATOMIC_HLE_ACQUIRE);
+}
diff --git a/gcc/testsuite/gcc.target/i386/hle-xadd-rel-1.c b/gcc/testsuite/gcc.target/i386/hle-xadd-rel-1.c
new file mode 100644
index 0000000..3bbd706
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/hle-xadd-rel-1.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-mhle" } */
+/* { dg-final { scan-assembler "lock\[ \n\t\]+\(xrelease\|\.byte\[ \t\]+0xf2\)\[ \t\n\]+xadd" } } */
+
+int
+hle_xadd (int *p, int v)
+{
+  return __atomic_fetch_add (p, v, __ATOMIC_ACQUIRE | __ATOMIC_HLE_RELEASE);
+}
diff --git a/gcc/testsuite/gcc.target/i386/hle-xchg-acq-1.c b/gcc/testsuite/gcc.target/i386/hle-xchg-acq-1.c
new file mode 100644
index 0000000..441c454
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/hle-xchg-acq-1.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-mhle" } */
+/* { dg-final { scan-assembler "\[ \n\t\]+\(xacquire\|\.byte\[ \t\]+0xf2\)\[ \t\n\]+xchg" } } */
+
+int
+hle_xchg (int *p, int v)
+{
+  return __atomic_exchange_n (p, v, __ATOMIC_ACQUIRE | __ATOMIC_HLE_ACQUIRE);
+}
diff --git a/gcc/testsuite/gcc.target/i386/hle-xchg-rel-1.c b/gcc/testsuite/gcc.target/i386/hle-xchg-rel-1.c
new file mode 100644
index 0000000..9993596
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/hle-xchg-rel-1.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-mhle" } */
+/* { dg-final { scan-assembler "\[ \n\t\]+\(xrelease\|\.byte\[ \t\]+0xf2\)\[ \t\n\]+xchg" } } */
+
+int
+hle_xchg (int *p, int v)
+{
+  return __atomic_exchange_n (p, v, __ATOMIC_ACQUIRE | __ATOMIC_HLE_RELEASE);
+}
diff --git a/gcc/testsuite/gcc.target/i386/hle-xor-acq-1.c b/gcc/testsuite/gcc.target/i386/hle-xor-acq-1.c
new file mode 100644
index 0000000..f219b58
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/hle-xor-acq-1.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-mhle" } */
+/* { dg-final { scan-assembler "lock\[ \n\t\]+\(xacquire\|\.byte\[ \t\]+0xf2\)\[ \t\n\]+xor" } } */
+
+void
+hle_xor (int *p, int v)
+{
+  __atomic_fetch_xor (p, v, __ATOMIC_ACQUIRE | __ATOMIC_HLE_ACQUIRE);
+}
diff --git a/gcc/testsuite/gcc.target/i386/hle-xor-rel-1.c b/gcc/testsuite/gcc.target/i386/hle-xor-rel-1.c
new file mode 100644
index 0000000..2fb9ec5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/hle-xor-rel-1.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-mhle" } */
+/* { dg-final { scan-assembler "lock\[ \n\t\]+\(xrelease\|\.byte\[ \t\]+0xf3\)\[ \t\n\]+xor" } } */
+
+void
+hle_xor (int *p, int v)
+{
+  __atomic_fetch_xor (p, v, __ATOMIC_ACQUIRE | __ATOMIC_HLE_RELEASE);
+}
